From b02e4a39acad543abcbaca64188534b34b8ce076 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 16:20:42 +0100 Subject: [PATCH 01/13] server queue: simplify and fix itteration of ErrorAtOnce --- server/queue/fifo.go | 87 +++++++++++++++----------------- server/queue/mocks/mock_Queue.go | 57 --------------------- server/queue/persistent.go | 13 ----- server/queue/queue.go | 6 +-- 4 files changed, 44 insertions(+), 119 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index c4bd11abf9e..722fcecd7e7 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -17,6 +17,7 @@ package queue import ( "container/list" "context" + "errors" "fmt" "slices" "sync" @@ -124,12 +125,17 @@ func (q *fifo) Error(_ context.Context, id string, err error) error { // ErrorAtOnce signals multiple done are complete with an error. func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error { + if errors.Is(err, ErrCancel) { + return q.finished(ids, model.StatusKilled, err) + } return q.finished(ids, model.StatusFailure, err) } func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) error { q.Lock() + defer q.Unlock() + var errs []error for _, id := range ids { taskEntry, ok := q.running[id] if ok { @@ -137,32 +143,12 @@ func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) e close(taskEntry.done) delete(q.running, id) } else { - q.removeFromPending(id) + errs = append(errs, q.removeFromPendingAndWaiting(id)) } q.updateDepStatusInQueue(id, exitStatus) } - q.Unlock() - return nil -} - -// EvictAtOnce removes multiple pending tasks from the queue. -func (q *fifo) EvictAtOnce(_ context.Context, taskIDs []string) error { - q.Lock() - defer q.Unlock() - - for _, id := range taskIDs { - var next *list.Element - for element := q.pending.Front(); element != nil; element = next { - next = element.Next() - task, ok := element.Value.(*model.Task) - if ok && task.ID == id { - q.pending.Remove(element) - return nil - } - } - } - return ErrNotFound + return errors.Join(errs...) } // Wait waits until the item is done executing. @@ -296,9 +282,7 @@ func (q *fifo) filterWaiting() { // rebuild waitingDeps q.waitingOnDeps = list.New() var filtered []*list.Element - var nextPending *list.Element - for element := q.pending.Front(); element != nil; element = nextPending { - nextPending = element.Next() + for element := q.pending.Front(); element != nil; element = element.Next() { task, _ := element.Value.(*model.Task) if q.depsInQueue(task) { log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID) @@ -352,9 +336,7 @@ func (q *fifo) resubmitExpiredPipelines() { } func (q *fifo) depsInQueue(task *model.Task) bool { - var next *list.Element - for element := q.pending.Front(); element != nil; element = next { - next = element.Next() + for element := q.pending.Front(); element != nil; element = element.Next() { possibleDep, ok := element.Value.(*model.Task) log.Debug().Msgf("queue: pending right now: %v", possibleDep.ID) for _, dep := range task.Dependencies { @@ -373,13 +355,13 @@ func (q *fifo) depsInQueue(task *model.Task) bool { } func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { - var next *list.Element - for element := q.pending.Front(); element != nil; element = next { - next = element.Next() + for element := q.pending.Front(); element != nil; element = element.Next() { pending, ok := element.Value.(*model.Task) - for _, dep := range pending.Dependencies { - if ok && taskID == dep { - pending.DepStatus[dep] = status + if ok { + for _, dep := range pending.Dependencies { + if taskID == dep { + pending.DepStatus[dep] = status + } } } } @@ -392,27 +374,42 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { } } - for element := q.waitingOnDeps.Front(); element != nil; element = next { - next = element.Next() + for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { waiting, ok := element.Value.(*model.Task) - for _, dep := range waiting.Dependencies { - if ok && taskID == dep { - waiting.DepStatus[dep] = status + if ok { + for _, dep := range waiting.Dependencies { + if taskID == dep { + waiting.DepStatus[dep] = status + } } } } } -func (q *fifo) removeFromPending(taskID string) { +// removeFromPendingAndWaiting expects the q to be currently owned e.g. locked by caller +func (q *fifo) removeFromPendingAndWaiting(taskID string) error { log.Debug().Msgf("queue: trying to remove %s", taskID) - var next *list.Element - for element := q.pending.Front(); element != nil; element = next { - next = element.Next() + + // we assume pending first + for element := q.pending.Front(); element != nil; element = element.Next() { task, _ := element.Value.(*model.Task) if task.ID == taskID { log.Debug().Msgf("queue: %s is removed from pending", taskID) - q.pending.Remove(element) - return + _ = q.pending.Remove(element) + return nil + } + } + + // well looks like it's waiting + for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { + task, _ := element.Value.(*model.Task) + if task.ID == taskID { + log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID) + _ = q.waitingOnDeps.Remove(element) + return nil } } + + // my bad could not be found + return ErrNotFound } diff --git a/server/queue/mocks/mock_Queue.go b/server/queue/mocks/mock_Queue.go index 97a215ed97a..0ae4eb09d5c 100644 --- a/server/queue/mocks/mock_Queue.go +++ b/server/queue/mocks/mock_Queue.go @@ -228,63 +228,6 @@ func (_c *MockQueue_ErrorAtOnce_Call) RunAndReturn(run func(c context.Context, i return _c } -// EvictAtOnce provides a mock function for the type MockQueue -func (_mock *MockQueue) EvictAtOnce(c context.Context, ids []string) error { - ret := _mock.Called(c, ids) - - if len(ret) == 0 { - panic("no return value specified for EvictAtOnce") - } - - var r0 error - if returnFunc, ok := ret.Get(0).(func(context.Context, []string) error); ok { - r0 = returnFunc(c, ids) - } else { - r0 = ret.Error(0) - } - return r0 -} - -// MockQueue_EvictAtOnce_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'EvictAtOnce' -type MockQueue_EvictAtOnce_Call struct { - *mock.Call -} - -// EvictAtOnce is a helper method to define mock.On call -// - c context.Context -// - ids []string -func (_e *MockQueue_Expecter) EvictAtOnce(c interface{}, ids interface{}) *MockQueue_EvictAtOnce_Call { - return &MockQueue_EvictAtOnce_Call{Call: _e.mock.On("EvictAtOnce", c, ids)} -} - -func (_c *MockQueue_EvictAtOnce_Call) Run(run func(c context.Context, ids []string)) *MockQueue_EvictAtOnce_Call { - _c.Call.Run(func(args mock.Arguments) { - var arg0 context.Context - if args[0] != nil { - arg0 = args[0].(context.Context) - } - var arg1 []string - if args[1] != nil { - arg1 = args[1].([]string) - } - run( - arg0, - arg1, - ) - }) - return _c -} - -func (_c *MockQueue_EvictAtOnce_Call) Return(err error) *MockQueue_EvictAtOnce_Call { - _c.Call.Return(err) - return _c -} - -func (_c *MockQueue_EvictAtOnce_Call) RunAndReturn(run func(c context.Context, ids []string) error) *MockQueue_EvictAtOnce_Call { - _c.Call.Return(run) - return _c -} - // Extend provides a mock function for the type MockQueue func (_mock *MockQueue) Extend(c context.Context, agentID int64, workflowID string) error { ret := _mock.Called(c, agentID, workflowID) diff --git a/server/queue/persistent.go b/server/queue/persistent.go index b5a887ed5f7..453a67e54c0 100644 --- a/server/queue/persistent.go +++ b/server/queue/persistent.go @@ -72,19 +72,6 @@ func (q *persistentQueue) Poll(c context.Context, agentID int64, f FilterFn) (*m return task, err } -// EvictAtOnce removes multiple pending tasks from the queue. -func (q *persistentQueue) EvictAtOnce(c context.Context, ids []string) error { - if err := q.Queue.EvictAtOnce(c, ids); err != nil { - return err - } - for _, id := range ids { - if err := q.store.TaskDelete(id); err != nil { - return err - } - } - return nil -} - // Error signals the task is done with an error. func (q *persistentQueue) Error(c context.Context, id string, err error) error { if err := q.Queue.Error(c, id, err); err != nil { diff --git a/server/queue/queue.go b/server/queue/queue.go index e4058871103..8ea7ad543c3 100644 --- a/server/queue/queue.go +++ b/server/queue/queue.go @@ -93,12 +93,10 @@ type Queue interface { // Error signals the task is done with an error. Error(c context.Context, id string, err error) error - // ErrorAtOnce signals multiple done are complete with an error. + // ErrorAtOnce signals multiple tasks are done and complete with an error. + // if still pending they will just get removed from the queue. ErrorAtOnce(c context.Context, ids []string, err error) error - // EvictAtOnce removes multiple pending tasks from the queue. - EvictAtOnce(c context.Context, ids []string) error - // Wait waits until the task is complete. Wait(c context.Context, id string) error From 867a16b66eec10b937a6847119cb5b6ff8337ebe Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 16:21:41 +0100 Subject: [PATCH 02/13] server queue: adjust consumer of ErrorAtOnce --- server/pipeline/cancel.go | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/server/pipeline/cancel.go b/server/pipeline/cancel.go index 78612562d08..d351d5c90ea 100644 --- a/server/pipeline/cancel.go +++ b/server/pipeline/cancel.go @@ -39,31 +39,28 @@ func Cancel(ctx context.Context, _forge forge.Forge, store store.Store, repo *mo return &ErrNotFound{Msg: err.Error()} } - // First cancel/evict steps in the queue in one go + // First cancel/evict workflows in the queue in one go var ( - stepsToCancel []string - stepsToEvict []string + workflowsToCancel []string + workflowsToEvict []string ) for _, workflow := range workflows { if workflow.State == model.StatusRunning { - stepsToCancel = append(stepsToCancel, fmt.Sprint(workflow.ID)) + workflowsToCancel = append(workflowsToCancel, fmt.Sprint(workflow.ID)) } if workflow.State == model.StatusPending { - stepsToEvict = append(stepsToEvict, fmt.Sprint(workflow.ID)) + workflowsToEvict = append(workflowsToEvict, fmt.Sprint(workflow.ID)) } } - if len(stepsToEvict) != 0 { - if err := server.Config.Services.Queue.EvictAtOnce(ctx, stepsToEvict); err != nil { - log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToEvict) - } - if err := server.Config.Services.Queue.ErrorAtOnce(ctx, stepsToEvict, queue.ErrCancel); err != nil { - log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToEvict) + if len(workflowsToEvict) != 0 { + if err := server.Config.Services.Queue.ErrorAtOnce(ctx, workflowsToEvict, queue.ErrCancel); err != nil { + log.Error().Err(err).Msgf("queue: evict_at_once: %v", workflowsToEvict) } } - if len(stepsToCancel) != 0 { - if err := server.Config.Services.Queue.ErrorAtOnce(ctx, stepsToCancel, queue.ErrCancel); err != nil { - log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToCancel) + if len(workflowsToCancel) != 0 { + if err := server.Config.Services.Queue.ErrorAtOnce(ctx, workflowsToCancel, queue.ErrCancel); err != nil { + log.Error().Err(err).Msgf("queue: evict_at_once: %v", workflowsToCancel) } } From 96a615f6780b5e76cf230172715b3630b1f429bc Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 16:22:36 +0100 Subject: [PATCH 03/13] fix code comment nit --- cmd/agent/core/agent.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/agent/core/agent.go b/cmd/agent/core/agent.go index 889cd2c657f..791a38d4b52 100644 --- a/cmd/agent/core/agent.go +++ b/cmd/agent/core/agent.go @@ -296,7 +296,7 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error { return nil } - log.Debug().Msg("polling new steps") + log.Debug().Msg("polling new workflow") if err := runner.Run(agentCtx, shutdownCtx); err != nil { log.Error().Err(err).Msg("runner error, retrying...") // Check if context is canceled From 2114fb5ed54e1654854fb941b8190b56552fe162 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 16:26:56 +0100 Subject: [PATCH 04/13] looking via git blame we can rm it as the queue code was already touched and refactored by us a lot and now is mosty ours --- server/queue/LICENSE | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 server/queue/LICENSE diff --git a/server/queue/LICENSE b/server/queue/LICENSE deleted file mode 100644 index 64e202179da..00000000000 --- a/server/queue/LICENSE +++ /dev/null @@ -1,29 +0,0 @@ -BSD 3-Clause License - -Copyright (c) 2017, Brad Rydzewski -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. From 0e438949cb40f996c8be63bdb6ec0ed16ae0018d Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 16:31:23 +0100 Subject: [PATCH 05/13] its ok we cast only internally --- server/queue/fifo.go | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index 722fcecd7e7..24c664d242f 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -356,12 +356,10 @@ func (q *fifo) depsInQueue(task *model.Task) bool { func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { for element := q.pending.Front(); element != nil; element = element.Next() { - pending, ok := element.Value.(*model.Task) - if ok { - for _, dep := range pending.Dependencies { - if taskID == dep { - pending.DepStatus[dep] = status - } + pending, _ := element.Value.(*model.Task) + for _, dep := range pending.Dependencies { + if taskID == dep { + pending.DepStatus[dep] = status } } } @@ -375,12 +373,10 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { } for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - waiting, ok := element.Value.(*model.Task) - if ok { - for _, dep := range waiting.Dependencies { - if taskID == dep { - waiting.DepStatus[dep] = status - } + waiting, _ := element.Value.(*model.Task) + for _, dep := range waiting.Dependencies { + if taskID == dep { + waiting.DepStatus[dep] = status } } } From 40b2aae8e8635da67877e9dc299f904ae420b432 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 18:33:37 +0100 Subject: [PATCH 06/13] fix tests lint etc --- server/queue/fifo.go | 12 ++++++------ server/queue/fifo_test.go | 2 +- server/queue/persistent.go | 3 ++- server/queue/queue.go | 5 ++++- 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index 24c664d242f..ea31a9ff6a0 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -18,7 +18,6 @@ import ( "container/list" "context" "errors" - "fmt" "slices" "sync" "time" @@ -59,8 +58,6 @@ type fifo struct { // as the agent pull in 10 milliseconds we should also give them work asap. const processTimeInterval = 100 * time.Millisecond -var ErrWorkerKicked = fmt.Errorf("worker was kicked") - // NewMemoryQueue returns a new fifo queue. func NewMemoryQueue(ctx context.Context) Queue { q := &fifo{ @@ -123,7 +120,8 @@ func (q *fifo) Error(_ context.Context, id string, err error) error { return q.finished([]string{id}, model.StatusFailure, err) } -// ErrorAtOnce signals multiple done are complete with an error. +// ErrorAtOnce signals multiple tasks are done and complete with an error. +// If still pending they will just get removed from the queue. func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error { if errors.Is(err, ErrCancel) { return q.finished(ids, model.StatusKilled, err) @@ -131,6 +129,7 @@ func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error { return q.finished(ids, model.StatusFailure, err) } +// locks the queue itself! func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) error { q.Lock() defer q.Unlock() @@ -354,6 +353,7 @@ func (q *fifo) depsInQueue(task *model.Task) bool { return false } +// expects the q to be currently owned e.g. locked by caller! func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { for element := q.pending.Front(); element != nil; element = element.Next() { pending, _ := element.Value.(*model.Task) @@ -382,7 +382,7 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { } } -// removeFromPendingAndWaiting expects the q to be currently owned e.g. locked by caller +// expects the q to be currently owned e.g. locked by caller! func (q *fifo) removeFromPendingAndWaiting(taskID string) error { log.Debug().Msgf("queue: trying to remove %s", taskID) @@ -406,6 +406,6 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error { } } - // my bad could not be found + // well it could not be found return ErrNotFound } diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 6270a033762..18168a53c43 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -430,7 +430,7 @@ func TestFifoCancel(t *testing.T) { time.Sleep(processTimeInterval * 2) info = q.Info(ctx) - assert.Len(t, info.Pending, 2, "canceled are rescheduled") + assert.Len(t, info.Pending, 0, "canceled are rescheduled") assert.Len(t, info.Running, 0, "canceled are rescheduled") assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled") } diff --git a/server/queue/persistent.go b/server/queue/persistent.go index 453a67e54c0..2178389e75b 100644 --- a/server/queue/persistent.go +++ b/server/queue/persistent.go @@ -80,7 +80,8 @@ func (q *persistentQueue) Error(c context.Context, id string, err error) error { return q.store.TaskDelete(id) } -// ErrorAtOnce signals multiple tasks are done with an error. +// ErrorAtOnce signals multiple tasks are done and complete with an error. +// If still pending they will just get removed from the queue. func (q *persistentQueue) ErrorAtOnce(c context.Context, ids []string, err error) error { if err := q.Queue.ErrorAtOnce(c, ids, err); err != nil { return err diff --git a/server/queue/queue.go b/server/queue/queue.go index 8ea7ad543c3..d924eddcf3a 100644 --- a/server/queue/queue.go +++ b/server/queue/queue.go @@ -36,6 +36,9 @@ var ( // ErrTaskExpired indicates a running task exceeded its lease/deadline and was resubmitted. ErrTaskExpired = errors.New("queue: task expired") + + // ErrWorkerKicked worker of an agent got kicked. + ErrWorkerKicked = errors.New("worker was kicked") ) // InfoT provides runtime information. @@ -94,7 +97,7 @@ type Queue interface { Error(c context.Context, id string, err error) error // ErrorAtOnce signals multiple tasks are done and complete with an error. - // if still pending they will just get removed from the queue. + // If still pending they will just get removed from the queue. ErrorAtOnce(c context.Context, ids []string, err error) error // Wait waits until the task is complete. From e1c876ff9c8427eb7699435c3c085063f34f03e7 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 19:05:07 +0100 Subject: [PATCH 07/13] fifo 98% coverage --- server/queue/fifo.go | 26 +- server/queue/fifo_test.go | 846 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 862 insertions(+), 10 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index ea31a9ff6a0..69eb3c97cc3 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -135,15 +135,21 @@ func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) e defer q.Unlock() var errs []error + // we first process the tasks itself for _, id := range ids { - taskEntry, ok := q.running[id] - if ok { + if taskEntry, ok := q.running[id]; ok { taskEntry.error = err close(taskEntry.done) delete(q.running, id) } else { errs = append(errs, q.removeFromPendingAndWaiting(id)) } + } + + // next we aim for there dependencies + // we do this because in our ids list there could be tasks and its dependencies + // so not to mess things up + for _, id := range ids { q.updateDepStatusInQueue(id, exitStatus) } @@ -192,11 +198,11 @@ func (q *fifo) Info(_ context.Context) InfoT { stats.Stats.Running = len(q.running) for element := q.pending.Front(); element != nil; element = element.Next() { - task, _ := element.Value.(*model.Task) + task := element.Value.(*model.Task) stats.Pending = append(stats.Pending, task) } for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - task, _ := element.Value.(*model.Task) + task := element.Value.(*model.Task) stats.WaitingOnDeps = append(stats.WaitingOnDeps, task) } for _, entry := range q.running { @@ -282,7 +288,7 @@ func (q *fifo) filterWaiting() { q.waitingOnDeps = list.New() var filtered []*list.Element for element := q.pending.Front(); element != nil; element = element.Next() { - task, _ := element.Value.(*model.Task) + task := element.Value.(*model.Task) if q.depsInQueue(task) { log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID) q.waitingOnDeps.PushBack(task) @@ -303,7 +309,7 @@ func (q *fifo) assignToWorker() (*list.Element, *worker) { for element := q.pending.Front(); element != nil; element = next { next = element.Next() - task, _ := element.Value.(*model.Task) + task := element.Value.(*model.Task) log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies) for worker := range q.workers { @@ -356,7 +362,7 @@ func (q *fifo) depsInQueue(task *model.Task) bool { // expects the q to be currently owned e.g. locked by caller! func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { for element := q.pending.Front(); element != nil; element = element.Next() { - pending, _ := element.Value.(*model.Task) + pending := element.Value.(*model.Task) for _, dep := range pending.Dependencies { if taskID == dep { pending.DepStatus[dep] = status @@ -373,7 +379,7 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { } for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - waiting, _ := element.Value.(*model.Task) + waiting := element.Value.(*model.Task) for _, dep := range waiting.Dependencies { if taskID == dep { waiting.DepStatus[dep] = status @@ -388,7 +394,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error { // we assume pending first for element := q.pending.Front(); element != nil; element = element.Next() { - task, _ := element.Value.(*model.Task) + task := element.Value.(*model.Task) if task.ID == taskID { log.Debug().Msgf("queue: %s is removed from pending", taskID) _ = q.pending.Remove(element) @@ -398,7 +404,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error { // well looks like it's waiting for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - task, _ := element.Value.(*model.Task) + task := element.Value.(*model.Task) if task.ID == taskID { log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID) _ = q.waitingOnDeps.Remove(element) diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 18168a53c43..71903a990c5 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -697,3 +697,849 @@ func TestFifoWithScoring(t *testing.T) { assert.Contains(t, expectedAgents, agentID, "Task %s should be assigned to one of the expected agents", taskID) } } + +func TestFifoErrorAtOnce(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + + waitForProcess() + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + + // Test ErrorAtOnce with running tasks + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) + + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "expect tasks removed from running queue") + assert.Len(t, info.Pending, 1, "expect remaining task in pending") +} + +func TestFifoErrorAtOnceWithPendingTasks(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + + waitForProcess() + + // ErrorAtOnce on pending tasks (task3 should be waiting on deps) + assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error"))) + + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Pending, 1, "only task1 should remain") + assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting") +} + +func TestFifoErrorAtOnceWithCancelError(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + + waitForProcess() + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Test ErrorAtOnce with ErrCancel - should result in StatusKilled + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) + + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "task should be removed from running") +} + +func TestFifoExtend(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Test successful extend + assert.NoError(t, q.Extend(ctx, 1, got.ID)) + + // Test extend with wrong agent ID + err = q.Extend(ctx, 999, got.ID) + assert.ErrorIs(t, err, ErrAgentMissMatch) + + // Clean up + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + + // Test extend with non-existent task + err = q.Extend(ctx, 1, "non-existent") + assert.ErrorIs(t, err, ErrNotFound) +} + +func TestFifoExtendPreventsExpiration(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + // Set very short extension + q.extension = 50 * time.Millisecond + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Extend the deadline multiple times + for i := 0; i < 3; i++ { + time.Sleep(30 * time.Millisecond) + assert.NoError(t, q.Extend(ctx, 1, got.ID)) + } + + // Task should still be running + info := q.Info(ctx) + assert.Len(t, info.Running, 1, "task should still be running after extensions") + assert.Len(t, info.Pending, 0, "task should not be resubmitted") +} + +func TestFifoKickAgentWorkers(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + + // Start multiple workers for different agents + pollResults := make(chan error, 3) + + for agentID := int64(1); agentID <= 3; agentID++ { + go func(id int64) { + _, err := q.Poll(ctx, id, filterFnTrue) + pollResults <- err + }(agentID) + } + + // Give workers time to register + time.Sleep(50 * time.Millisecond) + + // Kick workers for agent 2 + q.KickAgentWorkers(2) + + // Check that agent 2's worker was kicked + select { + case err := <-pollResults: + assert.Error(t, err) + // Check the cause of the context cancellation + if errors.Is(err, context.Canceled) { + // If ctx wasn't the one canceled, we need to check if it's our kicked error + // The error should either be ErrWorkerKicked or wrapped in context + assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled") + } + case <-time.After(time.Second): + t.Fatal("expected worker to be kicked") + } + + // Other workers should still be waiting or get work + info := q.Info(ctx) + assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered") +} + +func TestFifoKickAgentWorkersMultiple(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + // Start multiple workers for the same agent + pollResults := make(chan error, 5) + + for i := 0; i < 5; i++ { + go func() { + _, err := q.Poll(ctx, 42, filterFnTrue) + pollResults <- err + }() + } + + // Give workers time to register + time.Sleep(50 * time.Millisecond) + + info := q.Info(ctx) + initialWorkers := info.Stats.Workers + assert.Equal(t, 5, initialWorkers, "all workers should be registered") + + // Kick all workers for agent 42 + q.KickAgentWorkers(42) + + // All workers should be kicked + kickedCount := 0 + for i := 0; i < 5; i++ { + select { + case err := <-pollResults: + assert.Error(t, err) + // All kicked workers will have their context canceled + if errors.Is(err, context.Canceled) { + kickedCount++ + } + case <-time.After(time.Second): + t.Fatal("expected all workers to be kicked") + } + } + + assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked") + + time.Sleep(50 * time.Millisecond) + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed") +} + +func TestFifoWaitContextCanceled(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Create a context that we can cancel + waitCtx, waitCancel := context.WithCancel(ctx) + + errCh := make(chan error, 1) + go func() { + errCh <- q.Wait(waitCtx, got.ID) + }() + + // Cancel the wait context + time.Sleep(50 * time.Millisecond) + waitCancel() + + select { + case err := <-errCh: + assert.NoError(t, err, "Wait should return nil when context is canceled before task completes") + case <-time.After(time.Second): + t.Fatal("Wait should return when context is canceled") + } +} + +func TestFifoWaitNonExistentTask(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + // Wait on a task that doesn't exist + err := q.Wait(ctx, "non-existent-task") + assert.NoError(t, err, "Wait should return nil for non-existent task") +} + +func TestFifoUpdateDepStatusInQueue(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task4 := &model.Task{ + ID: "4", + Dependencies: []string{"2"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) + + // Complete task1 with success + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + + waitForProcess() + + // Poll task2 to check its DepStatus was updated + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated") + + // Poll task3 to check its DepStatus was updated + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated") +} + +func TestFifoRemoveFromPendingAndWaitingNotFound(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Try to error a task that's running (it will be removed from running, not pending/waiting) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) + + // Try to error a completely non-existent task + err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")) + assert.Error(t, err, "should return error for non-existent task") +} + +func TestFifoRemoveFromWaitingOnDeps(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + + waitForProcess() + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") + + // Error task2 while it's in waitingOnDeps + assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task"))) + + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now") +} + +func TestFifoPollContextCanceled(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + + q := NewMemoryQueue(ctx) + + pollCtx, pollCancel := context.WithCancel(ctx) + + errCh := make(chan error, 1) + go func() { + _, err := q.Poll(pollCtx, 1, filterFnTrue) + errCh <- err + }() + + // Give Poll time to register the worker + time.Sleep(50 * time.Millisecond) + + // Cancel the poll context + pollCancel() + + select { + case err := <-errCh: + assert.Error(t, err) + assert.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + t.Fatal("Poll should return when context is canceled") + } + + cancel(nil) +} + +func TestFifoMultipleDepStatusUpdates(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + + waitForProcess() + + // Get both independent tasks + got1, _ := q.Poll(ctx, 1, filterFnTrue) + got2, _ := q.Poll(ctx, 2, filterFnTrue) + + // Complete one with success + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + + // Complete other with failure + assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) + + waitForProcess() + + // Get task3 and check both dependencies are updated + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) + assert.Equal(t, model.StatusFailure, got3.DepStatus["2"]) + assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed") +} + +func TestFifoErrorAtOnceCoverage(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Test ErrorAtOnce with one running task and one non-existent task + // This should trigger the error case in finished() where removeFromPendingAndWaiting returns ErrNotFound + err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error")) + assert.Error(t, err, "should return error when one of the tasks doesn't exist") + assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") + + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "running task should be removed") +} + +func TestFifoErrorAtOnceMultipleNonExistent(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + task1 := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Test ErrorAtOnce with multiple non-existent tasks + // This tests that errors.Join works correctly with multiple errors + err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors")) + assert.Error(t, err, "should return joined errors") + + // The error should contain multiple ErrNotFound instances + errStr := err.Error() + assert.Contains(t, errStr, "not found", "error should mention not found") +} + +func TestFifoErrorAtOnceWithErrCancel(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + // Test the ErrCancel specific path which sets StatusKilled + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel)) + + waitForProcess() + + // Get the dependent task and verify its dependency status is StatusKilled + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed") +} + +func TestFifoExtendWithWrongAgent(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + + waitForProcess() + got, err := q.Poll(ctx, 5, filterFnTrue) // Agent 5 polls the task + assert.NoError(t, err) + assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5") + + // Test extend with correct agent - should succeed + assert.NoError(t, q.Extend(ctx, 5, got.ID)) + + // Test extend with wrong agent - should return ErrAgentMissMatch + err = q.Extend(ctx, 999, got.ID) + assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend") + + // Test extend with another wrong agent + err = q.Extend(ctx, 1, got.ID) + assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent") + + // Verify the correct agent can still extend + assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend") + + // Clean up + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) +} + +func TestFifoFinishedWithMultipleErrors(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + + waitForProcess() + + // Poll task1 so it's in running + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, "1", got.ID) + + // Now call ErrorAtOnce with: + // - got.ID (task1) which is in running - should succeed + // - "2" which is in waitingOnDeps - should succeed + // - "fake-1" which doesn't exist - should return ErrNotFound + // - "fake-2" which doesn't exist - should return ErrNotFound + err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error")) + + assert.Error(t, err, "should return error for non-existent tasks") + + // Verify that the error contains multiple ErrNotFound (from errors.Join) + assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") + + // Check that both real tasks were removed + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "task1 should be removed from running") + assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting") + assert.Len(t, info.Pending, 0, "no tasks should be pending") +} + +func TestFifoUpdateDepStatusInQueuePending(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + // Push all tasks - task2 and task3 will be in pending initially (before filterWaiting moves them) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + + waitForProcess() + + // At this point, task2 and task3 should be in waitingOnDeps + // but we want to test when they're in pending + + // Get task1 + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) + + // Before completing task1, let's verify task2 and task3 are waiting + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") + + // Now complete task1 - this should trigger updateDepStatusInQueue + // which will update DepStatus in the waitingOnDeps queue + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + + waitForProcess() + + // Now get task2 and verify its DepStatus was updated + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], + "task2's DepStatus should be updated in pending queue") + + // Get task3 and verify its DepStatus was updated + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], + "task3's DepStatus should be updated in pending queue") +} + +func TestFifoUpdateDepStatusInQueueRunning(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), + } + task4 := &model.Task{ + ID: "4", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) + + waitForProcess() + + // Get task1 and task2 (both independent) + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + + waitForProcess() + + // Now complete task1 first + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + + waitForProcess() + + // Now task3 and task4 are waiting on their dependencies + // task3 still needs task2, task4 should be ready now + + // Get task4 which should now be available since task1 is done + got4, err := q.Poll(ctx, 4, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"], + "task4's DepStatus should show task1 succeeded") + + // Now complete task2 + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + + waitForProcess() + + // Now get task3 - both its dependencies are satisfied + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], + "task3's DepStatus should show task1 succeeded") + assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"], + "task3's DepStatus should show task2 succeeded") + assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded") + + // Clean up remaining tasks + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) + assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess)) +} + +func TestFifoUpdateDepStatusInQueueWaiting(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task4 := &model.Task{ + ID: "4", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) + + waitForProcess() + + // Verify tasks are waiting on deps + info := q.Info(ctx) + assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting") + + // Get and complete task1 + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) + + // Complete task1 with failure - this should update DepStatus in waitingOnDeps + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed"))) + + waitForProcess() + + // All waiting tasks should have their DepStatus updated + // Get each one and verify + for i := 2; i <= 4; i++ { + gotTask, err := q.Poll(ctx, int64(i), filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"], + "task %d's DepStatus should be updated in waitingOnDeps", i) + assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i) + } +} + +func TestFifoUpdateDepStatusWithSkipped(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + t.Cleanup(func() { cancel(nil) }) + + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"2"}, + DepStatus: make(map[string]model.StatusValue), + } + + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + + waitForProcess() + + // Get and fail task1 + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) + + waitForProcess() + + // Get task2 - should not run but get skipped + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.False(t, got2.ShouldRun()) + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped)) + + waitForProcess() + + // Get task3 - should have task2's skipped status updated + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"], + "task3 should have task2's skipped status") + assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency") +} From c4774e1082bd28fe03d20ed1368f95618dbf64a1 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 19:14:18 +0100 Subject: [PATCH 08/13] group tests --- server/queue/fifo_test.go | 2048 +++++++++++++++++-------------------- 1 file changed, 958 insertions(+), 1090 deletions(-) diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 71903a990c5..5a8db3f3e8a 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -38,453 +38,472 @@ var ( waitForProcess = func() { time.Sleep(processTimeInterval + 50*time.Millisecond) } ) -func TestFifo(t *testing.T) { +func TestFifoBasicOperations(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - q := NewMemoryQueue(ctx) - dummyTask := genDummyTask() - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task in pending queue") - - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) + t.Run("basic push poll done flow", func(t *testing.T) { + q := NewMemoryQueue(ctx) + dummyTask := genDummyTask() - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 0, "expect task removed from pending queue") - assert.Len(t, info.Running, 1, "expect task in running queue") - - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 0, "expect task removed from pending queue") - assert.Len(t, info.Running, 0, "expect task removed from running queue") -} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Pending, 1, "expect task in pending queue") -func TestFifoExpire(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, dummyTask, got) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Pending, 0, "expect task removed from pending queue") + assert.Len(t, info.Running, 1, "expect task in running queue") - dummyTask := genDummyTask() + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - q.extension = 0 - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task in pending queue") + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Pending, 0, "expect task removed from pending queue") + assert.Len(t, info.Running, 0, "expect task removed from running queue") + }) + + t.Run("task expiration", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + dummyTask := genDummyTask() + + q.extension = 0 + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Pending, 1, "expect task in pending queue") - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, dummyTask, got) - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task re-added to pending queue") -} + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Pending, 1, "expect task re-added to pending queue") + }) -func TestFifoWaitOnExpireReturnsError(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + t.Run("pause and resume", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + dummyTask := genDummyTask() - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + var wg sync.WaitGroup + wg.Add(1) + go func() { + _, _ = q.Poll(ctx, 1, filterFnTrue) + wg.Done() + }() - q.extension = 0 + q.Pause() + t0 := time.Now() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() + q.Resume() - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + wg.Wait() + t1 := time.Now() - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) + assert.Greater(t, t1.Sub(t0), 20*time.Millisecond, "should have waited til resume") - errCh := make(chan error, 1) - go func() { - errCh <- q.Wait(ctx, got.ID) - }() + q.Pause() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + q.Resume() + _, _ = q.Poll(ctx, 1, filterFnTrue) + }) - waitForProcess() + t.Run("pause then resume", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + dummyTask := genDummyTask() - select { - case werr := <-errCh: - assert.Error(t, werr, "expected Wait to return error when lease expires") - case <-time.After(2 * time.Second): - t.Fatal("timeout waiting for Wait to return after lease expiration") - } + q.Pause() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + q.Resume() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task re-added to pending queue after expiration") + _, _ = q.Poll(ctx, 1, filterFnTrue) + }) } func TestFifoWait(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + t.Run("wait on task completion", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + dummyTask := genDummyTask() - dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, dummyTask, got) - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) + var wg sync.WaitGroup + wg.Add(1) + go func() { + assert.NoError(t, q.Wait(ctx, got.ID)) + wg.Done() + }() - var wg sync.WaitGroup - wg.Add(1) - go func() { - assert.NoError(t, q.Wait(ctx, got.ID)) - wg.Done() - }() + <-time.After(time.Millisecond) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + wg.Wait() + }) - <-time.After(time.Millisecond) - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - wg.Wait() -} + t.Run("wait returns error on expiration", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + q.extension = 0 -func TestFifoDependencies(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, dummyTask, got) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + errCh := make(chan error, 1) + go func() { + errCh <- q.Wait(ctx, got.ID) + }() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task1})) + waitForProcess() - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) + select { + case werr := <-errCh: + assert.Error(t, werr, "expected Wait to return error when lease expires") + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for Wait to return after lease expiration") + } - waitForProcess() - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + info := q.Info(ctx) + assert.Len(t, info.Pending, 1, "expect task re-added to pending queue after expiration") + }) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) -} + t.Run("wait with context canceled", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + dummyTask := genDummyTask() -func TestFifoErrors(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitCtx, waitCancel := context.WithCancel(ctx) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + errCh := make(chan error, 1) + go func() { + errCh <- q.Wait(waitCtx, got.ID) + }() - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) + time.Sleep(50 * time.Millisecond) + waitCancel() - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) + select { + case err := <-errCh: + assert.NoError(t, err, "Wait should return nil when context is canceled before task completes") + case <-time.After(time.Second): + t.Fatal("Wait should return when context is canceled") + } + }) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) - assert.False(t, got.ShouldRun()) + t.Run("wait on non-existent task", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.True(t, got.ShouldRun()) + err := q.Wait(ctx, "non-existent-task") + assert.NoError(t, err, "Wait should return nil for non-existent task") + }) } -func TestFifoErrors2(t *testing.T) { +func TestFifoDependencies(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } + t.Run("basic dependency ordering", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task1})) - for i := 0; i < 2; i++ { waitForProcess() got, err := q.Poll(ctx, 1, filterFnTrue) assert.NoError(t, err) - assert.False(t, got != task1 && got != task2, "expect task1 or task2 returned from queue as task3 depends on them") + assert.Equal(t, task1, got) + + waitForProcess() + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - if got != task1 { - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task2, got) + }) + + t.Run("waiting vs pending stats", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), } - if got != task2 { - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, } - } - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.False(t, got.ShouldRun()) -} + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) -func TestFifoErrorsMultiThread(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + got, _ := q.Poll(ctx, 1, filterFnTrue) - obtainedWorkCh := make(chan *model.Task) - defer func() { close(obtainedWorkCh) }() - - for i := 0; i < 10; i++ { - go func(i int) { - for { - fmt.Printf("Worker %d started\n", i) - got, err := q.Poll(ctx, 1, filterFnTrue) - if err != nil && errors.Is(err, context.Canceled) { - return - } - assert.NoError(t, err) - obtainedWorkCh <- got - } - }(i) - } - - task1Processed := false - task2Processed := false - - for { - select { - case got := <-obtainedWorkCh: - fmt.Println(got.ID) + waitForProcess() + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps) - switch { - case !task1Processed: - assert.Equal(t, task1, got) - task1Processed = true - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - go func() { - for { - fmt.Printf("Worker spawned\n") - got, err := q.Poll(ctx, 1, filterFnTrue) - if err != nil && errors.Is(err, context.Canceled) { - return - } - assert.NoError(t, err) - obtainedWorkCh <- got - } - }() - case !task2Processed: - assert.Equal(t, task2, got) - task2Processed = true - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - go func() { - for { - fmt.Printf("Worker spawned\n") - got, err := q.Poll(ctx, 1, filterFnTrue) - if err != nil && errors.Is(err, context.Canceled) { - return - } - assert.NoError(t, err) - obtainedWorkCh <- got - } - }() - default: - assert.Equal(t, task3, got) - assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 succeeded but task2 failed") - return - } + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.EqualValues(t, task2, got) - case <-time.After(5 * time.Second): - info := q.Info(ctx) - fmt.Println(info.String()) - t.Errorf("test timed out") - return - } - } + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.WaitingOnDeps) + assert.Equal(t, 1, info.Stats.Pending) + }) } -func TestFifoTransitiveErrors(t *testing.T) { +func TestFifoErrors(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), - } + t.Run("error handling with run_on conditions", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) - assert.False(t, got.ShouldRun(), "expect task2 should not run, since task1 failed") - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) - - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 failed, thus task2 was skipped, task3 should be skipped too") -} + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) -func TestFifoCancel(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task2, got) + assert.False(t, got.ShouldRun()) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task3, got) + assert.True(t, got.ShouldRun()) + }) + + t.Run("multiple dependency failures", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), + } - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - _, _ = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.Error(ctx, task1.ID, fmt.Errorf("canceled"))) - assert.NoError(t, q.Error(ctx, task2.ID, fmt.Errorf("canceled"))) - assert.NoError(t, q.Error(ctx, task3.ID, fmt.Errorf("canceled"))) - info := q.Info(ctx) - assert.Len(t, info.Pending, 0, "all pipelines should be canceled") + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - time.Sleep(processTimeInterval * 2) - info = q.Info(ctx) - assert.Len(t, info.Pending, 0, "canceled are rescheduled") - assert.Len(t, info.Running, 0, "canceled are rescheduled") - assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled") -} + for i := 0; i < 2; i++ { + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.False(t, got != task1 && got != task2, "expect task1 or task2 returned from queue as task3 depends on them") -func TestFifoPause(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + if got != task1 { + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + } + if got != task2 { + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) + } + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task3, got) + assert.False(t, got.ShouldRun()) + }) + + t.Run("transitive error propagation", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"2"}, + DepStatus: make(map[string]model.StatusValue), + } - dummyTask := genDummyTask() + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - var wg sync.WaitGroup - wg.Add(1) - go func() { - _, _ = q.Poll(ctx, 1, filterFnTrue) - wg.Done() - }() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - q.Pause() - t0 := time.Now() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - q.Resume() + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - wg.Wait() - t1 := time.Now() + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task2, got) + assert.False(t, got.ShouldRun(), "expect task2 should not run, since task1 failed") + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) - assert.Greater(t, t1.Sub(t0), 20*time.Millisecond, "should have waited til resume") + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task3, got) + assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 failed, thus task2 was skipped, task3 should be skipped too") + }) + + t.Run("multithreaded error handling", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), + } - q.Pause() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - q.Resume() - _, _ = q.Poll(ctx, 1, filterFnTrue) -} + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) -func TestFifoPauseResume(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + obtainedWorkCh := make(chan *model.Task) + defer func() { close(obtainedWorkCh) }() - dummyTask := genDummyTask() + for i := 0; i < 10; i++ { + go func(i int) { + for { + fmt.Printf("Worker %d started\n", i) + got, err := q.Poll(ctx, 1, filterFnTrue) + if err != nil && errors.Is(err, context.Canceled) { + return + } + assert.NoError(t, err) + obtainedWorkCh <- got + } + }(i) + } - q.Pause() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - q.Resume() + task1Processed := false + task2Processed := false + + for { + select { + case got := <-obtainedWorkCh: + fmt.Println(got.ID) + + switch { + case !task1Processed: + assert.Equal(t, task1, got) + task1Processed = true + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) + go func() { + for { + fmt.Printf("Worker spawned\n") + got, err := q.Poll(ctx, 1, filterFnTrue) + if err != nil && errors.Is(err, context.Canceled) { + return + } + assert.NoError(t, err) + obtainedWorkCh <- got + } + }() + case !task2Processed: + assert.Equal(t, task2, got) + task2Processed = true + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + go func() { + for { + fmt.Printf("Worker spawned\n") + got, err := q.Poll(ctx, 1, filterFnTrue) + if err != nil && errors.Is(err, context.Canceled) { + return + } + assert.NoError(t, err) + obtainedWorkCh <- got + } + }() + default: + assert.Equal(t, task3, got) + assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 succeeded but task2 failed") + return + } - _, _ = q.Poll(ctx, 1, filterFnTrue) + case <-time.After(5 * time.Second): + info := q.Info(ctx) + fmt.Println(info.String()) + t.Errorf("test timed out") + return + } + } + }) } -func TestWaitingVsPending(t *testing.T) { +func TestFifoCancel(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) @@ -506,94 +525,113 @@ func TestWaitingVsPending(t *testing.T) { assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - got, _ := q.Poll(ctx, 1, filterFnTrue) - - waitForProcess() + _, _ = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Error(ctx, task1.ID, fmt.Errorf("canceled"))) + assert.NoError(t, q.Error(ctx, task2.ID, fmt.Errorf("canceled"))) + assert.NoError(t, q.Error(ctx, task3.ID, fmt.Errorf("canceled"))) info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps) - - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.EqualValues(t, task2, got) + assert.Len(t, info.Pending, 0, "all pipelines should be canceled") - waitForProcess() + time.Sleep(processTimeInterval * 2) info = q.Info(ctx) - assert.Equal(t, 0, info.Stats.WaitingOnDeps) - assert.Equal(t, 1, info.Stats.Pending) + assert.Len(t, info.Pending, 0, "canceled are rescheduled") + assert.Len(t, info.Running, 0, "canceled are rescheduled") + assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled") } -func TestShouldRun(t *testing.T) { - task := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusSuccess, +func TestFifoShouldRun(t *testing.T) { + tests := []struct { + name string + task *model.Task + expected bool + reason string + }{ + { + name: "failure only - success status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusSuccess}, + RunOn: []string{"failure"}, + }, + expected: false, + reason: "expect task to not run, it runs on failure only", }, - RunOn: []string{"failure"}, - } - assert.False(t, task.ShouldRun(), "expect task to not run, it runs on failure only") - - task = &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusSuccess, + { + name: "failure and success - success status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusSuccess}, + RunOn: []string{"failure", "success"}, + }, + expected: true, }, - RunOn: []string{"failure", "success"}, - } - assert.True(t, task.ShouldRun()) - - task = &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusFailure, + { + name: "no run_on - failure status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusFailure}, + }, + expected: false, }, - } - assert.False(t, task.ShouldRun()) - - task = &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusSuccess, + { + name: "success only - success status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusSuccess}, + RunOn: []string{"success"}, + }, + expected: true, }, - RunOn: []string{"success"}, - } - assert.True(t, task.ShouldRun()) - - task = &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusFailure, + { + name: "failure only - failure status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusFailure}, + RunOn: []string{"failure"}, + }, + expected: true, }, - RunOn: []string{"failure"}, - } - assert.True(t, task.ShouldRun()) - - task = &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusSkipped, + { + name: "no run_on - skipped status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusSkipped}, + }, + expected: false, + reason: "task should not run if dependency is skipped", + }, + { + name: "failure only - skipped status", + task: &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": model.StatusSkipped}, + RunOn: []string{"failure"}, + }, + expected: true, + reason: "on failure, tasks should run on skipped deps, something failed higher up the chain", }, } - assert.False(t, task.ShouldRun(), "task should not run if dependency is skipped") - task = &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{ - "1": model.StatusSkipped, - }, - RunOn: []string{"failure"}, + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.task.ShouldRun() + if tt.reason != "" { + assert.Equal(t, tt.expected, result, tt.reason) + } else { + assert.Equal(t, tt.expected, result) + } + }) } - assert.True(t, task.ShouldRun(), "on failure, tasks should run on skipped deps, something failed higher up the chain") } -func TestFifoWithScoring(t *testing.T) { +func TestFifoScoring(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) @@ -677,11 +715,6 @@ func TestFifoWithScoring(t *testing.T) { assert.Len(t, receivedTasks, 5, "All tasks should be assigned") // Define expected agent assignments - // Map structure: {taskID: []possible agentIDs} - // - taskID "1" and "4" can be assigned to agents 1 or 4 (org-id "123") - // - taskID "2" should be assigned to agent 2 (org-id "456") - // - taskID "3" should be assigned to agent 3 (platform "windows") - // - taskID "5" should be assigned to agent 5 (org-id "*") expectedAssignments := map[string][]int64{ "1": {1, 4}, "2": {2}, @@ -702,392 +735,338 @@ func TestFifoErrorAtOnce(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - - // Test ErrorAtOnce with running tasks - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) - - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "expect tasks removed from running queue") - assert.Len(t, info.Pending, 1, "expect remaining task in pending") -} - -func TestFifoErrorAtOnceWithPendingTasks(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + t.Run("batch error on running tasks", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + } - waitForProcess() + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - // ErrorAtOnce on pending tasks (task3 should be waiting on deps) - assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error"))) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "only task1 should remain") - assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting") -} + waitForProcess() + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) -func TestFifoErrorAtOnceWithCancelError(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "expect tasks removed from running queue") + assert.Len(t, info.Pending, 1, "expect remaining task in pending") + }) + + t.Run("batch error on pending tasks", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - // Test ErrorAtOnce with ErrCancel - should result in StatusKilled - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) + waitForProcess() - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "task should be removed from running") -} + assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error"))) -func TestFifoExtend(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Pending, 1, "only task1 should remain") + assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting") + }) + + t.Run("error at once with cancel error", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + waitForProcess() + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - // Test successful extend - assert.NoError(t, q.Extend(ctx, 1, got.ID)) + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) - // Test extend with wrong agent ID - err = q.Extend(ctx, 999, got.ID) - assert.ErrorIs(t, err, ErrAgentMissMatch) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "task should be removed from running") + }) + + t.Run("error at once with non-existent tasks", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } - // Clean up - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - // Test extend with non-existent task - err = q.Extend(ctx, 1, "non-existent") - assert.ErrorIs(t, err, ErrNotFound) -} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) -func TestFifoExtendPreventsExpiration(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error")) + assert.Error(t, err, "should return error when one of the tasks doesn't exist") + assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") - // Set very short extension - q.extension = 50 * time.Millisecond + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "running task should be removed") + }) - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + t.Run("error at once with multiple non-existent tasks", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + task1 := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) - // Extend the deadline multiple times - for i := 0; i < 3; i++ { - time.Sleep(30 * time.Millisecond) - assert.NoError(t, q.Extend(ctx, 1, got.ID)) - } + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - // Task should still be running - info := q.Info(ctx) - assert.Len(t, info.Running, 1, "task should still be running after extensions") - assert.Len(t, info.Pending, 0, "task should not be resubmitted") -} + err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors")) + assert.Error(t, err, "should return joined errors") -func TestFifoKickAgentWorkers(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + errStr := err.Error() + assert.Contains(t, errStr, "not found", "error should mention not found") + }) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + t.Run("error at once with ErrCancel and dependency update", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - // Start multiple workers for different agents - pollResults := make(chan error, 3) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - for agentID := int64(1); agentID <= 3; agentID++ { - go func(id int64) { - _, err := q.Poll(ctx, id, filterFnTrue) - pollResults <- err - }(agentID) - } + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - // Give workers time to register - time.Sleep(50 * time.Millisecond) + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel)) - // Kick workers for agent 2 - q.KickAgentWorkers(2) + waitForProcess() - // Check that agent 2's worker was kicked - select { - case err := <-pollResults: - assert.Error(t, err) - // Check the cause of the context cancellation - if errors.Is(err, context.Canceled) { - // If ctx wasn't the one canceled, we need to check if it's our kicked error - // The error should either be ErrWorkerKicked or wrapped in context - assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled") + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed") + }) + + t.Run("finished with multiple errors", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), } - case <-time.After(time.Second): - t.Fatal("expected worker to be kicked") - } - // Other workers should still be waiting or get work - info := q.Info(ctx) - assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered") -} - -func TestFifoKickAgentWorkersMultiple(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - // Start multiple workers for the same agent - pollResults := make(chan error, 5) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - for i := 0; i < 5; i++ { - go func() { - _, err := q.Poll(ctx, 42, filterFnTrue) - pollResults <- err - }() - } - - // Give workers time to register - time.Sleep(50 * time.Millisecond) + waitForProcess() - info := q.Info(ctx) - initialWorkers := info.Stats.Workers - assert.Equal(t, 5, initialWorkers, "all workers should be registered") + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, "1", got.ID) - // Kick all workers for agent 42 - q.KickAgentWorkers(42) + err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error")) - // All workers should be kicked - kickedCount := 0 - for i := 0; i < 5; i++ { - select { - case err := <-pollResults: - assert.Error(t, err) - // All kicked workers will have their context canceled - if errors.Is(err, context.Canceled) { - kickedCount++ - } - case <-time.After(time.Second): - t.Fatal("expected all workers to be kicked") - } - } - - assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked") + assert.Error(t, err, "should return error for non-existent tasks") + assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") - time.Sleep(50 * time.Millisecond) - info = q.Info(ctx) - assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed") + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0, "task1 should be removed from running") + assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting") + assert.Len(t, info.Pending, 0, "no tasks should be pending") + }) } -func TestFifoWaitContextCanceled(t *testing.T) { +func TestFifoExtend(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + t.Run("basic extend operations", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - // Create a context that we can cancel - waitCtx, waitCancel := context.WithCancel(ctx) + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - errCh := make(chan error, 1) - go func() { - errCh <- q.Wait(waitCtx, got.ID) - }() - - // Cancel the wait context - time.Sleep(50 * time.Millisecond) - waitCancel() - - select { - case err := <-errCh: - assert.NoError(t, err, "Wait should return nil when context is canceled before task completes") - case <-time.After(time.Second): - t.Fatal("Wait should return when context is canceled") - } -} + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) -func TestFifoWaitNonExistentTask(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + assert.NoError(t, q.Extend(ctx, 1, got.ID)) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + err = q.Extend(ctx, 999, got.ID) + assert.ErrorIs(t, err, ErrAgentMissMatch) - // Wait on a task that doesn't exist - err := q.Wait(ctx, "non-existent-task") - assert.NoError(t, err, "Wait should return nil for non-existent task") -} + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) -func TestFifoUpdateDepStatusInQueue(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + err = q.Extend(ctx, 1, "non-existent") + assert.ErrorIs(t, err, ErrNotFound) + }) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task4 := &model.Task{ - ID: "4", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), - } + t.Run("extend prevents expiration", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q.extension = 50 * time.Millisecond + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + + for i := 0; i < 3; i++ { + time.Sleep(30 * time.Millisecond) + assert.NoError(t, q.Extend(ctx, 1, got.ID)) + } + + info := q.Info(ctx) + assert.Len(t, info.Running, 1, "task should still be running after extensions") + assert.Len(t, info.Pending, 0, "task should not be resubmitted") + }) + + t.Run("extend with wrong agent", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) + + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) + waitForProcess() + got, err := q.Poll(ctx, 5, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5") - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) + assert.NoError(t, q.Extend(ctx, 5, got.ID)) - // Complete task1 with success - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + err = q.Extend(ctx, 999, got.ID) + assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend") - waitForProcess() + err = q.Extend(ctx, 1, got.ID) + assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent") - // Poll task2 to check its DepStatus was updated - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated") + assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend") - // Poll task3 to check its DepStatus was updated - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated") + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + }) } -func TestFifoRemoveFromPendingAndWaitingNotFound(t *testing.T) { +func TestFifoKickAgentWorkers(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - task1 := genDummyTask() + t.Run("kick single agent workers", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) + pollResults := make(chan error, 3) - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + for agentID := int64(1); agentID <= 3; agentID++ { + go func(id int64) { + _, err := q.Poll(ctx, id, filterFnTrue) + pollResults <- err + }(agentID) + } - // Try to error a task that's running (it will be removed from running, not pending/waiting) - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) + time.Sleep(50 * time.Millisecond) - // Try to error a completely non-existent task - err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")) - assert.Error(t, err, "should return error for non-existent task") -} + q.KickAgentWorkers(2) -func TestFifoRemoveFromWaitingOnDeps(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + select { + case err := <-pollResults: + assert.Error(t, err) + if errors.Is(err, context.Canceled) { + assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled") + } + case <-time.After(time.Second): + t.Fatal("expected worker to be kicked") + } - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + info := q.Info(ctx) + assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered") + }) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + t.Run("kick multiple workers for same agent", func(t *testing.T) { + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + pollResults := make(chan error, 5) - waitForProcess() - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") + for i := 0; i < 5; i++ { + go func() { + _, err := q.Poll(ctx, 42, filterFnTrue) + pollResults <- err + }() + } - // Error task2 while it's in waitingOnDeps - assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task"))) + time.Sleep(50 * time.Millisecond) - waitForProcess() - info = q.Info(ctx) - assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now") + info := q.Info(ctx) + initialWorkers := info.Stats.Workers + assert.Equal(t, 5, initialWorkers, "all workers should be registered") + + q.KickAgentWorkers(42) + + kickedCount := 0 + for i := 0; i < 5; i++ { + select { + case err := <-pollResults: + assert.Error(t, err) + if errors.Is(err, context.Canceled) { + kickedCount++ + } + case <-time.After(time.Second): + t.Fatal("expected all workers to be kicked") + } + } + + assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked") + + time.Sleep(50 * time.Millisecond) + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed") + }) } func TestFifoPollContextCanceled(t *testing.T) { @@ -1103,10 +1082,8 @@ func TestFifoPollContextCanceled(t *testing.T) { errCh <- err }() - // Give Poll time to register the worker time.Sleep(50 * time.Millisecond) - // Cancel the poll context pollCancel() select { @@ -1120,426 +1097,317 @@ func TestFifoPollContextCanceled(t *testing.T) { cancel(nil) } -func TestFifoMultipleDepStatusUpdates(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - - // Get both independent tasks - got1, _ := q.Poll(ctx, 1, filterFnTrue) - got2, _ := q.Poll(ctx, 2, filterFnTrue) - - // Complete one with success - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - - // Complete other with failure - assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) - - waitForProcess() - - // Get task3 and check both dependencies are updated - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) - assert.Equal(t, model.StatusFailure, got3.DepStatus["2"]) - assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed") -} - -func TestFifoErrorAtOnceCoverage(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - // Test ErrorAtOnce with one running task and one non-existent task - // This should trigger the error case in finished() where removeFromPendingAndWaiting returns ErrNotFound - err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error")) - assert.Error(t, err, "should return error when one of the tasks doesn't exist") - assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") - - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "running task should be removed") -} - -func TestFifoErrorAtOnceMultipleNonExistent(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - task1 := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - // Test ErrorAtOnce with multiple non-existent tasks - // This tests that errors.Join works correctly with multiple errors - err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors")) - assert.Error(t, err, "should return joined errors") - - // The error should contain multiple ErrNotFound instances - errStr := err.Error() - assert.Contains(t, errStr, "not found", "error should mention not found") -} - -func TestFifoErrorAtOnceWithErrCancel(t *testing.T) { +func TestFifoDepStatusUpdates(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + t.Run("update dep status in queue", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task4 := &model.Task{ + ID: "4", + Dependencies: []string{"2"}, + DepStatus: make(map[string]model.StatusValue), + } - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) - // Test the ErrCancel specific path which sets StatusKilled - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel)) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - waitForProcess() + waitForProcess() - // Get the dependent task and verify its dependency status is StatusKilled - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed") -} + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated") -func TestFifoExtendWithWrongAgent(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated") + }) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + t.Run("multiple dep status updates", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), + } - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() - got, err := q.Poll(ctx, 5, filterFnTrue) // Agent 5 polls the task - assert.NoError(t, err) - assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5") + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - // Test extend with correct agent - should succeed - assert.NoError(t, q.Extend(ctx, 5, got.ID)) + waitForProcess() - // Test extend with wrong agent - should return ErrAgentMissMatch - err = q.Extend(ctx, 999, got.ID) - assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend") + got1, _ := q.Poll(ctx, 1, filterFnTrue) + got2, _ := q.Poll(ctx, 2, filterFnTrue) - // Test extend with another wrong agent - err = q.Extend(ctx, 1, got.ID) - assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent") + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) - // Verify the correct agent can still extend - assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend") + waitForProcess() - // Clean up - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) -} + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) -func TestFifoFinishedWithMultipleErrors(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) + assert.Equal(t, model.StatusFailure, got3.DepStatus["2"]) + assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed") + }) + + t.Run("update dep status in pending queue", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + waitForProcess() - waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) - // Poll task1 so it's in running - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, "1", got.ID) + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") - // Now call ErrorAtOnce with: - // - got.ID (task1) which is in running - should succeed - // - "2" which is in waitingOnDeps - should succeed - // - "fake-1" which doesn't exist - should return ErrNotFound - // - "fake-2" which doesn't exist - should return ErrNotFound - err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error")) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - assert.Error(t, err, "should return error for non-existent tasks") + waitForProcess() - // Verify that the error contains multiple ErrNotFound (from errors.Join) - assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], + "task2's DepStatus should be updated in pending queue") - // Check that both real tasks were removed - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "task1 should be removed from running") - assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting") - assert.Len(t, info.Pending, 0, "no tasks should be pending") -} + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], + "task3's DepStatus should be updated in pending queue") + }) + + t.Run("update dep status in running queue", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), + } + task4 := &model.Task{ + ID: "4", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } -func TestFifoUpdateDepStatusInQueuePending(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitForProcess() - // Push all tasks - task2 and task3 will be in pending initially (before filterWaiting moves them) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - waitForProcess() + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) - // At this point, task2 and task3 should be in waitingOnDeps - // but we want to test when they're in pending + waitForProcess() - // Get task1 - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - // Before completing task1, let's verify task2 and task3 are waiting - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") + waitForProcess() - // Now complete task1 - this should trigger updateDepStatusInQueue - // which will update DepStatus in the waitingOnDeps queue - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + got4, err := q.Poll(ctx, 4, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"], + "task4's DepStatus should show task1 succeeded") - waitForProcess() + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) - // Now get task2 and verify its DepStatus was updated - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], - "task2's DepStatus should be updated in pending queue") + waitForProcess() - // Get task3 and verify its DepStatus was updated - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], - "task3's DepStatus should be updated in pending queue") -} + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], + "task3's DepStatus should show task1 succeeded") + assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"], + "task3's DepStatus should show task2 succeeded") + assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded") + + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) + assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess)) + }) + + t.Run("update dep status in waiting queue", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task4 := &model.Task{ + ID: "4", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } -func TestFifoUpdateDepStatusInQueueRunning(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } - task4 := &model.Task{ - ID: "4", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitForProcess() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) + info := q.Info(ctx) + assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting") - waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) - // Get task1 and task2 (both independent) - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed"))) - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) + waitForProcess() - waitForProcess() + for i := 2; i <= 4; i++ { + gotTask, err := q.Poll(ctx, int64(i), filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"], + "task %d's DepStatus should be updated in waitingOnDeps", i) + assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i) + } + }) + + t.Run("update dep status with skipped", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"2"}, + DepStatus: make(map[string]model.StatusValue), + } - // Now complete task1 first - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - // Now task3 and task4 are waiting on their dependencies - // task3 still needs task2, task4 should be ready now + waitForProcess() - // Get task4 which should now be available since task1 is done - got4, err := q.Poll(ctx, 4, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"], - "task4's DepStatus should show task1 succeeded") + got1, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) - // Now complete task2 - assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + waitForProcess() - waitForProcess() + got2, err := q.Poll(ctx, 2, filterFnTrue) + assert.NoError(t, err) + assert.False(t, got2.ShouldRun()) + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped)) - // Now get task3 - both its dependencies are satisfied - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], - "task3's DepStatus should show task1 succeeded") - assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"], - "task3's DepStatus should show task2 succeeded") - assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded") + waitForProcess() - // Clean up remaining tasks - assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) - assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess)) + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"], + "task3 should have task2's skipped status") + assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency") + }) } -func TestFifoUpdateDepStatusInQueueWaiting(t *testing.T) { +func TestFifoRemoveOperations(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task4 := &model.Task{ - ID: "4", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - - waitForProcess() - - // Verify tasks are waiting on deps - info := q.Info(ctx) - assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting") - - // Get and complete task1 - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) + t.Run("remove from pending and waiting not found", func(t *testing.T) { + task1 := genDummyTask() - // Complete task1 with failure - this should update DepStatus in waitingOnDeps - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed"))) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) - // All waiting tasks should have their DepStatus updated - // Get each one and verify - for i := 2; i <= 4; i++ { - gotTask, err := q.Poll(ctx, int64(i), filterFnTrue) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) assert.NoError(t, err) - assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"], - "task %d's DepStatus should be updated in waitingOnDeps", i) - assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i) - } -} - -func TestFifoUpdateDepStatusWithSkipped(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), - } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")) + assert.Error(t, err, "should return error for non-existent task") + }) - waitForProcess() + t.Run("remove from waiting on deps", func(t *testing.T) { + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } - // Get and fail task1 - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) + q, _ := NewMemoryQueue(ctx).(*fifo) + assert.NotNil(t, q) - waitForProcess() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - // Get task2 - should not run but get skipped - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.False(t, got2.ShouldRun()) - assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped)) + waitForProcess() + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") - waitForProcess() + assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task"))) - // Get task3 - should have task2's skipped status updated - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"], - "task3 should have task2's skipped status") - assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency") + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now") + }) } From 679aba56d87ed35574f84b9e4253788907a3abc4 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 19:26:35 +0100 Subject: [PATCH 09/13] same coverage and less test code --- server/queue/fifo.go | 6 +- server/queue/fifo_test.go | 1614 +++++++++---------------------------- 2 files changed, 373 insertions(+), 1247 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index 69eb3c97cc3..4cac0b22992 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -277,10 +277,8 @@ func (q *fifo) process() { func (q *fifo) filterWaiting() { // resubmits all waiting tasks to pending, deps may have cleared - var nextWaiting *list.Element - for e := q.waitingOnDeps.Front(); e != nil; e = nextWaiting { - nextWaiting = e.Next() - task, _ := e.Value.(*model.Task) + for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { + task, _ := element.Value.(*model.Task) q.pending.PushBack(task) } diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 5a8db3f3e8a..5a0ce0a10c1 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -38,474 +38,50 @@ var ( waitForProcess = func() { time.Sleep(processTimeInterval + 50*time.Millisecond) } ) -func TestFifoBasicOperations(t *testing.T) { +func setupTestQueue(t *testing.T) (context.Context, context.CancelCauseFunc, *fifo) { ctx, cancel := context.WithCancelCause(t.Context()) t.Cleanup(func() { cancel(nil) }) - t.Run("basic push poll done flow", func(t *testing.T) { - q := NewMemoryQueue(ctx) - dummyTask := genDummyTask() - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task in pending queue") - - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) - - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 0, "expect task removed from pending queue") - assert.Len(t, info.Running, 1, "expect task in running queue") - - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 0, "expect task removed from pending queue") - assert.Len(t, info.Running, 0, "expect task removed from running queue") - }) - - t.Run("task expiration", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - dummyTask := genDummyTask() - - q.extension = 0 - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task in pending queue") - - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) - - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task re-added to pending queue") - }) - - t.Run("pause and resume", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - dummyTask := genDummyTask() - - var wg sync.WaitGroup - wg.Add(1) - go func() { - _, _ = q.Poll(ctx, 1, filterFnTrue) - wg.Done() - }() - - q.Pause() - t0 := time.Now() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - q.Resume() - - wg.Wait() - t1 := time.Now() - - assert.Greater(t, t1.Sub(t0), 20*time.Millisecond, "should have waited til resume") - - q.Pause() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - q.Resume() - _, _ = q.Poll(ctx, 1, filterFnTrue) - }) - - t.Run("pause then resume", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - dummyTask := genDummyTask() - - q.Pause() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - q.Resume() - - _, _ = q.Poll(ctx, 1, filterFnTrue) - }) -} - -func TestFifoWait(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("wait on task completion", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - dummyTask := genDummyTask() - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) - - var wg sync.WaitGroup - wg.Add(1) - go func() { - assert.NoError(t, q.Wait(ctx, got.ID)) - wg.Done() - }() - - <-time.After(time.Millisecond) - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - wg.Wait() - }) - - t.Run("wait returns error on expiration", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - q.extension = 0 - - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) - - errCh := make(chan error, 1) - go func() { - errCh <- q.Wait(ctx, got.ID) - }() - - waitForProcess() - - select { - case werr := <-errCh: - assert.Error(t, werr, "expected Wait to return error when lease expires") - case <-time.After(2 * time.Second): - t.Fatal("timeout waiting for Wait to return after lease expiration") - } - - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "expect task re-added to pending queue after expiration") - }) - - t.Run("wait with context canceled", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - dummyTask := genDummyTask() - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - waitCtx, waitCancel := context.WithCancel(ctx) - - errCh := make(chan error, 1) - go func() { - errCh <- q.Wait(waitCtx, got.ID) - }() - - time.Sleep(50 * time.Millisecond) - waitCancel() - - select { - case err := <-errCh: - assert.NoError(t, err, "Wait should return nil when context is canceled before task completes") - case <-time.After(time.Second): - t.Fatal("Wait should return when context is canceled") - } - }) - - t.Run("wait on non-existent task", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - err := q.Wait(ctx, "non-existent-task") - assert.NoError(t, err, "Wait should return nil for non-existent task") - }) -} - -func TestFifoDependencies(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("basic dependency ordering", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task1})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - - waitForProcess() - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) - }) - - t.Run("waiting vs pending stats", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - - got, _ := q.Poll(ctx, 1, filterFnTrue) - - waitForProcess() - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps) - - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.EqualValues(t, task2, got) + q, _ := NewMemoryQueue(ctx).(*fifo) + if q == nil { + t.Fatal("Failed to create queue") + } - waitForProcess() - info = q.Info(ctx) - assert.Equal(t, 0, info.Stats.WaitingOnDeps) - assert.Equal(t, 1, info.Stats.Pending) - }) + return ctx, cancel, q } -func TestFifoErrors(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("error handling with run_on conditions", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) - assert.False(t, got.ShouldRun()) - - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.True(t, got.ShouldRun()) - }) - - t.Run("multiple dependency failures", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - - for i := 0; i < 2; i++ { - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.False(t, got != task1 && got != task2, "expect task1 or task2 returned from queue as task3 depends on them") - - if got != task1 { - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - } - if got != task2 { - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - } - } - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.False(t, got.ShouldRun()) - }) - - t.Run("transitive error propagation", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), - } +func TestBasicQueueOperations(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + // Test push -> poll -> done lifecycle + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) + info := q.Info(ctx) + assert.Len(t, info.Pending, 1) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) - assert.False(t, got.ShouldRun(), "expect task2 should not run, since task1 failed") - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, dummyTask, got) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 failed, thus task2 was skipped, task3 should be skipped too") - }) - - t.Run("multithreaded error handling", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Pending, 0) + assert.Len(t, info.Running, 1) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - - obtainedWorkCh := make(chan *model.Task) - defer func() { close(obtainedWorkCh) }() - - for i := 0; i < 10; i++ { - go func(i int) { - for { - fmt.Printf("Worker %d started\n", i) - got, err := q.Poll(ctx, 1, filterFnTrue) - if err != nil && errors.Is(err, context.Canceled) { - return - } - assert.NoError(t, err) - obtainedWorkCh <- got - } - }(i) - } + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - task1Processed := false - task2Processed := false - - for { - select { - case got := <-obtainedWorkCh: - fmt.Println(got.ID) - - switch { - case !task1Processed: - assert.Equal(t, task1, got) - task1Processed = true - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error"))) - go func() { - for { - fmt.Printf("Worker spawned\n") - got, err := q.Poll(ctx, 1, filterFnTrue) - if err != nil && errors.Is(err, context.Canceled) { - return - } - assert.NoError(t, err) - obtainedWorkCh <- got - } - }() - case !task2Processed: - assert.Equal(t, task2, got) - task2Processed = true - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - go func() { - for { - fmt.Printf("Worker spawned\n") - got, err := q.Poll(ctx, 1, filterFnTrue) - if err != nil && errors.Is(err, context.Canceled) { - return - } - assert.NoError(t, err) - obtainedWorkCh <- got - } - }() - default: - assert.Equal(t, task3, got) - assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 succeeded but task2 failed") - return - } - - case <-time.After(5 * time.Second): - info := q.Info(ctx) - fmt.Println(info.String()) - t.Errorf("test timed out") - return - } - } - }) + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Running, 0) } -func TestFifoCancel(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) +func TestTaskDependencies(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) task1 := genDummyTask() task2 := &model.Task{ @@ -520,562 +96,322 @@ func TestFifoCancel(t *testing.T) { RunOn: []string{"success", "failure"}, } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + waitForProcess() - _, _ = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.Error(ctx, task1.ID, fmt.Errorf("canceled"))) - assert.NoError(t, q.Error(ctx, task2.ID, fmt.Errorf("canceled"))) - assert.NoError(t, q.Error(ctx, task3.ID, fmt.Errorf("canceled"))) + // Verify waiting on deps stat info := q.Info(ctx) - assert.Len(t, info.Pending, 0, "all pipelines should be canceled") - - time.Sleep(processTimeInterval * 2) + assert.Equal(t, 2, info.Stats.WaitingOnDeps) + + // Poll and fail task1 + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1"))) + + // task2 should be polled but not run (no failure in RunOn) + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task2, got) + assert.False(t, got.ShouldRun()) + assert.Equal(t, model.StatusFailure, got.DepStatus["1"]) + + // task3 should run (has failure in RunOn) + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task3, got) + assert.True(t, got.ShouldRun()) + assert.Equal(t, model.StatusFailure, got.DepStatus["1"]) + + waitForProcess() info = q.Info(ctx) - assert.Len(t, info.Pending, 0, "canceled are rescheduled") - assert.Len(t, info.Running, 0, "canceled are rescheduled") - assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled") -} - -func TestFifoShouldRun(t *testing.T) { - tests := []struct { - name string - task *model.Task - expected bool - reason string - }{ - { - name: "failure only - success status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusSuccess}, - RunOn: []string{"failure"}, - }, - expected: false, - reason: "expect task to not run, it runs on failure only", - }, - { - name: "failure and success - success status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusSuccess}, - RunOn: []string{"failure", "success"}, - }, - expected: true, - }, - { - name: "no run_on - failure status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusFailure}, - }, - expected: false, - }, - { - name: "success only - success status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusSuccess}, - RunOn: []string{"success"}, - }, - expected: true, - }, - { - name: "failure only - failure status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusFailure}, - RunOn: []string{"failure"}, - }, - expected: true, - }, - { - name: "no run_on - skipped status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusSkipped}, - }, - expected: false, - reason: "task should not run if dependency is skipped", - }, - { - name: "failure only - skipped status", - task: &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": model.StatusSkipped}, - RunOn: []string{"failure"}, - }, - expected: true, - reason: "on failure, tasks should run on skipped deps, something failed higher up the chain", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := tt.task.ShouldRun() - if tt.reason != "" { - assert.Equal(t, tt.expected, result, tt.reason) - } else { - assert.Equal(t, tt.expected, result) - } - }) - } + assert.Equal(t, 0, info.Stats.WaitingOnDeps) } -func TestFifoScoring(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - q := NewMemoryQueue(ctx) +func TestMultipleDependencies(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - // Create tasks with different labels - tasks := []*model.Task{ - {ID: "1", Labels: map[string]string{"org-id": "123", "platform": "linux"}}, - {ID: "2", Labels: map[string]string{"org-id": "456", "platform": "linux"}}, - {ID: "3", Labels: map[string]string{"org-id": "789", "platform": "windows"}}, - {ID: "4", Labels: map[string]string{"org-id": "123", "platform": "linux"}}, - {ID: "5", Labels: map[string]string{"org-id": "*", "platform": "linux"}}, + task1 := genDummyTask() + task2 := &model.Task{ID: "2"} + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: make(map[string]model.StatusValue), } - assert.NoError(t, q.PushAtOnce(ctx, tasks)) - - // Create filter functions for different workers - filters := map[int]FilterFn{ - 1: func(task *model.Task) (bool, int) { - if task.Labels["org-id"] == "123" { - return true, 20 - } - if task.Labels["platform"] == "linux" { - return true, 10 - } - return true, 1 - }, - 2: func(task *model.Task) (bool, int) { - if task.Labels["org-id"] == "456" { - return true, 20 - } - if task.Labels["platform"] == "linux" { - return true, 10 - } - return true, 1 - }, - 3: func(task *model.Task) (bool, int) { - if task.Labels["platform"] == "windows" { - return true, 20 - } - return true, 1 - }, - 4: func(task *model.Task) (bool, int) { - if task.Labels["org-id"] == "123" { - return true, 20 - } - if task.Labels["platform"] == "linux" { - return true, 10 - } - return true, 1 - }, - 5: func(task *model.Task) (bool, int) { - if task.Labels["org-id"] == "*" { - return true, 15 - } - return true, 1 - }, - } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + waitForProcess() + + // Poll and complete both dependencies + got1, _ := q.Poll(ctx, 1, filterFnTrue) + got2, _ := q.Poll(ctx, 2, filterFnTrue) + + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) + + // task3 should have both statuses propagated + waitForProcess() + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) + assert.Equal(t, model.StatusFailure, got3.DepStatus["2"]) + assert.False(t, got3.ShouldRun()) +} - // Start polling in separate goroutines - results := make(chan *model.Task, 5) - for i := 1; i <= 5; i++ { - go func(n int) { - task, err := q.Poll(ctx, int64(n), filters[n]) - assert.NoError(t, err) - results <- task - }(i) - } +func TestTransitiveDependencies(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - // Collect results - receivedTasks := make(map[string]int64) - for i := 0; i < 5; i++ { - select { - case task := <-results: - receivedTasks[task.ID] = task.AgentID - case <-time.After(time.Second): - t.Fatal("Timeout waiting for tasks") - } + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), } - - assert.Len(t, receivedTasks, 5, "All tasks should be assigned") - - // Define expected agent assignments - expectedAssignments := map[string][]int64{ - "1": {1, 4}, - "2": {2}, - "3": {3}, - "4": {1, 4}, - "5": {5}, + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"2"}, + DepStatus: make(map[string]model.StatusValue), } - // Check if tasks are assigned as expected - for taskID, expectedAgents := range expectedAssignments { - agentID, ok := receivedTasks[taskID] - assert.True(t, ok, "Task %s should be assigned", taskID) - assert.Contains(t, expectedAgents, agentID, "Task %s should be assigned to one of the expected agents", taskID) - } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + waitForProcess() + + // Fail task1 + got, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1"))) + + // task2 should skip + waitForProcess() + got, _ = q.Poll(ctx, 2, filterFnTrue) + assert.False(t, got.ShouldRun()) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) + + // task3 should also skip (transitive) + waitForProcess() + got, _ = q.Poll(ctx, 3, filterFnTrue) + assert.Equal(t, model.StatusSkipped, got.DepStatus["2"]) + assert.False(t, got.ShouldRun()) } -func TestFifoErrorAtOnce(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("batch error on running tasks", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) - - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "expect tasks removed from running queue") - assert.Len(t, info.Pending, 1, "expect remaining task in pending") - }) - - t.Run("batch error on pending tasks", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) +func TestLeaseExpiration(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - waitForProcess() + q.extension = 0 // Immediate expiration + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error"))) - - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1, "only task1 should remain") - assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting") - }) - - t.Run("error at once with cancel error", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - - waitForProcess() - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) - - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "task should be removed from running") - }) - - t.Run("error at once with non-existent tasks", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error")) - assert.Error(t, err, "should return error when one of the tasks doesn't exist") - assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") - - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "running task should be removed") - }) - - t.Run("error at once with multiple non-existent tasks", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - task1 := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors")) - assert.Error(t, err, "should return joined errors") - - errStr := err.Error() - assert.Contains(t, errStr, "not found", "error should mention not found") - }) - - t.Run("error at once with ErrCancel and dependency update", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + // Test Wait returns error on expiration + errCh := make(chan error, 1) + go func() { errCh <- q.Wait(ctx, got.ID) }() - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel)) + waitForProcess() + select { + case werr := <-errCh: + assert.Error(t, werr) + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for Wait to return") + } - waitForProcess() + // Task should be back in pending + info := q.Info(ctx) + assert.Len(t, info.Pending, 1) +} - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed") - }) - - t.Run("finished with multiple errors", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } +func TestExtend(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + q.extension = 50 * time.Millisecond + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + waitForProcess() + got, _ := q.Poll(ctx, 5, filterFnTrue) - waitForProcess() + // Correct agent can extend + assert.NoError(t, q.Extend(ctx, 5, got.ID)) - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, "1", got.ID) + // Wrong agent cannot extend + assert.ErrorIs(t, q.Extend(ctx, 999, got.ID), ErrAgentMissMatch) + assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch) - err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error")) + // Non-existent task + assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound) - assert.Error(t, err, "should return error for non-existent tasks") - assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound") + // Extend prevents expiration + for i := 0; i < 3; i++ { + time.Sleep(30 * time.Millisecond) + assert.NoError(t, q.Extend(ctx, 5, got.ID)) + } - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0, "task1 should be removed from running") - assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting") - assert.Len(t, info.Pending, 0, "no tasks should be pending") - }) + info := q.Info(ctx) + assert.Len(t, info.Running, 1) + assert.Len(t, info.Pending, 0) } -func TestFifoExtend(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("basic extend operations", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - assert.NoError(t, q.Extend(ctx, 1, got.ID)) - - err = q.Extend(ctx, 999, got.ID) - assert.ErrorIs(t, err, ErrAgentMissMatch) - - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - - err = q.Extend(ctx, 1, "non-existent") - assert.ErrorIs(t, err, ErrNotFound) - }) +func TestWait(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - t.Run("extend prevents expiration", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + dummyTask := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - q.extension = 50 * time.Millisecond + waitForProcess() + got, _ := q.Poll(ctx, 1, filterFnTrue) - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + // Wait completes on Done + var wg sync.WaitGroup + wg.Add(1) + go func() { + assert.NoError(t, q.Wait(ctx, got.ID)) + wg.Done() + }() - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + time.Sleep(time.Millisecond) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + wg.Wait() - for i := 0; i < 3; i++ { - time.Sleep(30 * time.Millisecond) - assert.NoError(t, q.Extend(ctx, 1, got.ID)) - } + // Wait on non-existent task + assert.NoError(t, q.Wait(ctx, "non-existent")) - info := q.Info(ctx) - assert.Len(t, info.Running, 1, "task should still be running after extensions") - assert.Len(t, info.Pending, 0, "task should not be resubmitted") - }) + // Wait with context cancellation + dummyTask2 := &model.Task{ID: "2"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask2})) + waitForProcess() + got2, _ := q.Poll(ctx, 1, filterFnTrue) - t.Run("extend with wrong agent", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + waitCtx, waitCancel := context.WithCancel(ctx) + errCh := make(chan error, 1) + go func() { errCh <- q.Wait(waitCtx, got2.ID) }() - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + time.Sleep(50 * time.Millisecond) + waitCancel() - waitForProcess() - got, err := q.Poll(ctx, 5, filterFnTrue) + select { + case err := <-errCh: assert.NoError(t, err) - assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5") - - assert.NoError(t, q.Extend(ctx, 5, got.ID)) - - err = q.Extend(ctx, 999, got.ID) - assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend") - - err = q.Extend(ctx, 1, got.ID) - assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent") - - assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend") - - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - }) + case <-time.After(time.Second): + t.Fatal("Wait should return when context is canceled") + } } -func TestFifoKickAgentWorkers(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) +func TestError(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - t.Run("kick single agent workers", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - - pollResults := make(chan error, 3) + task1 := genDummyTask() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) - for agentID := int64(1); agentID <= 3; agentID++ { - go func(id int64) { - _, err := q.Poll(ctx, id, filterFnTrue) - pollResults <- err - }(agentID) - } + waitForProcess() + got, _ := q.Poll(ctx, 1, filterFnTrue) - time.Sleep(50 * time.Millisecond) + // Error on running task + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0) - q.KickAgentWorkers(2) + // Error on non-existent task + assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))) +} - select { - case err := <-pollResults: - assert.Error(t, err) - if errors.Is(err, context.Canceled) { - assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled") - } - case <-time.After(time.Second): - t.Fatal("expected worker to be kicked") - } +func TestErrorAtOnce(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - info := q.Info(ctx) - assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered") - }) + task1 := genDummyTask() + task2 := &model.Task{ID: "2"} + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } - t.Run("kick multiple workers for same agent", func(t *testing.T) { - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + waitForProcess() - pollResults := make(chan error, 5) + got1, _ := q.Poll(ctx, 1, filterFnTrue) + got2, _ := q.Poll(ctx, 2, filterFnTrue) - for i := 0; i < 5; i++ { - go func() { - _, err := q.Poll(ctx, 42, filterFnTrue) - pollResults <- err - }() - } + // Batch error on running tasks + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0) + + // Error with non-existent tasks + task4 := &model.Task{ID: "4"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4})) + waitForProcess() + got4, _ := q.Poll(ctx, 1, filterFnTrue) + + err := q.ErrorAtOnce(ctx, []string{got4.ID, "fake-1", "fake-2"}, fmt.Errorf("test")) + assert.Error(t, err) + assert.ErrorIs(t, err, ErrNotFound) + + // ErrCancel updates dependency status + task5 := genDummyTask() + task5.ID = "5" + task6 := &model.Task{ + ID: "6", + Dependencies: []string{"5"}, + DepStatus: make(map[string]model.StatusValue), + } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6})) + waitForProcess() + got5, _ := q.Poll(ctx, 1, filterFnTrue) - time.Sleep(50 * time.Millisecond) + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got5.ID}, ErrCancel)) + waitForProcess() - info := q.Info(ctx) - initialWorkers := info.Stats.Workers - assert.Equal(t, 5, initialWorkers, "all workers should be registered") + got6, _ := q.Poll(ctx, 2, filterFnTrue) + assert.Equal(t, model.StatusKilled, got6.DepStatus["5"]) +} - q.KickAgentWorkers(42) +func TestShouldRunLogic(t *testing.T) { + tests := []struct { + name string + depStatus model.StatusValue + runOn []string + expected bool + }{ + {"Success without RunOn", model.StatusSuccess, nil, true}, + {"Failure without RunOn", model.StatusFailure, nil, false}, + {"Success with failure RunOn", model.StatusSuccess, []string{"failure"}, false}, + {"Failure with failure RunOn", model.StatusFailure, []string{"failure"}, true}, + {"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true}, + {"Skipped without RunOn", model.StatusSkipped, nil, false}, + {"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true}, + } - kickedCount := 0 - for i := 0; i < 5; i++ { - select { - case err := <-pollResults: - assert.Error(t, err) - if errors.Is(err, context.Canceled) { - kickedCount++ - } - case <-time.After(time.Second): - t.Fatal("expected all workers to be kicked") + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + task := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": tt.depStatus}, + RunOn: tt.runOn, } - } - - assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked") - - time.Sleep(50 * time.Millisecond) - info = q.Info(ctx) - assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed") - }) + assert.Equal(t, tt.expected, task.ShouldRun()) + }) + } } -func TestFifoPollContextCanceled(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - - q := NewMemoryQueue(ctx) +func TestWorkerManagement(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) + // Poll with context cancellation pollCtx, pollCancel := context.WithCancel(ctx) - errCh := make(chan error, 1) go func() { _, err := q.Poll(pollCtx, 1, filterFnTrue) @@ -1083,331 +419,123 @@ func TestFifoPollContextCanceled(t *testing.T) { }() time.Sleep(50 * time.Millisecond) - pollCancel() select { case err := <-errCh: - assert.Error(t, err) assert.ErrorIs(t, err, context.Canceled) case <-time.After(time.Second): t.Fatal("Poll should return when context is canceled") } - cancel(nil) -} - -func TestFifoDepStatusUpdates(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("update dep status in queue", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task4 := &model.Task{ - ID: "4", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - - waitForProcess() - - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated") - - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated") - }) - - t.Run("multiple dep status updates", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - - got1, _ := q.Poll(ctx, 1, filterFnTrue) - got2, _ := q.Poll(ctx, 2, filterFnTrue) - - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) - - waitForProcess() + // Kick multiple workers for same agent + pollResults := make(chan error, 5) + for i := 0; i < 5; i++ { + go func() { + _, err := q.Poll(ctx, 42, filterFnTrue) + pollResults <- err + }() + } - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) + time.Sleep(50 * time.Millisecond) + q.KickAgentWorkers(42) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) - assert.Equal(t, model.StatusFailure, got3.DepStatus["2"]) - assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed") - }) - - t.Run("update dep status in pending queue", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), + kickedCount := 0 + for i := 0; i < 5; i++ { + select { + case err := <-pollResults: + if errors.Is(err, context.Canceled) { + kickedCount++ + } + case <-time.After(time.Second): + t.Fatal("expected all workers to be kicked") } + } + assert.Equal(t, 5, kickedCount) +} - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") +func TestLabelBasedScoring(t *testing.T) { + ctx, cancel := context.WithCancelCause(t.Context()) + defer cancel(nil) - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + q := NewMemoryQueue(ctx) - waitForProcess() + tasks := []*model.Task{ + {ID: "1", Labels: map[string]string{"org-id": "123", "platform": "linux"}}, + {ID: "2", Labels: map[string]string{"org-id": "456", "platform": "linux"}}, + {ID: "3", Labels: map[string]string{"org-id": "123", "platform": "windows"}}, + } - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], - "task2's DepStatus should be updated in pending queue") + assert.NoError(t, q.PushAtOnce(ctx, tasks)) - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], - "task3's DepStatus should be updated in pending queue") - }) - - t.Run("update dep status in running queue", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } - task4 := &model.Task{ - ID: "4", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), + filter123 := func(task *model.Task) (bool, int) { + if task.Labels["org-id"] == "123" { + return true, 20 } + return true, 1 + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - - waitForProcess() - - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - - waitForProcess() - - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - - waitForProcess() - - got4, err := q.Poll(ctx, 4, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"], - "task4's DepStatus should show task1 succeeded") - - assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) - - waitForProcess() - - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], - "task3's DepStatus should show task1 succeeded") - assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"], - "task3's DepStatus should show task2 succeeded") - assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded") - - assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) - assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess)) - }) - - t.Run("update dep status in waiting queue", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), + filter456 := func(task *model.Task) (bool, int) { + if task.Labels["org-id"] == "456" { + return true, 20 } - task4 := &model.Task{ - ID: "4", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4})) - - waitForProcess() - - info := q.Info(ctx) - assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting") - - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed"))) + return true, 1 + } - waitForProcess() + results := make(chan *model.Task, 2) + go func() { + task, _ := q.Poll(ctx, 1, filter123) + results <- task + }() + go func() { + task, _ := q.Poll(ctx, 2, filter456) + results <- task + }() - for i := 2; i <= 4; i++ { - gotTask, err := q.Poll(ctx, int64(i), filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"], - "task %d's DepStatus should be updated in waitingOnDeps", i) - assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i) - } - }) - - t.Run("update dep status with skipped", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), + receivedTasks := make(map[string]int64) + for i := 0; i < 2; i++ { + select { + case task := <-results: + receivedTasks[task.ID] = task.AgentID + case <-time.After(time.Second): + t.Fatal("Timeout waiting for tasks") } + } - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - - got1, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) - - waitForProcess() - - got2, err := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, err) - assert.False(t, got2.ShouldRun()) - assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped)) - - waitForProcess() - - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"], - "task3 should have task2's skipped status") - assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency") - }) + // Agent 1 should get task with org-id 123 + assert.Contains(t, []string{"1", "3"}, findTaskByAgent(receivedTasks, 1)) + // Agent 2 should get task with org-id 456 + assert.Equal(t, "2", findTaskByAgent(receivedTasks, 2)) } -func TestFifoRemoveOperations(t *testing.T) { - ctx, cancel := context.WithCancelCause(t.Context()) - t.Cleanup(func() { cancel(nil) }) - - t.Run("remove from pending and waiting not found", func(t *testing.T) { - task1 := genDummyTask() +func TestPauseResume(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) + dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) - - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + var wg sync.WaitGroup + wg.Add(1) + go func() { + _, _ = q.Poll(ctx, 1, filterFnTrue) + wg.Done() + }() - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) + q.Pause() + t0 := time.Now() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() + q.Resume() - err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")) - assert.Error(t, err, "should return error for non-existent task") - }) + wg.Wait() + assert.Greater(t, time.Since(t0), 20*time.Millisecond) +} - t.Run("remove from waiting on deps", func(t *testing.T) { - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), +func findTaskByAgent(tasks map[string]int64, agentID int64) string { + for taskID, aid := range tasks { + if aid == agentID { + return taskID } - - q, _ := NewMemoryQueue(ctx).(*fifo) - assert.NotNil(t, q) - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - - waitForProcess() - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting") - - assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task"))) - - waitForProcess() - info = q.Info(ctx) - assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now") - }) + } + return "" } From 8ed5a3cb259618483ece0627b28f2352017c5869 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 19:36:48 +0100 Subject: [PATCH 10/13] more coverage --- server/queue/fifo_test.go | 164 +++++++++++++++++++++++++++++++++----- 1 file changed, 145 insertions(+), 19 deletions(-) diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 5a0ce0a10c1..9530925653f 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -145,19 +145,35 @@ func TestMultipleDependencies(t *testing.T) { assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) waitForProcess() - // Poll and complete both dependencies + // Poll both independent tasks got1, _ := q.Poll(ctx, 1, filterFnTrue) got2, _ := q.Poll(ctx, 2, filterFnTrue) - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) + // Ensure we got both task1 and task2 (order may vary) + gotIDs := map[string]bool{got1.ID: true, got2.ID: true} + assert.True(t, gotIDs["1"] && gotIDs["2"], "Should get both task1 and task2") + + // Complete them with different statuses + if got1.ID == "1" { + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) + } else { + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) + } // task3 should have both statuses propagated waitForProcess() got3, err := q.Poll(ctx, 3, filterFnTrue) assert.NoError(t, err) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) - assert.Equal(t, model.StatusFailure, got3.DepStatus["2"]) + + // Verify both dependency statuses are set correctly + assert.Contains(t, got3.DepStatus, "1") + assert.Contains(t, got3.DepStatus, "2") + assert.True(t, + (got3.DepStatus["1"] == model.StatusSuccess && got3.DepStatus["2"] == model.StatusFailure) || + (got3.DepStatus["1"] == model.StatusFailure && got3.DepStatus["2"] == model.StatusSuccess), + "One dependency should succeed and one should fail") assert.False(t, got3.ShouldRun()) } @@ -328,13 +344,10 @@ func TestErrorAtOnce(t *testing.T) { ctx, cancel, q := setupTestQueue(t) defer cancel(nil) + // Test batch error on running tasks task1 := genDummyTask() task2 := &model.Task{ID: "2"} - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } + task3 := &model.Task{ID: "3"} assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) waitForProcess() @@ -342,13 +355,18 @@ func TestErrorAtOnce(t *testing.T) { got1, _ := q.Poll(ctx, 1, filterFnTrue) got2, _ := q.Poll(ctx, 2, filterFnTrue) - // Batch error on running tasks assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) waitForProcess() info := q.Info(ctx) assert.Len(t, info.Running, 0) + assert.Len(t, info.Pending, 1) // task3 should still be pending + + // Clean up task3 + got3, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) + waitForProcess() - // Error with non-existent tasks + // Test error with non-existent tasks task4 := &model.Task{ID: "4"} assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4})) waitForProcess() @@ -358,23 +376,59 @@ func TestErrorAtOnce(t *testing.T) { assert.Error(t, err) assert.ErrorIs(t, err, ErrNotFound) - // ErrCancel updates dependency status - task5 := genDummyTask() - task5.ID = "5" + // Verify task4 was still removed despite the error + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Running, 0) + + // Test ErrorAtOnce on tasks in waitingOnDeps (to cover removeFromPendingAndWaiting) + task5 := &model.Task{ID: "5"} task6 := &model.Task{ ID: "6", Dependencies: []string{"5"}, DepStatus: make(map[string]model.StatusValue), } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6})) waitForProcess() - got5, _ := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got5.ID}, ErrCancel)) + info = q.Info(ctx) + assert.Equal(t, 1, info.Stats.WaitingOnDeps, "task6 should be waiting on deps") + + // Cancel both tasks - this should remove task6 from waitingOnDeps + assert.NoError(t, q.ErrorAtOnce(ctx, []string{"5", "6"}, fmt.Errorf("canceled"))) + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.WaitingOnDeps, "task6 should be removed from waitingOnDeps") + assert.Len(t, info.Pending, 0, "no tasks should be pending") +} - got6, _ := q.Poll(ctx, 2, filterFnTrue) - assert.Equal(t, model.StatusKilled, got6.DepStatus["5"]) +func TestErrorAtOnceCancellation(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) + + // Test ErrCancel with dependency propagation + task1 := &model.Task{ID: "1"} + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, // Ensures task runs on kill/cancel + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + waitForProcess() + got1, _ := q.Poll(ctx, 1, filterFnTrue) + + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) + + // Wait for cancellation to be processed and dependency to be updated + waitForProcess() + waitForProcess() + + got2, _ := q.Poll(ctx, 2, filterFnTrue) + assert.Equal(t, model.StatusKilled, got2.DepStatus["1"]) } func TestShouldRunLogic(t *testing.T) { @@ -539,3 +593,75 @@ func findTaskByAgent(tasks map[string]int64, agentID int64) string { } return "" } + +func TestDependencyStatusPropagation(t *testing.T) { + ctx, cancel, q := setupTestQueue(t) + defer cancel(nil) + + // Test basic dependency status propagation from completed task to waiting tasks + task1 := genDummyTask() + task2 := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "3", + Dependencies: []string{"1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + waitForProcess() + + // Both task2 and task3 should be waiting on task1 + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps) + + // Complete task1 - this triggers updateDepStatusInQueue for waitingOnDeps tasks + got1, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + + waitForProcess() + + // Poll task2 and task3 - both should have task1's success status + got2, _ := q.Poll(ctx, 2, filterFnTrue) + got3, _ := q.Poll(ctx, 3, filterFnTrue) + + assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"]) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) + + // Now test updateDepStatusInQueue for tasks in pending and running queues + task4 := &model.Task{ID: "4"} + task5 := &model.Task{ + ID: "5", + Dependencies: []string{"4"}, + DepStatus: make(map[string]model.StatusValue), + } + task6 := &model.Task{ + ID: "6", + Dependencies: []string{"4"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4, task5, task6})) + waitForProcess() + + // Poll task4 and complete it while task5 and task6 are waiting + got4, _ := q.Poll(ctx, 4, filterFnTrue) + assert.NoError(t, q.Error(ctx, got4.ID, fmt.Errorf("failed"))) + + waitForProcess() + + // task5 should not run (default behavior on failure) + got5, _ := q.Poll(ctx, 5, filterFnTrue) + assert.Equal(t, model.StatusFailure, got5.DepStatus["4"]) + assert.False(t, got5.ShouldRun()) + + // task6 should run (has failure in RunOn) + got6, _ := q.Poll(ctx, 6, filterFnTrue) + assert.Equal(t, model.StatusFailure, got6.DepStatus["4"]) + assert.True(t, got6.ShouldRun()) +} From b2fe8c00e37634cdf8704460555c007c26fe2c4e Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 19:53:10 +0100 Subject: [PATCH 11/13] nice test --- server/queue/fifo.go | 12 +- server/queue/fifo_test.go | 905 ++++++++++++++++++-------------------- 2 files changed, 443 insertions(+), 474 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index 4cac0b22992..5a5209b8e49 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -88,23 +88,23 @@ func (q *fifo) Poll(c context.Context, agentID int64, filter FilterFn) (*model.T q.Lock() ctx, stop := context.WithCancelCause(c) - _worker := &worker{ + w := &worker{ agentID: agentID, channel: make(chan *model.Task, 1), filter: filter, stop: stop, } - q.workers[_worker] = struct{}{} + q.workers[w] = struct{}{} q.Unlock() for { select { case <-ctx.Done(): q.Lock() - delete(q.workers, _worker) + delete(q.workers, w) q.Unlock() return nil, ctx.Err() - case t := <-_worker.channel: + case t := <-w.channel: return t, nil } } @@ -301,12 +301,10 @@ func (q *fifo) filterWaiting() { } func (q *fifo) assignToWorker() (*list.Element, *worker) { - var next *list.Element var bestWorker *worker var bestScore int - for element := q.pending.Front(); element != nil; element = next { - next = element.Next() + for element := q.pending.Front(); element != nil; element = element.Next() { task := element.Value.(*model.Task) log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies) diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 9530925653f..0ffaed96937 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -50,465 +50,504 @@ func setupTestQueue(t *testing.T) (context.Context, context.CancelCauseFunc, *fi return ctx, cancel, q } -func TestBasicQueueOperations(t *testing.T) { +func TestFifoBasicOperations(t *testing.T) { ctx, cancel, q := setupTestQueue(t) defer cancel(nil) - dummyTask := genDummyTask() + t.Run("push poll done lifecycle", func(t *testing.T) { + dummyTask := genDummyTask() - // Test push -> poll -> done lifecycle - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Pending, 1) + info := q.Info(ctx) + assert.Len(t, info.Pending, 1) - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, dummyTask, got) + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, dummyTask, got) + + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Pending, 0) + assert.Len(t, info.Running, 1) + + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Running, 0) + }) + + t.Run("error handling", func(t *testing.T) { + task1 := &model.Task{ID: "task-error-1"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) + + waitForProcess() + got, _ := q.Poll(ctx, 1, filterFnTrue) + + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0) + + assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))) + }) + + t.Run("error at once", func(t *testing.T) { + task1 := &model.Task{ID: "batch-1"} + task2 := &model.Task{ID: "batch-2"} + task3 := &model.Task{ID: "batch-3"} + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + waitForProcess() + + got1, _ := q.Poll(ctx, 1, filterFnTrue) + got2, _ := q.Poll(ctx, 2, filterFnTrue) + + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) + waitForProcess() + info := q.Info(ctx) + assert.Len(t, info.Running, 0) + assert.Len(t, info.Pending, 1) + + got3, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) + waitForProcess() + + task4 := &model.Task{ID: "batch-4"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4})) + waitForProcess() + got4, _ := q.Poll(ctx, 1, filterFnTrue) + + err := q.ErrorAtOnce(ctx, []string{got4.ID, "fake-1", "fake-2"}, fmt.Errorf("test")) + assert.Error(t, err) + assert.ErrorIs(t, err, ErrNotFound) + + waitForProcess() + info = q.Info(ctx) + assert.Len(t, info.Running, 0) + }) + + t.Run("error at once with waiting deps", func(t *testing.T) { + task5 := &model.Task{ID: "deps-cancel-5"} + task6 := &model.Task{ + ID: "deps-cancel-6", + Dependencies: []string{"deps-cancel-5"}, + DepStatus: make(map[string]model.StatusValue), + } - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Pending, 0) - assert.Len(t, info.Running, 1) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6})) + waitForProcess() - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + info := q.Info(ctx) + assert.Equal(t, 1, info.Stats.WaitingOnDeps) - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Running, 0) -} + assert.NoError(t, q.ErrorAtOnce(ctx, []string{"deps-cancel-5", "deps-cancel-6"}, fmt.Errorf("canceled"))) -func TestTaskDependencies(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.WaitingOnDeps) + assert.Len(t, info.Pending, 0) + }) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } + t.Run("error at once cancellation", func(t *testing.T) { + task1 := &model.Task{ID: "cancel-prop-1"} + task2 := &model.Task{ + ID: "cancel-prop-2", + Dependencies: []string{"cancel-prop-1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - waitForProcess() - - // Verify waiting on deps stat - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps) - - // Poll and fail task1 - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task1, got) - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1"))) - - // task2 should be polled but not run (no failure in RunOn) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task2, got) - assert.False(t, got.ShouldRun()) - assert.Equal(t, model.StatusFailure, got.DepStatus["1"]) - - // task3 should run (has failure in RunOn) - waitForProcess() - got, err = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) - assert.Equal(t, task3, got) - assert.True(t, got.ShouldRun()) - assert.Equal(t, model.StatusFailure, got.DepStatus["1"]) - - waitForProcess() - info = q.Info(ctx) - assert.Equal(t, 0, info.Stats.WaitingOnDeps) -} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + waitForProcess() + got1, _ := q.Poll(ctx, 1, filterFnTrue) -func TestMultipleDependencies(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) - task1 := genDummyTask() - task2 := &model.Task{ID: "2"} - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1", "2"}, - DepStatus: make(map[string]model.StatusValue), - } + waitForProcess() + waitForProcess() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - waitForProcess() + got2, _ := q.Poll(ctx, 2, filterFnTrue) + assert.Equal(t, model.StatusKilled, got2.DepStatus["cancel-prop-1"]) + }) - // Poll both independent tasks - got1, _ := q.Poll(ctx, 1, filterFnTrue) - got2, _ := q.Poll(ctx, 2, filterFnTrue) + t.Run("pause resume", func(t *testing.T) { + dummyTask := &model.Task{ID: "pause-1"} - // Ensure we got both task1 and task2 (order may vary) - gotIDs := map[string]bool{got1.ID: true, got2.ID: true} - assert.True(t, gotIDs["1"] && gotIDs["2"], "Should get both task1 and task2") + var wg sync.WaitGroup + wg.Add(1) + go func() { + _, _ = q.Poll(ctx, 99, filterFnTrue) + wg.Done() + }() - // Complete them with different statuses - if got1.ID == "1" { - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) - } else { - assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) - assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) - } + q.Pause() + t0 := time.Now() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + waitForProcess() + q.Resume() - // task3 should have both statuses propagated - waitForProcess() - got3, err := q.Poll(ctx, 3, filterFnTrue) - assert.NoError(t, err) - - // Verify both dependency statuses are set correctly - assert.Contains(t, got3.DepStatus, "1") - assert.Contains(t, got3.DepStatus, "2") - assert.True(t, - (got3.DepStatus["1"] == model.StatusSuccess && got3.DepStatus["2"] == model.StatusFailure) || - (got3.DepStatus["1"] == model.StatusFailure && got3.DepStatus["2"] == model.StatusSuccess), - "One dependency should succeed and one should fail") - assert.False(t, got3.ShouldRun()) + wg.Wait() + assert.Greater(t, time.Since(t0), 20*time.Millisecond) + }) } -func TestTransitiveDependencies(t *testing.T) { +func TestFifoDependencies(t *testing.T) { ctx, cancel, q := setupTestQueue(t) defer cancel(nil) - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"2"}, - DepStatus: make(map[string]model.StatusValue), - } + t.Run("basic dependency handling", func(t *testing.T) { + task1 := &model.Task{ID: "dep-basic-1"} + task2 := &model.Task{ + ID: "dep-basic-2", + Dependencies: []string{"dep-basic-1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "dep-basic-3", + Dependencies: []string{"dep-basic-1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) - waitForProcess() + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + waitForProcess() - // Fail task1 - got, _ := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1"))) + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps) - // task2 should skip - waitForProcess() - got, _ = q.Poll(ctx, 2, filterFnTrue) - assert.False(t, got.ShouldRun()) - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task1, got) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1"))) - // task3 should also skip (transitive) - waitForProcess() - got, _ = q.Poll(ctx, 3, filterFnTrue) - assert.Equal(t, model.StatusSkipped, got.DepStatus["2"]) - assert.False(t, got.ShouldRun()) -} + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task2, got) + assert.False(t, got.ShouldRun()) + assert.Equal(t, model.StatusFailure, got.DepStatus["dep-basic-1"]) -func TestLeaseExpiration(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + waitForProcess() + got, err = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, task3, got) + assert.True(t, got.ShouldRun()) + assert.Equal(t, model.StatusFailure, got.DepStatus["dep-basic-1"]) + + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.WaitingOnDeps) + }) + + t.Run("multiple dependencies", func(t *testing.T) { + task1 := &model.Task{ID: "multi-dep-1"} + task2 := &model.Task{ID: "multi-dep-2"} + task3 := &model.Task{ + ID: "multi-dep-3", + Dependencies: []string{"multi-dep-1", "multi-dep-2"}, + DepStatus: make(map[string]model.StatusValue), + } - q.extension = 0 // Immediate expiration - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + waitForProcess() - waitForProcess() - got, err := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, err) + got1, _ := q.Poll(ctx, 1, filterFnTrue) + got2, _ := q.Poll(ctx, 2, filterFnTrue) - // Test Wait returns error on expiration - errCh := make(chan error, 1) - go func() { errCh <- q.Wait(ctx, got.ID) }() + gotIDs := map[string]bool{got1.ID: true, got2.ID: true} + assert.True(t, gotIDs["multi-dep-1"] && gotIDs["multi-dep-2"]) - waitForProcess() - select { - case werr := <-errCh: - assert.Error(t, werr) - case <-time.After(2 * time.Second): - t.Fatal("timeout waiting for Wait to return") - } + if got1.ID == "multi-dep-1" { + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) + assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed"))) + } else { + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed"))) + } - // Task should be back in pending - info := q.Info(ctx) - assert.Len(t, info.Pending, 1) -} + waitForProcess() + got3, err := q.Poll(ctx, 3, filterFnTrue) + assert.NoError(t, err) -func TestExtend(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + assert.Contains(t, got3.DepStatus, "multi-dep-1") + assert.Contains(t, got3.DepStatus, "multi-dep-2") + assert.True(t, + (got3.DepStatus["multi-dep-1"] == model.StatusSuccess && got3.DepStatus["multi-dep-2"] == model.StatusFailure) || + (got3.DepStatus["multi-dep-1"] == model.StatusFailure && got3.DepStatus["multi-dep-2"] == model.StatusSuccess)) + assert.False(t, got3.ShouldRun()) + }) + + t.Run("transitive dependencies", func(t *testing.T) { + task1 := &model.Task{ID: "trans-1"} + task2 := &model.Task{ + ID: "trans-2", + Dependencies: []string{"trans-1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "trans-3", + Dependencies: []string{"trans-2"}, + DepStatus: make(map[string]model.StatusValue), + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1})) + waitForProcess() + + got, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1"))) + + waitForProcess() + got, _ = q.Poll(ctx, 2, filterFnTrue) + assert.False(t, got.ShouldRun()) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) + + waitForProcess() + got, _ = q.Poll(ctx, 3, filterFnTrue) + assert.Equal(t, model.StatusSkipped, got.DepStatus["trans-2"]) + assert.False(t, got.ShouldRun()) + }) + + t.Run("dependency status propagation", func(t *testing.T) { + task1 := &model.Task{ID: "prop-1"} + task2 := &model.Task{ + ID: "prop-2", + Dependencies: []string{"prop-1"}, + DepStatus: make(map[string]model.StatusValue), + } + task3 := &model.Task{ + ID: "prop-3", + Dependencies: []string{"prop-1"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } - q.extension = 50 * time.Millisecond - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) + waitForProcess() - waitForProcess() - got, _ := q.Poll(ctx, 5, filterFnTrue) + info := q.Info(ctx) + assert.Equal(t, 2, info.Stats.WaitingOnDeps) - // Correct agent can extend - assert.NoError(t, q.Extend(ctx, 5, got.ID)) + got1, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - // Wrong agent cannot extend - assert.ErrorIs(t, q.Extend(ctx, 999, got.ID), ErrAgentMissMatch) - assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch) + waitForProcess() - // Non-existent task - assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound) + got2, _ := q.Poll(ctx, 2, filterFnTrue) + got3, _ := q.Poll(ctx, 3, filterFnTrue) - // Extend prevents expiration - for i := 0; i < 3; i++ { - time.Sleep(30 * time.Millisecond) - assert.NoError(t, q.Extend(ctx, 5, got.ID)) - } + assert.Equal(t, model.StatusSuccess, got2.DepStatus["prop-1"]) + assert.Equal(t, model.StatusSuccess, got3.DepStatus["prop-1"]) + + task4 := &model.Task{ID: "prop-4"} + task5 := &model.Task{ + ID: "prop-5", + Dependencies: []string{"prop-4"}, + DepStatus: make(map[string]model.StatusValue), + } + task6 := &model.Task{ + ID: "prop-6", + Dependencies: []string{"prop-4"}, + DepStatus: make(map[string]model.StatusValue), + RunOn: []string{"success", "failure"}, + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4, task5, task6})) + waitForProcess() + + got4, _ := q.Poll(ctx, 4, filterFnTrue) + assert.NoError(t, q.Error(ctx, got4.ID, fmt.Errorf("failed"))) + + waitForProcess() + + got5, _ := q.Poll(ctx, 5, filterFnTrue) + assert.Equal(t, model.StatusFailure, got5.DepStatus["prop-4"]) + assert.False(t, got5.ShouldRun()) - info := q.Info(ctx) - assert.Len(t, info.Running, 1) - assert.Len(t, info.Pending, 0) + got6, _ := q.Poll(ctx, 6, filterFnTrue) + assert.Equal(t, model.StatusFailure, got6.DepStatus["prop-4"]) + assert.True(t, got6.ShouldRun()) + }) } -func TestWait(t *testing.T) { +func TestFifoLeaseManagement(t *testing.T) { ctx, cancel, q := setupTestQueue(t) defer cancel(nil) - dummyTask := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) + t.Run("lease expiration", func(t *testing.T) { + q.extension = 0 + dummyTask := &model.Task{ID: "lease-exp-1"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - got, _ := q.Poll(ctx, 1, filterFnTrue) + waitForProcess() + got, err := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, err) - // Wait completes on Done - var wg sync.WaitGroup - wg.Add(1) - go func() { - assert.NoError(t, q.Wait(ctx, got.ID)) - wg.Done() - }() + errCh := make(chan error, 1) + go func() { errCh <- q.Wait(ctx, got.ID) }() - time.Sleep(time.Millisecond) - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) - wg.Wait() + waitForProcess() + select { + case werr := <-errCh: + assert.Error(t, werr) + case <-time.After(2 * time.Second): + t.Fatal("timeout waiting for Wait to return") + } - // Wait on non-existent task - assert.NoError(t, q.Wait(ctx, "non-existent")) + info := q.Info(ctx) + assert.Len(t, info.Pending, 1) - // Wait with context cancellation - dummyTask2 := &model.Task{ID: "2"} - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask2})) - waitForProcess() - got2, _ := q.Poll(ctx, 1, filterFnTrue) + // Clean up - poll and complete the task + got, _ = q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + waitForProcess() - waitCtx, waitCancel := context.WithCancel(ctx) - errCh := make(chan error, 1) - go func() { errCh <- q.Wait(waitCtx, got2.ID) }() + // Verify cleanup + info = q.Info(ctx) + assert.Len(t, info.Pending, 0) + assert.Len(t, info.Running, 0) + }) - time.Sleep(50 * time.Millisecond) - waitCancel() + t.Run("extend lease", func(t *testing.T) { + q.extension = 50 * time.Millisecond + dummyTask := &model.Task{ID: "extend-1"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - select { - case err := <-errCh: - assert.NoError(t, err) - case <-time.After(time.Second): - t.Fatal("Wait should return when context is canceled") - } -} + waitForProcess() + got, _ := q.Poll(ctx, 5, filterFnTrue) -func TestError(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + assert.NoError(t, q.Extend(ctx, 5, got.ID)) + assert.ErrorIs(t, q.Extend(ctx, 999, got.ID), ErrAgentMissMatch) + assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch) + assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound) - task1 := genDummyTask() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) + for i := 0; i < 3; i++ { + time.Sleep(30 * time.Millisecond) + assert.NoError(t, q.Extend(ctx, 5, got.ID)) + } - waitForProcess() - got, _ := q.Poll(ctx, 1, filterFnTrue) + info := q.Info(ctx) + assert.Len(t, info.Running, 1) + assert.Len(t, info.Pending, 0) - // Error on running task - assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error"))) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0) + // Clean up - complete the extended task + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + waitForProcess() - // Error on non-existent task - assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))) -} + // Verify cleanup + info = q.Info(ctx) + assert.Len(t, info.Pending, 0) + assert.Len(t, info.Running, 0) + }) -func TestErrorAtOnce(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + t.Run("wait operations", func(t *testing.T) { + // Verify queue is clean before starting + info := q.Info(ctx) + assert.Len(t, info.Pending, 0, "queue should be empty at start of wait operations") + assert.Len(t, info.Running, 0, "queue should be empty at start of wait operations") - // Test batch error on running tasks - task1 := genDummyTask() - task2 := &model.Task{ID: "2"} - task3 := &model.Task{ID: "3"} - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - waitForProcess() - - got1, _ := q.Poll(ctx, 1, filterFnTrue) - got2, _ := q.Poll(ctx, 2, filterFnTrue) - - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error"))) - waitForProcess() - info := q.Info(ctx) - assert.Len(t, info.Running, 0) - assert.Len(t, info.Pending, 1) // task3 should still be pending - - // Clean up task3 - got3, _ := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) - waitForProcess() - - // Test error with non-existent tasks - task4 := &model.Task{ID: "4"} - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4})) - waitForProcess() - got4, _ := q.Poll(ctx, 1, filterFnTrue) - - err := q.ErrorAtOnce(ctx, []string{got4.ID, "fake-1", "fake-2"}, fmt.Errorf("test")) - assert.Error(t, err) - assert.ErrorIs(t, err, ErrNotFound) - - // Verify task4 was still removed despite the error - waitForProcess() - info = q.Info(ctx) - assert.Len(t, info.Running, 0) - - // Test ErrorAtOnce on tasks in waitingOnDeps (to cover removeFromPendingAndWaiting) - task5 := &model.Task{ID: "5"} - task6 := &model.Task{ - ID: "6", - Dependencies: []string{"5"}, - DepStatus: make(map[string]model.StatusValue), - } + dummyTask := &model.Task{ID: "wait-1"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6})) - waitForProcess() + waitForProcess() + got, _ := q.Poll(ctx, 1, filterFnTrue) - info = q.Info(ctx) - assert.Equal(t, 1, info.Stats.WaitingOnDeps, "task6 should be waiting on deps") + var wg sync.WaitGroup + wg.Add(1) + go func() { + assert.NoError(t, q.Wait(ctx, got.ID)) + wg.Done() + }() - // Cancel both tasks - this should remove task6 from waitingOnDeps - assert.NoError(t, q.ErrorAtOnce(ctx, []string{"5", "6"}, fmt.Errorf("canceled"))) + time.Sleep(time.Millisecond) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + wg.Wait() - waitForProcess() - info = q.Info(ctx) - assert.Equal(t, 0, info.Stats.WaitingOnDeps, "task6 should be removed from waitingOnDeps") - assert.Len(t, info.Pending, 0, "no tasks should be pending") -} + assert.NoError(t, q.Wait(ctx, "non-existent")) -func TestErrorAtOnceCancellation(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) + dummyTask2 := &model.Task{ID: "wait-2"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask2})) + waitForProcess() + got2, _ := q.Poll(ctx, 1, filterFnTrue) - // Test ErrCancel with dependency propagation - task1 := &model.Task{ID: "1"} - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, // Ensures task runs on kill/cancel - } + waitCtx, waitCancel := context.WithCancel(ctx) + errCh := make(chan error, 1) + go func() { errCh <- q.Wait(waitCtx, got2.ID) }() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) - waitForProcess() - got1, _ := q.Poll(ctx, 1, filterFnTrue) + time.Sleep(50 * time.Millisecond) + waitCancel() - assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel)) - - // Wait for cancellation to be processed and dependency to be updated - waitForProcess() - waitForProcess() - - got2, _ := q.Poll(ctx, 2, filterFnTrue) - assert.Equal(t, model.StatusKilled, got2.DepStatus["1"]) -} + select { + case err := <-errCh: + assert.NoError(t, err) + case <-time.After(time.Second): + t.Fatal("Wait should return when context is canceled") + } -func TestShouldRunLogic(t *testing.T) { - tests := []struct { - name string - depStatus model.StatusValue - runOn []string - expected bool - }{ - {"Success without RunOn", model.StatusSuccess, nil, true}, - {"Failure without RunOn", model.StatusFailure, nil, false}, - {"Success with failure RunOn", model.StatusSuccess, []string{"failure"}, false}, - {"Failure with failure RunOn", model.StatusFailure, []string{"failure"}, true}, - {"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true}, - {"Skipped without RunOn", model.StatusSkipped, nil, false}, - {"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true}, - } + // Clean up - complete the second wait task + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + waitForProcess() - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - task := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: map[string]model.StatusValue{"1": tt.depStatus}, - RunOn: tt.runOn, - } - assert.Equal(t, tt.expected, task.ShouldRun()) - }) - } + // Verify cleanup + info = q.Info(ctx) + assert.Len(t, info.Pending, 0) + assert.Len(t, info.Running, 0) + }) } -func TestWorkerManagement(t *testing.T) { +func TestFifoWorkerManagement(t *testing.T) { ctx, cancel, q := setupTestQueue(t) defer cancel(nil) - // Poll with context cancellation - pollCtx, pollCancel := context.WithCancel(ctx) - errCh := make(chan error, 1) - go func() { - _, err := q.Poll(pollCtx, 1, filterFnTrue) - errCh <- err - }() - - time.Sleep(50 * time.Millisecond) - pollCancel() - - select { - case err := <-errCh: - assert.ErrorIs(t, err, context.Canceled) - case <-time.After(time.Second): - t.Fatal("Poll should return when context is canceled") - } - - // Kick multiple workers for same agent - pollResults := make(chan error, 5) - for i := 0; i < 5; i++ { + t.Run("poll with context cancellation", func(t *testing.T) { + pollCtx, pollCancel := context.WithCancel(ctx) + errCh := make(chan error, 1) go func() { - _, err := q.Poll(ctx, 42, filterFnTrue) - pollResults <- err + _, err := q.Poll(pollCtx, 1, filterFnTrue) + errCh <- err }() - } - time.Sleep(50 * time.Millisecond) - q.KickAgentWorkers(42) + time.Sleep(50 * time.Millisecond) + pollCancel() - kickedCount := 0 - for i := 0; i < 5; i++ { select { - case err := <-pollResults: - if errors.Is(err, context.Canceled) { - kickedCount++ - } + case err := <-errCh: + assert.ErrorIs(t, err, context.Canceled) case <-time.After(time.Second): - t.Fatal("expected all workers to be kicked") + t.Fatal("Poll should return when context is canceled") } - } - assert.Equal(t, 5, kickedCount) + }) + + t.Run("kick agent workers", func(t *testing.T) { + pollResults := make(chan error, 5) + for i := 0; i < 5; i++ { + go func() { + _, err := q.Poll(ctx, 42, filterFnTrue) + pollResults <- err + }() + } + + time.Sleep(50 * time.Millisecond) + q.KickAgentWorkers(42) + + kickedCount := 0 + for i := 0; i < 5; i++ { + select { + case err := <-pollResults: + if errors.Is(err, context.Canceled) { + kickedCount++ + } + case <-time.After(time.Second): + t.Fatal("expected all workers to be kicked") + } + } + assert.Equal(t, 5, kickedCount) + }) } -func TestLabelBasedScoring(t *testing.T) { +func TestFifoLabelBasedScoring(t *testing.T) { ctx, cancel := context.WithCancelCause(t.Context()) defer cancel(nil) @@ -556,33 +595,37 @@ func TestLabelBasedScoring(t *testing.T) { } } - // Agent 1 should get task with org-id 123 assert.Contains(t, []string{"1", "3"}, findTaskByAgent(receivedTasks, 1)) - // Agent 2 should get task with org-id 456 assert.Equal(t, "2", findTaskByAgent(receivedTasks, 2)) } -func TestPauseResume(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) - - dummyTask := genDummyTask() - - var wg sync.WaitGroup - wg.Add(1) - go func() { - _, _ = q.Poll(ctx, 1, filterFnTrue) - wg.Done() - }() - - q.Pause() - t0 := time.Now() - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) - waitForProcess() - q.Resume() +func TestShouldRunLogic(t *testing.T) { + tests := []struct { + name string + depStatus model.StatusValue + runOn []string + expected bool + }{ + {"Success without RunOn", model.StatusSuccess, nil, true}, + {"Failure without RunOn", model.StatusFailure, nil, false}, + {"Success with failure RunOn", model.StatusSuccess, []string{"failure"}, false}, + {"Failure with failure RunOn", model.StatusFailure, []string{"failure"}, true}, + {"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true}, + {"Skipped without RunOn", model.StatusSkipped, nil, false}, + {"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true}, + } - wg.Wait() - assert.Greater(t, time.Since(t0), 20*time.Millisecond) + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + task := &model.Task{ + ID: "2", + Dependencies: []string{"1"}, + DepStatus: map[string]model.StatusValue{"1": tt.depStatus}, + RunOn: tt.runOn, + } + assert.Equal(t, tt.expected, task.ShouldRun()) + }) + } } func findTaskByAgent(tasks map[string]int64, agentID int64) string { @@ -593,75 +636,3 @@ func findTaskByAgent(tasks map[string]int64, agentID int64) string { } return "" } - -func TestDependencyStatusPropagation(t *testing.T) { - ctx, cancel, q := setupTestQueue(t) - defer cancel(nil) - - // Test basic dependency status propagation from completed task to waiting tasks - task1 := genDummyTask() - task2 := &model.Task{ - ID: "2", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - } - task3 := &model.Task{ - ID: "3", - Dependencies: []string{"1"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3})) - waitForProcess() - - // Both task2 and task3 should be waiting on task1 - info := q.Info(ctx) - assert.Equal(t, 2, info.Stats.WaitingOnDeps) - - // Complete task1 - this triggers updateDepStatusInQueue for waitingOnDeps tasks - got1, _ := q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess)) - - waitForProcess() - - // Poll task2 and task3 - both should have task1's success status - got2, _ := q.Poll(ctx, 2, filterFnTrue) - got3, _ := q.Poll(ctx, 3, filterFnTrue) - - assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"]) - assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"]) - - // Now test updateDepStatusInQueue for tasks in pending and running queues - task4 := &model.Task{ID: "4"} - task5 := &model.Task{ - ID: "5", - Dependencies: []string{"4"}, - DepStatus: make(map[string]model.StatusValue), - } - task6 := &model.Task{ - ID: "6", - Dependencies: []string{"4"}, - DepStatus: make(map[string]model.StatusValue), - RunOn: []string{"success", "failure"}, - } - - assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4, task5, task6})) - waitForProcess() - - // Poll task4 and complete it while task5 and task6 are waiting - got4, _ := q.Poll(ctx, 4, filterFnTrue) - assert.NoError(t, q.Error(ctx, got4.ID, fmt.Errorf("failed"))) - - waitForProcess() - - // task5 should not run (default behavior on failure) - got5, _ := q.Poll(ctx, 5, filterFnTrue) - assert.Equal(t, model.StatusFailure, got5.DepStatus["4"]) - assert.False(t, got5.ShouldRun()) - - // task6 should run (has failure in RunOn) - got6, _ := q.Poll(ctx, 6, filterFnTrue) - assert.Equal(t, model.StatusFailure, got6.DepStatus["4"]) - assert.True(t, got6.ShouldRun()) -} From 08f0c43be32806900b995cd709d4977a0ab30d71 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 19:55:06 +0100 Subject: [PATCH 12/13] add edgecases --- server/queue/fifo_test.go | 359 +++++++++++++++++++++++++++++++++++++- 1 file changed, 355 insertions(+), 4 deletions(-) diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 0ffaed96937..56b360024a1 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -72,11 +72,24 @@ func TestFifoBasicOperations(t *testing.T) { assert.Len(t, info.Pending, 0) assert.Len(t, info.Running, 1) + // Edge case: verify task can't be polled again while running + pollCtx, pollCancel := context.WithTimeout(ctx, 100*time.Millisecond) + _, err = q.Poll(pollCtx, 2, filterFnTrue) + pollCancel() + assert.Error(t, err) // Should timeout/cancel, not return the same task + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) waitForProcess() info = q.Info(ctx) assert.Len(t, info.Running, 0) + + // Edge case: Done on already completed task should handle gracefully + err = q.Done(ctx, got.ID, model.StatusSuccess) + // Document current behavior - should either error or be idempotent + if err != nil { + assert.Error(t, err) + } }) t.Run("error handling", func(t *testing.T) { @@ -92,6 +105,13 @@ func TestFifoBasicOperations(t *testing.T) { assert.Len(t, info.Running, 0) assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))) + + // Edge case: Error on task that's already errored + err := q.Error(ctx, got.ID, fmt.Errorf("double error")) + // Should either error or be idempotent + if err != nil { + assert.Error(t, err) + } }) t.Run("error at once", func(t *testing.T) { @@ -127,6 +147,19 @@ func TestFifoBasicOperations(t *testing.T) { waitForProcess() info = q.Info(ctx) assert.Len(t, info.Running, 0) + + // Edge case: ErrorAtOnce with empty slice + err = q.ErrorAtOnce(ctx, []string{}, fmt.Errorf("no tasks")) + // Should handle gracefully, potentially no-op + + // Edge case: ErrorAtOnce with nil error + task5 := &model.Task{ID: "batch-5"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5})) + waitForProcess() + got5, _ := q.Poll(ctx, 3, filterFnTrue) + err = q.ErrorAtOnce(ctx, []string{got5.ID}, nil) + // Should handle nil error gracefully + waitForProcess() }) t.Run("error at once with waiting deps", func(t *testing.T) { @@ -149,6 +182,10 @@ func TestFifoBasicOperations(t *testing.T) { info = q.Info(ctx) assert.Equal(t, 0, info.Stats.WaitingOnDeps) assert.Len(t, info.Pending, 0) + + // Edge case: verify both tasks are actually gone, not stuck somewhere + assert.Len(t, info.Running, 0) + assert.Len(t, info.WaitingOnDeps, 0) }) t.Run("error at once cancellation", func(t *testing.T) { @@ -171,6 +208,11 @@ func TestFifoBasicOperations(t *testing.T) { got2, _ := q.Poll(ctx, 2, filterFnTrue) assert.Equal(t, model.StatusKilled, got2.DepStatus["cancel-prop-1"]) + + // Edge case: verify ErrCancel results in StatusKilled not StatusFailure + assert.NotEqual(t, model.StatusFailure, got2.DepStatus["cancel-prop-1"]) + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + waitForProcess() }) t.Run("pause resume", func(t *testing.T) { @@ -187,10 +229,34 @@ func TestFifoBasicOperations(t *testing.T) { t0 := time.Now() assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask})) waitForProcess() + + // Edge case: verify queue is actually paused + info := q.Info(ctx) + assert.True(t, info.Paused) + assert.Len(t, info.Pending, 1) + assert.Len(t, info.Running, 0) + q.Resume() wg.Wait() assert.Greater(t, time.Since(t0), 20*time.Millisecond) + + // Edge case: verify queue is unpaused + info = q.Info(ctx) + assert.False(t, info.Paused) + + // Edge case: multiple pause/resume cycles + task2 := &model.Task{ID: "pause-2"} + q.Pause() + q.Pause() // Double pause + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2})) + waitForProcess() + q.Resume() + q.Resume() // Double resume + waitForProcess() + got, _ := q.Poll(ctx, 99, filterFnTrue) + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + waitForProcess() }) } @@ -240,6 +306,10 @@ func TestFifoDependencies(t *testing.T) { waitForProcess() info = q.Info(ctx) assert.Equal(t, 0, info.Stats.WaitingOnDeps) + + // Edge case: verify DepStatus is correctly set before polling + assert.NotEmpty(t, task2.DepStatus) + assert.NotEmpty(t, task3.DepStatus) }) t.Run("multiple dependencies", func(t *testing.T) { @@ -278,6 +348,11 @@ func TestFifoDependencies(t *testing.T) { (got3.DepStatus["multi-dep-1"] == model.StatusSuccess && got3.DepStatus["multi-dep-2"] == model.StatusFailure) || (got3.DepStatus["multi-dep-1"] == model.StatusFailure && got3.DepStatus["multi-dep-2"] == model.StatusSuccess)) assert.False(t, got3.ShouldRun()) + + // Edge case: verify both deps are tracked + assert.Len(t, got3.DepStatus, 2) + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSkipped)) + waitForProcess() }) t.Run("transitive dependencies", func(t *testing.T) { @@ -308,6 +383,12 @@ func TestFifoDependencies(t *testing.T) { got, _ = q.Poll(ctx, 3, filterFnTrue) assert.Equal(t, model.StatusSkipped, got.DepStatus["trans-2"]) assert.False(t, got.ShouldRun()) + + // Edge case: verify transitive failure propagates correctly + // task3 should see trans-2 as skipped, not trans-1's status + assert.NotContains(t, got.DepStatus, "trans-1") + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped)) + waitForProcess() }) t.Run("dependency status propagation", func(t *testing.T) { @@ -341,6 +422,12 @@ func TestFifoDependencies(t *testing.T) { assert.Equal(t, model.StatusSuccess, got2.DepStatus["prop-1"]) assert.Equal(t, model.StatusSuccess, got3.DepStatus["prop-1"]) + // Edge case: verify both tasks can be polled concurrently + assert.NotEqual(t, got2.ID, got3.ID) + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) + waitForProcess() + task4 := &model.Task{ID: "prop-4"} task5 := &model.Task{ ID: "prop-5", @@ -369,6 +456,119 @@ func TestFifoDependencies(t *testing.T) { got6, _ := q.Poll(ctx, 6, filterFnTrue) assert.Equal(t, model.StatusFailure, got6.DepStatus["prop-4"]) assert.True(t, got6.ShouldRun()) + + // Edge case: complete dependent tasks + assert.NoError(t, q.Done(ctx, got5.ID, model.StatusSkipped)) + assert.NoError(t, q.Done(ctx, got6.ID, model.StatusSuccess)) + waitForProcess() + }) + + // Edge case: circular dependency detection (should be handled or cause issue) + t.Run("circular dependencies", func(t *testing.T) { + task1 := &model.Task{ + ID: "circ-1", + Dependencies: []string{"circ-2"}, + DepStatus: make(map[string]model.StatusValue), + } + task2 := &model.Task{ + ID: "circ-2", + Dependencies: []string{"circ-1"}, + DepStatus: make(map[string]model.StatusValue), + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2})) + waitForProcess() + + info := q.Info(ctx) + // Both should be waiting on deps - this is a deadlock scenario + assert.Equal(t, 2, info.Stats.WaitingOnDeps) + assert.Len(t, info.Pending, 0) + + // Verify they never become available for polling + pollCtx, pollCancel := context.WithTimeout(ctx, 200*time.Millisecond) + _, err := q.Poll(pollCtx, 99, filterFnTrue) + pollCancel() + assert.Error(t, err) // Should timeout + + // Clean up the deadlocked tasks + assert.NoError(t, q.ErrorAtOnce(ctx, []string{"circ-1", "circ-2"}, fmt.Errorf("circular dep"))) + waitForProcess() + }) + + // Edge case: dependency on non-existent task + // NOTE: This reveals a potential issue - the queue doesn't validate dependencies exist. + // If a dependency was never added to the queue, the task will run immediately since + // depsInQueue() only checks currently pending/running tasks, not if deps will arrive. + t.Run("non-existent dependency", func(t *testing.T) { + task1 := &model.Task{ + ID: "orphan-1", + Dependencies: []string{"does-not-exist"}, + DepStatus: make(map[string]model.StatusValue), + } + + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1})) + waitForProcess() + + info := q.Info(ctx) + // Current implementation: task doesn't wait if dependency not in queue + // This means tasks with typos in dependency names will run immediately! + assert.Equal(t, 0, info.Stats.WaitingOnDeps) + assert.Len(t, info.Pending, 1) + + // Task will be available for polling even though dependency doesn't exist + got, err := q.Poll(ctx, 99, filterFnTrue) + assert.NoError(t, err) + assert.Equal(t, "orphan-1", got.ID) + + // DepStatus will be empty since dependency never completed + assert.Empty(t, got.DepStatus) + + // Clean up + assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + waitForProcess() + }) + + // Edge case: dependency added AFTER dependent task (race condition) + t.Run("dependency added after dependent", func(t *testing.T) { + // Push dependent task first + dependent := &model.Task{ + ID: "late-dep-child", + Dependencies: []string{"late-dep-parent"}, + DepStatus: make(map[string]model.StatusValue), + } + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dependent})) + waitForProcess() + + // At this point, dependent doesn't see parent in queue, so it won't wait + info := q.Info(ctx) + // Dependent should NOT be waiting since parent doesn't exist yet + initialWaiting := info.Stats.WaitingOnDeps + + // Now add the parent task + parent := &model.Task{ID: "late-dep-parent"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{parent})) + waitForProcess() + + // After filterWaiting runs, dependent SHOULD now see parent and wait + info = q.Info(ctx) + // The implementation calls filterWaiting() which rechecks dependencies + // So dependent should now be waiting + assert.Greater(t, info.Stats.WaitingOnDeps, initialWaiting, + "dependent should start waiting once parent is added") + + // Complete parent first + gotParent, _ := q.Poll(ctx, 1, filterFnTrue) + assert.Equal(t, "late-dep-parent", gotParent.ID, "parent should be polled first") + assert.NoError(t, q.Done(ctx, gotParent.ID, model.StatusSuccess)) + waitForProcess() + + // Now child should be unblocked with parent's status + gotChild, _ := q.Poll(ctx, 2, filterFnTrue) + assert.Equal(t, "late-dep-child", gotChild.ID) + assert.Equal(t, model.StatusSuccess, gotChild.DepStatus["late-dep-parent"]) + + assert.NoError(t, q.Done(ctx, gotChild.ID, model.StatusSuccess)) + waitForProcess() }) } @@ -392,6 +592,8 @@ func TestFifoLeaseManagement(t *testing.T) { select { case werr := <-errCh: assert.Error(t, werr) + // Edge case: verify error is ErrTaskExpired + assert.ErrorIs(t, werr, ErrTaskExpired) case <-time.After(2 * time.Second): t.Fatal("timeout waiting for Wait to return") } @@ -399,9 +601,11 @@ func TestFifoLeaseManagement(t *testing.T) { info := q.Info(ctx) assert.Len(t, info.Pending, 1) - // Clean up - poll and complete the task - got, _ = q.Poll(ctx, 1, filterFnTrue) - assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) + // Edge case: verify task was resubmitted to front of queue + got2, _ := q.Poll(ctx, 1, filterFnTrue) + assert.Equal(t, got.ID, got2.ID) // Same task resubmitted + + assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) waitForProcess() // Verify cleanup @@ -423,6 +627,7 @@ func TestFifoLeaseManagement(t *testing.T) { assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch) assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound) + // Edge case: extend multiple times rapidly for i := 0; i < 3; i++ { time.Sleep(30 * time.Millisecond) assert.NoError(t, q.Extend(ctx, 5, got.ID)) @@ -432,9 +637,10 @@ func TestFifoLeaseManagement(t *testing.T) { assert.Len(t, info.Running, 1) assert.Len(t, info.Pending, 0) - // Clean up - complete the extended task + // Edge case: extend after Done should error assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) waitForProcess() + assert.ErrorIs(t, q.Extend(ctx, 5, got.ID), ErrNotFound) // Verify cleanup info = q.Info(ctx) @@ -465,6 +671,7 @@ func TestFifoLeaseManagement(t *testing.T) { assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess)) wg.Wait() + // Edge case: Wait on non-existent task should return immediately assert.NoError(t, q.Wait(ctx, "non-existent")) dummyTask2 := &model.Task{ID: "wait-2"} @@ -490,6 +697,25 @@ func TestFifoLeaseManagement(t *testing.T) { assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess)) waitForProcess() + // Edge case: multiple concurrent waits on same task + dummyTask3 := &model.Task{ID: "wait-3"} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask3})) + waitForProcess() + got3, _ := q.Poll(ctx, 1, filterFnTrue) + + var wg2 sync.WaitGroup + wg2.Add(3) + for i := 0; i < 3; i++ { + go func() { + assert.NoError(t, q.Wait(ctx, got3.ID)) + wg2.Done() + }() + } + + time.Sleep(10 * time.Millisecond) + assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess)) + wg2.Wait() + // Verify cleanup info = q.Info(ctx) assert.Len(t, info.Pending, 0) @@ -518,6 +744,10 @@ func TestFifoWorkerManagement(t *testing.T) { case <-time.After(time.Second): t.Fatal("Poll should return when context is canceled") } + + // Edge case: verify worker is cleaned up + info := q.Info(ctx) + assert.Equal(t, 0, info.Stats.Workers) }) t.Run("kick agent workers", func(t *testing.T) { @@ -530,6 +760,11 @@ func TestFifoWorkerManagement(t *testing.T) { } time.Sleep(50 * time.Millisecond) + + // Edge case: verify workers are registered before kicking + info := q.Info(ctx) + assert.Equal(t, 5, info.Stats.Workers) + q.KickAgentWorkers(42) kickedCount := 0 @@ -544,6 +779,77 @@ func TestFifoWorkerManagement(t *testing.T) { } } assert.Equal(t, 5, kickedCount) + + // Edge case: verify workers are removed after kicking + waitForProcess() + info = q.Info(ctx) + assert.Equal(t, 0, info.Stats.Workers) + + // Edge case: kick non-existent agent should be no-op + q.KickAgentWorkers(999) + }) + + // Edge case: mixed agent workers + t.Run("kick specific agent among multiple", func(t *testing.T) { + pollResults := make(chan struct { + agentID int64 + err error + }, 10) + + // Start workers for agent 1 + for i := 0; i < 3; i++ { + go func() { + _, err := q.Poll(ctx, 1, filterFnTrue) + pollResults <- struct { + agentID int64 + err error + }{1, err} + }() + } + + // Start workers for agent 2 + for i := 0; i < 3; i++ { + go func() { + _, err := q.Poll(ctx, 2, filterFnTrue) + pollResults <- struct { + agentID int64 + err error + }{2, err} + }() + } + + time.Sleep(50 * time.Millisecond) + info := q.Info(ctx) + assert.Equal(t, 6, info.Stats.Workers) + + // Kick only agent 1 + q.KickAgentWorkers(1) + + kickedAgent1 := 0 + kickedAgent2 := 0 + for i := 0; i < 3; i++ { + select { + case result := <-pollResults: + if errors.Is(result.err, context.Canceled) { + if result.agentID == 1 { + kickedAgent1++ + } else { + kickedAgent2++ + } + } + case <-time.After(time.Second): + t.Fatal("expected kicked workers to return") + } + } + + assert.Equal(t, 3, kickedAgent1) + assert.Equal(t, 0, kickedAgent2) + + // Clean up agent 2 workers + q.KickAgentWorkers(2) + for i := 0; i < 3; i++ { + <-pollResults + } }) } @@ -597,6 +903,27 @@ func TestFifoLabelBasedScoring(t *testing.T) { assert.Contains(t, []string{"1", "3"}, findTaskByAgent(receivedTasks, 1)) assert.Equal(t, "2", findTaskByAgent(receivedTasks, 2)) + + // Edge case: filter that rejects all tasks + filterRejectAll := func(task *model.Task) (bool, int) { + return false, 0 + } + + task4 := &model.Task{ID: "4", Labels: map[string]string{"org-id": "789"}} + assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4})) + waitForProcess() + + pollCtx, pollCancel := context.WithTimeout(ctx, 200*time.Millisecond) + _, err := q.Poll(pollCtx, 99, filterRejectAll) + pollCancel() + assert.Error(t, err) // Should timeout as filter rejects task + + // Clean up remaining tasks + task3, _ := q.Poll(ctx, 1, filterFnTrue) + assert.NoError(t, q.Done(ctx, task3.ID, model.StatusSuccess)) + task4Got, _ := q.Poll(ctx, 99, filterFnTrue) + assert.NoError(t, q.Done(ctx, task4Got.ID, model.StatusSuccess)) + waitForProcess() } func TestShouldRunLogic(t *testing.T) { @@ -613,6 +940,11 @@ func TestShouldRunLogic(t *testing.T) { {"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true}, {"Skipped without RunOn", model.StatusSkipped, nil, false}, {"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true}, + // Edge cases + {"Killed without RunOn", model.StatusKilled, nil, false}, + {"Killed with failure RunOn", model.StatusKilled, []string{"failure"}, true}, + {"Success with success RunOn only", model.StatusSuccess, []string{"success"}, true}, + {"Failure with success RunOn only", model.StatusFailure, []string{"success"}, false}, } for _, tt := range tests { @@ -626,6 +958,25 @@ func TestShouldRunLogic(t *testing.T) { assert.Equal(t, tt.expected, task.ShouldRun()) }) } + + // Edge case: multiple dependencies with mixed statuses + t.Run("multiple deps mixed status", func(t *testing.T) { + task := &model.Task{ + ID: "3", + Dependencies: []string{"1", "2"}, + DepStatus: map[string]model.StatusValue{ + "1": model.StatusSuccess, + "2": model.StatusFailure, + }, + RunOn: nil, + } + // With default RunOn (nil), needs all deps successful + assert.False(t, task.ShouldRun()) + + task.RunOn = []string{"success", "failure"} + // With both RunOn, should run regardless + assert.True(t, task.ShouldRun()) + }) } func findTaskByAgent(tasks map[string]int64, agentID int64) string { From 3c6d9f5014b4131c7222ac6c36bd89a52fb16299 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Mon, 26 Jan 2026 23:52:19 +0100 Subject: [PATCH 13/13] fix lint errors --- server/queue/fifo.go | 16 ++++++++-------- server/queue/fifo_test.go | 10 ++++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/server/queue/fifo.go b/server/queue/fifo.go index 5a5209b8e49..25acb25bfbb 100644 --- a/server/queue/fifo.go +++ b/server/queue/fifo.go @@ -198,11 +198,11 @@ func (q *fifo) Info(_ context.Context) InfoT { stats.Stats.Running = len(q.running) for element := q.pending.Front(); element != nil; element = element.Next() { - task := element.Value.(*model.Task) + task, _ := element.Value.(*model.Task) stats.Pending = append(stats.Pending, task) } for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - task := element.Value.(*model.Task) + task, _ := element.Value.(*model.Task) stats.WaitingOnDeps = append(stats.WaitingOnDeps, task) } for _, entry := range q.running { @@ -286,7 +286,7 @@ func (q *fifo) filterWaiting() { q.waitingOnDeps = list.New() var filtered []*list.Element for element := q.pending.Front(); element != nil; element = element.Next() { - task := element.Value.(*model.Task) + task, _ := element.Value.(*model.Task) if q.depsInQueue(task) { log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID) q.waitingOnDeps.PushBack(task) @@ -305,7 +305,7 @@ func (q *fifo) assignToWorker() (*list.Element, *worker) { var bestScore int for element := q.pending.Front(); element != nil; element = element.Next() { - task := element.Value.(*model.Task) + task, _ := element.Value.(*model.Task) log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies) for worker := range q.workers { @@ -358,7 +358,7 @@ func (q *fifo) depsInQueue(task *model.Task) bool { // expects the q to be currently owned e.g. locked by caller! func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { for element := q.pending.Front(); element != nil; element = element.Next() { - pending := element.Value.(*model.Task) + pending, _ := element.Value.(*model.Task) for _, dep := range pending.Dependencies { if taskID == dep { pending.DepStatus[dep] = status @@ -375,7 +375,7 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) { } for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - waiting := element.Value.(*model.Task) + waiting, _ := element.Value.(*model.Task) for _, dep := range waiting.Dependencies { if taskID == dep { waiting.DepStatus[dep] = status @@ -390,7 +390,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error { // we assume pending first for element := q.pending.Front(); element != nil; element = element.Next() { - task := element.Value.(*model.Task) + task, _ := element.Value.(*model.Task) if task.ID == taskID { log.Debug().Msgf("queue: %s is removed from pending", taskID) _ = q.pending.Remove(element) @@ -400,7 +400,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error { // well looks like it's waiting for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() { - task := element.Value.(*model.Task) + task, _ := element.Value.(*model.Task) if task.ID == taskID { log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID) _ = q.waitingOnDeps.Remove(element) diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go index 56b360024a1..b8f05f62578 100644 --- a/server/queue/fifo_test.go +++ b/server/queue/fifo_test.go @@ -150,6 +150,7 @@ func TestFifoBasicOperations(t *testing.T) { // Edge case: ErrorAtOnce with empty slice err = q.ErrorAtOnce(ctx, []string{}, fmt.Errorf("no tasks")) + assert.NoError(t, err) // Should handle gracefully, potentially no-op // Edge case: ErrorAtOnce with nil error @@ -158,6 +159,7 @@ func TestFifoBasicOperations(t *testing.T) { waitForProcess() got5, _ := q.Poll(ctx, 3, filterFnTrue) err = q.ErrorAtOnce(ctx, []string{got5.ID}, nil) + assert.NoError(t, err) // Should handle nil error gracefully waitForProcess() }) @@ -679,12 +681,12 @@ func TestFifoLeaseManagement(t *testing.T) { waitForProcess() got2, _ := q.Poll(ctx, 1, filterFnTrue) - waitCtx, waitCancel := context.WithCancel(ctx) + waitCtx, waitCancel := context.WithCancelCause(ctx) errCh := make(chan error, 1) go func() { errCh <- q.Wait(waitCtx, got2.ID) }() time.Sleep(50 * time.Millisecond) - waitCancel() + waitCancel(nil) select { case err := <-errCh: @@ -728,7 +730,7 @@ func TestFifoWorkerManagement(t *testing.T) { defer cancel(nil) t.Run("poll with context cancellation", func(t *testing.T) { - pollCtx, pollCancel := context.WithCancel(ctx) + pollCtx, pollCancel := context.WithCancelCause(ctx) errCh := make(chan error, 1) go func() { _, err := q.Poll(pollCtx, 1, filterFnTrue) @@ -736,7 +738,7 @@ func TestFifoWorkerManagement(t *testing.T) { }() time.Sleep(50 * time.Millisecond) - pollCancel() + pollCancel(nil) select { case err := <-errCh: