From b02e4a39acad543abcbaca64188534b34b8ce076 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 16:20:42 +0100
Subject: [PATCH 01/13] server queue: simplify and fix itteration of
 ErrorAtOnce

---
 server/queue/fifo.go             | 87 +++++++++++++++-----------------
 server/queue/mocks/mock_Queue.go | 57 ---------------------
 server/queue/persistent.go       | 13 -----
 server/queue/queue.go            |  6 +--
 4 files changed, 44 insertions(+), 119 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index c4bd11abf9e..722fcecd7e7 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -17,6 +17,7 @@ package queue
 import (
 	"container/list"
 	"context"
+	"errors"
 	"fmt"
 	"slices"
 	"sync"
@@ -124,12 +125,17 @@ func (q *fifo) Error(_ context.Context, id string, err error) error {
 
 // ErrorAtOnce signals multiple done are complete with an error.
 func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error {
+	if errors.Is(err, ErrCancel) {
+		return q.finished(ids, model.StatusKilled, err)
+	}
 	return q.finished(ids, model.StatusFailure, err)
 }
 
 func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) error {
 	q.Lock()
+	defer q.Unlock()
 
+	var errs []error
 	for _, id := range ids {
 		taskEntry, ok := q.running[id]
 		if ok {
@@ -137,32 +143,12 @@ func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) e
 			close(taskEntry.done)
 			delete(q.running, id)
 		} else {
-			q.removeFromPending(id)
+			errs = append(errs, q.removeFromPendingAndWaiting(id))
 		}
 		q.updateDepStatusInQueue(id, exitStatus)
 	}
 
-	q.Unlock()
-	return nil
-}
-
-// EvictAtOnce removes multiple pending tasks from the queue.
-func (q *fifo) EvictAtOnce(_ context.Context, taskIDs []string) error {
-	q.Lock()
-	defer q.Unlock()
-
-	for _, id := range taskIDs {
-		var next *list.Element
-		for element := q.pending.Front(); element != nil; element = next {
-			next = element.Next()
-			task, ok := element.Value.(*model.Task)
-			if ok && task.ID == id {
-				q.pending.Remove(element)
-				return nil
-			}
-		}
-	}
-	return ErrNotFound
+	return errors.Join(errs...)
 }
 
 // Wait waits until the item is done executing.
@@ -296,9 +282,7 @@ func (q *fifo) filterWaiting() {
 	// rebuild waitingDeps
 	q.waitingOnDeps = list.New()
 	var filtered []*list.Element
-	var nextPending *list.Element
-	for element := q.pending.Front(); element != nil; element = nextPending {
-		nextPending = element.Next()
+	for element := q.pending.Front(); element != nil; element = element.Next() {
 		task, _ := element.Value.(*model.Task)
 		if q.depsInQueue(task) {
 			log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID)
@@ -352,9 +336,7 @@ func (q *fifo) resubmitExpiredPipelines() {
 }
 
 func (q *fifo) depsInQueue(task *model.Task) bool {
-	var next *list.Element
-	for element := q.pending.Front(); element != nil; element = next {
-		next = element.Next()
+	for element := q.pending.Front(); element != nil; element = element.Next() {
 		possibleDep, ok := element.Value.(*model.Task)
 		log.Debug().Msgf("queue: pending right now: %v", possibleDep.ID)
 		for _, dep := range task.Dependencies {
@@ -373,13 +355,13 @@ func (q *fifo) depsInQueue(task *model.Task) bool {
 }
 
 func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
-	var next *list.Element
-	for element := q.pending.Front(); element != nil; element = next {
-		next = element.Next()
+	for element := q.pending.Front(); element != nil; element = element.Next() {
 		pending, ok := element.Value.(*model.Task)
-		for _, dep := range pending.Dependencies {
-			if ok && taskID == dep {
-				pending.DepStatus[dep] = status
+		if ok {
+			for _, dep := range pending.Dependencies {
+				if taskID == dep {
+					pending.DepStatus[dep] = status
+				}
 			}
 		}
 	}
@@ -392,27 +374,42 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 		}
 	}
 
-	for element := q.waitingOnDeps.Front(); element != nil; element = next {
-		next = element.Next()
+	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
 		waiting, ok := element.Value.(*model.Task)
-		for _, dep := range waiting.Dependencies {
-			if ok && taskID == dep {
-				waiting.DepStatus[dep] = status
+		if ok {
+			for _, dep := range waiting.Dependencies {
+				if taskID == dep {
+					waiting.DepStatus[dep] = status
+				}
 			}
 		}
 	}
 }
 
-func (q *fifo) removeFromPending(taskID string) {
+// removeFromPendingAndWaiting expects the q to be currently owned e.g. locked by caller
+func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 	log.Debug().Msgf("queue: trying to remove %s", taskID)
-	var next *list.Element
-	for element := q.pending.Front(); element != nil; element = next {
-		next = element.Next()
+
+	// we assume pending first
+	for element := q.pending.Front(); element != nil; element = element.Next() {
 		task, _ := element.Value.(*model.Task)
 		if task.ID == taskID {
 			log.Debug().Msgf("queue: %s is removed from pending", taskID)
-			q.pending.Remove(element)
-			return
+			_ = q.pending.Remove(element)
+			return nil
+		}
+	}
+
+	// well looks like it's waiting
+	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
+		task, _ := element.Value.(*model.Task)
+		if task.ID == taskID {
+			log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID)
+			_ = q.waitingOnDeps.Remove(element)
+			return nil
 		}
 	}
+
+	// my bad could not be found
+	return ErrNotFound
 }
diff --git a/server/queue/mocks/mock_Queue.go b/server/queue/mocks/mock_Queue.go
index 97a215ed97a..0ae4eb09d5c 100644
--- a/server/queue/mocks/mock_Queue.go
+++ b/server/queue/mocks/mock_Queue.go
@@ -228,63 +228,6 @@ func (_c *MockQueue_ErrorAtOnce_Call) RunAndReturn(run func(c context.Context, i
 	return _c
 }
 
-// EvictAtOnce provides a mock function for the type MockQueue
-func (_mock *MockQueue) EvictAtOnce(c context.Context, ids []string) error {
-	ret := _mock.Called(c, ids)
-
-	if len(ret) == 0 {
-		panic("no return value specified for EvictAtOnce")
-	}
-
-	var r0 error
-	if returnFunc, ok := ret.Get(0).(func(context.Context, []string) error); ok {
-		r0 = returnFunc(c, ids)
-	} else {
-		r0 = ret.Error(0)
-	}
-	return r0
-}
-
-// MockQueue_EvictAtOnce_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'EvictAtOnce'
-type MockQueue_EvictAtOnce_Call struct {
-	*mock.Call
-}
-
-// EvictAtOnce is a helper method to define mock.On call
-//   - c context.Context
-//   - ids []string
-func (_e *MockQueue_Expecter) EvictAtOnce(c interface{}, ids interface{}) *MockQueue_EvictAtOnce_Call {
-	return &MockQueue_EvictAtOnce_Call{Call: _e.mock.On("EvictAtOnce", c, ids)}
-}
-
-func (_c *MockQueue_EvictAtOnce_Call) Run(run func(c context.Context, ids []string)) *MockQueue_EvictAtOnce_Call {
-	_c.Call.Run(func(args mock.Arguments) {
-		var arg0 context.Context
-		if args[0] != nil {
-			arg0 = args[0].(context.Context)
-		}
-		var arg1 []string
-		if args[1] != nil {
-			arg1 = args[1].([]string)
-		}
-		run(
-			arg0,
-			arg1,
-		)
-	})
-	return _c
-}
-
-func (_c *MockQueue_EvictAtOnce_Call) Return(err error) *MockQueue_EvictAtOnce_Call {
-	_c.Call.Return(err)
-	return _c
-}
-
-func (_c *MockQueue_EvictAtOnce_Call) RunAndReturn(run func(c context.Context, ids []string) error) *MockQueue_EvictAtOnce_Call {
-	_c.Call.Return(run)
-	return _c
-}
-
 // Extend provides a mock function for the type MockQueue
 func (_mock *MockQueue) Extend(c context.Context, agentID int64, workflowID string) error {
 	ret := _mock.Called(c, agentID, workflowID)
diff --git a/server/queue/persistent.go b/server/queue/persistent.go
index b5a887ed5f7..453a67e54c0 100644
--- a/server/queue/persistent.go
+++ b/server/queue/persistent.go
@@ -72,19 +72,6 @@ func (q *persistentQueue) Poll(c context.Context, agentID int64, f FilterFn) (*m
 	return task, err
 }
 
-// EvictAtOnce removes multiple pending tasks from the queue.
-func (q *persistentQueue) EvictAtOnce(c context.Context, ids []string) error {
-	if err := q.Queue.EvictAtOnce(c, ids); err != nil {
-		return err
-	}
-	for _, id := range ids {
-		if err := q.store.TaskDelete(id); err != nil {
-			return err
-		}
-	}
-	return nil
-}
-
 // Error signals the task is done with an error.
 func (q *persistentQueue) Error(c context.Context, id string, err error) error {
 	if err := q.Queue.Error(c, id, err); err != nil {
diff --git a/server/queue/queue.go b/server/queue/queue.go
index e4058871103..8ea7ad543c3 100644
--- a/server/queue/queue.go
+++ b/server/queue/queue.go
@@ -93,12 +93,10 @@ type Queue interface {
 	// Error signals the task is done with an error.
 	Error(c context.Context, id string, err error) error
 
-	// ErrorAtOnce signals multiple done are complete with an error.
+	// ErrorAtOnce signals multiple tasks are done and complete with an error.
+	// if still pending they will just get removed from the queue.
 	ErrorAtOnce(c context.Context, ids []string, err error) error
 
-	// EvictAtOnce removes multiple pending tasks from the queue.
-	EvictAtOnce(c context.Context, ids []string) error
-
 	// Wait waits until the task is complete.
 	Wait(c context.Context, id string) error
 

From 867a16b66eec10b937a6847119cb5b6ff8337ebe Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 16:21:41 +0100
Subject: [PATCH 02/13] server queue: adjust consumer of ErrorAtOnce

---
 server/pipeline/cancel.go | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/server/pipeline/cancel.go b/server/pipeline/cancel.go
index 78612562d08..d351d5c90ea 100644
--- a/server/pipeline/cancel.go
+++ b/server/pipeline/cancel.go
@@ -39,31 +39,28 @@ func Cancel(ctx context.Context, _forge forge.Forge, store store.Store, repo *mo
 		return &ErrNotFound{Msg: err.Error()}
 	}
 
-	// First cancel/evict steps in the queue in one go
+	// First cancel/evict workflows in the queue in one go
 	var (
-		stepsToCancel []string
-		stepsToEvict  []string
+		workflowsToCancel []string
+		workflowsToEvict  []string
 	)
 	for _, workflow := range workflows {
 		if workflow.State == model.StatusRunning {
-			stepsToCancel = append(stepsToCancel, fmt.Sprint(workflow.ID))
+			workflowsToCancel = append(workflowsToCancel, fmt.Sprint(workflow.ID))
 		}
 		if workflow.State == model.StatusPending {
-			stepsToEvict = append(stepsToEvict, fmt.Sprint(workflow.ID))
+			workflowsToEvict = append(workflowsToEvict, fmt.Sprint(workflow.ID))
 		}
 	}
 
-	if len(stepsToEvict) != 0 {
-		if err := server.Config.Services.Queue.EvictAtOnce(ctx, stepsToEvict); err != nil {
-			log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToEvict)
-		}
-		if err := server.Config.Services.Queue.ErrorAtOnce(ctx, stepsToEvict, queue.ErrCancel); err != nil {
-			log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToEvict)
+	if len(workflowsToEvict) != 0 {
+		if err := server.Config.Services.Queue.ErrorAtOnce(ctx, workflowsToEvict, queue.ErrCancel); err != nil {
+			log.Error().Err(err).Msgf("queue: evict_at_once: %v", workflowsToEvict)
 		}
 	}
-	if len(stepsToCancel) != 0 {
-		if err := server.Config.Services.Queue.ErrorAtOnce(ctx, stepsToCancel, queue.ErrCancel); err != nil {
-			log.Error().Err(err).Msgf("queue: evict_at_once: %v", stepsToCancel)
+	if len(workflowsToCancel) != 0 {
+		if err := server.Config.Services.Queue.ErrorAtOnce(ctx, workflowsToCancel, queue.ErrCancel); err != nil {
+			log.Error().Err(err).Msgf("queue: evict_at_once: %v", workflowsToCancel)
 		}
 	}
 

From 96a615f6780b5e76cf230172715b3630b1f429bc Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 16:22:36 +0100
Subject: [PATCH 03/13] fix code comment nit

---
 cmd/agent/core/agent.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmd/agent/core/agent.go b/cmd/agent/core/agent.go
index 889cd2c657f..791a38d4b52 100644
--- a/cmd/agent/core/agent.go
+++ b/cmd/agent/core/agent.go
@@ -296,7 +296,7 @@ func run(ctx context.Context, c *cli.Command, backends []types.Backend) error {
 					return nil
 				}
 
-				log.Debug().Msg("polling new steps")
+				log.Debug().Msg("polling new workflow")
 				if err := runner.Run(agentCtx, shutdownCtx); err != nil {
 					log.Error().Err(err).Msg("runner error, retrying...")
 					// Check if context is canceled

From 2114fb5ed54e1654854fb941b8190b56552fe162 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 16:26:56 +0100
Subject: [PATCH 04/13] looking via git blame we can rm it as the queue code
 was already touched and refactored by us a lot and now is mosty ours

---
 server/queue/LICENSE | 29 -----------------------------
 1 file changed, 29 deletions(-)
 delete mode 100644 server/queue/LICENSE

diff --git a/server/queue/LICENSE b/server/queue/LICENSE
deleted file mode 100644
index 64e202179da..00000000000
--- a/server/queue/LICENSE
+++ /dev/null
@@ -1,29 +0,0 @@
-BSD 3-Clause License
-
-Copyright (c) 2017, Brad Rydzewski
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-* Redistributions of source code must retain the above copyright notice, this
-  list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-* Neither the name of the copyright holder nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

From 0e438949cb40f996c8be63bdb6ec0ed16ae0018d Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 16:31:23 +0100
Subject: [PATCH 05/13] its ok we cast only internally

---
 server/queue/fifo.go | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index 722fcecd7e7..24c664d242f 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -356,12 +356,10 @@ func (q *fifo) depsInQueue(task *model.Task) bool {
 
 func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		pending, ok := element.Value.(*model.Task)
-		if ok {
-			for _, dep := range pending.Dependencies {
-				if taskID == dep {
-					pending.DepStatus[dep] = status
-				}
+		pending, _ := element.Value.(*model.Task)
+		for _, dep := range pending.Dependencies {
+			if taskID == dep {
+				pending.DepStatus[dep] = status
 			}
 		}
 	}
@@ -375,12 +373,10 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	}
 
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		waiting, ok := element.Value.(*model.Task)
-		if ok {
-			for _, dep := range waiting.Dependencies {
-				if taskID == dep {
-					waiting.DepStatus[dep] = status
-				}
+		waiting, _ := element.Value.(*model.Task)
+		for _, dep := range waiting.Dependencies {
+			if taskID == dep {
+				waiting.DepStatus[dep] = status
 			}
 		}
 	}

From 40b2aae8e8635da67877e9dc299f904ae420b432 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 18:33:37 +0100
Subject: [PATCH 06/13] fix tests lint etc

---
 server/queue/fifo.go       | 12 ++++++------
 server/queue/fifo_test.go  |  2 +-
 server/queue/persistent.go |  3 ++-
 server/queue/queue.go      |  5 ++++-
 4 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index 24c664d242f..ea31a9ff6a0 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -18,7 +18,6 @@ import (
 	"container/list"
 	"context"
 	"errors"
-	"fmt"
 	"slices"
 	"sync"
 	"time"
@@ -59,8 +58,6 @@ type fifo struct {
 // as the agent pull in 10 milliseconds we should also give them work asap.
 const processTimeInterval = 100 * time.Millisecond
 
-var ErrWorkerKicked = fmt.Errorf("worker was kicked")
-
 // NewMemoryQueue returns a new fifo queue.
 func NewMemoryQueue(ctx context.Context) Queue {
 	q := &fifo{
@@ -123,7 +120,8 @@ func (q *fifo) Error(_ context.Context, id string, err error) error {
 	return q.finished([]string{id}, model.StatusFailure, err)
 }
 
-// ErrorAtOnce signals multiple done are complete with an error.
+// ErrorAtOnce signals multiple tasks are done and complete with an error.
+// If still pending they will just get removed from the queue.
 func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error {
 	if errors.Is(err, ErrCancel) {
 		return q.finished(ids, model.StatusKilled, err)
@@ -131,6 +129,7 @@ func (q *fifo) ErrorAtOnce(_ context.Context, ids []string, err error) error {
 	return q.finished(ids, model.StatusFailure, err)
 }
 
+// locks the queue itself!
 func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) error {
 	q.Lock()
 	defer q.Unlock()
@@ -354,6 +353,7 @@ func (q *fifo) depsInQueue(task *model.Task) bool {
 	return false
 }
 
+// expects the q to be currently owned e.g. locked by caller!
 func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	for element := q.pending.Front(); element != nil; element = element.Next() {
 		pending, _ := element.Value.(*model.Task)
@@ -382,7 +382,7 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	}
 }
 
-// removeFromPendingAndWaiting expects the q to be currently owned e.g. locked by caller
+// expects the q to be currently owned e.g. locked by caller!
 func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 	log.Debug().Msgf("queue: trying to remove %s", taskID)
 
@@ -406,6 +406,6 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 		}
 	}
 
-	// my bad could not be found
+	// well it could not be found
 	return ErrNotFound
 }
diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 6270a033762..18168a53c43 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -430,7 +430,7 @@ func TestFifoCancel(t *testing.T) {
 
 	time.Sleep(processTimeInterval * 2)
 	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 2, "canceled are rescheduled")
+	assert.Len(t, info.Pending, 0, "canceled are rescheduled")
 	assert.Len(t, info.Running, 0, "canceled are rescheduled")
 	assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled")
 }
diff --git a/server/queue/persistent.go b/server/queue/persistent.go
index 453a67e54c0..2178389e75b 100644
--- a/server/queue/persistent.go
+++ b/server/queue/persistent.go
@@ -80,7 +80,8 @@ func (q *persistentQueue) Error(c context.Context, id string, err error) error {
 	return q.store.TaskDelete(id)
 }
 
-// ErrorAtOnce signals multiple tasks are done with an error.
+// ErrorAtOnce signals multiple tasks are done and complete with an error.
+// If still pending they will just get removed from the queue.
 func (q *persistentQueue) ErrorAtOnce(c context.Context, ids []string, err error) error {
 	if err := q.Queue.ErrorAtOnce(c, ids, err); err != nil {
 		return err
diff --git a/server/queue/queue.go b/server/queue/queue.go
index 8ea7ad543c3..d924eddcf3a 100644
--- a/server/queue/queue.go
+++ b/server/queue/queue.go
@@ -36,6 +36,9 @@ var (
 
 	// ErrTaskExpired indicates a running task exceeded its lease/deadline and was resubmitted.
 	ErrTaskExpired = errors.New("queue: task expired")
+
+	// ErrWorkerKicked worker of an agent got kicked.
+	ErrWorkerKicked = errors.New("worker was kicked")
 )
 
 // InfoT provides runtime information.
@@ -94,7 +97,7 @@ type Queue interface {
 	Error(c context.Context, id string, err error) error
 
 	// ErrorAtOnce signals multiple tasks are done and complete with an error.
-	// if still pending they will just get removed from the queue.
+	// If still pending they will just get removed from the queue.
 	ErrorAtOnce(c context.Context, ids []string, err error) error
 
 	// Wait waits until the task is complete.

From e1c876ff9c8427eb7699435c3c085063f34f03e7 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 19:05:07 +0100
Subject: [PATCH 07/13] fifo 98% coverage

---
 server/queue/fifo.go      |  26 +-
 server/queue/fifo_test.go | 846 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 862 insertions(+), 10 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index ea31a9ff6a0..69eb3c97cc3 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -135,15 +135,21 @@ func (q *fifo) finished(ids []string, exitStatus model.StatusValue, err error) e
 	defer q.Unlock()
 
 	var errs []error
+	// we first process the tasks itself
 	for _, id := range ids {
-		taskEntry, ok := q.running[id]
-		if ok {
+		if taskEntry, ok := q.running[id]; ok {
 			taskEntry.error = err
 			close(taskEntry.done)
 			delete(q.running, id)
 		} else {
 			errs = append(errs, q.removeFromPendingAndWaiting(id))
 		}
+	}
+
+	// next we aim for there dependencies
+	// we do this because in our ids list there could be tasks and its dependencies
+	// so not to mess things up
+	for _, id := range ids {
 		q.updateDepStatusInQueue(id, exitStatus)
 	}
 
@@ -192,11 +198,11 @@ func (q *fifo) Info(_ context.Context) InfoT {
 	stats.Stats.Running = len(q.running)
 
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task, _ := element.Value.(*model.Task)
+		task := element.Value.(*model.Task)
 		stats.Pending = append(stats.Pending, task)
 	}
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		task, _ := element.Value.(*model.Task)
+		task := element.Value.(*model.Task)
 		stats.WaitingOnDeps = append(stats.WaitingOnDeps, task)
 	}
 	for _, entry := range q.running {
@@ -282,7 +288,7 @@ func (q *fifo) filterWaiting() {
 	q.waitingOnDeps = list.New()
 	var filtered []*list.Element
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task, _ := element.Value.(*model.Task)
+		task := element.Value.(*model.Task)
 		if q.depsInQueue(task) {
 			log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID)
 			q.waitingOnDeps.PushBack(task)
@@ -303,7 +309,7 @@ func (q *fifo) assignToWorker() (*list.Element, *worker) {
 
 	for element := q.pending.Front(); element != nil; element = next {
 		next = element.Next()
-		task, _ := element.Value.(*model.Task)
+		task := element.Value.(*model.Task)
 		log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies)
 
 		for worker := range q.workers {
@@ -356,7 +362,7 @@ func (q *fifo) depsInQueue(task *model.Task) bool {
 // expects the q to be currently owned e.g. locked by caller!
 func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		pending, _ := element.Value.(*model.Task)
+		pending := element.Value.(*model.Task)
 		for _, dep := range pending.Dependencies {
 			if taskID == dep {
 				pending.DepStatus[dep] = status
@@ -373,7 +379,7 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	}
 
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		waiting, _ := element.Value.(*model.Task)
+		waiting := element.Value.(*model.Task)
 		for _, dep := range waiting.Dependencies {
 			if taskID == dep {
 				waiting.DepStatus[dep] = status
@@ -388,7 +394,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 
 	// we assume pending first
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task, _ := element.Value.(*model.Task)
+		task := element.Value.(*model.Task)
 		if task.ID == taskID {
 			log.Debug().Msgf("queue: %s is removed from pending", taskID)
 			_ = q.pending.Remove(element)
@@ -398,7 +404,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 
 	// well looks like it's waiting
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		task, _ := element.Value.(*model.Task)
+		task := element.Value.(*model.Task)
 		if task.ID == taskID {
 			log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID)
 			_ = q.waitingOnDeps.Remove(element)
diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 18168a53c43..71903a990c5 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -697,3 +697,849 @@ func TestFifoWithScoring(t *testing.T) {
 		assert.Contains(t, expectedAgents, agentID, "Task %s should be assigned to one of the expected agents", taskID)
 	}
 }
+
+func TestFifoErrorAtOnce(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID: "3",
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+
+	waitForProcess()
+	got1, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	got2, err := q.Poll(ctx, 2, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Test ErrorAtOnce with running tasks
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
+
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 0, "expect tasks removed from running queue")
+	assert.Len(t, info.Pending, 1, "expect remaining task in pending")
+}
+
+func TestFifoErrorAtOnceWithPendingTasks(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID: "2",
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+
+	waitForProcess()
+
+	// ErrorAtOnce on pending tasks (task3 should be waiting on deps)
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error")))
+
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Pending, 1, "only task1 should remain")
+	assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting")
+}
+
+func TestFifoErrorAtOnceWithCancelError(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID: "2",
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+
+	waitForProcess()
+	got1, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Test ErrorAtOnce with ErrCancel - should result in StatusKilled
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
+
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 0, "task should be removed from running")
+}
+
+func TestFifoExtend(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Test successful extend
+	assert.NoError(t, q.Extend(ctx, 1, got.ID))
+
+	// Test extend with wrong agent ID
+	err = q.Extend(ctx, 999, got.ID)
+	assert.ErrorIs(t, err, ErrAgentMissMatch)
+
+	// Clean up
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+
+	// Test extend with non-existent task
+	err = q.Extend(ctx, 1, "non-existent")
+	assert.ErrorIs(t, err, ErrNotFound)
+}
+
+func TestFifoExtendPreventsExpiration(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	// Set very short extension
+	q.extension = 50 * time.Millisecond
+
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Extend the deadline multiple times
+	for i := 0; i < 3; i++ {
+		time.Sleep(30 * time.Millisecond)
+		assert.NoError(t, q.Extend(ctx, 1, got.ID))
+	}
+
+	// Task should still be running
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 1, "task should still be running after extensions")
+	assert.Len(t, info.Pending, 0, "task should not be resubmitted")
+}
+
+func TestFifoKickAgentWorkers(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+
+	// Start multiple workers for different agents
+	pollResults := make(chan error, 3)
+
+	for agentID := int64(1); agentID <= 3; agentID++ {
+		go func(id int64) {
+			_, err := q.Poll(ctx, id, filterFnTrue)
+			pollResults <- err
+		}(agentID)
+	}
+
+	// Give workers time to register
+	time.Sleep(50 * time.Millisecond)
+
+	// Kick workers for agent 2
+	q.KickAgentWorkers(2)
+
+	// Check that agent 2's worker was kicked
+	select {
+	case err := <-pollResults:
+		assert.Error(t, err)
+		// Check the cause of the context cancellation
+		if errors.Is(err, context.Canceled) {
+			// If ctx wasn't the one canceled, we need to check if it's our kicked error
+			// The error should either be ErrWorkerKicked or wrapped in context
+			assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled")
+		}
+	case <-time.After(time.Second):
+		t.Fatal("expected worker to be kicked")
+	}
+
+	// Other workers should still be waiting or get work
+	info := q.Info(ctx)
+	assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered")
+}
+
+func TestFifoKickAgentWorkersMultiple(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	// Start multiple workers for the same agent
+	pollResults := make(chan error, 5)
+
+	for i := 0; i < 5; i++ {
+		go func() {
+			_, err := q.Poll(ctx, 42, filterFnTrue)
+			pollResults <- err
+		}()
+	}
+
+	// Give workers time to register
+	time.Sleep(50 * time.Millisecond)
+
+	info := q.Info(ctx)
+	initialWorkers := info.Stats.Workers
+	assert.Equal(t, 5, initialWorkers, "all workers should be registered")
+
+	// Kick all workers for agent 42
+	q.KickAgentWorkers(42)
+
+	// All workers should be kicked
+	kickedCount := 0
+	for i := 0; i < 5; i++ {
+		select {
+		case err := <-pollResults:
+			assert.Error(t, err)
+			// All kicked workers will have their context canceled
+			if errors.Is(err, context.Canceled) {
+				kickedCount++
+			}
+		case <-time.After(time.Second):
+			t.Fatal("expected all workers to be kicked")
+		}
+	}
+
+	assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked")
+
+	time.Sleep(50 * time.Millisecond)
+	info = q.Info(ctx)
+	assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed")
+}
+
+func TestFifoWaitContextCanceled(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Create a context that we can cancel
+	waitCtx, waitCancel := context.WithCancel(ctx)
+
+	errCh := make(chan error, 1)
+	go func() {
+		errCh <- q.Wait(waitCtx, got.ID)
+	}()
+
+	// Cancel the wait context
+	time.Sleep(50 * time.Millisecond)
+	waitCancel()
+
+	select {
+	case err := <-errCh:
+		assert.NoError(t, err, "Wait should return nil when context is canceled before task completes")
+	case <-time.After(time.Second):
+		t.Fatal("Wait should return when context is canceled")
+	}
+}
+
+func TestFifoWaitNonExistentTask(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	// Wait on a task that doesn't exist
+	err := q.Wait(ctx, "non-existent-task")
+	assert.NoError(t, err, "Wait should return nil for non-existent task")
+}
+
+func TestFifoUpdateDepStatusInQueue(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task4 := &model.Task{
+		ID:           "4",
+		Dependencies: []string{"2"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, task1, got)
+
+	// Complete task1 with success
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+
+	waitForProcess()
+
+	// Poll task2 to check its DepStatus was updated
+	got2, err := q.Poll(ctx, 2, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated")
+
+	// Poll task3 to check its DepStatus was updated
+	got3, err := q.Poll(ctx, 3, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated")
+}
+
+func TestFifoRemoveFromPendingAndWaitingNotFound(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Try to error a task that's running (it will be removed from running, not pending/waiting)
+	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
+
+	// Try to error a completely non-existent task
+	err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))
+	assert.Error(t, err, "should return error for non-existent task")
+}
+
+func TestFifoRemoveFromWaitingOnDeps(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
+
+	// Error task2 while it's in waitingOnDeps
+	assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task")))
+
+	waitForProcess()
+	info = q.Info(ctx)
+	assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now")
+}
+
+func TestFifoPollContextCanceled(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+
+	q := NewMemoryQueue(ctx)
+
+	pollCtx, pollCancel := context.WithCancel(ctx)
+
+	errCh := make(chan error, 1)
+	go func() {
+		_, err := q.Poll(pollCtx, 1, filterFnTrue)
+		errCh <- err
+	}()
+
+	// Give Poll time to register the worker
+	time.Sleep(50 * time.Millisecond)
+
+	// Cancel the poll context
+	pollCancel()
+
+	select {
+	case err := <-errCh:
+		assert.Error(t, err)
+		assert.ErrorIs(t, err, context.Canceled)
+	case <-time.After(time.Second):
+		t.Fatal("Poll should return when context is canceled")
+	}
+
+	cancel(nil)
+}
+
+func TestFifoMultipleDepStatusUpdates(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID: "2",
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1", "2"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+
+	waitForProcess()
+
+	// Get both independent tasks
+	got1, _ := q.Poll(ctx, 1, filterFnTrue)
+	got2, _ := q.Poll(ctx, 2, filterFnTrue)
+
+	// Complete one with success
+	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+
+	// Complete other with failure
+	assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
+
+	waitForProcess()
+
+	// Get task3 and check both dependencies are updated
+	got3, err := q.Poll(ctx, 3, filterFnTrue)
+	assert.NoError(t, err)
+
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
+	assert.Equal(t, model.StatusFailure, got3.DepStatus["2"])
+	assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed")
+}
+
+func TestFifoErrorAtOnceCoverage(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID: "2",
+	}
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Test ErrorAtOnce with one running task and one non-existent task
+	// This should trigger the error case in finished() where removeFromPendingAndWaiting returns ErrNotFound
+	err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error"))
+	assert.Error(t, err, "should return error when one of the tasks doesn't exist")
+	assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
+
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 0, "running task should be removed")
+}
+
+func TestFifoErrorAtOnceMultipleNonExistent(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	task1 := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Test ErrorAtOnce with multiple non-existent tasks
+	// This tests that errors.Join works correctly with multiple errors
+	err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors"))
+	assert.Error(t, err, "should return joined errors")
+
+	// The error should contain multiple ErrNotFound instances
+	errStr := err.Error()
+	assert.Contains(t, errStr, "not found", "error should mention not found")
+}
+
+func TestFifoErrorAtOnceWithErrCancel(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	// Test the ErrCancel specific path which sets StatusKilled
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel))
+
+	waitForProcess()
+
+	// Get the dependent task and verify its dependency status is StatusKilled
+	got2, err := q.Poll(ctx, 2, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed")
+}
+
+func TestFifoExtendWithWrongAgent(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+
+	waitForProcess()
+	got, err := q.Poll(ctx, 5, filterFnTrue) // Agent 5 polls the task
+	assert.NoError(t, err)
+	assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5")
+
+	// Test extend with correct agent - should succeed
+	assert.NoError(t, q.Extend(ctx, 5, got.ID))
+
+	// Test extend with wrong agent - should return ErrAgentMissMatch
+	err = q.Extend(ctx, 999, got.ID)
+	assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend")
+
+	// Test extend with another wrong agent
+	err = q.Extend(ctx, 1, got.ID)
+	assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent")
+
+	// Verify the correct agent can still extend
+	assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend")
+
+	// Clean up
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+}
+
+func TestFifoFinishedWithMultipleErrors(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+
+	waitForProcess()
+
+	// Poll task1 so it's in running
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, "1", got.ID)
+
+	// Now call ErrorAtOnce with:
+	// - got.ID (task1) which is in running - should succeed
+	// - "2" which is in waitingOnDeps - should succeed
+	// - "fake-1" which doesn't exist - should return ErrNotFound
+	// - "fake-2" which doesn't exist - should return ErrNotFound
+	err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error"))
+
+	assert.Error(t, err, "should return error for non-existent tasks")
+
+	// Verify that the error contains multiple ErrNotFound (from errors.Join)
+	assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
+
+	// Check that both real tasks were removed
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 0, "task1 should be removed from running")
+	assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting")
+	assert.Len(t, info.Pending, 0, "no tasks should be pending")
+}
+
+func TestFifoUpdateDepStatusInQueuePending(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	// Push all tasks - task2 and task3 will be in pending initially (before filterWaiting moves them)
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+
+	waitForProcess()
+
+	// At this point, task2 and task3 should be in waitingOnDeps
+	// but we want to test when they're in pending
+
+	// Get task1
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, task1, got)
+
+	// Before completing task1, let's verify task2 and task3 are waiting
+	info := q.Info(ctx)
+	assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
+
+	// Now complete task1 - this should trigger updateDepStatusInQueue
+	// which will update DepStatus in the waitingOnDeps queue
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+
+	waitForProcess()
+
+	// Now get task2 and verify its DepStatus was updated
+	got2, err := q.Poll(ctx, 2, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"],
+		"task2's DepStatus should be updated in pending queue")
+
+	// Get task3 and verify its DepStatus was updated
+	got3, err := q.Poll(ctx, 3, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
+		"task3's DepStatus should be updated in pending queue")
+}
+
+func TestFifoUpdateDepStatusInQueueRunning(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID: "2",
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1", "2"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task4 := &model.Task{
+		ID:           "4",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
+
+	waitForProcess()
+
+	// Get task1 and task2 (both independent)
+	got1, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+
+	got2, err := q.Poll(ctx, 2, filterFnTrue)
+	assert.NoError(t, err)
+
+	waitForProcess()
+
+	// Now complete task1 first
+	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+
+	waitForProcess()
+
+	// Now task3 and task4 are waiting on their dependencies
+	// task3 still needs task2, task4 should be ready now
+
+	// Get task4 which should now be available since task1 is done
+	got4, err := q.Poll(ctx, 4, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"],
+		"task4's DepStatus should show task1 succeeded")
+
+	// Now complete task2
+	assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+
+	waitForProcess()
+
+	// Now get task3 - both its dependencies are satisfied
+	got3, err := q.Poll(ctx, 3, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
+		"task3's DepStatus should show task1 succeeded")
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"],
+		"task3's DepStatus should show task2 succeeded")
+	assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded")
+
+	// Clean up remaining tasks
+	assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
+	assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess))
+}
+
+func TestFifoUpdateDepStatusInQueueWaiting(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task4 := &model.Task{
+		ID:           "4",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
+
+	waitForProcess()
+
+	// Verify tasks are waiting on deps
+	info := q.Info(ctx)
+	assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting")
+
+	// Get and complete task1
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, task1, got)
+
+	// Complete task1 with failure - this should update DepStatus in waitingOnDeps
+	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed")))
+
+	waitForProcess()
+
+	// All waiting tasks should have their DepStatus updated
+	// Get each one and verify
+	for i := 2; i <= 4; i++ {
+		gotTask, err := q.Poll(ctx, int64(i), filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"],
+			"task %d's DepStatus should be updated in waitingOnDeps", i)
+		assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i)
+	}
+}
+
+func TestFifoUpdateDepStatusWithSkipped(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	t.Cleanup(func() { cancel(nil) })
+
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"2"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	assert.NotNil(t, q)
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+
+	waitForProcess()
+
+	// Get and fail task1
+	got1, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
+
+	waitForProcess()
+
+	// Get task2 - should not run but get skipped
+	got2, err := q.Poll(ctx, 2, filterFnTrue)
+	assert.NoError(t, err)
+	assert.False(t, got2.ShouldRun())
+	assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped))
+
+	waitForProcess()
+
+	// Get task3 - should have task2's skipped status updated
+	got3, err := q.Poll(ctx, 3, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"],
+		"task3 should have task2's skipped status")
+	assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency")
+}

From c4774e1082bd28fe03d20ed1368f95618dbf64a1 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 19:14:18 +0100
Subject: [PATCH 08/13] group tests

---
 server/queue/fifo_test.go | 2048 +++++++++++++++++--------------------
 1 file changed, 958 insertions(+), 1090 deletions(-)

diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 71903a990c5..5a8db3f3e8a 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -38,453 +38,472 @@ var (
 	waitForProcess = func() { time.Sleep(processTimeInterval + 50*time.Millisecond) }
 )
 
-func TestFifo(t *testing.T) {
+func TestFifoBasicOperations(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	q := NewMemoryQueue(ctx)
-	dummyTask := genDummyTask()
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 1, "expect task in pending queue")
-
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, dummyTask, got)
+	t.Run("basic push poll done flow", func(t *testing.T) {
+		q := NewMemoryQueue(ctx)
+		dummyTask := genDummyTask()
 
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 0, "expect task removed from pending queue")
-	assert.Len(t, info.Running, 1, "expect task in running queue")
-
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 0, "expect task removed from pending queue")
-	assert.Len(t, info.Running, 0, "expect task removed from running queue")
-}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 1, "expect task in pending queue")
 
-func TestFifoExpire(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, dummyTask, got)
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 0, "expect task removed from pending queue")
+		assert.Len(t, info.Running, 1, "expect task in running queue")
 
-	dummyTask := genDummyTask()
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
-	q.extension = 0
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 1, "expect task in pending queue")
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 0, "expect task removed from pending queue")
+		assert.Len(t, info.Running, 0, "expect task removed from running queue")
+	})
+
+	t.Run("task expiration", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+		dummyTask := genDummyTask()
+
+		q.extension = 0
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 1, "expect task in pending queue")
 
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, dummyTask, got)
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, dummyTask, got)
 
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 1, "expect task re-added to pending queue")
-}
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 1, "expect task re-added to pending queue")
+	})
 
-func TestFifoWaitOnExpireReturnsError(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+	t.Run("pause and resume", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+		dummyTask := genDummyTask()
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		var wg sync.WaitGroup
+		wg.Add(1)
+		go func() {
+			_, _ = q.Poll(ctx, 1, filterFnTrue)
+			wg.Done()
+		}()
 
-	q.extension = 0
+		q.Pause()
+		t0 := time.Now()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		waitForProcess()
+		q.Resume()
 
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		wg.Wait()
+		t1 := time.Now()
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, dummyTask, got)
+		assert.Greater(t, t1.Sub(t0), 20*time.Millisecond, "should have waited til resume")
 
-	errCh := make(chan error, 1)
-	go func() {
-		errCh <- q.Wait(ctx, got.ID)
-	}()
+		q.Pause()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		q.Resume()
+		_, _ = q.Poll(ctx, 1, filterFnTrue)
+	})
 
-	waitForProcess()
+	t.Run("pause then resume", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+		dummyTask := genDummyTask()
 
-	select {
-	case werr := <-errCh:
-		assert.Error(t, werr, "expected Wait to return error when lease expires")
-	case <-time.After(2 * time.Second):
-		t.Fatal("timeout waiting for Wait to return after lease expiration")
-	}
+		q.Pause()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		q.Resume()
 
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 1, "expect task re-added to pending queue after expiration")
+		_, _ = q.Poll(ctx, 1, filterFnTrue)
+	})
 }
 
 func TestFifoWait(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+	t.Run("wait on task completion", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+		dummyTask := genDummyTask()
 
-	dummyTask := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, dummyTask, got)
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, dummyTask, got)
+		var wg sync.WaitGroup
+		wg.Add(1)
+		go func() {
+			assert.NoError(t, q.Wait(ctx, got.ID))
+			wg.Done()
+		}()
 
-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		assert.NoError(t, q.Wait(ctx, got.ID))
-		wg.Done()
-	}()
+		<-time.After(time.Millisecond)
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		wg.Wait()
+	})
 
-	<-time.After(time.Millisecond)
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-	wg.Wait()
-}
+	t.Run("wait returns error on expiration", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+		q.extension = 0
 
-func TestFifoDependencies(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		dummyTask := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, dummyTask, got)
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		errCh := make(chan error, 1)
+		go func() {
+			errCh <- q.Wait(ctx, got.ID)
+		}()
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task1}))
+		waitForProcess()
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
+		select {
+		case werr := <-errCh:
+			assert.Error(t, werr, "expected Wait to return error when lease expires")
+		case <-time.After(2 * time.Second):
+			t.Fatal("timeout waiting for Wait to return after lease expiration")
+		}
 
-	waitForProcess()
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 1, "expect task re-added to pending queue after expiration")
+	})
 
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task2, got)
-}
+	t.Run("wait with context canceled", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+		dummyTask := genDummyTask()
 
-func TestFifoErrors(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-		RunOn:        []string{"success", "failure"},
-	}
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitCtx, waitCancel := context.WithCancel(ctx)
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		errCh := make(chan error, 1)
+		go func() {
+			errCh <- q.Wait(waitCtx, got.ID)
+		}()
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
+		time.Sleep(50 * time.Millisecond)
+		waitCancel()
 
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
+		select {
+		case err := <-errCh:
+			assert.NoError(t, err, "Wait should return nil when context is canceled before task completes")
+		case <-time.After(time.Second):
+			t.Fatal("Wait should return when context is canceled")
+		}
+	})
 
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task2, got)
-	assert.False(t, got.ShouldRun())
+	t.Run("wait on non-existent task", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task3, got)
-	assert.True(t, got.ShouldRun())
+		err := q.Wait(ctx, "non-existent-task")
+		assert.NoError(t, err, "Wait should return nil for non-existent task")
+	})
 }
 
-func TestFifoErrors2(t *testing.T) {
+func TestFifoDependencies(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID: "2",
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1", "2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+	t.Run("basic dependency ordering", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task1}))
 
-	for i := 0; i < 2; i++ {
 		waitForProcess()
 		got, err := q.Poll(ctx, 1, filterFnTrue)
 		assert.NoError(t, err)
-		assert.False(t, got != task1 && got != task2, "expect task1 or task2 returned from queue as task3 depends on them")
+		assert.Equal(t, task1, got)
+
+		waitForProcess()
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
-		if got != task1 {
-			assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task2, got)
+	})
+
+	t.Run("waiting vs pending stats", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
 		}
-		if got != task2 {
-			assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+			RunOn:        []string{"success", "failure"},
 		}
-	}
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task3, got)
-	assert.False(t, got.ShouldRun())
-}
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-func TestFifoErrorsMultiThread(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1", "2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		got, _ := q.Poll(ctx, 1, filterFnTrue)
 
-	obtainedWorkCh := make(chan *model.Task)
-	defer func() { close(obtainedWorkCh) }()
-
-	for i := 0; i < 10; i++ {
-		go func(i int) {
-			for {
-				fmt.Printf("Worker %d started\n", i)
-				got, err := q.Poll(ctx, 1, filterFnTrue)
-				if err != nil && errors.Is(err, context.Canceled) {
-					return
-				}
-				assert.NoError(t, err)
-				obtainedWorkCh <- got
-			}
-		}(i)
-	}
-
-	task1Processed := false
-	task2Processed := false
-
-	for {
-		select {
-		case got := <-obtainedWorkCh:
-			fmt.Println(got.ID)
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Equal(t, 2, info.Stats.WaitingOnDeps)
 
-			switch {
-			case !task1Processed:
-				assert.Equal(t, task1, got)
-				task1Processed = true
-				assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-				go func() {
-					for {
-						fmt.Printf("Worker spawned\n")
-						got, err := q.Poll(ctx, 1, filterFnTrue)
-						if err != nil && errors.Is(err, context.Canceled) {
-							return
-						}
-						assert.NoError(t, err)
-						obtainedWorkCh <- got
-					}
-				}()
-			case !task2Processed:
-				assert.Equal(t, task2, got)
-				task2Processed = true
-				assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-				go func() {
-					for {
-						fmt.Printf("Worker spawned\n")
-						got, err := q.Poll(ctx, 1, filterFnTrue)
-						if err != nil && errors.Is(err, context.Canceled) {
-							return
-						}
-						assert.NoError(t, err)
-						obtainedWorkCh <- got
-					}
-				}()
-			default:
-				assert.Equal(t, task3, got)
-				assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 succeeded but task2 failed")
-				return
-			}
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.EqualValues(t, task2, got)
 
-		case <-time.After(5 * time.Second):
-			info := q.Info(ctx)
-			fmt.Println(info.String())
-			t.Errorf("test timed out")
-			return
-		}
-	}
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
+		assert.Equal(t, 1, info.Stats.Pending)
+	})
 }
 
-func TestFifoTransitiveErrors(t *testing.T) {
+func TestFifoErrors(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+	t.Run("error handling with run_on conditions", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+			RunOn:        []string{"success", "failure"},
+		}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task2, got)
-	assert.False(t, got.ShouldRun(), "expect task2 should not run, since task1 failed")
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
-
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task3, got)
-	assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 failed, thus task2 was skipped, task3 should be skipped too")
-}
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task1, got)
 
-func TestFifoCancel(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-		RunOn:        []string{"success", "failure"},
-	}
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task2, got)
+		assert.False(t, got.ShouldRun())
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task3, got)
+		assert.True(t, got.ShouldRun())
+	})
+
+	t.Run("multiple dependency failures", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID: "2",
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1", "2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	_, _ = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, q.Error(ctx, task1.ID, fmt.Errorf("canceled")))
-	assert.NoError(t, q.Error(ctx, task2.ID, fmt.Errorf("canceled")))
-	assert.NoError(t, q.Error(ctx, task3.ID, fmt.Errorf("canceled")))
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 0, "all pipelines should be canceled")
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 
-	time.Sleep(processTimeInterval * 2)
-	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 0, "canceled are rescheduled")
-	assert.Len(t, info.Running, 0, "canceled are rescheduled")
-	assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled")
-}
+		for i := 0; i < 2; i++ {
+			waitForProcess()
+			got, err := q.Poll(ctx, 1, filterFnTrue)
+			assert.NoError(t, err)
+			assert.False(t, got != task1 && got != task2, "expect task1 or task2 returned from queue as task3 depends on them")
 
-func TestFifoPause(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+			if got != task1 {
+				assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+			}
+			if got != task2 {
+				assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
+			}
+		}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task3, got)
+		assert.False(t, got.ShouldRun())
+	})
+
+	t.Run("transitive error propagation", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	dummyTask := genDummyTask()
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		_, _ = q.Poll(ctx, 1, filterFnTrue)
-		wg.Done()
-	}()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 
-	q.Pause()
-	t0 := time.Now()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	waitForProcess()
-	q.Resume()
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task1, got)
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
 
-	wg.Wait()
-	t1 := time.Now()
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task2, got)
+		assert.False(t, got.ShouldRun(), "expect task2 should not run, since task1 failed")
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
 
-	assert.Greater(t, t1.Sub(t0), 20*time.Millisecond, "should have waited til resume")
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task3, got)
+		assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 failed, thus task2 was skipped, task3 should be skipped too")
+	})
+
+	t.Run("multithreaded error handling", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1", "2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	q.Pause()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	q.Resume()
-	_, _ = q.Poll(ctx, 1, filterFnTrue)
-}
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-func TestFifoPauseResume(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		obtainedWorkCh := make(chan *model.Task)
+		defer func() { close(obtainedWorkCh) }()
 
-	dummyTask := genDummyTask()
+		for i := 0; i < 10; i++ {
+			go func(i int) {
+				for {
+					fmt.Printf("Worker %d started\n", i)
+					got, err := q.Poll(ctx, 1, filterFnTrue)
+					if err != nil && errors.Is(err, context.Canceled) {
+						return
+					}
+					assert.NoError(t, err)
+					obtainedWorkCh <- got
+				}
+			}(i)
+		}
 
-	q.Pause()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	q.Resume()
+		task1Processed := false
+		task2Processed := false
+
+		for {
+			select {
+			case got := <-obtainedWorkCh:
+				fmt.Println(got.ID)
+
+				switch {
+				case !task1Processed:
+					assert.Equal(t, task1, got)
+					task1Processed = true
+					assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
+					go func() {
+						for {
+							fmt.Printf("Worker spawned\n")
+							got, err := q.Poll(ctx, 1, filterFnTrue)
+							if err != nil && errors.Is(err, context.Canceled) {
+								return
+							}
+							assert.NoError(t, err)
+							obtainedWorkCh <- got
+						}
+					}()
+				case !task2Processed:
+					assert.Equal(t, task2, got)
+					task2Processed = true
+					assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+					go func() {
+						for {
+							fmt.Printf("Worker spawned\n")
+							got, err := q.Poll(ctx, 1, filterFnTrue)
+							if err != nil && errors.Is(err, context.Canceled) {
+								return
+							}
+							assert.NoError(t, err)
+							obtainedWorkCh <- got
+						}
+					}()
+				default:
+					assert.Equal(t, task3, got)
+					assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 succeeded but task2 failed")
+					return
+				}
 
-	_, _ = q.Poll(ctx, 1, filterFnTrue)
+			case <-time.After(5 * time.Second):
+				info := q.Info(ctx)
+				fmt.Println(info.String())
+				t.Errorf("test timed out")
+				return
+			}
+		}
+	})
 }
 
-func TestWaitingVsPending(t *testing.T) {
+func TestFifoCancel(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
@@ -506,94 +525,113 @@ func TestWaitingVsPending(t *testing.T) {
 
 	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 
-	got, _ := q.Poll(ctx, 1, filterFnTrue)
-
-	waitForProcess()
+	_, _ = q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, q.Error(ctx, task1.ID, fmt.Errorf("canceled")))
+	assert.NoError(t, q.Error(ctx, task2.ID, fmt.Errorf("canceled")))
+	assert.NoError(t, q.Error(ctx, task3.ID, fmt.Errorf("canceled")))
 	info := q.Info(ctx)
-	assert.Equal(t, 2, info.Stats.WaitingOnDeps)
-
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.EqualValues(t, task2, got)
+	assert.Len(t, info.Pending, 0, "all pipelines should be canceled")
 
-	waitForProcess()
+	time.Sleep(processTimeInterval * 2)
 	info = q.Info(ctx)
-	assert.Equal(t, 0, info.Stats.WaitingOnDeps)
-	assert.Equal(t, 1, info.Stats.Pending)
+	assert.Len(t, info.Pending, 0, "canceled are rescheduled")
+	assert.Len(t, info.Running, 0, "canceled are rescheduled")
+	assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled")
 }
 
-func TestShouldRun(t *testing.T) {
-	task := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusSuccess,
+func TestFifoShouldRun(t *testing.T) {
+	tests := []struct {
+		name     string
+		task     *model.Task
+		expected bool
+		reason   string
+	}{
+		{
+			name: "failure only - success status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusSuccess},
+				RunOn:        []string{"failure"},
+			},
+			expected: false,
+			reason:   "expect task to not run, it runs on failure only",
 		},
-		RunOn: []string{"failure"},
-	}
-	assert.False(t, task.ShouldRun(), "expect task to not run, it runs on failure only")
-
-	task = &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusSuccess,
+		{
+			name: "failure and success - success status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusSuccess},
+				RunOn:        []string{"failure", "success"},
+			},
+			expected: true,
 		},
-		RunOn: []string{"failure", "success"},
-	}
-	assert.True(t, task.ShouldRun())
-
-	task = &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusFailure,
+		{
+			name: "no run_on - failure status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusFailure},
+			},
+			expected: false,
 		},
-	}
-	assert.False(t, task.ShouldRun())
-
-	task = &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusSuccess,
+		{
+			name: "success only - success status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusSuccess},
+				RunOn:        []string{"success"},
+			},
+			expected: true,
 		},
-		RunOn: []string{"success"},
-	}
-	assert.True(t, task.ShouldRun())
-
-	task = &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusFailure,
+		{
+			name: "failure only - failure status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusFailure},
+				RunOn:        []string{"failure"},
+			},
+			expected: true,
 		},
-		RunOn: []string{"failure"},
-	}
-	assert.True(t, task.ShouldRun())
-
-	task = &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusSkipped,
+		{
+			name: "no run_on - skipped status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusSkipped},
+			},
+			expected: false,
+			reason:   "task should not run if dependency is skipped",
+		},
+		{
+			name: "failure only - skipped status",
+			task: &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": model.StatusSkipped},
+				RunOn:        []string{"failure"},
+			},
+			expected: true,
+			reason:   "on failure, tasks should run on skipped deps, something failed higher up the chain",
 		},
 	}
-	assert.False(t, task.ShouldRun(), "task should not run if dependency is skipped")
 
-	task = &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus: map[string]model.StatusValue{
-			"1": model.StatusSkipped,
-		},
-		RunOn: []string{"failure"},
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := tt.task.ShouldRun()
+			if tt.reason != "" {
+				assert.Equal(t, tt.expected, result, tt.reason)
+			} else {
+				assert.Equal(t, tt.expected, result)
+			}
+		})
 	}
-	assert.True(t, task.ShouldRun(), "on failure, tasks should run on skipped deps, something failed higher up the chain")
 }
 
-func TestFifoWithScoring(t *testing.T) {
+func TestFifoScoring(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
@@ -677,11 +715,6 @@ func TestFifoWithScoring(t *testing.T) {
 	assert.Len(t, receivedTasks, 5, "All tasks should be assigned")
 
 	// Define expected agent assignments
-	// Map structure: {taskID: []possible agentIDs}
-	// - taskID "1" and "4" can be assigned to agents 1 or 4 (org-id "123")
-	// - taskID "2" should be assigned to agent 2 (org-id "456")
-	// - taskID "3" should be assigned to agent 3 (platform "windows")
-	// - taskID "5" should be assigned to agent 5 (org-id "*")
 	expectedAssignments := map[string][]int64{
 		"1": {1, 4},
 		"2": {2},
@@ -702,392 +735,338 @@ func TestFifoErrorAtOnce(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID: "3",
-	}
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-	waitForProcess()
-	got1, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-
-	got2, err := q.Poll(ctx, 2, filterFnTrue)
-	assert.NoError(t, err)
-
-	// Test ErrorAtOnce with running tasks
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
-
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 0, "expect tasks removed from running queue")
-	assert.Len(t, info.Pending, 1, "expect remaining task in pending")
-}
-
-func TestFifoErrorAtOnceWithPendingTasks(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID: "2",
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+	t.Run("batch error on running tasks", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID: "3",
+		}
 
-	waitForProcess()
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	// ErrorAtOnce on pending tasks (task3 should be waiting on deps)
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error")))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 1, "only task1 should remain")
-	assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting")
-}
+		waitForProcess()
+		got1, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-func TestFifoErrorAtOnceWithCancelError(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		got2, err := q.Poll(ctx, 2, filterFnTrue)
+		assert.NoError(t, err)
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID: "2",
-	}
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 0, "expect tasks removed from running queue")
+		assert.Len(t, info.Pending, 1, "expect remaining task in pending")
+	})
+
+	t.Run("batch error on pending tasks", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID: "2",
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
-	got1, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 
-	// Test ErrorAtOnce with ErrCancel - should result in StatusKilled
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
+		waitForProcess()
 
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 0, "task should be removed from running")
-}
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error")))
 
-func TestFifoExtend(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 1, "only task1 should remain")
+		assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting")
+	})
+
+	t.Run("error at once with cancel error", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID: "2",
+		}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		waitForProcess()
+		got1, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	// Test successful extend
-	assert.NoError(t, q.Extend(ctx, 1, got.ID))
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
 
-	// Test extend with wrong agent ID
-	err = q.Extend(ctx, 999, got.ID)
-	assert.ErrorIs(t, err, ErrAgentMissMatch)
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 0, "task should be removed from running")
+	})
+
+	t.Run("error at once with non-existent tasks", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID: "2",
+		}
 
-	// Clean up
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	// Test extend with non-existent task
-	err = q.Extend(ctx, 1, "non-existent")
-	assert.ErrorIs(t, err, ErrNotFound)
-}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
 
-func TestFifoExtendPreventsExpiration(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error"))
+		assert.Error(t, err, "should return error when one of the tasks doesn't exist")
+		assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
 
-	// Set very short extension
-	q.extension = 50 * time.Millisecond
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 0, "running task should be removed")
+	})
 
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+	t.Run("error at once with multiple non-existent tasks", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		task1 := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
 
-	// Extend the deadline multiple times
-	for i := 0; i < 3; i++ {
-		time.Sleep(30 * time.Millisecond)
-		assert.NoError(t, q.Extend(ctx, 1, got.ID))
-	}
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	// Task should still be running
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 1, "task should still be running after extensions")
-	assert.Len(t, info.Pending, 0, "task should not be resubmitted")
-}
+		err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors"))
+		assert.Error(t, err, "should return joined errors")
 
-func TestFifoKickAgentWorkers(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		errStr := err.Error()
+		assert.Contains(t, errStr, "not found", "error should mention not found")
+	})
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+	t.Run("error at once with ErrCancel and dependency update", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	// Start multiple workers for different agents
-	pollResults := make(chan error, 3)
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
 
-	for agentID := int64(1); agentID <= 3; agentID++ {
-		go func(id int64) {
-			_, err := q.Poll(ctx, id, filterFnTrue)
-			pollResults <- err
-		}(agentID)
-	}
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	// Give workers time to register
-	time.Sleep(50 * time.Millisecond)
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel))
 
-	// Kick workers for agent 2
-	q.KickAgentWorkers(2)
+		waitForProcess()
 
-	// Check that agent 2's worker was kicked
-	select {
-	case err := <-pollResults:
-		assert.Error(t, err)
-		// Check the cause of the context cancellation
-		if errors.Is(err, context.Canceled) {
-			// If ctx wasn't the one canceled, we need to check if it's our kicked error
-			// The error should either be ErrWorkerKicked or wrapped in context
-			assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled")
+		got2, err := q.Poll(ctx, 2, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed")
+	})
+
+	t.Run("finished with multiple errors", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
 		}
-	case <-time.After(time.Second):
-		t.Fatal("expected worker to be kicked")
-	}
 
-	// Other workers should still be waiting or get work
-	info := q.Info(ctx)
-	assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered")
-}
-
-func TestFifoKickAgentWorkersMultiple(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	// Start multiple workers for the same agent
-	pollResults := make(chan error, 5)
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
 
-	for i := 0; i < 5; i++ {
-		go func() {
-			_, err := q.Poll(ctx, 42, filterFnTrue)
-			pollResults <- err
-		}()
-	}
-
-	// Give workers time to register
-	time.Sleep(50 * time.Millisecond)
+		waitForProcess()
 
-	info := q.Info(ctx)
-	initialWorkers := info.Stats.Workers
-	assert.Equal(t, 5, initialWorkers, "all workers should be registered")
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, "1", got.ID)
 
-	// Kick all workers for agent 42
-	q.KickAgentWorkers(42)
+		err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error"))
 
-	// All workers should be kicked
-	kickedCount := 0
-	for i := 0; i < 5; i++ {
-		select {
-		case err := <-pollResults:
-			assert.Error(t, err)
-			// All kicked workers will have their context canceled
-			if errors.Is(err, context.Canceled) {
-				kickedCount++
-			}
-		case <-time.After(time.Second):
-			t.Fatal("expected all workers to be kicked")
-		}
-	}
-
-	assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked")
+		assert.Error(t, err, "should return error for non-existent tasks")
+		assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
 
-	time.Sleep(50 * time.Millisecond)
-	info = q.Info(ctx)
-	assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed")
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 0, "task1 should be removed from running")
+		assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting")
+		assert.Len(t, info.Pending, 0, "no tasks should be pending")
+	})
 }
 
-func TestFifoWaitContextCanceled(t *testing.T) {
+func TestFifoExtend(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+	t.Run("basic extend operations", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	// Create a context that we can cancel
-	waitCtx, waitCancel := context.WithCancel(ctx)
+		dummyTask := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	errCh := make(chan error, 1)
-	go func() {
-		errCh <- q.Wait(waitCtx, got.ID)
-	}()
-
-	// Cancel the wait context
-	time.Sleep(50 * time.Millisecond)
-	waitCancel()
-
-	select {
-	case err := <-errCh:
-		assert.NoError(t, err, "Wait should return nil when context is canceled before task completes")
-	case <-time.After(time.Second):
-		t.Fatal("Wait should return when context is canceled")
-	}
-}
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-func TestFifoWaitNonExistentTask(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		assert.NoError(t, q.Extend(ctx, 1, got.ID))
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		err = q.Extend(ctx, 999, got.ID)
+		assert.ErrorIs(t, err, ErrAgentMissMatch)
 
-	// Wait on a task that doesn't exist
-	err := q.Wait(ctx, "non-existent-task")
-	assert.NoError(t, err, "Wait should return nil for non-existent task")
-}
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
-func TestFifoUpdateDepStatusInQueue(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		err = q.Extend(ctx, 1, "non-existent")
+		assert.ErrorIs(t, err, ErrNotFound)
+	})
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task4 := &model.Task{
-		ID:           "4",
-		Dependencies: []string{"2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+	t.Run("extend prevents expiration", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		q.extension = 50 * time.Millisecond
+
+		dummyTask := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+
+		for i := 0; i < 3; i++ {
+			time.Sleep(30 * time.Millisecond)
+			assert.NoError(t, q.Extend(ctx, 1, got.ID))
+		}
+
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 1, "task should still be running after extensions")
+		assert.Len(t, info.Pending, 0, "task should not be resubmitted")
+	})
+
+	t.Run("extend with wrong agent", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
+
+		dummyTask := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
+		waitForProcess()
+		got, err := q.Poll(ctx, 5, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5")
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
+		assert.NoError(t, q.Extend(ctx, 5, got.ID))
 
-	// Complete task1 with success
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		err = q.Extend(ctx, 999, got.ID)
+		assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend")
 
-	waitForProcess()
+		err = q.Extend(ctx, 1, got.ID)
+		assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent")
 
-	// Poll task2 to check its DepStatus was updated
-	got2, err := q.Poll(ctx, 2, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated")
+		assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend")
 
-	// Poll task3 to check its DepStatus was updated
-	got3, err := q.Poll(ctx, 3, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated")
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+	})
 }
 
-func TestFifoRemoveFromPendingAndWaitingNotFound(t *testing.T) {
+func TestFifoKickAgentWorkers(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	task1 := genDummyTask()
+	t.Run("kick single agent workers", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		dummyTask := genDummyTask()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
+		pollResults := make(chan error, 3)
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		for agentID := int64(1); agentID <= 3; agentID++ {
+			go func(id int64) {
+				_, err := q.Poll(ctx, id, filterFnTrue)
+				pollResults <- err
+			}(agentID)
+		}
 
-	// Try to error a task that's running (it will be removed from running, not pending/waiting)
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
+		time.Sleep(50 * time.Millisecond)
 
-	// Try to error a completely non-existent task
-	err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))
-	assert.Error(t, err, "should return error for non-existent task")
-}
+		q.KickAgentWorkers(2)
 
-func TestFifoRemoveFromWaitingOnDeps(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		select {
+		case err := <-pollResults:
+			assert.Error(t, err)
+			if errors.Is(err, context.Canceled) {
+				assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled")
+			}
+		case <-time.After(time.Second):
+			t.Fatal("expected worker to be kicked")
+		}
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		info := q.Info(ctx)
+		assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered")
+	})
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+	t.Run("kick multiple workers for same agent", func(t *testing.T) {
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+		pollResults := make(chan error, 5)
 
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
+		for i := 0; i < 5; i++ {
+			go func() {
+				_, err := q.Poll(ctx, 42, filterFnTrue)
+				pollResults <- err
+			}()
+		}
 
-	// Error task2 while it's in waitingOnDeps
-	assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task")))
+		time.Sleep(50 * time.Millisecond)
 
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now")
+		info := q.Info(ctx)
+		initialWorkers := info.Stats.Workers
+		assert.Equal(t, 5, initialWorkers, "all workers should be registered")
+
+		q.KickAgentWorkers(42)
+
+		kickedCount := 0
+		for i := 0; i < 5; i++ {
+			select {
+			case err := <-pollResults:
+				assert.Error(t, err)
+				if errors.Is(err, context.Canceled) {
+					kickedCount++
+				}
+			case <-time.After(time.Second):
+				t.Fatal("expected all workers to be kicked")
+			}
+		}
+
+		assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked")
+
+		time.Sleep(50 * time.Millisecond)
+		info = q.Info(ctx)
+		assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed")
+	})
 }
 
 func TestFifoPollContextCanceled(t *testing.T) {
@@ -1103,10 +1082,8 @@ func TestFifoPollContextCanceled(t *testing.T) {
 		errCh <- err
 	}()
 
-	// Give Poll time to register the worker
 	time.Sleep(50 * time.Millisecond)
 
-	// Cancel the poll context
 	pollCancel()
 
 	select {
@@ -1120,426 +1097,317 @@ func TestFifoPollContextCanceled(t *testing.T) {
 	cancel(nil)
 }
 
-func TestFifoMultipleDepStatusUpdates(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID: "2",
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1", "2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-	waitForProcess()
-
-	// Get both independent tasks
-	got1, _ := q.Poll(ctx, 1, filterFnTrue)
-	got2, _ := q.Poll(ctx, 2, filterFnTrue)
-
-	// Complete one with success
-	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
-
-	// Complete other with failure
-	assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
-
-	waitForProcess()
-
-	// Get task3 and check both dependencies are updated
-	got3, err := q.Poll(ctx, 3, filterFnTrue)
-	assert.NoError(t, err)
-
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
-	assert.Equal(t, model.StatusFailure, got3.DepStatus["2"])
-	assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed")
-}
-
-func TestFifoErrorAtOnceCoverage(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID: "2",
-	}
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
-
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-
-	// Test ErrorAtOnce with one running task and one non-existent task
-	// This should trigger the error case in finished() where removeFromPendingAndWaiting returns ErrNotFound
-	err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error"))
-	assert.Error(t, err, "should return error when one of the tasks doesn't exist")
-	assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
-
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 0, "running task should be removed")
-}
-
-func TestFifoErrorAtOnceMultipleNonExistent(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	task1 := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
-
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-
-	// Test ErrorAtOnce with multiple non-existent tasks
-	// This tests that errors.Join works correctly with multiple errors
-	err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors"))
-	assert.Error(t, err, "should return joined errors")
-
-	// The error should contain multiple ErrNotFound instances
-	errStr := err.Error()
-	assert.Contains(t, errStr, "not found", "error should mention not found")
-}
-
-func TestFifoErrorAtOnceWithErrCancel(t *testing.T) {
+func TestFifoDepStatusUpdates(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+	t.Run("update dep status in queue", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task4 := &model.Task{
+			ID:           "4",
+			Dependencies: []string{"2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task1, got)
 
-	// Test the ErrCancel specific path which sets StatusKilled
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel))
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
-	waitForProcess()
+		waitForProcess()
 
-	// Get the dependent task and verify its dependency status is StatusKilled
-	got2, err := q.Poll(ctx, 2, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed")
-}
+		got2, err := q.Poll(ctx, 2, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated")
 
-func TestFifoExtendWithWrongAgent(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		got3, err := q.Poll(ctx, 3, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated")
+	})
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+	t.Run("multiple dep status updates", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID: "2",
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1", "2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 5, filterFnTrue) // Agent 5 polls the task
-	assert.NoError(t, err)
-	assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5")
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 
-	// Test extend with correct agent - should succeed
-	assert.NoError(t, q.Extend(ctx, 5, got.ID))
+		waitForProcess()
 
-	// Test extend with wrong agent - should return ErrAgentMissMatch
-	err = q.Extend(ctx, 999, got.ID)
-	assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend")
+		got1, _ := q.Poll(ctx, 1, filterFnTrue)
+		got2, _ := q.Poll(ctx, 2, filterFnTrue)
 
-	// Test extend with another wrong agent
-	err = q.Extend(ctx, 1, got.ID)
-	assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent")
+		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+		assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
 
-	// Verify the correct agent can still extend
-	assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend")
+		waitForProcess()
 
-	// Clean up
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-}
+		got3, err := q.Poll(ctx, 3, filterFnTrue)
+		assert.NoError(t, err)
 
-func TestFifoFinishedWithMultipleErrors(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
+		assert.Equal(t, model.StatusFailure, got3.DepStatus["2"])
+		assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed")
+	})
+
+	t.Run("update dep status in pending queue", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+		waitForProcess()
 
-	waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task1, got)
 
-	// Poll task1 so it's in running
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, "1", got.ID)
+		info := q.Info(ctx)
+		assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
 
-	// Now call ErrorAtOnce with:
-	// - got.ID (task1) which is in running - should succeed
-	// - "2" which is in waitingOnDeps - should succeed
-	// - "fake-1" which doesn't exist - should return ErrNotFound
-	// - "fake-2" which doesn't exist - should return ErrNotFound
-	err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error"))
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
-	assert.Error(t, err, "should return error for non-existent tasks")
+		waitForProcess()
 
-	// Verify that the error contains multiple ErrNotFound (from errors.Join)
-	assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
+		got2, err := q.Poll(ctx, 2, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"],
+			"task2's DepStatus should be updated in pending queue")
 
-	// Check that both real tasks were removed
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 0, "task1 should be removed from running")
-	assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting")
-	assert.Len(t, info.Pending, 0, "no tasks should be pending")
-}
+		got3, err := q.Poll(ctx, 3, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
+			"task3's DepStatus should be updated in pending queue")
+	})
+
+	t.Run("update dep status in running queue", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID: "2",
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1", "2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task4 := &model.Task{
+			ID:           "4",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-func TestFifoUpdateDepStatusInQueuePending(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitForProcess()
 
-	// Push all tasks - task2 and task3 will be in pending initially (before filterWaiting moves them)
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+		got1, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	waitForProcess()
+		got2, err := q.Poll(ctx, 2, filterFnTrue)
+		assert.NoError(t, err)
 
-	// At this point, task2 and task3 should be in waitingOnDeps
-	// but we want to test when they're in pending
+		waitForProcess()
 
-	// Get task1
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
+		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
 
-	// Before completing task1, let's verify task2 and task3 are waiting
-	info := q.Info(ctx)
-	assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
+		waitForProcess()
 
-	// Now complete task1 - this should trigger updateDepStatusInQueue
-	// which will update DepStatus in the waitingOnDeps queue
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		got4, err := q.Poll(ctx, 4, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"],
+			"task4's DepStatus should show task1 succeeded")
 
-	waitForProcess()
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
 
-	// Now get task2 and verify its DepStatus was updated
-	got2, err := q.Poll(ctx, 2, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"],
-		"task2's DepStatus should be updated in pending queue")
+		waitForProcess()
 
-	// Get task3 and verify its DepStatus was updated
-	got3, err := q.Poll(ctx, 3, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
-		"task3's DepStatus should be updated in pending queue")
-}
+		got3, err := q.Poll(ctx, 3, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
+			"task3's DepStatus should show task1 succeeded")
+		assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"],
+			"task3's DepStatus should show task2 succeeded")
+		assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded")
+
+		assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
+		assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess))
+	})
+
+	t.Run("update dep status in waiting queue", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task4 := &model.Task{
+			ID:           "4",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-func TestFifoUpdateDepStatusInQueueRunning(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID: "2",
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1", "2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task4 := &model.Task{
-		ID:           "4",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		waitForProcess()
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
+		info := q.Info(ctx)
+		assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting")
 
-	waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task1, got)
 
-	// Get task1 and task2 (both independent)
-	got1, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed")))
 
-	got2, err := q.Poll(ctx, 2, filterFnTrue)
-	assert.NoError(t, err)
+		waitForProcess()
 
-	waitForProcess()
+		for i := 2; i <= 4; i++ {
+			gotTask, err := q.Poll(ctx, int64(i), filterFnTrue)
+			assert.NoError(t, err)
+			assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"],
+				"task %d's DepStatus should be updated in waitingOnDeps", i)
+			assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i)
+		}
+	})
+
+	t.Run("update dep status with skipped", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	// Now complete task1 first
-	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 
-	// Now task3 and task4 are waiting on their dependencies
-	// task3 still needs task2, task4 should be ready now
+		waitForProcess()
 
-	// Get task4 which should now be available since task1 is done
-	got4, err := q.Poll(ctx, 4, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"],
-		"task4's DepStatus should show task1 succeeded")
+		got1, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
 
-	// Now complete task2
-	assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+		waitForProcess()
 
-	waitForProcess()
+		got2, err := q.Poll(ctx, 2, filterFnTrue)
+		assert.NoError(t, err)
+		assert.False(t, got2.ShouldRun())
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped))
 
-	// Now get task3 - both its dependencies are satisfied
-	got3, err := q.Poll(ctx, 3, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
-		"task3's DepStatus should show task1 succeeded")
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"],
-		"task3's DepStatus should show task2 succeeded")
-	assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded")
+		waitForProcess()
 
-	// Clean up remaining tasks
-	assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
-	assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess))
+		got3, err := q.Poll(ctx, 3, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"],
+			"task3 should have task2's skipped status")
+		assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency")
+	})
 }
 
-func TestFifoUpdateDepStatusInQueueWaiting(t *testing.T) {
+func TestFifoRemoveOperations(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task4 := &model.Task{
-		ID:           "4",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
-
-	waitForProcess()
-
-	// Verify tasks are waiting on deps
-	info := q.Info(ctx)
-	assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting")
-
-	// Get and complete task1
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
+	t.Run("remove from pending and waiting not found", func(t *testing.T) {
+		task1 := genDummyTask()
 
-	// Complete task1 with failure - this should update DepStatus in waitingOnDeps
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed")))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
 
-	// All waiting tasks should have their DepStatus updated
-	// Get each one and verify
-	for i := 2; i <= 4; i++ {
-		gotTask, err := q.Poll(ctx, int64(i), filterFnTrue)
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
 		assert.NoError(t, err)
-		assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"],
-			"task %d's DepStatus should be updated in waitingOnDeps", i)
-		assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i)
-	}
-}
-
-func TestFifoUpdateDepStatusWithSkipped(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+		err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))
+		assert.Error(t, err, "should return error for non-existent task")
+	})
 
-	waitForProcess()
+	t.Run("remove from waiting on deps", func(t *testing.T) {
+		task1 := genDummyTask()
+		task2 := &model.Task{
+			ID:           "2",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	// Get and fail task1
-	got1, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
+		q, _ := NewMemoryQueue(ctx).(*fifo)
+		assert.NotNil(t, q)
 
-	waitForProcess()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 
-	// Get task2 - should not run but get skipped
-	got2, err := q.Poll(ctx, 2, filterFnTrue)
-	assert.NoError(t, err)
-	assert.False(t, got2.ShouldRun())
-	assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped))
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
 
-	waitForProcess()
+		assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task")))
 
-	// Get task3 - should have task2's skipped status updated
-	got3, err := q.Poll(ctx, 3, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"],
-		"task3 should have task2's skipped status")
-	assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency")
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now")
+	})
 }

From 679aba56d87ed35574f84b9e4253788907a3abc4 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 19:26:35 +0100
Subject: [PATCH 09/13] same coverage and less test code

---
 server/queue/fifo.go      |    6 +-
 server/queue/fifo_test.go | 1614 +++++++++----------------------------
 2 files changed, 373 insertions(+), 1247 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index 69eb3c97cc3..4cac0b22992 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -277,10 +277,8 @@ func (q *fifo) process() {
 
 func (q *fifo) filterWaiting() {
 	// resubmits all waiting tasks to pending, deps may have cleared
-	var nextWaiting *list.Element
-	for e := q.waitingOnDeps.Front(); e != nil; e = nextWaiting {
-		nextWaiting = e.Next()
-		task, _ := e.Value.(*model.Task)
+	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
+		task, _ := element.Value.(*model.Task)
 		q.pending.PushBack(task)
 	}
 
diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 5a8db3f3e8a..5a0ce0a10c1 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -38,474 +38,50 @@ var (
 	waitForProcess = func() { time.Sleep(processTimeInterval + 50*time.Millisecond) }
 )
 
-func TestFifoBasicOperations(t *testing.T) {
+func setupTestQueue(t *testing.T) (context.Context, context.CancelCauseFunc, *fifo) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	t.Cleanup(func() { cancel(nil) })
 
-	t.Run("basic push poll done flow", func(t *testing.T) {
-		q := NewMemoryQueue(ctx)
-		dummyTask := genDummyTask()
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Pending, 1, "expect task in pending queue")
-
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, dummyTask, got)
-
-		waitForProcess()
-		info = q.Info(ctx)
-		assert.Len(t, info.Pending, 0, "expect task removed from pending queue")
-		assert.Len(t, info.Running, 1, "expect task in running queue")
-
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-
-		waitForProcess()
-		info = q.Info(ctx)
-		assert.Len(t, info.Pending, 0, "expect task removed from pending queue")
-		assert.Len(t, info.Running, 0, "expect task removed from running queue")
-	})
-
-	t.Run("task expiration", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-		dummyTask := genDummyTask()
-
-		q.extension = 0
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Pending, 1, "expect task in pending queue")
-
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, dummyTask, got)
-
-		waitForProcess()
-		info = q.Info(ctx)
-		assert.Len(t, info.Pending, 1, "expect task re-added to pending queue")
-	})
-
-	t.Run("pause and resume", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-		dummyTask := genDummyTask()
-
-		var wg sync.WaitGroup
-		wg.Add(1)
-		go func() {
-			_, _ = q.Poll(ctx, 1, filterFnTrue)
-			wg.Done()
-		}()
-
-		q.Pause()
-		t0 := time.Now()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-		waitForProcess()
-		q.Resume()
-
-		wg.Wait()
-		t1 := time.Now()
-
-		assert.Greater(t, t1.Sub(t0), 20*time.Millisecond, "should have waited til resume")
-
-		q.Pause()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-		q.Resume()
-		_, _ = q.Poll(ctx, 1, filterFnTrue)
-	})
-
-	t.Run("pause then resume", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-		dummyTask := genDummyTask()
-
-		q.Pause()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-		q.Resume()
-
-		_, _ = q.Poll(ctx, 1, filterFnTrue)
-	})
-}
-
-func TestFifoWait(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("wait on task completion", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-		dummyTask := genDummyTask()
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, dummyTask, got)
-
-		var wg sync.WaitGroup
-		wg.Add(1)
-		go func() {
-			assert.NoError(t, q.Wait(ctx, got.ID))
-			wg.Done()
-		}()
-
-		<-time.After(time.Millisecond)
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-		wg.Wait()
-	})
-
-	t.Run("wait returns error on expiration", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-		q.extension = 0
-
-		dummyTask := genDummyTask()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, dummyTask, got)
-
-		errCh := make(chan error, 1)
-		go func() {
-			errCh <- q.Wait(ctx, got.ID)
-		}()
-
-		waitForProcess()
-
-		select {
-		case werr := <-errCh:
-			assert.Error(t, werr, "expected Wait to return error when lease expires")
-		case <-time.After(2 * time.Second):
-			t.Fatal("timeout waiting for Wait to return after lease expiration")
-		}
-
-		info := q.Info(ctx)
-		assert.Len(t, info.Pending, 1, "expect task re-added to pending queue after expiration")
-	})
-
-	t.Run("wait with context canceled", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-		dummyTask := genDummyTask()
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-
-		waitCtx, waitCancel := context.WithCancel(ctx)
-
-		errCh := make(chan error, 1)
-		go func() {
-			errCh <- q.Wait(waitCtx, got.ID)
-		}()
-
-		time.Sleep(50 * time.Millisecond)
-		waitCancel()
-
-		select {
-		case err := <-errCh:
-			assert.NoError(t, err, "Wait should return nil when context is canceled before task completes")
-		case <-time.After(time.Second):
-			t.Fatal("Wait should return when context is canceled")
-		}
-	})
-
-	t.Run("wait on non-existent task", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		err := q.Wait(ctx, "non-existent-task")
-		assert.NoError(t, err, "Wait should return nil for non-existent task")
-	})
-}
-
-func TestFifoDependencies(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("basic dependency ordering", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task1}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task1, got)
-
-		waitForProcess()
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-
-		waitForProcess()
-		got, err = q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task2, got)
-	})
-
-	t.Run("waiting vs pending stats", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-			RunOn:        []string{"success", "failure"},
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-
-		got, _ := q.Poll(ctx, 1, filterFnTrue)
-
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Equal(t, 2, info.Stats.WaitingOnDeps)
-
-		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.EqualValues(t, task2, got)
+	q, _ := NewMemoryQueue(ctx).(*fifo)
+	if q == nil {
+		t.Fatal("Failed to create queue")
+	}
 
-		waitForProcess()
-		info = q.Info(ctx)
-		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
-		assert.Equal(t, 1, info.Stats.Pending)
-	})
+	return ctx, cancel, q
 }
 
-func TestFifoErrors(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("error handling with run_on conditions", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-			RunOn:        []string{"success", "failure"},
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task1, got)
-
-		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-
-		waitForProcess()
-		got, err = q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task2, got)
-		assert.False(t, got.ShouldRun())
-
-		waitForProcess()
-		got, err = q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task3, got)
-		assert.True(t, got.ShouldRun())
-	})
-
-	t.Run("multiple dependency failures", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID: "2",
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1", "2"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-
-		for i := 0; i < 2; i++ {
-			waitForProcess()
-			got, err := q.Poll(ctx, 1, filterFnTrue)
-			assert.NoError(t, err)
-			assert.False(t, got != task1 && got != task2, "expect task1 or task2 returned from queue as task3 depends on them")
-
-			if got != task1 {
-				assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-			}
-			if got != task2 {
-				assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-			}
-		}
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task3, got)
-		assert.False(t, got.ShouldRun())
-	})
-
-	t.Run("transitive error propagation", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"2"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
+func TestBasicQueueOperations(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
+	dummyTask := genDummyTask()
 
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+	// Test push -> poll -> done lifecycle
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+	waitForProcess()
 
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task1, got)
-		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
+	info := q.Info(ctx)
+	assert.Len(t, info.Pending, 1)
 
-		waitForProcess()
-		got, err = q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task2, got)
-		assert.False(t, got.ShouldRun(), "expect task2 should not run, since task1 failed")
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, dummyTask, got)
 
-		waitForProcess()
-		got, err = q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task3, got)
-		assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 failed, thus task2 was skipped, task3 should be skipped too")
-	})
-
-	t.Run("multithreaded error handling", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1", "2"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
+	waitForProcess()
+	info = q.Info(ctx)
+	assert.Len(t, info.Pending, 0)
+	assert.Len(t, info.Running, 1)
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-
-		obtainedWorkCh := make(chan *model.Task)
-		defer func() { close(obtainedWorkCh) }()
-
-		for i := 0; i < 10; i++ {
-			go func(i int) {
-				for {
-					fmt.Printf("Worker %d started\n", i)
-					got, err := q.Poll(ctx, 1, filterFnTrue)
-					if err != nil && errors.Is(err, context.Canceled) {
-						return
-					}
-					assert.NoError(t, err)
-					obtainedWorkCh <- got
-				}
-			}(i)
-		}
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
-		task1Processed := false
-		task2Processed := false
-
-		for {
-			select {
-			case got := <-obtainedWorkCh:
-				fmt.Println(got.ID)
-
-				switch {
-				case !task1Processed:
-					assert.Equal(t, task1, got)
-					task1Processed = true
-					assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1, there was an error")))
-					go func() {
-						for {
-							fmt.Printf("Worker spawned\n")
-							got, err := q.Poll(ctx, 1, filterFnTrue)
-							if err != nil && errors.Is(err, context.Canceled) {
-								return
-							}
-							assert.NoError(t, err)
-							obtainedWorkCh <- got
-						}
-					}()
-				case !task2Processed:
-					assert.Equal(t, task2, got)
-					task2Processed = true
-					assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-					go func() {
-						for {
-							fmt.Printf("Worker spawned\n")
-							got, err := q.Poll(ctx, 1, filterFnTrue)
-							if err != nil && errors.Is(err, context.Canceled) {
-								return
-							}
-							assert.NoError(t, err)
-							obtainedWorkCh <- got
-						}
-					}()
-				default:
-					assert.Equal(t, task3, got)
-					assert.False(t, got.ShouldRun(), "expect task3 should not run, task1 succeeded but task2 failed")
-					return
-				}
-
-			case <-time.After(5 * time.Second):
-				info := q.Info(ctx)
-				fmt.Println(info.String())
-				t.Errorf("test timed out")
-				return
-			}
-		}
-	})
+	waitForProcess()
+	info = q.Info(ctx)
+	assert.Len(t, info.Running, 0)
 }
 
-func TestFifoCancel(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+func TestTaskDependencies(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
 	task1 := genDummyTask()
 	task2 := &model.Task{
@@ -520,562 +96,322 @@ func TestFifoCancel(t *testing.T) {
 		RunOn:        []string{"success", "failure"},
 	}
 
-	q, _ := NewMemoryQueue(ctx).(*fifo)
-	assert.NotNil(t, q)
-
 	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+	waitForProcess()
 
-	_, _ = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, q.Error(ctx, task1.ID, fmt.Errorf("canceled")))
-	assert.NoError(t, q.Error(ctx, task2.ID, fmt.Errorf("canceled")))
-	assert.NoError(t, q.Error(ctx, task3.ID, fmt.Errorf("canceled")))
+	// Verify waiting on deps stat
 	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 0, "all pipelines should be canceled")
-
-	time.Sleep(processTimeInterval * 2)
+	assert.Equal(t, 2, info.Stats.WaitingOnDeps)
+
+	// Poll and fail task1
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, task1, got)
+	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1")))
+
+	// task2 should be polled but not run (no failure in RunOn)
+	waitForProcess()
+	got, err = q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, task2, got)
+	assert.False(t, got.ShouldRun())
+	assert.Equal(t, model.StatusFailure, got.DepStatus["1"])
+
+	// task3 should run (has failure in RunOn)
+	waitForProcess()
+	got, err = q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, task3, got)
+	assert.True(t, got.ShouldRun())
+	assert.Equal(t, model.StatusFailure, got.DepStatus["1"])
+
+	waitForProcess()
 	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 0, "canceled are rescheduled")
-	assert.Len(t, info.Running, 0, "canceled are rescheduled")
-	assert.Len(t, info.WaitingOnDeps, 0, "canceled are rescheduled")
-}
-
-func TestFifoShouldRun(t *testing.T) {
-	tests := []struct {
-		name     string
-		task     *model.Task
-		expected bool
-		reason   string
-	}{
-		{
-			name: "failure only - success status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusSuccess},
-				RunOn:        []string{"failure"},
-			},
-			expected: false,
-			reason:   "expect task to not run, it runs on failure only",
-		},
-		{
-			name: "failure and success - success status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusSuccess},
-				RunOn:        []string{"failure", "success"},
-			},
-			expected: true,
-		},
-		{
-			name: "no run_on - failure status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusFailure},
-			},
-			expected: false,
-		},
-		{
-			name: "success only - success status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusSuccess},
-				RunOn:        []string{"success"},
-			},
-			expected: true,
-		},
-		{
-			name: "failure only - failure status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusFailure},
-				RunOn:        []string{"failure"},
-			},
-			expected: true,
-		},
-		{
-			name: "no run_on - skipped status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusSkipped},
-			},
-			expected: false,
-			reason:   "task should not run if dependency is skipped",
-		},
-		{
-			name: "failure only - skipped status",
-			task: &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": model.StatusSkipped},
-				RunOn:        []string{"failure"},
-			},
-			expected: true,
-			reason:   "on failure, tasks should run on skipped deps, something failed higher up the chain",
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := tt.task.ShouldRun()
-			if tt.reason != "" {
-				assert.Equal(t, tt.expected, result, tt.reason)
-			} else {
-				assert.Equal(t, tt.expected, result)
-			}
-		})
-	}
+	assert.Equal(t, 0, info.Stats.WaitingOnDeps)
 }
 
-func TestFifoScoring(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	q := NewMemoryQueue(ctx)
+func TestMultipleDependencies(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-	// Create tasks with different labels
-	tasks := []*model.Task{
-		{ID: "1", Labels: map[string]string{"org-id": "123", "platform": "linux"}},
-		{ID: "2", Labels: map[string]string{"org-id": "456", "platform": "linux"}},
-		{ID: "3", Labels: map[string]string{"org-id": "789", "platform": "windows"}},
-		{ID: "4", Labels: map[string]string{"org-id": "123", "platform": "linux"}},
-		{ID: "5", Labels: map[string]string{"org-id": "*", "platform": "linux"}},
+	task1 := genDummyTask()
+	task2 := &model.Task{ID: "2"}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1", "2"},
+		DepStatus:    make(map[string]model.StatusValue),
 	}
 
-	assert.NoError(t, q.PushAtOnce(ctx, tasks))
-
-	// Create filter functions for different workers
-	filters := map[int]FilterFn{
-		1: func(task *model.Task) (bool, int) {
-			if task.Labels["org-id"] == "123" {
-				return true, 20
-			}
-			if task.Labels["platform"] == "linux" {
-				return true, 10
-			}
-			return true, 1
-		},
-		2: func(task *model.Task) (bool, int) {
-			if task.Labels["org-id"] == "456" {
-				return true, 20
-			}
-			if task.Labels["platform"] == "linux" {
-				return true, 10
-			}
-			return true, 1
-		},
-		3: func(task *model.Task) (bool, int) {
-			if task.Labels["platform"] == "windows" {
-				return true, 20
-			}
-			return true, 1
-		},
-		4: func(task *model.Task) (bool, int) {
-			if task.Labels["org-id"] == "123" {
-				return true, 20
-			}
-			if task.Labels["platform"] == "linux" {
-				return true, 10
-			}
-			return true, 1
-		},
-		5: func(task *model.Task) (bool, int) {
-			if task.Labels["org-id"] == "*" {
-				return true, 15
-			}
-			return true, 1
-		},
-	}
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+	waitForProcess()
+
+	// Poll and complete both dependencies
+	got1, _ := q.Poll(ctx, 1, filterFnTrue)
+	got2, _ := q.Poll(ctx, 2, filterFnTrue)
+
+	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+	assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
+
+	// task3 should have both statuses propagated
+	waitForProcess()
+	got3, err := q.Poll(ctx, 3, filterFnTrue)
+	assert.NoError(t, err)
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
+	assert.Equal(t, model.StatusFailure, got3.DepStatus["2"])
+	assert.False(t, got3.ShouldRun())
+}
 
-	// Start polling in separate goroutines
-	results := make(chan *model.Task, 5)
-	for i := 1; i <= 5; i++ {
-		go func(n int) {
-			task, err := q.Poll(ctx, int64(n), filters[n])
-			assert.NoError(t, err)
-			results <- task
-		}(i)
-	}
+func TestTransitiveDependencies(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-	// Collect results
-	receivedTasks := make(map[string]int64)
-	for i := 0; i < 5; i++ {
-		select {
-		case task := <-results:
-			receivedTasks[task.ID] = task.AgentID
-		case <-time.After(time.Second):
-			t.Fatal("Timeout waiting for tasks")
-		}
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
 	}
-
-	assert.Len(t, receivedTasks, 5, "All tasks should be assigned")
-
-	// Define expected agent assignments
-	expectedAssignments := map[string][]int64{
-		"1": {1, 4},
-		"2": {2},
-		"3": {3},
-		"4": {1, 4},
-		"5": {5},
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"2"},
+		DepStatus:    make(map[string]model.StatusValue),
 	}
 
-	// Check if tasks are assigned as expected
-	for taskID, expectedAgents := range expectedAssignments {
-		agentID, ok := receivedTasks[taskID]
-		assert.True(t, ok, "Task %s should be assigned", taskID)
-		assert.Contains(t, expectedAgents, agentID, "Task %s should be assigned to one of the expected agents", taskID)
-	}
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+	waitForProcess()
+
+	// Fail task1
+	got, _ := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1")))
+
+	// task2 should skip
+	waitForProcess()
+	got, _ = q.Poll(ctx, 2, filterFnTrue)
+	assert.False(t, got.ShouldRun())
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
+
+	// task3 should also skip (transitive)
+	waitForProcess()
+	got, _ = q.Poll(ctx, 3, filterFnTrue)
+	assert.Equal(t, model.StatusSkipped, got.DepStatus["2"])
+	assert.False(t, got.ShouldRun())
 }
 
-func TestFifoErrorAtOnce(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("batch error on running tasks", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID: "3",
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-		waitForProcess()
-		got1, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-
-		got2, err := q.Poll(ctx, 2, filterFnTrue)
-		assert.NoError(t, err)
-
-		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
-
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Running, 0, "expect tasks removed from running queue")
-		assert.Len(t, info.Pending, 1, "expect remaining task in pending")
-	})
-
-	t.Run("batch error on pending tasks", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID: "2",
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+func TestLeaseExpiration(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-		waitForProcess()
+	q.extension = 0 // Immediate expiration
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-		assert.NoError(t, q.ErrorAtOnce(ctx, []string{"2", "3"}, fmt.Errorf("pending error")))
-
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Pending, 1, "only task1 should remain")
-		assert.Len(t, info.WaitingOnDeps, 0, "task3 should be removed from waiting")
-	})
-
-	t.Run("error at once with cancel error", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID: "2",
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
-
-		waitForProcess()
-		got1, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
+	waitForProcess()
+	got, err := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, err)
 
-		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
-
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Running, 0, "task should be removed from running")
-	})
-
-	t.Run("error at once with non-existent tasks", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID: "2",
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-
-		err = q.ErrorAtOnce(ctx, []string{got.ID, "non-existent-id"}, fmt.Errorf("batch error"))
-		assert.Error(t, err, "should return error when one of the tasks doesn't exist")
-		assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
-
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Running, 0, "running task should be removed")
-	})
-
-	t.Run("error at once with multiple non-existent tasks", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		task1 := genDummyTask()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-
-		err = q.ErrorAtOnce(ctx, []string{got.ID, "fake-1", "fake-2", "fake-3"}, fmt.Errorf("multiple errors"))
-		assert.Error(t, err, "should return joined errors")
-
-		errStr := err.Error()
-		assert.Contains(t, errStr, "not found", "error should mention not found")
-	})
-
-	t.Run("error at once with ErrCancel and dependency update", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
+	// Test Wait returns error on expiration
+	errCh := make(chan error, 1)
+	go func() { errCh <- q.Wait(ctx, got.ID) }()
 
-		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got.ID}, ErrCancel))
+	waitForProcess()
+	select {
+	case werr := <-errCh:
+		assert.Error(t, werr)
+	case <-time.After(2 * time.Second):
+		t.Fatal("timeout waiting for Wait to return")
+	}
 
-		waitForProcess()
+	// Task should be back in pending
+	info := q.Info(ctx)
+	assert.Len(t, info.Pending, 1)
+}
 
-		got2, err := q.Poll(ctx, 2, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusKilled, got2.DepStatus["1"], "dependency should be marked as killed")
-	})
-
-	t.Run("finished with multiple errors", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
+func TestExtend(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
+	q.extension = 50 * time.Millisecond
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+	waitForProcess()
+	got, _ := q.Poll(ctx, 5, filterFnTrue)
 
-		waitForProcess()
+	// Correct agent can extend
+	assert.NoError(t, q.Extend(ctx, 5, got.ID))
 
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, "1", got.ID)
+	// Wrong agent cannot extend
+	assert.ErrorIs(t, q.Extend(ctx, 999, got.ID), ErrAgentMissMatch)
+	assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch)
 
-		err = q.ErrorAtOnce(ctx, []string{got.ID, "2", "fake-1", "fake-2"}, fmt.Errorf("test error"))
+	// Non-existent task
+	assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound)
 
-		assert.Error(t, err, "should return error for non-existent tasks")
-		assert.ErrorIs(t, err, ErrNotFound, "error should contain ErrNotFound")
+	// Extend prevents expiration
+	for i := 0; i < 3; i++ {
+		time.Sleep(30 * time.Millisecond)
+		assert.NoError(t, q.Extend(ctx, 5, got.ID))
+	}
 
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Len(t, info.Running, 0, "task1 should be removed from running")
-		assert.Len(t, info.WaitingOnDeps, 0, "task2 should be removed from waiting")
-		assert.Len(t, info.Pending, 0, "no tasks should be pending")
-	})
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 1)
+	assert.Len(t, info.Pending, 0)
 }
 
-func TestFifoExtend(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("basic extend operations", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		dummyTask := genDummyTask()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-
-		assert.NoError(t, q.Extend(ctx, 1, got.ID))
-
-		err = q.Extend(ctx, 999, got.ID)
-		assert.ErrorIs(t, err, ErrAgentMissMatch)
-
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-
-		err = q.Extend(ctx, 1, "non-existent")
-		assert.ErrorIs(t, err, ErrNotFound)
-	})
+func TestWait(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-	t.Run("extend prevents expiration", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
+	dummyTask := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-		q.extension = 50 * time.Millisecond
+	waitForProcess()
+	got, _ := q.Poll(ctx, 1, filterFnTrue)
 
-		dummyTask := genDummyTask()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+	// Wait completes on Done
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		assert.NoError(t, q.Wait(ctx, got.ID))
+		wg.Done()
+	}()
 
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
+	time.Sleep(time.Millisecond)
+	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+	wg.Wait()
 
-		for i := 0; i < 3; i++ {
-			time.Sleep(30 * time.Millisecond)
-			assert.NoError(t, q.Extend(ctx, 1, got.ID))
-		}
+	// Wait on non-existent task
+	assert.NoError(t, q.Wait(ctx, "non-existent"))
 
-		info := q.Info(ctx)
-		assert.Len(t, info.Running, 1, "task should still be running after extensions")
-		assert.Len(t, info.Pending, 0, "task should not be resubmitted")
-	})
+	// Wait with context cancellation
+	dummyTask2 := &model.Task{ID: "2"}
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask2}))
+	waitForProcess()
+	got2, _ := q.Poll(ctx, 1, filterFnTrue)
 
-	t.Run("extend with wrong agent", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
+	waitCtx, waitCancel := context.WithCancel(ctx)
+	errCh := make(chan error, 1)
+	go func() { errCh <- q.Wait(waitCtx, got2.ID) }()
 
-		dummyTask := genDummyTask()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+	time.Sleep(50 * time.Millisecond)
+	waitCancel()
 
-		waitForProcess()
-		got, err := q.Poll(ctx, 5, filterFnTrue)
+	select {
+	case err := <-errCh:
 		assert.NoError(t, err)
-		assert.Equal(t, int64(5), got.AgentID, "task should be assigned to agent 5")
-
-		assert.NoError(t, q.Extend(ctx, 5, got.ID))
-
-		err = q.Extend(ctx, 999, got.ID)
-		assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch when wrong agent tries to extend")
-
-		err = q.Extend(ctx, 1, got.ID)
-		assert.ErrorIs(t, err, ErrAgentMissMatch, "should return ErrAgentMissMatch for any wrong agent")
-
-		assert.NoError(t, q.Extend(ctx, 5, got.ID), "correct agent should still be able to extend")
-
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-	})
+	case <-time.After(time.Second):
+		t.Fatal("Wait should return when context is canceled")
+	}
 }
 
-func TestFifoKickAgentWorkers(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
+func TestError(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-	t.Run("kick single agent workers", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		dummyTask := genDummyTask()
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-
-		pollResults := make(chan error, 3)
+	task1 := genDummyTask()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
 
-		for agentID := int64(1); agentID <= 3; agentID++ {
-			go func(id int64) {
-				_, err := q.Poll(ctx, id, filterFnTrue)
-				pollResults <- err
-			}(agentID)
-		}
+	waitForProcess()
+	got, _ := q.Poll(ctx, 1, filterFnTrue)
 
-		time.Sleep(50 * time.Millisecond)
+	// Error on running task
+	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 0)
 
-		q.KickAgentWorkers(2)
+	// Error on non-existent task
+	assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")))
+}
 
-		select {
-		case err := <-pollResults:
-			assert.Error(t, err)
-			if errors.Is(err, context.Canceled) {
-				assert.True(t, errors.Is(err, context.Canceled), "expected context.Canceled")
-			}
-		case <-time.After(time.Second):
-			t.Fatal("expected worker to be kicked")
-		}
+func TestErrorAtOnce(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-		info := q.Info(ctx)
-		assert.GreaterOrEqual(t, info.Stats.Workers, 2, "other workers should still be registered")
-	})
+	task1 := genDummyTask()
+	task2 := &model.Task{ID: "2"}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
 
-	t.Run("kick multiple workers for same agent", func(t *testing.T) {
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+	waitForProcess()
 
-		pollResults := make(chan error, 5)
+	got1, _ := q.Poll(ctx, 1, filterFnTrue)
+	got2, _ := q.Poll(ctx, 2, filterFnTrue)
 
-		for i := 0; i < 5; i++ {
-			go func() {
-				_, err := q.Poll(ctx, 42, filterFnTrue)
-				pollResults <- err
-			}()
-		}
+	// Batch error on running tasks
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
+	waitForProcess()
+	info := q.Info(ctx)
+	assert.Len(t, info.Running, 0)
+
+	// Error with non-existent tasks
+	task4 := &model.Task{ID: "4"}
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4}))
+	waitForProcess()
+	got4, _ := q.Poll(ctx, 1, filterFnTrue)
+
+	err := q.ErrorAtOnce(ctx, []string{got4.ID, "fake-1", "fake-2"}, fmt.Errorf("test"))
+	assert.Error(t, err)
+	assert.ErrorIs(t, err, ErrNotFound)
+
+	// ErrCancel updates dependency status
+	task5 := genDummyTask()
+	task5.ID = "5"
+	task6 := &model.Task{
+		ID:           "6",
+		Dependencies: []string{"5"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6}))
+	waitForProcess()
+	got5, _ := q.Poll(ctx, 1, filterFnTrue)
 
-		time.Sleep(50 * time.Millisecond)
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got5.ID}, ErrCancel))
+	waitForProcess()
 
-		info := q.Info(ctx)
-		initialWorkers := info.Stats.Workers
-		assert.Equal(t, 5, initialWorkers, "all workers should be registered")
+	got6, _ := q.Poll(ctx, 2, filterFnTrue)
+	assert.Equal(t, model.StatusKilled, got6.DepStatus["5"])
+}
 
-		q.KickAgentWorkers(42)
+func TestShouldRunLogic(t *testing.T) {
+	tests := []struct {
+		name      string
+		depStatus model.StatusValue
+		runOn     []string
+		expected  bool
+	}{
+		{"Success without RunOn", model.StatusSuccess, nil, true},
+		{"Failure without RunOn", model.StatusFailure, nil, false},
+		{"Success with failure RunOn", model.StatusSuccess, []string{"failure"}, false},
+		{"Failure with failure RunOn", model.StatusFailure, []string{"failure"}, true},
+		{"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true},
+		{"Skipped without RunOn", model.StatusSkipped, nil, false},
+		{"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true},
+	}
 
-		kickedCount := 0
-		for i := 0; i < 5; i++ {
-			select {
-			case err := <-pollResults:
-				assert.Error(t, err)
-				if errors.Is(err, context.Canceled) {
-					kickedCount++
-				}
-			case <-time.After(time.Second):
-				t.Fatal("expected all workers to be kicked")
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			task := &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": tt.depStatus},
+				RunOn:        tt.runOn,
 			}
-		}
-
-		assert.Equal(t, 5, kickedCount, "all 5 workers should have been kicked")
-
-		time.Sleep(50 * time.Millisecond)
-		info = q.Info(ctx)
-		assert.Equal(t, 0, info.Stats.Workers, "all workers should be removed")
-	})
+			assert.Equal(t, tt.expected, task.ShouldRun())
+		})
+	}
 }
 
-func TestFifoPollContextCanceled(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-
-	q := NewMemoryQueue(ctx)
+func TestWorkerManagement(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
+	// Poll with context cancellation
 	pollCtx, pollCancel := context.WithCancel(ctx)
-
 	errCh := make(chan error, 1)
 	go func() {
 		_, err := q.Poll(pollCtx, 1, filterFnTrue)
@@ -1083,331 +419,123 @@ func TestFifoPollContextCanceled(t *testing.T) {
 	}()
 
 	time.Sleep(50 * time.Millisecond)
-
 	pollCancel()
 
 	select {
 	case err := <-errCh:
-		assert.Error(t, err)
 		assert.ErrorIs(t, err, context.Canceled)
 	case <-time.After(time.Second):
 		t.Fatal("Poll should return when context is canceled")
 	}
 
-	cancel(nil)
-}
-
-func TestFifoDepStatusUpdates(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("update dep status in queue", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task4 := &model.Task{
-			ID:           "4",
-			Dependencies: []string{"2"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task1, got)
-
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-
-		waitForProcess()
-
-		got2, err := q.Poll(ctx, 2, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"], "task2 should have task1's status updated")
-
-		got3, err := q.Poll(ctx, 3, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"], "task3 should have task1's status updated")
-	})
-
-	t.Run("multiple dep status updates", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID: "2",
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1", "2"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-		waitForProcess()
-
-		got1, _ := q.Poll(ctx, 1, filterFnTrue)
-		got2, _ := q.Poll(ctx, 2, filterFnTrue)
-
-		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
-		assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
-
-		waitForProcess()
+	// Kick multiple workers for same agent
+	pollResults := make(chan error, 5)
+	for i := 0; i < 5; i++ {
+		go func() {
+			_, err := q.Poll(ctx, 42, filterFnTrue)
+			pollResults <- err
+		}()
+	}
 
-		got3, err := q.Poll(ctx, 3, filterFnTrue)
-		assert.NoError(t, err)
+	time.Sleep(50 * time.Millisecond)
+	q.KickAgentWorkers(42)
 
-		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
-		assert.Equal(t, model.StatusFailure, got3.DepStatus["2"])
-		assert.False(t, got3.ShouldRun(), "task3 should not run since one dependency failed")
-	})
-
-	t.Run("update dep status in pending queue", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
+	kickedCount := 0
+	for i := 0; i < 5; i++ {
+		select {
+		case err := <-pollResults:
+			if errors.Is(err, context.Canceled) {
+				kickedCount++
+			}
+		case <-time.After(time.Second):
+			t.Fatal("expected all workers to be kicked")
 		}
+	}
+	assert.Equal(t, 5, kickedCount)
+}
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-		waitForProcess()
-
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task1, got)
-
-		info := q.Info(ctx)
-		assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
+func TestLabelBasedScoring(t *testing.T) {
+	ctx, cancel := context.WithCancelCause(t.Context())
+	defer cancel(nil)
 
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+	q := NewMemoryQueue(ctx)
 
-		waitForProcess()
+	tasks := []*model.Task{
+		{ID: "1", Labels: map[string]string{"org-id": "123", "platform": "linux"}},
+		{ID: "2", Labels: map[string]string{"org-id": "456", "platform": "linux"}},
+		{ID: "3", Labels: map[string]string{"org-id": "123", "platform": "windows"}},
+	}
 
-		got2, err := q.Poll(ctx, 2, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"],
-			"task2's DepStatus should be updated in pending queue")
+	assert.NoError(t, q.PushAtOnce(ctx, tasks))
 
-		got3, err := q.Poll(ctx, 3, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
-			"task3's DepStatus should be updated in pending queue")
-	})
-
-	t.Run("update dep status in running queue", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID: "2",
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1", "2"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task4 := &model.Task{
-			ID:           "4",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
+	filter123 := func(task *model.Task) (bool, int) {
+		if task.Labels["org-id"] == "123" {
+			return true, 20
 		}
+		return true, 1
+	}
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
-
-		waitForProcess()
-
-		got1, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-
-		got2, err := q.Poll(ctx, 2, filterFnTrue)
-		assert.NoError(t, err)
-
-		waitForProcess()
-
-		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
-
-		waitForProcess()
-
-		got4, err := q.Poll(ctx, 4, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSuccess, got4.DepStatus["1"],
-			"task4's DepStatus should show task1 succeeded")
-
-		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
-
-		waitForProcess()
-
-		got3, err := q.Poll(ctx, 3, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"],
-			"task3's DepStatus should show task1 succeeded")
-		assert.Equal(t, model.StatusSuccess, got3.DepStatus["2"],
-			"task3's DepStatus should show task2 succeeded")
-		assert.True(t, got3.ShouldRun(), "task3 should run since both deps succeeded")
-
-		assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
-		assert.NoError(t, q.Done(ctx, got4.ID, model.StatusSuccess))
-	})
-
-	t.Run("update dep status in waiting queue", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
+	filter456 := func(task *model.Task) (bool, int) {
+		if task.Labels["org-id"] == "456" {
+			return true, 20
 		}
-		task4 := &model.Task{
-			ID:           "4",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3, task4}))
-
-		waitForProcess()
-
-		info := q.Info(ctx)
-		assert.Equal(t, 3, info.Stats.WaitingOnDeps, "tasks 2, 3, 4 should be waiting")
-
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, task1, got)
-
-		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("task1 failed")))
+		return true, 1
+	}
 
-		waitForProcess()
+	results := make(chan *model.Task, 2)
+	go func() {
+		task, _ := q.Poll(ctx, 1, filter123)
+		results <- task
+	}()
+	go func() {
+		task, _ := q.Poll(ctx, 2, filter456)
+		results <- task
+	}()
 
-		for i := 2; i <= 4; i++ {
-			gotTask, err := q.Poll(ctx, int64(i), filterFnTrue)
-			assert.NoError(t, err)
-			assert.Equal(t, model.StatusFailure, gotTask.DepStatus["1"],
-				"task %d's DepStatus should be updated in waitingOnDeps", i)
-			assert.False(t, gotTask.ShouldRun(), "task %d should not run due to failed dependency", i)
-		}
-	})
-
-	t.Run("update dep status with skipped", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"2"},
-			DepStatus:    make(map[string]model.StatusValue),
+	receivedTasks := make(map[string]int64)
+	for i := 0; i < 2; i++ {
+		select {
+		case task := <-results:
+			receivedTasks[task.ID] = task.AgentID
+		case <-time.After(time.Second):
+			t.Fatal("Timeout waiting for tasks")
 		}
+	}
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-		waitForProcess()
-
-		got1, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
-		assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
-
-		waitForProcess()
-
-		got2, err := q.Poll(ctx, 2, filterFnTrue)
-		assert.NoError(t, err)
-		assert.False(t, got2.ShouldRun())
-		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSkipped))
-
-		waitForProcess()
-
-		got3, err := q.Poll(ctx, 3, filterFnTrue)
-		assert.NoError(t, err)
-		assert.Equal(t, model.StatusSkipped, got3.DepStatus["2"],
-			"task3 should have task2's skipped status")
-		assert.False(t, got3.ShouldRun(), "task3 should not run due to skipped dependency")
-	})
+	// Agent 1 should get task with org-id 123
+	assert.Contains(t, []string{"1", "3"}, findTaskByAgent(receivedTasks, 1))
+	// Agent 2 should get task with org-id 456
+	assert.Equal(t, "2", findTaskByAgent(receivedTasks, 2))
 }
 
-func TestFifoRemoveOperations(t *testing.T) {
-	ctx, cancel := context.WithCancelCause(t.Context())
-	t.Cleanup(func() { cancel(nil) })
-
-	t.Run("remove from pending and waiting not found", func(t *testing.T) {
-		task1 := genDummyTask()
+func TestPauseResume(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
 
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
+	dummyTask := genDummyTask()
 
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
-
-		waitForProcess()
-		got, err := q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, err)
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		_, _ = q.Poll(ctx, 1, filterFnTrue)
+		wg.Done()
+	}()
 
-		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
+	q.Pause()
+	t0 := time.Now()
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+	waitForProcess()
+	q.Resume()
 
-		err = q.Error(ctx, "totally-fake-id", fmt.Errorf("test error"))
-		assert.Error(t, err, "should return error for non-existent task")
-	})
+	wg.Wait()
+	assert.Greater(t, time.Since(t0), 20*time.Millisecond)
+}
 
-	t.Run("remove from waiting on deps", func(t *testing.T) {
-		task1 := genDummyTask()
-		task2 := &model.Task{
-			ID:           "2",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
-		}
-		task3 := &model.Task{
-			ID:           "3",
-			Dependencies: []string{"1"},
-			DepStatus:    make(map[string]model.StatusValue),
+func findTaskByAgent(tasks map[string]int64, agentID int64) string {
+	for taskID, aid := range tasks {
+		if aid == agentID {
+			return taskID
 		}
-
-		q, _ := NewMemoryQueue(ctx).(*fifo)
-		assert.NotNil(t, q)
-
-		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-
-		waitForProcess()
-		info := q.Info(ctx)
-		assert.Equal(t, 2, info.Stats.WaitingOnDeps, "task2 and task3 should be waiting")
-
-		assert.NoError(t, q.Error(ctx, "2", fmt.Errorf("cancel task")))
-
-		waitForProcess()
-		info = q.Info(ctx)
-		assert.Equal(t, 1, info.Stats.WaitingOnDeps, "only task3 should be waiting now")
-	})
+	}
+	return ""
 }

From 8ed5a3cb259618483ece0627b28f2352017c5869 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 19:36:48 +0100
Subject: [PATCH 10/13] more coverage

---
 server/queue/fifo_test.go | 164 +++++++++++++++++++++++++++++++++-----
 1 file changed, 145 insertions(+), 19 deletions(-)

diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 5a0ce0a10c1..9530925653f 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -145,19 +145,35 @@ func TestMultipleDependencies(t *testing.T) {
 	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
 	waitForProcess()
 
-	// Poll and complete both dependencies
+	// Poll both independent tasks
 	got1, _ := q.Poll(ctx, 1, filterFnTrue)
 	got2, _ := q.Poll(ctx, 2, filterFnTrue)
 
-	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
-	assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
+	// Ensure we got both task1 and task2 (order may vary)
+	gotIDs := map[string]bool{got1.ID: true, got2.ID: true}
+	assert.True(t, gotIDs["1"] && gotIDs["2"], "Should get both task1 and task2")
+
+	// Complete them with different statuses
+	if got1.ID == "1" {
+		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+		assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
+	} else {
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+		assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
+	}
 
 	// task3 should have both statuses propagated
 	waitForProcess()
 	got3, err := q.Poll(ctx, 3, filterFnTrue)
 	assert.NoError(t, err)
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
-	assert.Equal(t, model.StatusFailure, got3.DepStatus["2"])
+
+	// Verify both dependency statuses are set correctly
+	assert.Contains(t, got3.DepStatus, "1")
+	assert.Contains(t, got3.DepStatus, "2")
+	assert.True(t,
+		(got3.DepStatus["1"] == model.StatusSuccess && got3.DepStatus["2"] == model.StatusFailure) ||
+			(got3.DepStatus["1"] == model.StatusFailure && got3.DepStatus["2"] == model.StatusSuccess),
+		"One dependency should succeed and one should fail")
 	assert.False(t, got3.ShouldRun())
 }
 
@@ -328,13 +344,10 @@ func TestErrorAtOnce(t *testing.T) {
 	ctx, cancel, q := setupTestQueue(t)
 	defer cancel(nil)
 
+	// Test batch error on running tasks
 	task1 := genDummyTask()
 	task2 := &model.Task{ID: "2"}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+	task3 := &model.Task{ID: "3"}
 
 	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
 	waitForProcess()
@@ -342,13 +355,18 @@ func TestErrorAtOnce(t *testing.T) {
 	got1, _ := q.Poll(ctx, 1, filterFnTrue)
 	got2, _ := q.Poll(ctx, 2, filterFnTrue)
 
-	// Batch error on running tasks
 	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
 	waitForProcess()
 	info := q.Info(ctx)
 	assert.Len(t, info.Running, 0)
+	assert.Len(t, info.Pending, 1) // task3 should still be pending
+
+	// Clean up task3
+	got3, _ := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
+	waitForProcess()
 
-	// Error with non-existent tasks
+	// Test error with non-existent tasks
 	task4 := &model.Task{ID: "4"}
 	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4}))
 	waitForProcess()
@@ -358,23 +376,59 @@ func TestErrorAtOnce(t *testing.T) {
 	assert.Error(t, err)
 	assert.ErrorIs(t, err, ErrNotFound)
 
-	// ErrCancel updates dependency status
-	task5 := genDummyTask()
-	task5.ID = "5"
+	// Verify task4 was still removed despite the error
+	waitForProcess()
+	info = q.Info(ctx)
+	assert.Len(t, info.Running, 0)
+
+	// Test ErrorAtOnce on tasks in waitingOnDeps (to cover removeFromPendingAndWaiting)
+	task5 := &model.Task{ID: "5"}
 	task6 := &model.Task{
 		ID:           "6",
 		Dependencies: []string{"5"},
 		DepStatus:    make(map[string]model.StatusValue),
 	}
+
 	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6}))
 	waitForProcess()
-	got5, _ := q.Poll(ctx, 1, filterFnTrue)
 
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got5.ID}, ErrCancel))
+	info = q.Info(ctx)
+	assert.Equal(t, 1, info.Stats.WaitingOnDeps, "task6 should be waiting on deps")
+
+	// Cancel both tasks - this should remove task6 from waitingOnDeps
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{"5", "6"}, fmt.Errorf("canceled")))
+
 	waitForProcess()
+	info = q.Info(ctx)
+	assert.Equal(t, 0, info.Stats.WaitingOnDeps, "task6 should be removed from waitingOnDeps")
+	assert.Len(t, info.Pending, 0, "no tasks should be pending")
+}
 
-	got6, _ := q.Poll(ctx, 2, filterFnTrue)
-	assert.Equal(t, model.StatusKilled, got6.DepStatus["5"])
+func TestErrorAtOnceCancellation(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
+
+	// Test ErrCancel with dependency propagation
+	task1 := &model.Task{ID: "1"}
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+		RunOn:        []string{"success", "failure"}, // Ensures task runs on kill/cancel
+	}
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+	waitForProcess()
+	got1, _ := q.Poll(ctx, 1, filterFnTrue)
+
+	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
+
+	// Wait for cancellation to be processed and dependency to be updated
+	waitForProcess()
+	waitForProcess()
+
+	got2, _ := q.Poll(ctx, 2, filterFnTrue)
+	assert.Equal(t, model.StatusKilled, got2.DepStatus["1"])
 }
 
 func TestShouldRunLogic(t *testing.T) {
@@ -539,3 +593,75 @@ func findTaskByAgent(tasks map[string]int64, agentID int64) string {
 	}
 	return ""
 }
+
+func TestDependencyStatusPropagation(t *testing.T) {
+	ctx, cancel, q := setupTestQueue(t)
+	defer cancel(nil)
+
+	// Test basic dependency status propagation from completed task to waiting tasks
+	task1 := genDummyTask()
+	task2 := &model.Task{
+		ID:           "2",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task3 := &model.Task{
+		ID:           "3",
+		Dependencies: []string{"1"},
+		DepStatus:    make(map[string]model.StatusValue),
+		RunOn:        []string{"success", "failure"},
+	}
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+	waitForProcess()
+
+	// Both task2 and task3 should be waiting on task1
+	info := q.Info(ctx)
+	assert.Equal(t, 2, info.Stats.WaitingOnDeps)
+
+	// Complete task1 - this triggers updateDepStatusInQueue for waitingOnDeps tasks
+	got1, _ := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+
+	waitForProcess()
+
+	// Poll task2 and task3 - both should have task1's success status
+	got2, _ := q.Poll(ctx, 2, filterFnTrue)
+	got3, _ := q.Poll(ctx, 3, filterFnTrue)
+
+	assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"])
+	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
+
+	// Now test updateDepStatusInQueue for tasks in pending and running queues
+	task4 := &model.Task{ID: "4"}
+	task5 := &model.Task{
+		ID:           "5",
+		Dependencies: []string{"4"},
+		DepStatus:    make(map[string]model.StatusValue),
+	}
+	task6 := &model.Task{
+		ID:           "6",
+		Dependencies: []string{"4"},
+		DepStatus:    make(map[string]model.StatusValue),
+		RunOn:        []string{"success", "failure"},
+	}
+
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4, task5, task6}))
+	waitForProcess()
+
+	// Poll task4 and complete it while task5 and task6 are waiting
+	got4, _ := q.Poll(ctx, 4, filterFnTrue)
+	assert.NoError(t, q.Error(ctx, got4.ID, fmt.Errorf("failed")))
+
+	waitForProcess()
+
+	// task5 should not run (default behavior on failure)
+	got5, _ := q.Poll(ctx, 5, filterFnTrue)
+	assert.Equal(t, model.StatusFailure, got5.DepStatus["4"])
+	assert.False(t, got5.ShouldRun())
+
+	// task6 should run (has failure in RunOn)
+	got6, _ := q.Poll(ctx, 6, filterFnTrue)
+	assert.Equal(t, model.StatusFailure, got6.DepStatus["4"])
+	assert.True(t, got6.ShouldRun())
+}

From b2fe8c00e37634cdf8704460555c007c26fe2c4e Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 19:53:10 +0100
Subject: [PATCH 11/13] nice test

---
 server/queue/fifo.go      |  12 +-
 server/queue/fifo_test.go | 905 ++++++++++++++++++--------------------
 2 files changed, 443 insertions(+), 474 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index 4cac0b22992..5a5209b8e49 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -88,23 +88,23 @@ func (q *fifo) Poll(c context.Context, agentID int64, filter FilterFn) (*model.T
 	q.Lock()
 	ctx, stop := context.WithCancelCause(c)
 
-	_worker := &worker{
+	w := &worker{
 		agentID: agentID,
 		channel: make(chan *model.Task, 1),
 		filter:  filter,
 		stop:    stop,
 	}
-	q.workers[_worker] = struct{}{}
+	q.workers[w] = struct{}{}
 	q.Unlock()
 
 	for {
 		select {
 		case <-ctx.Done():
 			q.Lock()
-			delete(q.workers, _worker)
+			delete(q.workers, w)
 			q.Unlock()
 			return nil, ctx.Err()
-		case t := <-_worker.channel:
+		case t := <-w.channel:
 			return t, nil
 		}
 	}
@@ -301,12 +301,10 @@ func (q *fifo) filterWaiting() {
 }
 
 func (q *fifo) assignToWorker() (*list.Element, *worker) {
-	var next *list.Element
 	var bestWorker *worker
 	var bestScore int
 
-	for element := q.pending.Front(); element != nil; element = next {
-		next = element.Next()
+	for element := q.pending.Front(); element != nil; element = element.Next() {
 		task := element.Value.(*model.Task)
 		log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies)
 
diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 9530925653f..0ffaed96937 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -50,465 +50,504 @@ func setupTestQueue(t *testing.T) (context.Context, context.CancelCauseFunc, *fi
 	return ctx, cancel, q
 }
 
-func TestBasicQueueOperations(t *testing.T) {
+func TestFifoBasicOperations(t *testing.T) {
 	ctx, cancel, q := setupTestQueue(t)
 	defer cancel(nil)
 
-	dummyTask := genDummyTask()
+	t.Run("push poll done lifecycle", func(t *testing.T) {
+		dummyTask := genDummyTask()
 
-	// Test push -> poll -> done lifecycle
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	waitForProcess()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		waitForProcess()
 
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 1)
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 1)
 
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, dummyTask, got)
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, dummyTask, got)
+
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 0)
+		assert.Len(t, info.Running, 1)
+
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Len(t, info.Running, 0)
+	})
+
+	t.Run("error handling", func(t *testing.T) {
+		task1 := &model.Task{ID: "task-error-1"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
+
+		waitForProcess()
+		got, _ := q.Poll(ctx, 1, filterFnTrue)
+
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 0)
+
+		assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")))
+	})
+
+	t.Run("error at once", func(t *testing.T) {
+		task1 := &model.Task{ID: "batch-1"}
+		task2 := &model.Task{ID: "batch-2"}
+		task3 := &model.Task{ID: "batch-3"}
+
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+		waitForProcess()
+
+		got1, _ := q.Poll(ctx, 1, filterFnTrue)
+		got2, _ := q.Poll(ctx, 2, filterFnTrue)
+
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
+		waitForProcess()
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 0)
+		assert.Len(t, info.Pending, 1)
+
+		got3, _ := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
+		waitForProcess()
+
+		task4 := &model.Task{ID: "batch-4"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4}))
+		waitForProcess()
+		got4, _ := q.Poll(ctx, 1, filterFnTrue)
+
+		err := q.ErrorAtOnce(ctx, []string{got4.ID, "fake-1", "fake-2"}, fmt.Errorf("test"))
+		assert.Error(t, err)
+		assert.ErrorIs(t, err, ErrNotFound)
+
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Len(t, info.Running, 0)
+	})
+
+	t.Run("error at once with waiting deps", func(t *testing.T) {
+		task5 := &model.Task{ID: "deps-cancel-5"}
+		task6 := &model.Task{
+			ID:           "deps-cancel-6",
+			Dependencies: []string{"deps-cancel-5"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Len(t, info.Pending, 0)
-	assert.Len(t, info.Running, 1)
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6}))
+		waitForProcess()
 
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		info := q.Info(ctx)
+		assert.Equal(t, 1, info.Stats.WaitingOnDeps)
 
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Len(t, info.Running, 0)
-}
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{"deps-cancel-5", "deps-cancel-6"}, fmt.Errorf("canceled")))
 
-func TestTaskDependencies(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
+		assert.Len(t, info.Pending, 0)
+	})
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-		RunOn:        []string{"success", "failure"},
-	}
+	t.Run("error at once cancellation", func(t *testing.T) {
+		task1 := &model.Task{ID: "cancel-prop-1"}
+		task2 := &model.Task{
+			ID:           "cancel-prop-2",
+			Dependencies: []string{"cancel-prop-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+			RunOn:        []string{"success", "failure"},
+		}
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-	waitForProcess()
-
-	// Verify waiting on deps stat
-	info := q.Info(ctx)
-	assert.Equal(t, 2, info.Stats.WaitingOnDeps)
-
-	// Poll and fail task1
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task1, got)
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1")))
-
-	// task2 should be polled but not run (no failure in RunOn)
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task2, got)
-	assert.False(t, got.ShouldRun())
-	assert.Equal(t, model.StatusFailure, got.DepStatus["1"])
-
-	// task3 should run (has failure in RunOn)
-	waitForProcess()
-	got, err = q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
-	assert.Equal(t, task3, got)
-	assert.True(t, got.ShouldRun())
-	assert.Equal(t, model.StatusFailure, got.DepStatus["1"])
-
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Equal(t, 0, info.Stats.WaitingOnDeps)
-}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+		waitForProcess()
+		got1, _ := q.Poll(ctx, 1, filterFnTrue)
 
-func TestMultipleDependencies(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
 
-	task1 := genDummyTask()
-	task2 := &model.Task{ID: "2"}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1", "2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		waitForProcess()
+		waitForProcess()
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-	waitForProcess()
+		got2, _ := q.Poll(ctx, 2, filterFnTrue)
+		assert.Equal(t, model.StatusKilled, got2.DepStatus["cancel-prop-1"])
+	})
 
-	// Poll both independent tasks
-	got1, _ := q.Poll(ctx, 1, filterFnTrue)
-	got2, _ := q.Poll(ctx, 2, filterFnTrue)
+	t.Run("pause resume", func(t *testing.T) {
+		dummyTask := &model.Task{ID: "pause-1"}
 
-	// Ensure we got both task1 and task2 (order may vary)
-	gotIDs := map[string]bool{got1.ID: true, got2.ID: true}
-	assert.True(t, gotIDs["1"] && gotIDs["2"], "Should get both task1 and task2")
+		var wg sync.WaitGroup
+		wg.Add(1)
+		go func() {
+			_, _ = q.Poll(ctx, 99, filterFnTrue)
+			wg.Done()
+		}()
 
-	// Complete them with different statuses
-	if got1.ID == "1" {
-		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
-		assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
-	} else {
-		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
-		assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
-	}
+		q.Pause()
+		t0 := time.Now()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		waitForProcess()
+		q.Resume()
 
-	// task3 should have both statuses propagated
-	waitForProcess()
-	got3, err := q.Poll(ctx, 3, filterFnTrue)
-	assert.NoError(t, err)
-
-	// Verify both dependency statuses are set correctly
-	assert.Contains(t, got3.DepStatus, "1")
-	assert.Contains(t, got3.DepStatus, "2")
-	assert.True(t,
-		(got3.DepStatus["1"] == model.StatusSuccess && got3.DepStatus["2"] == model.StatusFailure) ||
-			(got3.DepStatus["1"] == model.StatusFailure && got3.DepStatus["2"] == model.StatusSuccess),
-		"One dependency should succeed and one should fail")
-	assert.False(t, got3.ShouldRun())
+		wg.Wait()
+		assert.Greater(t, time.Since(t0), 20*time.Millisecond)
+	})
 }
 
-func TestTransitiveDependencies(t *testing.T) {
+func TestFifoDependencies(t *testing.T) {
 	ctx, cancel, q := setupTestQueue(t)
 	defer cancel(nil)
 
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"2"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+	t.Run("basic dependency handling", func(t *testing.T) {
+		task1 := &model.Task{ID: "dep-basic-1"}
+		task2 := &model.Task{
+			ID:           "dep-basic-2",
+			Dependencies: []string{"dep-basic-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "dep-basic-3",
+			Dependencies: []string{"dep-basic-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+			RunOn:        []string{"success", "failure"},
+		}
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
-	waitForProcess()
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		waitForProcess()
 
-	// Fail task1
-	got, _ := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1")))
+		info := q.Info(ctx)
+		assert.Equal(t, 2, info.Stats.WaitingOnDeps)
 
-	// task2 should skip
-	waitForProcess()
-	got, _ = q.Poll(ctx, 2, filterFnTrue)
-	assert.False(t, got.ShouldRun())
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task1, got)
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1")))
 
-	// task3 should also skip (transitive)
-	waitForProcess()
-	got, _ = q.Poll(ctx, 3, filterFnTrue)
-	assert.Equal(t, model.StatusSkipped, got.DepStatus["2"])
-	assert.False(t, got.ShouldRun())
-}
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task2, got)
+		assert.False(t, got.ShouldRun())
+		assert.Equal(t, model.StatusFailure, got.DepStatus["dep-basic-1"])
 
-func TestLeaseExpiration(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+		waitForProcess()
+		got, err = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, task3, got)
+		assert.True(t, got.ShouldRun())
+		assert.Equal(t, model.StatusFailure, got.DepStatus["dep-basic-1"])
+
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
+	})
+
+	t.Run("multiple dependencies", func(t *testing.T) {
+		task1 := &model.Task{ID: "multi-dep-1"}
+		task2 := &model.Task{ID: "multi-dep-2"}
+		task3 := &model.Task{
+			ID:           "multi-dep-3",
+			Dependencies: []string{"multi-dep-1", "multi-dep-2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
 
-	q.extension = 0 // Immediate expiration
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		waitForProcess()
 
-	waitForProcess()
-	got, err := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, err)
+		got1, _ := q.Poll(ctx, 1, filterFnTrue)
+		got2, _ := q.Poll(ctx, 2, filterFnTrue)
 
-	// Test Wait returns error on expiration
-	errCh := make(chan error, 1)
-	go func() { errCh <- q.Wait(ctx, got.ID) }()
+		gotIDs := map[string]bool{got1.ID: true, got2.ID: true}
+		assert.True(t, gotIDs["multi-dep-1"] && gotIDs["multi-dep-2"])
 
-	waitForProcess()
-	select {
-	case werr := <-errCh:
-		assert.Error(t, werr)
-	case <-time.After(2 * time.Second):
-		t.Fatal("timeout waiting for Wait to return")
-	}
+		if got1.ID == "multi-dep-1" {
+			assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
+			assert.NoError(t, q.Error(ctx, got2.ID, fmt.Errorf("failed")))
+		} else {
+			assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+			assert.NoError(t, q.Error(ctx, got1.ID, fmt.Errorf("failed")))
+		}
 
-	// Task should be back in pending
-	info := q.Info(ctx)
-	assert.Len(t, info.Pending, 1)
-}
+		waitForProcess()
+		got3, err := q.Poll(ctx, 3, filterFnTrue)
+		assert.NoError(t, err)
 
-func TestExtend(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+		assert.Contains(t, got3.DepStatus, "multi-dep-1")
+		assert.Contains(t, got3.DepStatus, "multi-dep-2")
+		assert.True(t,
+			(got3.DepStatus["multi-dep-1"] == model.StatusSuccess && got3.DepStatus["multi-dep-2"] == model.StatusFailure) ||
+				(got3.DepStatus["multi-dep-1"] == model.StatusFailure && got3.DepStatus["multi-dep-2"] == model.StatusSuccess))
+		assert.False(t, got3.ShouldRun())
+	})
+
+	t.Run("transitive dependencies", func(t *testing.T) {
+		task1 := &model.Task{ID: "trans-1"}
+		task2 := &model.Task{
+			ID:           "trans-2",
+			Dependencies: []string{"trans-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "trans-3",
+			Dependencies: []string{"trans-2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2, task3, task1}))
+		waitForProcess()
+
+		got, _ := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("exit code 1")))
+
+		waitForProcess()
+		got, _ = q.Poll(ctx, 2, filterFnTrue)
+		assert.False(t, got.ShouldRun())
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
+
+		waitForProcess()
+		got, _ = q.Poll(ctx, 3, filterFnTrue)
+		assert.Equal(t, model.StatusSkipped, got.DepStatus["trans-2"])
+		assert.False(t, got.ShouldRun())
+	})
+
+	t.Run("dependency status propagation", func(t *testing.T) {
+		task1 := &model.Task{ID: "prop-1"}
+		task2 := &model.Task{
+			ID:           "prop-2",
+			Dependencies: []string{"prop-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task3 := &model.Task{
+			ID:           "prop-3",
+			Dependencies: []string{"prop-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+			RunOn:        []string{"success", "failure"},
+		}
 
-	q.extension = 50 * time.Millisecond
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
+		waitForProcess()
 
-	waitForProcess()
-	got, _ := q.Poll(ctx, 5, filterFnTrue)
+		info := q.Info(ctx)
+		assert.Equal(t, 2, info.Stats.WaitingOnDeps)
 
-	// Correct agent can extend
-	assert.NoError(t, q.Extend(ctx, 5, got.ID))
+		got1, _ := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
 
-	// Wrong agent cannot extend
-	assert.ErrorIs(t, q.Extend(ctx, 999, got.ID), ErrAgentMissMatch)
-	assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch)
+		waitForProcess()
 
-	// Non-existent task
-	assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound)
+		got2, _ := q.Poll(ctx, 2, filterFnTrue)
+		got3, _ := q.Poll(ctx, 3, filterFnTrue)
 
-	// Extend prevents expiration
-	for i := 0; i < 3; i++ {
-		time.Sleep(30 * time.Millisecond)
-		assert.NoError(t, q.Extend(ctx, 5, got.ID))
-	}
+		assert.Equal(t, model.StatusSuccess, got2.DepStatus["prop-1"])
+		assert.Equal(t, model.StatusSuccess, got3.DepStatus["prop-1"])
+
+		task4 := &model.Task{ID: "prop-4"}
+		task5 := &model.Task{
+			ID:           "prop-5",
+			Dependencies: []string{"prop-4"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task6 := &model.Task{
+			ID:           "prop-6",
+			Dependencies: []string{"prop-4"},
+			DepStatus:    make(map[string]model.StatusValue),
+			RunOn:        []string{"success", "failure"},
+		}
+
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4, task5, task6}))
+		waitForProcess()
+
+		got4, _ := q.Poll(ctx, 4, filterFnTrue)
+		assert.NoError(t, q.Error(ctx, got4.ID, fmt.Errorf("failed")))
+
+		waitForProcess()
+
+		got5, _ := q.Poll(ctx, 5, filterFnTrue)
+		assert.Equal(t, model.StatusFailure, got5.DepStatus["prop-4"])
+		assert.False(t, got5.ShouldRun())
 
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 1)
-	assert.Len(t, info.Pending, 0)
+		got6, _ := q.Poll(ctx, 6, filterFnTrue)
+		assert.Equal(t, model.StatusFailure, got6.DepStatus["prop-4"])
+		assert.True(t, got6.ShouldRun())
+	})
 }
 
-func TestWait(t *testing.T) {
+func TestFifoLeaseManagement(t *testing.T) {
 	ctx, cancel, q := setupTestQueue(t)
 	defer cancel(nil)
 
-	dummyTask := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
+	t.Run("lease expiration", func(t *testing.T) {
+		q.extension = 0
+		dummyTask := &model.Task{ID: "lease-exp-1"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	waitForProcess()
-	got, _ := q.Poll(ctx, 1, filterFnTrue)
+		waitForProcess()
+		got, err := q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, err)
 
-	// Wait completes on Done
-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		assert.NoError(t, q.Wait(ctx, got.ID))
-		wg.Done()
-	}()
+		errCh := make(chan error, 1)
+		go func() { errCh <- q.Wait(ctx, got.ID) }()
 
-	time.Sleep(time.Millisecond)
-	assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
-	wg.Wait()
+		waitForProcess()
+		select {
+		case werr := <-errCh:
+			assert.Error(t, werr)
+		case <-time.After(2 * time.Second):
+			t.Fatal("timeout waiting for Wait to return")
+		}
 
-	// Wait on non-existent task
-	assert.NoError(t, q.Wait(ctx, "non-existent"))
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 1)
 
-	// Wait with context cancellation
-	dummyTask2 := &model.Task{ID: "2"}
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask2}))
-	waitForProcess()
-	got2, _ := q.Poll(ctx, 1, filterFnTrue)
+		// Clean up - poll and complete the task
+		got, _ = q.Poll(ctx, 1, filterFnTrue)
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		waitForProcess()
 
-	waitCtx, waitCancel := context.WithCancel(ctx)
-	errCh := make(chan error, 1)
-	go func() { errCh <- q.Wait(waitCtx, got2.ID) }()
+		// Verify cleanup
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 0)
+		assert.Len(t, info.Running, 0)
+	})
 
-	time.Sleep(50 * time.Millisecond)
-	waitCancel()
+	t.Run("extend lease", func(t *testing.T) {
+		q.extension = 50 * time.Millisecond
+		dummyTask := &model.Task{ID: "extend-1"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	select {
-	case err := <-errCh:
-		assert.NoError(t, err)
-	case <-time.After(time.Second):
-		t.Fatal("Wait should return when context is canceled")
-	}
-}
+		waitForProcess()
+		got, _ := q.Poll(ctx, 5, filterFnTrue)
 
-func TestError(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+		assert.NoError(t, q.Extend(ctx, 5, got.ID))
+		assert.ErrorIs(t, q.Extend(ctx, 999, got.ID), ErrAgentMissMatch)
+		assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch)
+		assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound)
 
-	task1 := genDummyTask()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
+		for i := 0; i < 3; i++ {
+			time.Sleep(30 * time.Millisecond)
+			assert.NoError(t, q.Extend(ctx, 5, got.ID))
+		}
 
-	waitForProcess()
-	got, _ := q.Poll(ctx, 1, filterFnTrue)
+		info := q.Info(ctx)
+		assert.Len(t, info.Running, 1)
+		assert.Len(t, info.Pending, 0)
 
-	// Error on running task
-	assert.NoError(t, q.Error(ctx, got.ID, fmt.Errorf("test error")))
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 0)
+		// Clean up - complete the extended task
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		waitForProcess()
 
-	// Error on non-existent task
-	assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")))
-}
+		// Verify cleanup
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 0)
+		assert.Len(t, info.Running, 0)
+	})
 
-func TestErrorAtOnce(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+	t.Run("wait operations", func(t *testing.T) {
+		// Verify queue is clean before starting
+		info := q.Info(ctx)
+		assert.Len(t, info.Pending, 0, "queue should be empty at start of wait operations")
+		assert.Len(t, info.Running, 0, "queue should be empty at start of wait operations")
 
-	// Test batch error on running tasks
-	task1 := genDummyTask()
-	task2 := &model.Task{ID: "2"}
-	task3 := &model.Task{ID: "3"}
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-	waitForProcess()
-
-	got1, _ := q.Poll(ctx, 1, filterFnTrue)
-	got2, _ := q.Poll(ctx, 2, filterFnTrue)
-
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID, got2.ID}, fmt.Errorf("batch error")))
-	waitForProcess()
-	info := q.Info(ctx)
-	assert.Len(t, info.Running, 0)
-	assert.Len(t, info.Pending, 1) // task3 should still be pending
-
-	// Clean up task3
-	got3, _ := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
-	waitForProcess()
-
-	// Test error with non-existent tasks
-	task4 := &model.Task{ID: "4"}
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4}))
-	waitForProcess()
-	got4, _ := q.Poll(ctx, 1, filterFnTrue)
-
-	err := q.ErrorAtOnce(ctx, []string{got4.ID, "fake-1", "fake-2"}, fmt.Errorf("test"))
-	assert.Error(t, err)
-	assert.ErrorIs(t, err, ErrNotFound)
-
-	// Verify task4 was still removed despite the error
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Len(t, info.Running, 0)
-
-	// Test ErrorAtOnce on tasks in waitingOnDeps (to cover removeFromPendingAndWaiting)
-	task5 := &model.Task{ID: "5"}
-	task6 := &model.Task{
-		ID:           "6",
-		Dependencies: []string{"5"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
+		dummyTask := &model.Task{ID: "wait-1"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5, task6}))
-	waitForProcess()
+		waitForProcess()
+		got, _ := q.Poll(ctx, 1, filterFnTrue)
 
-	info = q.Info(ctx)
-	assert.Equal(t, 1, info.Stats.WaitingOnDeps, "task6 should be waiting on deps")
+		var wg sync.WaitGroup
+		wg.Add(1)
+		go func() {
+			assert.NoError(t, q.Wait(ctx, got.ID))
+			wg.Done()
+		}()
 
-	// Cancel both tasks - this should remove task6 from waitingOnDeps
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{"5", "6"}, fmt.Errorf("canceled")))
+		time.Sleep(time.Millisecond)
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		wg.Wait()
 
-	waitForProcess()
-	info = q.Info(ctx)
-	assert.Equal(t, 0, info.Stats.WaitingOnDeps, "task6 should be removed from waitingOnDeps")
-	assert.Len(t, info.Pending, 0, "no tasks should be pending")
-}
+		assert.NoError(t, q.Wait(ctx, "non-existent"))
 
-func TestErrorAtOnceCancellation(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
+		dummyTask2 := &model.Task{ID: "wait-2"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask2}))
+		waitForProcess()
+		got2, _ := q.Poll(ctx, 1, filterFnTrue)
 
-	// Test ErrCancel with dependency propagation
-	task1 := &model.Task{ID: "1"}
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-		RunOn:        []string{"success", "failure"}, // Ensures task runs on kill/cancel
-	}
+		waitCtx, waitCancel := context.WithCancel(ctx)
+		errCh := make(chan error, 1)
+		go func() { errCh <- q.Wait(waitCtx, got2.ID) }()
 
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
-	waitForProcess()
-	got1, _ := q.Poll(ctx, 1, filterFnTrue)
+		time.Sleep(50 * time.Millisecond)
+		waitCancel()
 
-	assert.NoError(t, q.ErrorAtOnce(ctx, []string{got1.ID}, ErrCancel))
-
-	// Wait for cancellation to be processed and dependency to be updated
-	waitForProcess()
-	waitForProcess()
-
-	got2, _ := q.Poll(ctx, 2, filterFnTrue)
-	assert.Equal(t, model.StatusKilled, got2.DepStatus["1"])
-}
+		select {
+		case err := <-errCh:
+			assert.NoError(t, err)
+		case <-time.After(time.Second):
+			t.Fatal("Wait should return when context is canceled")
+		}
 
-func TestShouldRunLogic(t *testing.T) {
-	tests := []struct {
-		name      string
-		depStatus model.StatusValue
-		runOn     []string
-		expected  bool
-	}{
-		{"Success without RunOn", model.StatusSuccess, nil, true},
-		{"Failure without RunOn", model.StatusFailure, nil, false},
-		{"Success with failure RunOn", model.StatusSuccess, []string{"failure"}, false},
-		{"Failure with failure RunOn", model.StatusFailure, []string{"failure"}, true},
-		{"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true},
-		{"Skipped without RunOn", model.StatusSkipped, nil, false},
-		{"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true},
-	}
+		// Clean up - complete the second wait task
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+		waitForProcess()
 
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			task := &model.Task{
-				ID:           "2",
-				Dependencies: []string{"1"},
-				DepStatus:    map[string]model.StatusValue{"1": tt.depStatus},
-				RunOn:        tt.runOn,
-			}
-			assert.Equal(t, tt.expected, task.ShouldRun())
-		})
-	}
+		// Verify cleanup
+		info = q.Info(ctx)
+		assert.Len(t, info.Pending, 0)
+		assert.Len(t, info.Running, 0)
+	})
 }
 
-func TestWorkerManagement(t *testing.T) {
+func TestFifoWorkerManagement(t *testing.T) {
 	ctx, cancel, q := setupTestQueue(t)
 	defer cancel(nil)
 
-	// Poll with context cancellation
-	pollCtx, pollCancel := context.WithCancel(ctx)
-	errCh := make(chan error, 1)
-	go func() {
-		_, err := q.Poll(pollCtx, 1, filterFnTrue)
-		errCh <- err
-	}()
-
-	time.Sleep(50 * time.Millisecond)
-	pollCancel()
-
-	select {
-	case err := <-errCh:
-		assert.ErrorIs(t, err, context.Canceled)
-	case <-time.After(time.Second):
-		t.Fatal("Poll should return when context is canceled")
-	}
-
-	// Kick multiple workers for same agent
-	pollResults := make(chan error, 5)
-	for i := 0; i < 5; i++ {
+	t.Run("poll with context cancellation", func(t *testing.T) {
+		pollCtx, pollCancel := context.WithCancel(ctx)
+		errCh := make(chan error, 1)
 		go func() {
-			_, err := q.Poll(ctx, 42, filterFnTrue)
-			pollResults <- err
+			_, err := q.Poll(pollCtx, 1, filterFnTrue)
+			errCh <- err
 		}()
-	}
 
-	time.Sleep(50 * time.Millisecond)
-	q.KickAgentWorkers(42)
+		time.Sleep(50 * time.Millisecond)
+		pollCancel()
 
-	kickedCount := 0
-	for i := 0; i < 5; i++ {
 		select {
-		case err := <-pollResults:
-			if errors.Is(err, context.Canceled) {
-				kickedCount++
-			}
+		case err := <-errCh:
+			assert.ErrorIs(t, err, context.Canceled)
 		case <-time.After(time.Second):
-			t.Fatal("expected all workers to be kicked")
+			t.Fatal("Poll should return when context is canceled")
 		}
-	}
-	assert.Equal(t, 5, kickedCount)
+	})
+
+	t.Run("kick agent workers", func(t *testing.T) {
+		pollResults := make(chan error, 5)
+		for i := 0; i < 5; i++ {
+			go func() {
+				_, err := q.Poll(ctx, 42, filterFnTrue)
+				pollResults <- err
+			}()
+		}
+
+		time.Sleep(50 * time.Millisecond)
+		q.KickAgentWorkers(42)
+
+		kickedCount := 0
+		for i := 0; i < 5; i++ {
+			select {
+			case err := <-pollResults:
+				if errors.Is(err, context.Canceled) {
+					kickedCount++
+				}
+			case <-time.After(time.Second):
+				t.Fatal("expected all workers to be kicked")
+			}
+		}
+		assert.Equal(t, 5, kickedCount)
+	})
 }
 
-func TestLabelBasedScoring(t *testing.T) {
+func TestFifoLabelBasedScoring(t *testing.T) {
 	ctx, cancel := context.WithCancelCause(t.Context())
 	defer cancel(nil)
 
@@ -556,33 +595,37 @@ func TestLabelBasedScoring(t *testing.T) {
 		}
 	}
 
-	// Agent 1 should get task with org-id 123
 	assert.Contains(t, []string{"1", "3"}, findTaskByAgent(receivedTasks, 1))
-	// Agent 2 should get task with org-id 456
 	assert.Equal(t, "2", findTaskByAgent(receivedTasks, 2))
 }
 
-func TestPauseResume(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
-
-	dummyTask := genDummyTask()
-
-	var wg sync.WaitGroup
-	wg.Add(1)
-	go func() {
-		_, _ = q.Poll(ctx, 1, filterFnTrue)
-		wg.Done()
-	}()
-
-	q.Pause()
-	t0 := time.Now()
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
-	waitForProcess()
-	q.Resume()
+func TestShouldRunLogic(t *testing.T) {
+	tests := []struct {
+		name      string
+		depStatus model.StatusValue
+		runOn     []string
+		expected  bool
+	}{
+		{"Success without RunOn", model.StatusSuccess, nil, true},
+		{"Failure without RunOn", model.StatusFailure, nil, false},
+		{"Success with failure RunOn", model.StatusSuccess, []string{"failure"}, false},
+		{"Failure with failure RunOn", model.StatusFailure, []string{"failure"}, true},
+		{"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true},
+		{"Skipped without RunOn", model.StatusSkipped, nil, false},
+		{"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true},
+	}
 
-	wg.Wait()
-	assert.Greater(t, time.Since(t0), 20*time.Millisecond)
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			task := &model.Task{
+				ID:           "2",
+				Dependencies: []string{"1"},
+				DepStatus:    map[string]model.StatusValue{"1": tt.depStatus},
+				RunOn:        tt.runOn,
+			}
+			assert.Equal(t, tt.expected, task.ShouldRun())
+		})
+	}
 }
 
 func findTaskByAgent(tasks map[string]int64, agentID int64) string {
@@ -593,75 +636,3 @@ func findTaskByAgent(tasks map[string]int64, agentID int64) string {
 	}
 	return ""
 }
-
-func TestDependencyStatusPropagation(t *testing.T) {
-	ctx, cancel, q := setupTestQueue(t)
-	defer cancel(nil)
-
-	// Test basic dependency status propagation from completed task to waiting tasks
-	task1 := genDummyTask()
-	task2 := &model.Task{
-		ID:           "2",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task3 := &model.Task{
-		ID:           "3",
-		Dependencies: []string{"1"},
-		DepStatus:    make(map[string]model.StatusValue),
-		RunOn:        []string{"success", "failure"},
-	}
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2, task3}))
-	waitForProcess()
-
-	// Both task2 and task3 should be waiting on task1
-	info := q.Info(ctx)
-	assert.Equal(t, 2, info.Stats.WaitingOnDeps)
-
-	// Complete task1 - this triggers updateDepStatusInQueue for waitingOnDeps tasks
-	got1, _ := q.Poll(ctx, 1, filterFnTrue)
-	assert.NoError(t, q.Done(ctx, got1.ID, model.StatusSuccess))
-
-	waitForProcess()
-
-	// Poll task2 and task3 - both should have task1's success status
-	got2, _ := q.Poll(ctx, 2, filterFnTrue)
-	got3, _ := q.Poll(ctx, 3, filterFnTrue)
-
-	assert.Equal(t, model.StatusSuccess, got2.DepStatus["1"])
-	assert.Equal(t, model.StatusSuccess, got3.DepStatus["1"])
-
-	// Now test updateDepStatusInQueue for tasks in pending and running queues
-	task4 := &model.Task{ID: "4"}
-	task5 := &model.Task{
-		ID:           "5",
-		Dependencies: []string{"4"},
-		DepStatus:    make(map[string]model.StatusValue),
-	}
-	task6 := &model.Task{
-		ID:           "6",
-		Dependencies: []string{"4"},
-		DepStatus:    make(map[string]model.StatusValue),
-		RunOn:        []string{"success", "failure"},
-	}
-
-	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4, task5, task6}))
-	waitForProcess()
-
-	// Poll task4 and complete it while task5 and task6 are waiting
-	got4, _ := q.Poll(ctx, 4, filterFnTrue)
-	assert.NoError(t, q.Error(ctx, got4.ID, fmt.Errorf("failed")))
-
-	waitForProcess()
-
-	// task5 should not run (default behavior on failure)
-	got5, _ := q.Poll(ctx, 5, filterFnTrue)
-	assert.Equal(t, model.StatusFailure, got5.DepStatus["4"])
-	assert.False(t, got5.ShouldRun())
-
-	// task6 should run (has failure in RunOn)
-	got6, _ := q.Poll(ctx, 6, filterFnTrue)
-	assert.Equal(t, model.StatusFailure, got6.DepStatus["4"])
-	assert.True(t, got6.ShouldRun())
-}

From 08f0c43be32806900b995cd709d4977a0ab30d71 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 19:55:06 +0100
Subject: [PATCH 12/13] add edgecases

---
 server/queue/fifo_test.go | 359 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 355 insertions(+), 4 deletions(-)

diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 0ffaed96937..56b360024a1 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -72,11 +72,24 @@ func TestFifoBasicOperations(t *testing.T) {
 		assert.Len(t, info.Pending, 0)
 		assert.Len(t, info.Running, 1)
 
+		// Edge case: verify task can't be polled again while running
+		pollCtx, pollCancel := context.WithTimeout(ctx, 100*time.Millisecond)
+		_, err = q.Poll(pollCtx, 2, filterFnTrue)
+		pollCancel()
+		assert.Error(t, err) // Should timeout/cancel, not return the same task
+
 		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 
 		waitForProcess()
 		info = q.Info(ctx)
 		assert.Len(t, info.Running, 0)
+
+		// Edge case: Done on already completed task should handle gracefully
+		err = q.Done(ctx, got.ID, model.StatusSuccess)
+		// Document current behavior - should either error or be idempotent
+		if err != nil {
+			assert.Error(t, err)
+		}
 	})
 
 	t.Run("error handling", func(t *testing.T) {
@@ -92,6 +105,13 @@ func TestFifoBasicOperations(t *testing.T) {
 		assert.Len(t, info.Running, 0)
 
 		assert.Error(t, q.Error(ctx, "totally-fake-id", fmt.Errorf("test error")))
+
+		// Edge case: Error on task that's already errored
+		err := q.Error(ctx, got.ID, fmt.Errorf("double error"))
+		// Should either error or be idempotent
+		if err != nil {
+			assert.Error(t, err)
+		}
 	})
 
 	t.Run("error at once", func(t *testing.T) {
@@ -127,6 +147,19 @@ func TestFifoBasicOperations(t *testing.T) {
 		waitForProcess()
 		info = q.Info(ctx)
 		assert.Len(t, info.Running, 0)
+
+		// Edge case: ErrorAtOnce with empty slice
+		err = q.ErrorAtOnce(ctx, []string{}, fmt.Errorf("no tasks"))
+		// Should handle gracefully, potentially no-op
+
+		// Edge case: ErrorAtOnce with nil error
+		task5 := &model.Task{ID: "batch-5"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task5}))
+		waitForProcess()
+		got5, _ := q.Poll(ctx, 3, filterFnTrue)
+		err = q.ErrorAtOnce(ctx, []string{got5.ID}, nil)
+		// Should handle nil error gracefully
+		waitForProcess()
 	})
 
 	t.Run("error at once with waiting deps", func(t *testing.T) {
@@ -149,6 +182,10 @@ func TestFifoBasicOperations(t *testing.T) {
 		info = q.Info(ctx)
 		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
 		assert.Len(t, info.Pending, 0)
+
+		// Edge case: verify both tasks are actually gone, not stuck somewhere
+		assert.Len(t, info.Running, 0)
+		assert.Len(t, info.WaitingOnDeps, 0)
 	})
 
 	t.Run("error at once cancellation", func(t *testing.T) {
@@ -171,6 +208,11 @@ func TestFifoBasicOperations(t *testing.T) {
 
 		got2, _ := q.Poll(ctx, 2, filterFnTrue)
 		assert.Equal(t, model.StatusKilled, got2.DepStatus["cancel-prop-1"])
+
+		// Edge case: verify ErrCancel results in StatusKilled not StatusFailure
+		assert.NotEqual(t, model.StatusFailure, got2.DepStatus["cancel-prop-1"])
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+		waitForProcess()
 	})
 
 	t.Run("pause resume", func(t *testing.T) {
@@ -187,10 +229,34 @@ func TestFifoBasicOperations(t *testing.T) {
 		t0 := time.Now()
 		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask}))
 		waitForProcess()
+
+		// Edge case: verify queue is actually paused
+		info := q.Info(ctx)
+		assert.True(t, info.Paused)
+		assert.Len(t, info.Pending, 1)
+		assert.Len(t, info.Running, 0)
+
 		q.Resume()
 
 		wg.Wait()
 		assert.Greater(t, time.Since(t0), 20*time.Millisecond)
+
+		// Edge case: verify queue is unpaused
+		info = q.Info(ctx)
+		assert.False(t, info.Paused)
+
+		// Edge case: multiple pause/resume cycles
+		task2 := &model.Task{ID: "pause-2"}
+		q.Pause()
+		q.Pause() // Double pause
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task2}))
+		waitForProcess()
+		q.Resume()
+		q.Resume() // Double resume
+		waitForProcess()
+		got, _ := q.Poll(ctx, 99, filterFnTrue)
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		waitForProcess()
 	})
 }
 
@@ -240,6 +306,10 @@ func TestFifoDependencies(t *testing.T) {
 		waitForProcess()
 		info = q.Info(ctx)
 		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
+
+		// Edge case: verify DepStatus is correctly set before polling
+		assert.NotEmpty(t, task2.DepStatus)
+		assert.NotEmpty(t, task3.DepStatus)
 	})
 
 	t.Run("multiple dependencies", func(t *testing.T) {
@@ -278,6 +348,11 @@ func TestFifoDependencies(t *testing.T) {
 			(got3.DepStatus["multi-dep-1"] == model.StatusSuccess && got3.DepStatus["multi-dep-2"] == model.StatusFailure) ||
 				(got3.DepStatus["multi-dep-1"] == model.StatusFailure && got3.DepStatus["multi-dep-2"] == model.StatusSuccess))
 		assert.False(t, got3.ShouldRun())
+
+		// Edge case: verify both deps are tracked
+		assert.Len(t, got3.DepStatus, 2)
+		assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSkipped))
+		waitForProcess()
 	})
 
 	t.Run("transitive dependencies", func(t *testing.T) {
@@ -308,6 +383,12 @@ func TestFifoDependencies(t *testing.T) {
 		got, _ = q.Poll(ctx, 3, filterFnTrue)
 		assert.Equal(t, model.StatusSkipped, got.DepStatus["trans-2"])
 		assert.False(t, got.ShouldRun())
+
+		// Edge case: verify transitive failure propagates correctly
+		// task3 should see trans-2 as skipped, not trans-1's status
+		assert.NotContains(t, got.DepStatus, "trans-1")
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSkipped))
+		waitForProcess()
 	})
 
 	t.Run("dependency status propagation", func(t *testing.T) {
@@ -341,6 +422,12 @@ func TestFifoDependencies(t *testing.T) {
 		assert.Equal(t, model.StatusSuccess, got2.DepStatus["prop-1"])
 		assert.Equal(t, model.StatusSuccess, got3.DepStatus["prop-1"])
 
+		// Edge case: verify both tasks can be polled concurrently
+		assert.NotEqual(t, got2.ID, got3.ID)
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
+		assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
+		waitForProcess()
+
 		task4 := &model.Task{ID: "prop-4"}
 		task5 := &model.Task{
 			ID:           "prop-5",
@@ -369,6 +456,119 @@ func TestFifoDependencies(t *testing.T) {
 		got6, _ := q.Poll(ctx, 6, filterFnTrue)
 		assert.Equal(t, model.StatusFailure, got6.DepStatus["prop-4"])
 		assert.True(t, got6.ShouldRun())
+
+		// Edge case: complete dependent tasks
+		assert.NoError(t, q.Done(ctx, got5.ID, model.StatusSkipped))
+		assert.NoError(t, q.Done(ctx, got6.ID, model.StatusSuccess))
+		waitForProcess()
+	})
+
+	// Edge case: circular dependency detection (should be handled or cause issue)
+	t.Run("circular dependencies", func(t *testing.T) {
+		task1 := &model.Task{
+			ID:           "circ-1",
+			Dependencies: []string{"circ-2"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		task2 := &model.Task{
+			ID:           "circ-2",
+			Dependencies: []string{"circ-1"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1, task2}))
+		waitForProcess()
+
+		info := q.Info(ctx)
+		// Both should be waiting on deps - this is a deadlock scenario
+		assert.Equal(t, 2, info.Stats.WaitingOnDeps)
+		assert.Len(t, info.Pending, 0)
+
+		// Verify they never become available for polling
+		pollCtx, pollCancel := context.WithTimeout(ctx, 200*time.Millisecond)
+		_, err := q.Poll(pollCtx, 99, filterFnTrue)
+		pollCancel()
+		assert.Error(t, err) // Should timeout
+
+		// Clean up the deadlocked tasks
+		assert.NoError(t, q.ErrorAtOnce(ctx, []string{"circ-1", "circ-2"}, fmt.Errorf("circular dep")))
+		waitForProcess()
+	})
+
+	// Edge case: dependency on non-existent task
+	// NOTE: This reveals a potential issue - the queue doesn't validate dependencies exist.
+	// If a dependency was never added to the queue, the task will run immediately since
+	// depsInQueue() only checks currently pending/running tasks, not if deps will arrive.
+	t.Run("non-existent dependency", func(t *testing.T) {
+		task1 := &model.Task{
+			ID:           "orphan-1",
+			Dependencies: []string{"does-not-exist"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task1}))
+		waitForProcess()
+
+		info := q.Info(ctx)
+		// Current implementation: task doesn't wait if dependency not in queue
+		// This means tasks with typos in dependency names will run immediately!
+		assert.Equal(t, 0, info.Stats.WaitingOnDeps)
+		assert.Len(t, info.Pending, 1)
+
+		// Task will be available for polling even though dependency doesn't exist
+		got, err := q.Poll(ctx, 99, filterFnTrue)
+		assert.NoError(t, err)
+		assert.Equal(t, "orphan-1", got.ID)
+
+		// DepStatus will be empty since dependency never completed
+		assert.Empty(t, got.DepStatus)
+
+		// Clean up
+		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		waitForProcess()
+	})
+
+	// Edge case: dependency added AFTER dependent task (race condition)
+	t.Run("dependency added after dependent", func(t *testing.T) {
+		// Push dependent task first
+		dependent := &model.Task{
+			ID:           "late-dep-child",
+			Dependencies: []string{"late-dep-parent"},
+			DepStatus:    make(map[string]model.StatusValue),
+		}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dependent}))
+		waitForProcess()
+
+		// At this point, dependent doesn't see parent in queue, so it won't wait
+		info := q.Info(ctx)
+		// Dependent should NOT be waiting since parent doesn't exist yet
+		initialWaiting := info.Stats.WaitingOnDeps
+
+		// Now add the parent task
+		parent := &model.Task{ID: "late-dep-parent"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{parent}))
+		waitForProcess()
+
+		// After filterWaiting runs, dependent SHOULD now see parent and wait
+		info = q.Info(ctx)
+		// The implementation calls filterWaiting() which rechecks dependencies
+		// So dependent should now be waiting
+		assert.Greater(t, info.Stats.WaitingOnDeps, initialWaiting,
+			"dependent should start waiting once parent is added")
+
+		// Complete parent first
+		gotParent, _ := q.Poll(ctx, 1, filterFnTrue)
+		assert.Equal(t, "late-dep-parent", gotParent.ID, "parent should be polled first")
+		assert.NoError(t, q.Done(ctx, gotParent.ID, model.StatusSuccess))
+		waitForProcess()
+
+		// Now child should be unblocked with parent's status
+		gotChild, _ := q.Poll(ctx, 2, filterFnTrue)
+		assert.Equal(t, "late-dep-child", gotChild.ID)
+		assert.Equal(t, model.StatusSuccess, gotChild.DepStatus["late-dep-parent"])
+
+		assert.NoError(t, q.Done(ctx, gotChild.ID, model.StatusSuccess))
+		waitForProcess()
 	})
 }
 
@@ -392,6 +592,8 @@ func TestFifoLeaseManagement(t *testing.T) {
 		select {
 		case werr := <-errCh:
 			assert.Error(t, werr)
+			// Edge case: verify error is ErrTaskExpired
+			assert.ErrorIs(t, werr, ErrTaskExpired)
 		case <-time.After(2 * time.Second):
 			t.Fatal("timeout waiting for Wait to return")
 		}
@@ -399,9 +601,11 @@ func TestFifoLeaseManagement(t *testing.T) {
 		info := q.Info(ctx)
 		assert.Len(t, info.Pending, 1)
 
-		// Clean up - poll and complete the task
-		got, _ = q.Poll(ctx, 1, filterFnTrue)
-		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
+		// Edge case: verify task was resubmitted to front of queue
+		got2, _ := q.Poll(ctx, 1, filterFnTrue)
+		assert.Equal(t, got.ID, got2.ID) // Same task resubmitted
+
+		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
 		waitForProcess()
 
 		// Verify cleanup
@@ -423,6 +627,7 @@ func TestFifoLeaseManagement(t *testing.T) {
 		assert.ErrorIs(t, q.Extend(ctx, 1, got.ID), ErrAgentMissMatch)
 		assert.ErrorIs(t, q.Extend(ctx, 1, "non-existent"), ErrNotFound)
 
+		// Edge case: extend multiple times rapidly
 		for i := 0; i < 3; i++ {
 			time.Sleep(30 * time.Millisecond)
 			assert.NoError(t, q.Extend(ctx, 5, got.ID))
@@ -432,9 +637,10 @@ func TestFifoLeaseManagement(t *testing.T) {
 		assert.Len(t, info.Running, 1)
 		assert.Len(t, info.Pending, 0)
 
-		// Clean up - complete the extended task
+		// Edge case: extend after Done should error
 		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 		waitForProcess()
+		assert.ErrorIs(t, q.Extend(ctx, 5, got.ID), ErrNotFound)
 
 		// Verify cleanup
 		info = q.Info(ctx)
@@ -465,6 +671,7 @@ func TestFifoLeaseManagement(t *testing.T) {
 		assert.NoError(t, q.Done(ctx, got.ID, model.StatusSuccess))
 		wg.Wait()
 
+		// Edge case: Wait on non-existent task should return immediately
 		assert.NoError(t, q.Wait(ctx, "non-existent"))
 
 		dummyTask2 := &model.Task{ID: "wait-2"}
@@ -490,6 +697,25 @@ func TestFifoLeaseManagement(t *testing.T) {
 		assert.NoError(t, q.Done(ctx, got2.ID, model.StatusSuccess))
 		waitForProcess()
 
+		// Edge case: multiple concurrent waits on same task
+		dummyTask3 := &model.Task{ID: "wait-3"}
+		assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{dummyTask3}))
+		waitForProcess()
+		got3, _ := q.Poll(ctx, 1, filterFnTrue)
+
+		var wg2 sync.WaitGroup
+		wg2.Add(3)
+		for i := 0; i < 3; i++ {
+			go func() {
+				assert.NoError(t, q.Wait(ctx, got3.ID))
+				wg2.Done()
+			}()
+		}
+
+		time.Sleep(10 * time.Millisecond)
+		assert.NoError(t, q.Done(ctx, got3.ID, model.StatusSuccess))
+		wg2.Wait()
+
 		// Verify cleanup
 		info = q.Info(ctx)
 		assert.Len(t, info.Pending, 0)
@@ -518,6 +744,10 @@ func TestFifoWorkerManagement(t *testing.T) {
 		case <-time.After(time.Second):
 			t.Fatal("Poll should return when context is canceled")
 		}
+
+		// Edge case: verify worker is cleaned up
+		info := q.Info(ctx)
+		assert.Equal(t, 0, info.Stats.Workers)
 	})
 
 	t.Run("kick agent workers", func(t *testing.T) {
@@ -530,6 +760,11 @@ func TestFifoWorkerManagement(t *testing.T) {
 		}
 
 		time.Sleep(50 * time.Millisecond)
+
+		// Edge case: verify workers are registered before kicking
+		info := q.Info(ctx)
+		assert.Equal(t, 5, info.Stats.Workers)
+
 		q.KickAgentWorkers(42)
 
 		kickedCount := 0
@@ -544,6 +779,77 @@ func TestFifoWorkerManagement(t *testing.T) {
 			}
 		}
 		assert.Equal(t, 5, kickedCount)
+
+		// Edge case: verify workers are removed after kicking
+		waitForProcess()
+		info = q.Info(ctx)
+		assert.Equal(t, 0, info.Stats.Workers)
+
+		// Edge case: kick non-existent agent should be no-op
+		q.KickAgentWorkers(999)
+	})
+
+	// Edge case: mixed agent workers
+	t.Run("kick specific agent among multiple", func(t *testing.T) {
+		pollResults := make(chan struct {
+			agentID int64
+			err     error
+		}, 10)
+
+		// Start workers for agent 1
+		for i := 0; i < 3; i++ {
+			go func() {
+				_, err := q.Poll(ctx, 1, filterFnTrue)
+				pollResults <- struct {
+					agentID int64
+					err     error
+				}{1, err}
+			}()
+		}
+
+		// Start workers for agent 2
+		for i := 0; i < 3; i++ {
+			go func() {
+				_, err := q.Poll(ctx, 2, filterFnTrue)
+				pollResults <- struct {
+					agentID int64
+					err     error
+				}{2, err}
+			}()
+		}
+
+		time.Sleep(50 * time.Millisecond)
+		info := q.Info(ctx)
+		assert.Equal(t, 6, info.Stats.Workers)
+
+		// Kick only agent 1
+		q.KickAgentWorkers(1)
+
+		kickedAgent1 := 0
+		kickedAgent2 := 0
+		for i := 0; i < 3; i++ {
+			select {
+			case result := <-pollResults:
+				if errors.Is(result.err, context.Canceled) {
+					if result.agentID == 1 {
+						kickedAgent1++
+					} else {
+						kickedAgent2++
+					}
+				}
+			case <-time.After(time.Second):
+				t.Fatal("expected kicked workers to return")
+			}
+		}
+
+		assert.Equal(t, 3, kickedAgent1)
+		assert.Equal(t, 0, kickedAgent2)
+
+		// Clean up agent 2 workers
+		q.KickAgentWorkers(2)
+		for i := 0; i < 3; i++ {
+			<-pollResults
+		}
 	})
 }
 
@@ -597,6 +903,27 @@ func TestFifoLabelBasedScoring(t *testing.T) {
 
 	assert.Contains(t, []string{"1", "3"}, findTaskByAgent(receivedTasks, 1))
 	assert.Equal(t, "2", findTaskByAgent(receivedTasks, 2))
+
+	// Edge case: filter that rejects all tasks
+	filterRejectAll := func(task *model.Task) (bool, int) {
+		return false, 0
+	}
+
+	task4 := &model.Task{ID: "4", Labels: map[string]string{"org-id": "789"}}
+	assert.NoError(t, q.PushAtOnce(ctx, []*model.Task{task4}))
+	waitForProcess()
+
+	pollCtx, pollCancel := context.WithTimeout(ctx, 200*time.Millisecond)
+	_, err := q.Poll(pollCtx, 99, filterRejectAll)
+	pollCancel()
+	assert.Error(t, err) // Should timeout as filter rejects task
+
+	// Clean up remaining tasks
+	task3, _ := q.Poll(ctx, 1, filterFnTrue)
+	assert.NoError(t, q.Done(ctx, task3.ID, model.StatusSuccess))
+	task4Got, _ := q.Poll(ctx, 99, filterFnTrue)
+	assert.NoError(t, q.Done(ctx, task4Got.ID, model.StatusSuccess))
+	waitForProcess()
 }
 
 func TestShouldRunLogic(t *testing.T) {
@@ -613,6 +940,11 @@ func TestShouldRunLogic(t *testing.T) {
 		{"Success with both RunOn", model.StatusSuccess, []string{"success", "failure"}, true},
 		{"Skipped without RunOn", model.StatusSkipped, nil, false},
 		{"Skipped with failure RunOn", model.StatusSkipped, []string{"failure"}, true},
+		// Edge cases
+		{"Killed without RunOn", model.StatusKilled, nil, false},
+		{"Killed with failure RunOn", model.StatusKilled, []string{"failure"}, true},
+		{"Success with success RunOn only", model.StatusSuccess, []string{"success"}, true},
+		{"Failure with success RunOn only", model.StatusFailure, []string{"success"}, false},
 	}
 
 	for _, tt := range tests {
@@ -626,6 +958,25 @@ func TestShouldRunLogic(t *testing.T) {
 			assert.Equal(t, tt.expected, task.ShouldRun())
 		})
 	}
+
+	// Edge case: multiple dependencies with mixed statuses
+	t.Run("multiple deps mixed status", func(t *testing.T) {
+		task := &model.Task{
+			ID:           "3",
+			Dependencies: []string{"1", "2"},
+			DepStatus: map[string]model.StatusValue{
+				"1": model.StatusSuccess,
+				"2": model.StatusFailure,
+			},
+			RunOn: nil,
+		}
+		// With default RunOn (nil), needs all deps successful
+		assert.False(t, task.ShouldRun())
+
+		task.RunOn = []string{"success", "failure"}
+		// With both RunOn, should run regardless
+		assert.True(t, task.ShouldRun())
+	})
 }
 
 func findTaskByAgent(tasks map[string]int64, agentID int64) string {

From 3c6d9f5014b4131c7222ac6c36bd89a52fb16299 Mon Sep 17 00:00:00 2001
From: 6543 <6543@obermui.de>
Date: Mon, 26 Jan 2026 23:52:19 +0100
Subject: [PATCH 13/13] fix lint errors

---
 server/queue/fifo.go      | 16 ++++++++--------
 server/queue/fifo_test.go | 10 ++++++----
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/server/queue/fifo.go b/server/queue/fifo.go
index 5a5209b8e49..25acb25bfbb 100644
--- a/server/queue/fifo.go
+++ b/server/queue/fifo.go
@@ -198,11 +198,11 @@ func (q *fifo) Info(_ context.Context) InfoT {
 	stats.Stats.Running = len(q.running)
 
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task := element.Value.(*model.Task)
+		task, _ := element.Value.(*model.Task)
 		stats.Pending = append(stats.Pending, task)
 	}
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		task := element.Value.(*model.Task)
+		task, _ := element.Value.(*model.Task)
 		stats.WaitingOnDeps = append(stats.WaitingOnDeps, task)
 	}
 	for _, entry := range q.running {
@@ -286,7 +286,7 @@ func (q *fifo) filterWaiting() {
 	q.waitingOnDeps = list.New()
 	var filtered []*list.Element
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task := element.Value.(*model.Task)
+		task, _ := element.Value.(*model.Task)
 		if q.depsInQueue(task) {
 			log.Debug().Msgf("queue: waiting due to unmet dependencies %v", task.ID)
 			q.waitingOnDeps.PushBack(task)
@@ -305,7 +305,7 @@ func (q *fifo) assignToWorker() (*list.Element, *worker) {
 	var bestScore int
 
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task := element.Value.(*model.Task)
+		task, _ := element.Value.(*model.Task)
 		log.Debug().Msgf("queue: trying to assign task: %v with deps %v", task.ID, task.Dependencies)
 
 		for worker := range q.workers {
@@ -358,7 +358,7 @@ func (q *fifo) depsInQueue(task *model.Task) bool {
 // expects the q to be currently owned e.g. locked by caller!
 func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		pending := element.Value.(*model.Task)
+		pending, _ := element.Value.(*model.Task)
 		for _, dep := range pending.Dependencies {
 			if taskID == dep {
 				pending.DepStatus[dep] = status
@@ -375,7 +375,7 @@ func (q *fifo) updateDepStatusInQueue(taskID string, status model.StatusValue) {
 	}
 
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		waiting := element.Value.(*model.Task)
+		waiting, _ := element.Value.(*model.Task)
 		for _, dep := range waiting.Dependencies {
 			if taskID == dep {
 				waiting.DepStatus[dep] = status
@@ -390,7 +390,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 
 	// we assume pending first
 	for element := q.pending.Front(); element != nil; element = element.Next() {
-		task := element.Value.(*model.Task)
+		task, _ := element.Value.(*model.Task)
 		if task.ID == taskID {
 			log.Debug().Msgf("queue: %s is removed from pending", taskID)
 			_ = q.pending.Remove(element)
@@ -400,7 +400,7 @@ func (q *fifo) removeFromPendingAndWaiting(taskID string) error {
 
 	// well looks like it's waiting
 	for element := q.waitingOnDeps.Front(); element != nil; element = element.Next() {
-		task := element.Value.(*model.Task)
+		task, _ := element.Value.(*model.Task)
 		if task.ID == taskID {
 			log.Debug().Msgf("queue: %s is removed from waitingOnDeps", taskID)
 			_ = q.waitingOnDeps.Remove(element)
diff --git a/server/queue/fifo_test.go b/server/queue/fifo_test.go
index 56b360024a1..b8f05f62578 100644
--- a/server/queue/fifo_test.go
+++ b/server/queue/fifo_test.go
@@ -150,6 +150,7 @@ func TestFifoBasicOperations(t *testing.T) {
 
 		// Edge case: ErrorAtOnce with empty slice
 		err = q.ErrorAtOnce(ctx, []string{}, fmt.Errorf("no tasks"))
+		assert.NoError(t, err)
 		// Should handle gracefully, potentially no-op
 
 		// Edge case: ErrorAtOnce with nil error
@@ -158,6 +159,7 @@ func TestFifoBasicOperations(t *testing.T) {
 		waitForProcess()
 		got5, _ := q.Poll(ctx, 3, filterFnTrue)
 		err = q.ErrorAtOnce(ctx, []string{got5.ID}, nil)
+		assert.NoError(t, err)
 		// Should handle nil error gracefully
 		waitForProcess()
 	})
@@ -679,12 +681,12 @@ func TestFifoLeaseManagement(t *testing.T) {
 		waitForProcess()
 		got2, _ := q.Poll(ctx, 1, filterFnTrue)
 
-		waitCtx, waitCancel := context.WithCancel(ctx)
+		waitCtx, waitCancel := context.WithCancelCause(ctx)
 		errCh := make(chan error, 1)
 		go func() { errCh <- q.Wait(waitCtx, got2.ID) }()
 
 		time.Sleep(50 * time.Millisecond)
-		waitCancel()
+		waitCancel(nil)
 
 		select {
 		case err := <-errCh:
@@ -728,7 +730,7 @@ func TestFifoWorkerManagement(t *testing.T) {
 	defer cancel(nil)
 
 	t.Run("poll with context cancellation", func(t *testing.T) {
-		pollCtx, pollCancel := context.WithCancel(ctx)
+		pollCtx, pollCancel := context.WithCancelCause(ctx)
 		errCh := make(chan error, 1)
 		go func() {
 			_, err := q.Poll(pollCtx, 1, filterFnTrue)
@@ -736,7 +738,7 @@ func TestFifoWorkerManagement(t *testing.T) {
 		}()
 
 		time.Sleep(50 * time.Millisecond)
-		pollCancel()
+		pollCancel(nil)
 
 		select {
 		case err := <-errCh: