Skip to content

Commit b39ccd7

Browse files
author
Josh Hawn
committed
Use Service Placement Constraints in Enforcer
This patch updates the ConstraintEnforcer to use the placement constraints from the current service spec rather than the placement constraints from the current task spec because they may be outdated in some cases (e.g., when the service was previously updated to modify placement constrainst but the node to which the task was scheduled still satisfies the constraints. If the node is updated in a way which causes it to no longer satisfy the new constraints then the task should be removed even if it still would satisfy the original task constraints). Docker-DCO-1.1-Signed-off-by: Josh Hawn <[email protected]> (github: jlhawn)
1 parent 88dcc0f commit b39ccd7

File tree

2 files changed

+160
-4
lines changed

2 files changed

+160
-4
lines changed

manager/orchestrator/constraintenforcer/constraint_enforcer.go

+52-4
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,22 @@ func (ce *ConstraintEnforcer) rejectNoncompliantTasks(node *api.Node) {
7676
err error
7777
)
7878

79+
services := map[string]*api.Service{}
7980
ce.store.View(func(tx store.ReadTx) {
8081
tasks, err = store.FindTasks(tx, store.ByNodeID(node.ID))
82+
if err != nil {
83+
return
84+
}
85+
86+
// Deduplicate service IDs using the services map. It's okay for the
87+
// values to be nil for now, we will look them up from the store next.
88+
for _, task := range tasks {
89+
services[task.ServiceID] = nil
90+
}
91+
92+
for serviceID := range services {
93+
services[serviceID] = store.GetService(tx, serviceID)
94+
}
8195
})
8296

8397
if err != nil {
@@ -105,10 +119,44 @@ loop:
105119
continue
106120
}
107121

108-
// Ensure that the task still meets scheduling
109-
// constraints.
110-
if t.Spec.Placement != nil && len(t.Spec.Placement.Constraints) != 0 {
111-
constraints, _ := constraint.Parse(t.Spec.Placement.Constraints)
122+
// Ensure that the node still satisfies placement constraints.
123+
// NOTE: If the task is associacted with a service then we must use the
124+
// constraints from the current service spec rather than the
125+
// constraints from the task spec because they may be outdated. This
126+
// will happen if the service was previously updated in a way which
127+
// only changes the placement constraints and the node matched the
128+
// placement constraints both before and after that update. In the case
129+
// of such updates, the tasks are not considered "dirty" and are not
130+
// restarted but it will mean that the task spec's placement
131+
// constraints are outdated. Consider this example:
132+
// - A service is created with no constraints and a task is scheduled
133+
// to a node.
134+
// - The node is updated to add a label, this doesn't affect the task
135+
// on that node because it has no constraints.
136+
// - The service is updated to add a node label constraint which
137+
// matches the label which was just added to the node. The updater
138+
// does not shut down the task because the only the constraints have
139+
// changed and the node still matches the updated constraints.
140+
// - The node is updated to remove the node label. The node no longer
141+
// satisfies the placement constraints of the service, so the task
142+
// should be shutdown. However, the task's spec still has the
143+
// original and outdated constraints (that are still satisfied by
144+
// the node). If we used those original constraints then the task
145+
// would incorrectly not be removed. This is why the constraints
146+
// from the service spec should be used instead.
147+
var placement *api.Placement
148+
if service := services[t.ServiceID]; service != nil {
149+
// This task is associated with a service, so we use the service's
150+
// current placement constraints.
151+
placement = service.Spec.Task.Placement
152+
} else {
153+
// This task is not associated with a service (or the service no
154+
// longer exists), so we use the placement constraints from the
155+
// original task spec.
156+
placement = t.Spec.Placement
157+
}
158+
if placement != nil && len(placement.Constraints) > 0 {
159+
constraints, _ := constraint.Parse(placement.Constraints)
112160
if !constraint.NodeMatches(constraints, node) {
113161
removeTasks[t.ID] = t
114162
continue

manager/orchestrator/constraintenforcer/constraint_enforcer_test.go

+108
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/docker/swarmkit/manager/state"
99
"github.com/docker/swarmkit/manager/state/store"
1010
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
1112
)
1213

1314
func TestConstraintEnforcer(t *testing.T) {
@@ -168,3 +169,110 @@ func TestConstraintEnforcer(t *testing.T) {
168169
assert.Equal(t, "id4", shutdown3.ID)
169170
assert.Equal(t, api.TaskStateRejected, shutdown3.Status.State)
170171
}
172+
173+
// TestOutdatedPlacementConstraints tests the following scenario: If a task is
174+
// associacted with a service then we must use the constraints from the current
175+
// service spec rather than the constraints from the task spec because they may
176+
// be outdated. This will happen if the service was previously updated in a way
177+
// which only changes the placement constraints and the node matched the
178+
// placement constraints both before and after that update. In the case of such
179+
// updates, the tasks are not considered "dirty" and are not restarted but it
180+
// will mean that the task spec's placement constraints are outdated. Consider
181+
// this example:
182+
// - A service is created with no constraints and a task is scheduled
183+
// to a node.
184+
// - The node is updated to add a label, this doesn't affect the task
185+
// on that node because it has no constraints.
186+
// - The service is updated to add a node label constraint which
187+
// matches the label which was just added to the node. The updater
188+
// does not shut down the task because the only the constraints have
189+
// changed and the node still matches the updated constraints.
190+
// This test initializes a new in-memory store with the expected state from
191+
// above, starts a new constraint enforcer, and then updates the node to remove
192+
// the node label. Since the node no longer satisfies the placement constraints
193+
// of the service spec, the task should be shutdown despite the fact that the
194+
// task's own spec still has the original placement constraints.
195+
func TestOutdatedTaskPlacementConstraints(t *testing.T) {
196+
node := &api.Node{
197+
ID: "id0",
198+
Spec: api.NodeSpec{
199+
Annotations: api.Annotations{
200+
Name: "node1",
201+
Labels: map[string]string{
202+
"foo": "bar",
203+
},
204+
},
205+
Availability: api.NodeAvailabilityActive,
206+
},
207+
Status: api.NodeStatus{
208+
State: api.NodeStatus_READY,
209+
},
210+
Role: api.NodeRoleWorker,
211+
}
212+
213+
service := &api.Service{
214+
ID: "id1",
215+
Spec: api.ServiceSpec{
216+
Annotations: api.Annotations{
217+
Name: "service1",
218+
},
219+
Task: api.TaskSpec{
220+
Placement: &api.Placement{
221+
Constraints: []string{
222+
"node.labels.foo == bar",
223+
},
224+
},
225+
},
226+
},
227+
}
228+
229+
task := &api.Task{
230+
ID: "id2",
231+
Spec: api.TaskSpec{
232+
Placement: nil, // Note: No placement constraints.
233+
},
234+
ServiceID: service.ID,
235+
NodeID: node.ID,
236+
Status: api.TaskStatus{
237+
State: api.TaskStateRunning,
238+
},
239+
DesiredState: api.TaskStateRunning,
240+
}
241+
242+
s := store.NewMemoryStore(nil)
243+
require.NotNil(t, s)
244+
defer s.Close()
245+
246+
require.NoError(t, s.Update(func(tx store.Tx) error {
247+
// Prepoulate node, service, and task.
248+
for _, err := range []error{
249+
store.CreateNode(tx, node),
250+
store.CreateService(tx, service),
251+
store.CreateTask(tx, task),
252+
} {
253+
if err != nil {
254+
return err
255+
}
256+
}
257+
return nil
258+
}))
259+
260+
watch, cancel := state.Watch(s.WatchQueue(), api.EventUpdateTask{})
261+
defer cancel()
262+
263+
constraintEnforcer := New(s)
264+
defer constraintEnforcer.Stop()
265+
266+
go constraintEnforcer.Run()
267+
268+
// Update the node to remove the node label.
269+
require.NoError(t, s.Update(func(tx store.Tx) error {
270+
node = store.GetNode(tx, node.ID)
271+
delete(node.Spec.Annotations.Labels, "foo")
272+
return store.UpdateNode(tx, node)
273+
}))
274+
275+
// The task should be rejected immediately.
276+
task = testutils.WatchTaskUpdate(t, watch)
277+
assert.Equal(t, api.TaskStateRejected, task.Status.State)
278+
}

0 commit comments

Comments
 (0)