Skip to content

Commit

Permalink
engine: adding poll function during progressTask
Browse files Browse the repository at this point in the history
Prior to this commit, we only tracked state we explicitly tried to
change when the task was starting. We did not respond to the event
stream or any other source of information from Docker. This means
that when we are waiting for certain dependency conditions
("SUCCESS", "COMPLETE", or "HEALTHY") the task progression logic
does not update the agent-internal model of container state.

Since we rely on that state for determining when conditions are
met, tasks would get stuck in infinite startup loops. This change
adds a call to engine.checkTaskState(), which explicity updates
any changed container state. We only call this function if we know
that we are waiting on the aforementioned subset of dependency
conditions.

Co-authored-by: Utsa Bhattacharjya <[email protected]>
  • Loading branch information
petderek and ubhattacharjya committed Feb 22, 2019
1 parent 6dcd3e3 commit 4c57056
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 89 deletions.
26 changes: 13 additions & 13 deletions agent/engine/dependencygraph/graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ func dependenciesCanBeResolved(target *apicontainer.Container, by []*apicontaine
nameMap[cont.Name] = cont
}

if err := verifyContainerOrderingStatusResolvable(target, nameMap, containerOrderingDependenciesCanResolve); err != nil {
if _, err := verifyContainerOrderingStatusResolvable(target, nameMap, containerOrderingDependenciesCanResolve); err != nil {
return false
}
return verifyStatusResolvable(target, nameMap, target.SteadyStateDependencies, onSteadyStateCanResolve)
Expand All @@ -122,9 +122,9 @@ func DependenciesAreResolved(target *apicontainer.Container,
by []*apicontainer.Container,
id string,
manager credentials.Manager,
resources []taskresource.TaskResource) error {
resources []taskresource.TaskResource) (*apicontainer.DependsOn, error) {
if !executionCredentialsResolved(target, id, manager) {
return CredentialsNotResolvedErr
return nil, CredentialsNotResolvedErr
}

nameMap := make(map[string]*apicontainer.Container)
Expand All @@ -141,18 +141,18 @@ func DependenciesAreResolved(target *apicontainer.Container,
resourcesMap[resource.GetName()] = resource
}

if err := verifyContainerOrderingStatusResolvable(target, nameMap, containerOrderingDependenciesIsResolved); err != nil {
return err
if blocked, err := verifyContainerOrderingStatusResolvable(target, nameMap, containerOrderingDependenciesIsResolved); err != nil {
return blocked, err
}

if !verifyStatusResolvable(target, nameMap, target.SteadyStateDependencies, onSteadyStateIsResolved) {
return DependentContainerNotResolvedErr
return nil, DependentContainerNotResolvedErr
}
if err := verifyTransitionDependenciesResolved(target, nameMap, resourcesMap); err != nil {
return err
return nil, err
}

return nil
return nil, nil
}

func linksToContainerNames(links []string) []string {
Expand Down Expand Up @@ -205,25 +205,25 @@ func verifyStatusResolvable(target *apicontainer.Container, existingContainers m
// (map from name to container). The `resolves` function passed should return true if the named container is resolved.

func verifyContainerOrderingStatusResolvable(target *apicontainer.Container, existingContainers map[string]*apicontainer.Container,
resolves func(*apicontainer.Container, *apicontainer.Container, string) bool) error {
resolves func(*apicontainer.Container, *apicontainer.Container, string) bool) (*apicontainer.DependsOn, error) {

targetGoal := target.GetDesiredStatus()
if targetGoal != target.GetSteadyStateStatus() && targetGoal != apicontainerstatus.ContainerCreated {
// A container can always stop, die, or reach whatever other state it
// wants regardless of what dependencies it has
return nil
return nil, nil
}

for _, dependency := range target.DependsOn {
dependencyContainer, ok := existingContainers[dependency.Container]
if !ok {
return fmt.Errorf("dependency graph: container ordering dependency [%v] for target [%v] does not exist.", dependencyContainer, target)
return nil, fmt.Errorf("dependency graph: container ordering dependency [%v] for target [%v] does not exist.", dependencyContainer, target)
}
if !resolves(target, dependencyContainer, dependency.Condition) {
return fmt.Errorf("dependency graph: failed to resolve the container ordering dependency [%v] for target [%v]", dependencyContainer, target)
return &dependency, fmt.Errorf("dependency graph: failed to resolve the container ordering dependency [%v] for target [%v]", dependencyContainer, target)
}
}
return nil
return nil, nil
}

func verifyTransitionDependenciesResolved(target *apicontainer.Container,
Expand Down
36 changes: 20 additions & 16 deletions agent/engine/dependencygraph/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ func TestDependenciesAreResolvedWhenSteadyStateIsRunning(t *testing.T) {
},
},
}
err := DependenciesAreResolved(task.Containers[0], task.Containers, "", nil, nil)
_, err := DependenciesAreResolved(task.Containers[0], task.Containers, "", nil, nil)
assert.NoError(t, err, "One container should resolve trivially")

// Webserver stack
Expand All @@ -138,28 +138,28 @@ func TestDependenciesAreResolvedWhenSteadyStateIsRunning(t *testing.T) {
},
}

err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
assert.Error(t, err, "Shouldn't be resolved; db isn't running")

err = DependenciesAreResolved(db, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(db, task.Containers, "", nil, nil)
assert.Error(t, err, "Shouldn't be resolved; dbdatavolume isn't created")

err = DependenciesAreResolved(dbdata, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(dbdata, task.Containers, "", nil, nil)
assert.NoError(t, err, "data volume with no deps should resolve")

dbdata.SetKnownStatus(apicontainerstatus.ContainerCreated)
err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
assert.Error(t, err, "Php shouldn't run, db is not created")

db.SetKnownStatus(apicontainerstatus.ContainerCreated)
err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
assert.Error(t, err, "Php shouldn't run, db is not running")

err = DependenciesAreResolved(db, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(db, task.Containers, "", nil, nil)
assert.NoError(t, err, "db should be resolved, dbdata volume is Created")
db.SetKnownStatus(apicontainerstatus.ContainerRunning)

err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
assert.NoError(t, err, "Php should resolve")
}

Expand All @@ -175,16 +175,20 @@ func TestRunDependencies(t *testing.T) {
SteadyStateDependencies: []string{"a"},
}
task := &apitask.Task{Containers: []*apicontainer.Container{c1, c2}}
_, err := DependenciesAreResolved(c2, task.Containers, "", nil, nil)
assert.Error(t, err, "Dependencies should not be resolved")

assert.Error(t, DependenciesAreResolved(c2, task.Containers, "", nil, nil), "Dependencies should not be resolved")
task.Containers[1].SetDesiredStatus(apicontainerstatus.ContainerRunning)
assert.Error(t, DependenciesAreResolved(c2, task.Containers, "", nil, nil), "Dependencies should not be resolved")
_, err = DependenciesAreResolved(c2, task.Containers, "", nil, nil)
assert.Error(t, err, "Dependencies should not be resolved")

task.Containers[0].SetKnownStatus(apicontainerstatus.ContainerRunning)
assert.NoError(t, DependenciesAreResolved(c2, task.Containers, "", nil, nil), "Dependencies should be resolved")
_, err = DependenciesAreResolved(c2, task.Containers, "", nil, nil)
assert.NoError(t, err, "Dependencies should be resolved")

task.Containers[1].SetDesiredStatus(apicontainerstatus.ContainerCreated)
assert.NoError(t, DependenciesAreResolved(c1, task.Containers, "", nil, nil), "Dependencies should be resolved")
_, err = DependenciesAreResolved(c1, task.Containers, "", nil, nil)
assert.NoError(t, err, "Dependencies should be resolved")
}

func TestRunDependenciesWhenSteadyStateIsResourcesProvisionedForOneContainer(t *testing.T) {
Expand All @@ -210,11 +214,11 @@ func TestRunDependenciesWhenSteadyStateIsResourcesProvisionedForOneContainer(t *
continue
}
container.SteadyStateDependencies = []string{"pause"}
err := DependenciesAreResolved(container, task.Containers, "", nil, nil)
_, err := DependenciesAreResolved(container, task.Containers, "", nil, nil)
assert.Error(t, err, "Shouldn't be resolved; pause isn't running")
}

err := DependenciesAreResolved(pause, task.Containers, "", nil, nil)
_, err := DependenciesAreResolved(pause, task.Containers, "", nil, nil)
assert.NoError(t, err, "Pause container's dependencies should be resolved")

// Transition pause container to RUNNING
Expand All @@ -228,13 +232,13 @@ func TestRunDependenciesWhenSteadyStateIsResourcesProvisionedForOneContainer(t *
}
// Assert that dependencies remain unresolved until the pause container reaches
// RESOURCES_PROVISIONED
err = DependenciesAreResolved(container, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(container, task.Containers, "", nil, nil)
assert.Error(t, err, "Shouldn't be resolved; pause isn't running")
}
pause.KnownStatusUnsafe = apicontainerstatus.ContainerResourcesProvisioned
// Dependecies should be resolved now that the 'pause' container has
// transitioned into RESOURCES_PROVISIONED
err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
_, err = DependenciesAreResolved(php, task.Containers, "", nil, nil)
assert.NoError(t, err, "Php should resolve")
}

Expand Down
64 changes: 32 additions & 32 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ func TestRecordContainerReferenceInspectError(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -179,8 +179,8 @@ func TestRecordContainerReferenceWithNoImageName(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -406,8 +406,8 @@ func TestRemoveContainerReferenceFromImageStateWithNoReference(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -442,8 +442,8 @@ func TestGetCandidateImagesForDeletionImageNoImageState(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand All @@ -462,8 +462,8 @@ func TestGetCandidateImagesForDeletionImageJustPulled(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand All @@ -487,8 +487,8 @@ func TestGetCandidateImagesForDeletionImageHasContainerReference(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -528,8 +528,8 @@ func TestGetCandidateImagesForDeletionImageHasMoreContainerReferences(t *testing
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -606,8 +606,8 @@ func TestImageCleanupExclusionListWithSingleName(t *testing.T) {
PulledAt: time.Now().AddDate(0, -2, 0),
}
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -665,8 +665,8 @@ func TestImageCleanupExclusionListWithMultipleNames(t *testing.T) {
PulledAt: time.Now().AddDate(0, -2, 0),
}
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -731,8 +731,8 @@ func TestGetLeastRecentlyUsedImagesLessThanFive(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -765,8 +765,8 @@ func TestRemoveAlreadyExistingImageNameWithDifferentID(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -816,8 +816,8 @@ func TestImageCleanupHappyPath(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: 1 * time.Millisecond,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -873,8 +873,8 @@ func TestImageCleanupCannotRemoveImage(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -931,8 +931,8 @@ func TestImageCleanupRemoveImageById(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -984,8 +984,8 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down Expand Up @@ -1220,8 +1220,8 @@ func TestConcurrentRemoveUnusedImages(t *testing.T) {
client := mock_dockerapi.NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
Expand Down
Loading

0 comments on commit 4c57056

Please sign in to comment.