diff --git a/CHANGELOG.md b/CHANGELOG.md index 909b3261390..945aa395fd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## UNRELEASED +* Bug - Fix a race condition where a container can be created twice when agent restart. [#939](https://github.com/aws/amazon-ecs-agent/pull/939) + ## 1.14.4 * Enhancement - Batch container state change events. [#867](https://github.com/aws/amazon-ecs-agent/pull/867) * Enhancement - Improve the error message when reserved memory is larger than the available memory. [#897](https://github.com/aws/amazon-ecs-agent/pull/897) diff --git a/agent/engine/docker_container_engine.go b/agent/engine/docker_container_engine.go index 0b87254ef3f..54107e385a6 100644 --- a/agent/engine/docker_container_engine.go +++ b/agent/engine/docker_container_engine.go @@ -468,10 +468,21 @@ func (dg *dockerGoClient) startContainer(ctx context.Context, id string) DockerC return metadata } +// dockerStateToState converts the container status from docker to status recognized by the agent +// Ref: https://github.com/fsouza/go-dockerclient/blob/fd53184a1439b6d7b82ca54c1cd9adac9a5278f2/container.go#L133 func dockerStateToState(state docker.State) api.ContainerStatus { if state.Running { return api.ContainerRunning } + + if state.Dead { + return api.ContainerStopped + } + + if state.StartedAt.IsZero() && state.Error == "" { + return api.ContainerCreated + } + return api.ContainerStopped } diff --git a/agent/engine/docker_task_engine.go b/agent/engine/docker_task_engine.go index b7275fa3e38..97b0853c465 100644 --- a/agent/engine/docker_task_engine.go +++ b/agent/engine/docker_task_engine.go @@ -236,6 +236,7 @@ func (engine *DockerTaskEngine) synchronizeState() { log.Warn("Could not find matching container for expected", "name", cont.DockerName) } else { cont.DockerID = describedCont.ID + cont.Container.SetKnownStatus(dockerStateToState(describedCont.State)) // update mappings that need dockerid engine.state.AddContainer(cont, task) engine.imageManager.RecordContainerReference(cont.Container) @@ -551,15 +552,24 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C client = client.WithVersion(dockerclient.DockerVersion(*container.DockerConfig.Version)) } - // Resolve HostConfig - // we have to do this in create, not start, because docker no longer handles - // merging create config with start hostconfig the same; e.g. memory limits - // get lost + dockerContainerName := "" containerMap, ok := engine.state.ContainerMapByArn(task.Arn) if !ok { containerMap = make(map[string]*api.DockerContainer) + } else { + // looking for container that has docker name but not created + for _, v := range containerMap { + if v.Container.Name == container.Name { + dockerContainerName = v.DockerName + break + } + } } + // Resolve HostConfig + // we have to do this in create, not start, because docker no longer handles + // merging create config with start hostconfig the same; e.g. memory limits + // get lost hostConfig, hcerr := task.DockerHostConfig(container, containerMap) if hcerr != nil { return DockerContainerMetadata{Error: api.NamedError(hcerr)} @@ -579,28 +589,30 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C config.Labels[labelPrefix+"task-definition-version"] = task.Version config.Labels[labelPrefix+"cluster"] = engine.cfg.Cluster - name := "" - for i := 0; i < len(container.Name); i++ { - c := container.Name[i] - if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) { - continue + if dockerContainerName == "" { + name := "" + for i := 0; i < len(container.Name); i++ { + c := container.Name[i] + if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) { + continue + } + name += string(c) } - name += string(c) - } - containerName := "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex() + dockerContainerName = "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex() - // Pre-add the container in case we stop before the next, more useful, - // AddContainer call. This ensures we have a way to get the container if - // we die before 'createContainer' returns because we can inspect by - // name - engine.state.AddContainer(&api.DockerContainer{DockerName: containerName, Container: container}, task) - seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, containerName) - engine.saver.ForceSave() + // Pre-add the container in case we stop before the next, more useful, + // AddContainer call. This ensures we have a way to get the container if + // we die before 'createContainer' returns because we can inspect by + // name + engine.state.AddContainer(&api.DockerContainer{DockerName: dockerContainerName, Container: container}, task) + seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, dockerContainerName) + engine.saver.ForceSave() + } - metadata := client.CreateContainer(config, hostConfig, containerName, createContainerTimeout) + metadata := client.CreateContainer(config, hostConfig, dockerContainerName, createContainerTimeout) if metadata.DockerID != "" { - engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: containerName, Container: container}, task) + engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: dockerContainerName, Container: container}, task) } seelog.Infof("Created docker container for task %s: %s -> %s", task, container, metadata.DockerID) return metadata diff --git a/agent/engine/docker_task_engine_test.go b/agent/engine/docker_task_engine_test.go index ec05b892487..22471441c89 100644 --- a/agent/engine/docker_task_engine_test.go +++ b/agent/engine/docker_task_engine_test.go @@ -1203,3 +1203,29 @@ func TestTaskWithCircularDependency(t *testing.T) { _, ok = taskEngine.(*DockerTaskEngine).managedTasks[task.Arn] assert.False(t, ok, "Task should not be added to task manager for processing") } + +// TestCreateContainerOnAgentRestart tests when agent restarts it should use the +// docker container name restored from agent state file to create the container +func TestCreateContainerOnAgentRestart(t *testing.T) { + ctrl, client, _, privateTaskEngine, _, _ := mocks(t, &config.Config{}) + saver := mock_statemanager.NewMockStateManager(ctrl) + defer ctrl.Finish() + + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + taskEngine.SetSaver(saver) + state := taskEngine.State() + + sleepTask := testdata.LoadTask("sleep5") + sleepContainer, _ := sleepTask.ContainerByName("sleep5") + // Store the generated container name to state + state.AddContainer(&api.DockerContainer{DockerName: "docker_container_name", Container: sleepContainer}, sleepTask) + + gomock.InOrder( + client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), "docker_container_name", gomock.Any()), + ) + + metadata := taskEngine.createContainer(sleepTask, sleepContainer) + if metadata.Error != nil { + t.Error("Unexpected error", metadata.Error) + } +} diff --git a/agent/engine/dockerstate/docker_task_engine_state.go b/agent/engine/dockerstate/docker_task_engine_state.go index 66440c950b5..2dbef7013a8 100644 --- a/agent/engine/dockerstate/docker_task_engine_state.go +++ b/agent/engine/dockerstate/docker_task_engine_state.go @@ -250,18 +250,25 @@ func (state *DockerTaskEngineState) AddContainer(container *api.DockerContainer, } if container.DockerID != "" { + // Update the container id to the state + state.idToContainer[container.DockerID] = container state.idToTask[container.DockerID] = task.Arn + + // Remove the previously added name mapping + delete(state.idToContainer, container.DockerName) + delete(state.idToTask, container.DockerName) + } else if container.DockerName != "" { + // Update the container name mapping to the state when the ID isn't available + state.idToContainer[container.DockerName] = container + state.idToTask[container.DockerName] = task.Arn } + existingMap, exists := state.taskToID[task.Arn] if !exists { existingMap = make(map[string]*api.DockerContainer, len(task.Containers)) state.taskToID[task.Arn] = existingMap } existingMap[container.Container.Name] = container - - if container.DockerID != "" { - state.idToContainer[container.DockerID] = container - } } // AddImageState adds an image.ImageState to be stored diff --git a/agent/engine/dockerstate/dockerstate_test.go b/agent/engine/dockerstate/dockerstate_test.go index 83dc63b9f0b..9c403b2756b 100644 --- a/agent/engine/dockerstate/dockerstate_test.go +++ b/agent/engine/dockerstate/dockerstate_test.go @@ -258,3 +258,44 @@ func TestRemoveNonExistingImageState(t *testing.T) { t.Error("Error removing incorrect image state") } } + +// TestAddContainer tests first add container with docker name and +// then add the container with dockerID +func TestAddContainer(t *testing.T) { + state := NewTaskEngineState() + + task := &api.Task{ + Arn: "taskArn", + } + container := &api.DockerContainer{ + DockerName: "ecs-test-container-1", + Container: &api.Container{ + Name: "test", + }, + } + state.AddTask(task) + state.AddContainer(container, task) + containerMap, ok := state.ContainerMapByArn(task.Arn) + assert.True(t, ok) + assert.Len(t, containerMap, 1) + + assert.Len(t, state.GetAllContainerIDs(), 1) + + _, ok = state.ContainerByID(container.DockerName) + assert.True(t, ok, "container with DockerName should be added to the state") + + container = &api.DockerContainer{ + DockerName: "ecs-test-container-1", + DockerID: "dockerid", + Container: &api.Container{ + Name: "test", + }, + } + state.AddContainer(container, task) + assert.Len(t, containerMap, 1) + assert.Len(t, state.GetAllContainerIDs(), 1) + _, ok = state.ContainerByID(container.DockerID) + assert.True(t, ok, "container with DockerName should be added to the state") + _, ok = state.ContainerByID(container.DockerName) + assert.False(t, ok, "container with DockerName should be added to the state") +} diff --git a/agent/engine/engine_integ_test.go b/agent/engine/engine_integ_test.go index c166abd7769..0fee8aae683 100644 --- a/agent/engine/engine_integ_test.go +++ b/agent/engine/engine_integ_test.go @@ -33,7 +33,9 @@ import ( "github.com/aws/amazon-ecs-agent/agent/eventstream" "github.com/aws/amazon-ecs-agent/agent/statechange" "github.com/aws/amazon-ecs-agent/agent/statemanager" + docker "github.com/fsouza/go-dockerclient" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "golang.org/x/net/context" ) @@ -115,6 +117,56 @@ func discardEvents(from interface{}) func() { } } +// TestDockerStateToContainerState tests convert the container status from +// docker inspect to the status defined in agent +func TestDockerStateToContainerState(t *testing.T) { + taskEngine, done, _ := setupWithDefaultConfig(t) + defer done() + + testTask := createTestTask("test_task") + container := testTask.Containers[0] + + client, err := docker.NewClientFromEnv() + require.NoError(t, err, "Creating go docker client failed") + + containerMetadata := taskEngine.(*DockerTaskEngine).pullContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + + containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ := client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerCreated, dockerStateToState(state.State)) + + containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerRunning, dockerStateToState(state.State)) + + containerMetadata = taskEngine.(*DockerTaskEngine).stopContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State)) + + // clean up the container + err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container) + assert.NoError(t, err, "remove the created container failed") + + // Start the container failed + testTask = createTestTask("test_task2") + testTask.Containers[0].EntryPoint = &[]string{"non-existed"} + container = testTask.Containers[0] + containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container) + assert.Error(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State)) + + // clean up the container + err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container) + assert.NoError(t, err, "remove the created container failed") +} + func TestHostVolumeMount(t *testing.T) { taskEngine, done, _ := setupWithDefaultConfig(t) defer done()