From 0a62385c4c468151ee185980bf56b1e6b742200e Mon Sep 17 00:00:00 2001 From: richardpen Date: Wed, 23 Aug 2017 20:18:42 +0000 Subject: [PATCH] engine: Make the agent reuse the generated container name on restart Save the generated docker container name and when agent restart restore the agent state, when agent try to create the container it will reuse the name so that no duplicate container will be created. --- CHANGELOG.md | 3 ++ agent/engine/docker_container_engine.go | 11 ++++ agent/engine/docker_task_engine.go | 54 +++++++++++-------- agent/engine/docker_task_engine_test.go | 26 +++++++++ .../dockerstate/docker_task_engine_state.go | 15 ++++-- agent/engine/dockerstate/dockerstate_test.go | 41 ++++++++++++++ agent/engine/engine_integ_test.go | 52 ++++++++++++++++++ 7 files changed, 177 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 909b3261390..945aa395fd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## UNRELEASED +* Bug - Fix a race condition where a container can be created twice when agent restart. [#939](https://github.com/aws/amazon-ecs-agent/pull/939) + ## 1.14.4 * Enhancement - Batch container state change events. [#867](https://github.com/aws/amazon-ecs-agent/pull/867) * Enhancement - Improve the error message when reserved memory is larger than the available memory. [#897](https://github.com/aws/amazon-ecs-agent/pull/897) diff --git a/agent/engine/docker_container_engine.go b/agent/engine/docker_container_engine.go index 0b87254ef3f..54107e385a6 100644 --- a/agent/engine/docker_container_engine.go +++ b/agent/engine/docker_container_engine.go @@ -468,10 +468,21 @@ func (dg *dockerGoClient) startContainer(ctx context.Context, id string) DockerC return metadata } +// dockerStateToState converts the container status from docker to status recognized by the agent +// Ref: https://github.com/fsouza/go-dockerclient/blob/fd53184a1439b6d7b82ca54c1cd9adac9a5278f2/container.go#L133 func dockerStateToState(state docker.State) api.ContainerStatus { if state.Running { return api.ContainerRunning } + + if state.Dead { + return api.ContainerStopped + } + + if state.StartedAt.IsZero() && state.Error == "" { + return api.ContainerCreated + } + return api.ContainerStopped } diff --git a/agent/engine/docker_task_engine.go b/agent/engine/docker_task_engine.go index b7275fa3e38..97b0853c465 100644 --- a/agent/engine/docker_task_engine.go +++ b/agent/engine/docker_task_engine.go @@ -236,6 +236,7 @@ func (engine *DockerTaskEngine) synchronizeState() { log.Warn("Could not find matching container for expected", "name", cont.DockerName) } else { cont.DockerID = describedCont.ID + cont.Container.SetKnownStatus(dockerStateToState(describedCont.State)) // update mappings that need dockerid engine.state.AddContainer(cont, task) engine.imageManager.RecordContainerReference(cont.Container) @@ -551,15 +552,24 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C client = client.WithVersion(dockerclient.DockerVersion(*container.DockerConfig.Version)) } - // Resolve HostConfig - // we have to do this in create, not start, because docker no longer handles - // merging create config with start hostconfig the same; e.g. memory limits - // get lost + dockerContainerName := "" containerMap, ok := engine.state.ContainerMapByArn(task.Arn) if !ok { containerMap = make(map[string]*api.DockerContainer) + } else { + // looking for container that has docker name but not created + for _, v := range containerMap { + if v.Container.Name == container.Name { + dockerContainerName = v.DockerName + break + } + } } + // Resolve HostConfig + // we have to do this in create, not start, because docker no longer handles + // merging create config with start hostconfig the same; e.g. memory limits + // get lost hostConfig, hcerr := task.DockerHostConfig(container, containerMap) if hcerr != nil { return DockerContainerMetadata{Error: api.NamedError(hcerr)} @@ -579,28 +589,30 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C config.Labels[labelPrefix+"task-definition-version"] = task.Version config.Labels[labelPrefix+"cluster"] = engine.cfg.Cluster - name := "" - for i := 0; i < len(container.Name); i++ { - c := container.Name[i] - if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) { - continue + if dockerContainerName == "" { + name := "" + for i := 0; i < len(container.Name); i++ { + c := container.Name[i] + if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) { + continue + } + name += string(c) } - name += string(c) - } - containerName := "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex() + dockerContainerName = "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex() - // Pre-add the container in case we stop before the next, more useful, - // AddContainer call. This ensures we have a way to get the container if - // we die before 'createContainer' returns because we can inspect by - // name - engine.state.AddContainer(&api.DockerContainer{DockerName: containerName, Container: container}, task) - seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, containerName) - engine.saver.ForceSave() + // Pre-add the container in case we stop before the next, more useful, + // AddContainer call. This ensures we have a way to get the container if + // we die before 'createContainer' returns because we can inspect by + // name + engine.state.AddContainer(&api.DockerContainer{DockerName: dockerContainerName, Container: container}, task) + seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, dockerContainerName) + engine.saver.ForceSave() + } - metadata := client.CreateContainer(config, hostConfig, containerName, createContainerTimeout) + metadata := client.CreateContainer(config, hostConfig, dockerContainerName, createContainerTimeout) if metadata.DockerID != "" { - engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: containerName, Container: container}, task) + engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: dockerContainerName, Container: container}, task) } seelog.Infof("Created docker container for task %s: %s -> %s", task, container, metadata.DockerID) return metadata diff --git a/agent/engine/docker_task_engine_test.go b/agent/engine/docker_task_engine_test.go index ec05b892487..22471441c89 100644 --- a/agent/engine/docker_task_engine_test.go +++ b/agent/engine/docker_task_engine_test.go @@ -1203,3 +1203,29 @@ func TestTaskWithCircularDependency(t *testing.T) { _, ok = taskEngine.(*DockerTaskEngine).managedTasks[task.Arn] assert.False(t, ok, "Task should not be added to task manager for processing") } + +// TestCreateContainerOnAgentRestart tests when agent restarts it should use the +// docker container name restored from agent state file to create the container +func TestCreateContainerOnAgentRestart(t *testing.T) { + ctrl, client, _, privateTaskEngine, _, _ := mocks(t, &config.Config{}) + saver := mock_statemanager.NewMockStateManager(ctrl) + defer ctrl.Finish() + + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + taskEngine.SetSaver(saver) + state := taskEngine.State() + + sleepTask := testdata.LoadTask("sleep5") + sleepContainer, _ := sleepTask.ContainerByName("sleep5") + // Store the generated container name to state + state.AddContainer(&api.DockerContainer{DockerName: "docker_container_name", Container: sleepContainer}, sleepTask) + + gomock.InOrder( + client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), "docker_container_name", gomock.Any()), + ) + + metadata := taskEngine.createContainer(sleepTask, sleepContainer) + if metadata.Error != nil { + t.Error("Unexpected error", metadata.Error) + } +} diff --git a/agent/engine/dockerstate/docker_task_engine_state.go b/agent/engine/dockerstate/docker_task_engine_state.go index 66440c950b5..2dbef7013a8 100644 --- a/agent/engine/dockerstate/docker_task_engine_state.go +++ b/agent/engine/dockerstate/docker_task_engine_state.go @@ -250,18 +250,25 @@ func (state *DockerTaskEngineState) AddContainer(container *api.DockerContainer, } if container.DockerID != "" { + // Update the container id to the state + state.idToContainer[container.DockerID] = container state.idToTask[container.DockerID] = task.Arn + + // Remove the previously added name mapping + delete(state.idToContainer, container.DockerName) + delete(state.idToTask, container.DockerName) + } else if container.DockerName != "" { + // Update the container name mapping to the state when the ID isn't available + state.idToContainer[container.DockerName] = container + state.idToTask[container.DockerName] = task.Arn } + existingMap, exists := state.taskToID[task.Arn] if !exists { existingMap = make(map[string]*api.DockerContainer, len(task.Containers)) state.taskToID[task.Arn] = existingMap } existingMap[container.Container.Name] = container - - if container.DockerID != "" { - state.idToContainer[container.DockerID] = container - } } // AddImageState adds an image.ImageState to be stored diff --git a/agent/engine/dockerstate/dockerstate_test.go b/agent/engine/dockerstate/dockerstate_test.go index 83dc63b9f0b..9c403b2756b 100644 --- a/agent/engine/dockerstate/dockerstate_test.go +++ b/agent/engine/dockerstate/dockerstate_test.go @@ -258,3 +258,44 @@ func TestRemoveNonExistingImageState(t *testing.T) { t.Error("Error removing incorrect image state") } } + +// TestAddContainer tests first add container with docker name and +// then add the container with dockerID +func TestAddContainer(t *testing.T) { + state := NewTaskEngineState() + + task := &api.Task{ + Arn: "taskArn", + } + container := &api.DockerContainer{ + DockerName: "ecs-test-container-1", + Container: &api.Container{ + Name: "test", + }, + } + state.AddTask(task) + state.AddContainer(container, task) + containerMap, ok := state.ContainerMapByArn(task.Arn) + assert.True(t, ok) + assert.Len(t, containerMap, 1) + + assert.Len(t, state.GetAllContainerIDs(), 1) + + _, ok = state.ContainerByID(container.DockerName) + assert.True(t, ok, "container with DockerName should be added to the state") + + container = &api.DockerContainer{ + DockerName: "ecs-test-container-1", + DockerID: "dockerid", + Container: &api.Container{ + Name: "test", + }, + } + state.AddContainer(container, task) + assert.Len(t, containerMap, 1) + assert.Len(t, state.GetAllContainerIDs(), 1) + _, ok = state.ContainerByID(container.DockerID) + assert.True(t, ok, "container with DockerName should be added to the state") + _, ok = state.ContainerByID(container.DockerName) + assert.False(t, ok, "container with DockerName should be added to the state") +} diff --git a/agent/engine/engine_integ_test.go b/agent/engine/engine_integ_test.go index c166abd7769..0fee8aae683 100644 --- a/agent/engine/engine_integ_test.go +++ b/agent/engine/engine_integ_test.go @@ -33,7 +33,9 @@ import ( "github.com/aws/amazon-ecs-agent/agent/eventstream" "github.com/aws/amazon-ecs-agent/agent/statechange" "github.com/aws/amazon-ecs-agent/agent/statemanager" + docker "github.com/fsouza/go-dockerclient" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "golang.org/x/net/context" ) @@ -115,6 +117,56 @@ func discardEvents(from interface{}) func() { } } +// TestDockerStateToContainerState tests convert the container status from +// docker inspect to the status defined in agent +func TestDockerStateToContainerState(t *testing.T) { + taskEngine, done, _ := setupWithDefaultConfig(t) + defer done() + + testTask := createTestTask("test_task") + container := testTask.Containers[0] + + client, err := docker.NewClientFromEnv() + require.NoError(t, err, "Creating go docker client failed") + + containerMetadata := taskEngine.(*DockerTaskEngine).pullContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + + containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ := client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerCreated, dockerStateToState(state.State)) + + containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerRunning, dockerStateToState(state.State)) + + containerMetadata = taskEngine.(*DockerTaskEngine).stopContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State)) + + // clean up the container + err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container) + assert.NoError(t, err, "remove the created container failed") + + // Start the container failed + testTask = createTestTask("test_task2") + testTask.Containers[0].EntryPoint = &[]string{"non-existed"} + container = testTask.Containers[0] + containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container) + assert.Error(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State)) + + // clean up the container + err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container) + assert.NoError(t, err, "remove the created container failed") +} + func TestHostVolumeMount(t *testing.T) { taskEngine, done, _ := setupWithDefaultConfig(t) defer done()