From 49c36c7855d14155469a7796b35318da34743cf8 Mon Sep 17 00:00:00 2001 From: richardpen Date: Wed, 23 Aug 2017 20:18:42 +0000 Subject: [PATCH] engine: Make the agent reuse the generated container name on restart Save the generated docker container name and when agent restart restore the agent state, when agent try to create the container it will reuse the name so that no duplicate container will be created. --- CHANGELOG.md | 1 + agent/engine/docker_container_engine.go | 11 +++ agent/engine/docker_task_engine.go | 54 ++++++++----- agent/engine/docker_task_engine_test.go | 26 +++++++ .../dockerstate/docker_task_engine_state.go | 15 +++- agent/engine/dockerstate/dockerstate_test.go | 41 ++++++++++ agent/engine/engine_integ_test.go | 77 +++++++++++++++++++ 7 files changed, 200 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 120f6322734..e0d41f06d8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## UNRELEASED * Feature - Support for provisioning Tasks with ENIs * Bug - Fixed a memory leak issue when submitting the task state change [#967](https://github.com/aws/amazon-ecs-agent/pull/967) +* Bug - Fix a race condition where a container can be created twice when agent restarts. [#939](https://github.com/aws/amazon-ecs-agent/pull/939) ## 1.14.4 * Enhancement - Batch container state change events. [#867](https://github.com/aws/amazon-ecs-agent/pull/867) diff --git a/agent/engine/docker_container_engine.go b/agent/engine/docker_container_engine.go index 88c63a6b072..c9c3d281e9b 100644 --- a/agent/engine/docker_container_engine.go +++ b/agent/engine/docker_container_engine.go @@ -475,10 +475,21 @@ func (dg *dockerGoClient) startContainer(ctx context.Context, id string) DockerC return metadata } +// dockerStateToState converts the container status from docker to status recognized by the agent +// Ref: https://github.com/fsouza/go-dockerclient/blob/fd53184a1439b6d7b82ca54c1cd9adac9a5278f2/container.go#L133 func dockerStateToState(state docker.State) api.ContainerStatus { if state.Running { return api.ContainerRunning } + + if state.Dead { + return api.ContainerStopped + } + + if state.StartedAt.IsZero() && state.Error == "" { + return api.ContainerCreated + } + return api.ContainerStopped } diff --git a/agent/engine/docker_task_engine.go b/agent/engine/docker_task_engine.go index 337c9f2c565..eb5c0bfe306 100644 --- a/agent/engine/docker_task_engine.go +++ b/agent/engine/docker_task_engine.go @@ -252,6 +252,7 @@ func (engine *DockerTaskEngine) synchronizeState() { log.Warn("Could not find matching container for expected", "name", cont.DockerName) } else { cont.DockerID = describedCont.ID + cont.Container.SetKnownStatus(dockerStateToState(describedCont.State)) // update mappings that need dockerid engine.state.AddContainer(cont, task) engine.imageManager.RecordContainerReference(cont.Container) @@ -577,15 +578,24 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C client = client.WithVersion(dockerclient.DockerVersion(*container.DockerConfig.Version)) } - // Resolve HostConfig - // we have to do this in create, not start, because docker no longer handles - // merging create config with start hostconfig the same; e.g. memory limits - // get lost + dockerContainerName := "" containerMap, ok := engine.state.ContainerMapByArn(task.Arn) if !ok { containerMap = make(map[string]*api.DockerContainer) + } else { + // looking for container that has docker name but not created + for _, v := range containerMap { + if v.Container.Name == container.Name { + dockerContainerName = v.DockerName + break + } + } } + // Resolve HostConfig + // we have to do this in create, not start, because docker no longer handles + // merging create config with start hostconfig the same; e.g. memory limits + // get lost hostConfig, hcerr := task.DockerHostConfig(container, containerMap) if hcerr != nil { return DockerContainerMetadata{Error: api.NamedError(hcerr)} @@ -605,28 +615,30 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C config.Labels[labelPrefix+"task-definition-version"] = task.Version config.Labels[labelPrefix+"cluster"] = engine.cfg.Cluster - name := "" - for i := 0; i < len(container.Name); i++ { - c := container.Name[i] - if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) { - continue + if dockerContainerName == "" { + name := "" + for i := 0; i < len(container.Name); i++ { + c := container.Name[i] + if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) { + continue + } + name += string(c) } - name += string(c) - } - containerName := "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex() + dockerContainerName = "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex() - // Pre-add the container in case we stop before the next, more useful, - // AddContainer call. This ensures we have a way to get the container if - // we die before 'createContainer' returns because we can inspect by - // name - engine.state.AddContainer(&api.DockerContainer{DockerName: containerName, Container: container}, task) - seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, containerName) - engine.saver.ForceSave() + // Pre-add the container in case we stop before the next, more useful, + // AddContainer call. This ensures we have a way to get the container if + // we die before 'createContainer' returns because we can inspect by + // name + engine.state.AddContainer(&api.DockerContainer{DockerName: dockerContainerName, Container: container}, task) + seelog.Infof("Created container name mapping for task %s - %s -> %s", task.Arn, container, dockerContainerName) + engine.saver.ForceSave() + } - metadata := client.CreateContainer(config, hostConfig, containerName, createContainerTimeout) + metadata := client.CreateContainer(config, hostConfig, dockerContainerName, createContainerTimeout) if metadata.DockerID != "" { - engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: containerName, Container: container}, task) + engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: dockerContainerName, Container: container}, task) } seelog.Infof("Created docker container for task %s: %s -> %s", task, container, metadata.DockerID) return metadata diff --git a/agent/engine/docker_task_engine_test.go b/agent/engine/docker_task_engine_test.go index 6332ffe8292..4a3bd100020 100644 --- a/agent/engine/docker_task_engine_test.go +++ b/agent/engine/docker_task_engine_test.go @@ -1487,3 +1487,29 @@ func TestTaskWithCircularDependency(t *testing.T) { _, ok = taskEngine.(*DockerTaskEngine).managedTasks[task.Arn] assert.False(t, ok, "Task should not be added to task manager for processing") } + +// TestCreateContainerOnAgentRestart tests when agent restarts it should use the +// docker container name restored from agent state file to create the container +func TestCreateContainerOnAgentRestart(t *testing.T) { + ctrl, client, _, privateTaskEngine, _, _ := mocks(t, &config.Config{}) + saver := mock_statemanager.NewMockStateManager(ctrl) + defer ctrl.Finish() + + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + taskEngine.SetSaver(saver) + state := taskEngine.State() + + sleepTask := testdata.LoadTask("sleep5") + sleepContainer, _ := sleepTask.ContainerByName("sleep5") + // Store the generated container name to state + state.AddContainer(&api.DockerContainer{DockerName: "docker_container_name", Container: sleepContainer}, sleepTask) + + gomock.InOrder( + client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), "docker_container_name", gomock.Any()), + ) + + metadata := taskEngine.createContainer(sleepTask, sleepContainer) + if metadata.Error != nil { + t.Error("Unexpected error", metadata.Error) + } +} diff --git a/agent/engine/dockerstate/docker_task_engine_state.go b/agent/engine/dockerstate/docker_task_engine_state.go index 4056a675d93..69e40950426 100644 --- a/agent/engine/dockerstate/docker_task_engine_state.go +++ b/agent/engine/dockerstate/docker_task_engine_state.go @@ -319,18 +319,25 @@ func (state *DockerTaskEngineState) AddContainer(container *api.DockerContainer, } if container.DockerID != "" { + // Update the container id to the state + state.idToContainer[container.DockerID] = container state.idToTask[container.DockerID] = task.Arn + + // Remove the previously added name mapping + delete(state.idToContainer, container.DockerName) + delete(state.idToTask, container.DockerName) + } else if container.DockerName != "" { + // Update the container name mapping to the state when the ID isn't available + state.idToContainer[container.DockerName] = container + state.idToTask[container.DockerName] = task.Arn } + existingMap, exists := state.taskToID[task.Arn] if !exists { existingMap = make(map[string]*api.DockerContainer, len(task.Containers)) state.taskToID[task.Arn] = existingMap } existingMap[container.Container.Name] = container - - if container.DockerID != "" { - state.idToContainer[container.DockerID] = container - } } // AddImageState adds an image.ImageState to be stored diff --git a/agent/engine/dockerstate/dockerstate_test.go b/agent/engine/dockerstate/dockerstate_test.go index 8804c97b2df..38ed13c2d71 100644 --- a/agent/engine/dockerstate/dockerstate_test.go +++ b/agent/engine/dockerstate/dockerstate_test.go @@ -287,3 +287,44 @@ func TestRemoveNonExistingImageState(t *testing.T) { t.Error("Error removing incorrect image state") } } + +// TestAddContainer tests first add container with docker name and +// then add the container with dockerID +func TestAddContainerNameAndID(t *testing.T) { + state := NewTaskEngineState() + + task := &api.Task{ + Arn: "taskArn", + } + container := &api.DockerContainer{ + DockerName: "ecs-test-container-1", + Container: &api.Container{ + Name: "test", + }, + } + state.AddTask(task) + state.AddContainer(container, task) + containerMap, ok := state.ContainerMapByArn(task.Arn) + assert.True(t, ok) + assert.Len(t, containerMap, 1) + + assert.Len(t, state.GetAllContainerIDs(), 1) + + _, ok = state.ContainerByID(container.DockerName) + assert.True(t, ok, "container with DockerName should be added to the state") + + container = &api.DockerContainer{ + DockerName: "ecs-test-container-1", + DockerID: "dockerid", + Container: &api.Container{ + Name: "test", + }, + } + state.AddContainer(container, task) + assert.Len(t, containerMap, 1) + assert.Len(t, state.GetAllContainerIDs(), 1) + _, ok = state.ContainerByID(container.DockerID) + assert.True(t, ok, "container with DockerName should be added to the state") + _, ok = state.ContainerByID(container.DockerName) + assert.False(t, ok, "container with DockerName should be added to the state") +} diff --git a/agent/engine/engine_integ_test.go b/agent/engine/engine_integ_test.go index cf364867a2a..d2d5650d85c 100644 --- a/agent/engine/engine_integ_test.go +++ b/agent/engine/engine_integ_test.go @@ -30,7 +30,9 @@ import ( "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" "github.com/aws/amazon-ecs-agent/agent/eventstream" "github.com/aws/amazon-ecs-agent/agent/statemanager" + docker "github.com/fsouza/go-dockerclient" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "golang.org/x/net/context" ) @@ -87,6 +89,81 @@ func setup(cfg *config.Config, t *testing.T) (TaskEngine, func(), credentials.Ma }, credentialsManager } +func discardEvents(from interface{}) func() { + done := make(chan bool) + + go func() { + for { + ndx, _, _ := reflect.Select([]reflect.SelectCase{ + { + Dir: reflect.SelectRecv, + Chan: reflect.ValueOf(from), + }, + { + Dir: reflect.SelectRecv, + Chan: reflect.ValueOf(done), + }, + }) + if ndx == 1 { + break + } + } + }() + return func() { + done <- true + } +} + +// TestDockerStateToContainerState tests convert the container status from +// docker inspect to the status defined in agent +func TestDockerStateToContainerState(t *testing.T) { + taskEngine, done, _ := setupWithDefaultConfig(t) + defer done() + + testTask := createTestTask("test_task") + container := testTask.Containers[0] + + client, err := docker.NewClientFromEnv() + require.NoError(t, err, "Creating go docker client failed") + + containerMetadata := taskEngine.(*DockerTaskEngine).pullContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + + containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ := client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerCreated, dockerStateToState(state.State)) + + containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerRunning, dockerStateToState(state.State)) + + containerMetadata = taskEngine.(*DockerTaskEngine).stopContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State)) + + // clean up the container + err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container) + assert.NoError(t, err, "remove the created container failed") + + // Start the container failed + testTask = createTestTask("test_task2") + testTask.Containers[0].EntryPoint = &[]string{"non-existed"} + container = testTask.Containers[0] + containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container) + assert.NoError(t, containerMetadata.Error) + containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container) + assert.Error(t, containerMetadata.Error) + state, _ = client.InspectContainer(containerMetadata.DockerID) + assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State)) + + // clean up the container + err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container) + assert.NoError(t, err, "remove the created container failed") +} + func TestHostVolumeMount(t *testing.T) { taskEngine, done, _ := setupWithDefaultConfig(t) defer done()