Skip to content

Commit

Permalink
engine: Make the agent reuse the generated container name on restart
Browse files Browse the repository at this point in the history
    Save the generated docker container name and when agent restart
    restore the agent state, when agent try to create the container it
    will reuse the name so that no duplicate container will be created.
  • Loading branch information
richardpen committed Sep 1, 2017
1 parent 789a768 commit b18e674
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 25 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## UNRELEASED
* Bug - Fix a race condition where a container can be created twice when agent restart. [#939](https://github.com/aws/amazon-ecs-agent/pull/939)

## 1.14.4
* Enhancement - Batch container state change events. [#867](https://github.com/aws/amazon-ecs-agent/pull/867)
* Enhancement - Improve the error message when reserved memory is larger than the available memory. [#897](https://github.com/aws/amazon-ecs-agent/pull/897)
Expand Down
11 changes: 11 additions & 0 deletions agent/engine/docker_container_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,10 +468,21 @@ func (dg *dockerGoClient) startContainer(ctx context.Context, id string) DockerC
return metadata
}

// dockerStateToState converts the container status from docker to status recognized by the agent
// Ref: https://github.com/fsouza/go-dockerclient/blob/fd53184a1439b6d7b82ca54c1cd9adac9a5278f2/container.go#L133
func dockerStateToState(state docker.State) api.ContainerStatus {
if state.Running {
return api.ContainerRunning
}

if state.Dead {
return api.ContainerStopped
}

if state.StartedAt.IsZero() && state.Error == "" {
return api.ContainerCreated
}

return api.ContainerStopped
}

Expand Down
54 changes: 33 additions & 21 deletions agent/engine/docker_task_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ func (engine *DockerTaskEngine) synchronizeState() {
log.Warn("Could not find matching container for expected", "name", cont.DockerName)
} else {
cont.DockerID = describedCont.ID
cont.Container.SetKnownStatus(dockerStateToState(describedCont.State))
// update mappings that need dockerid
engine.state.AddContainer(cont, task)
engine.imageManager.RecordContainerReference(cont.Container)
Expand Down Expand Up @@ -551,15 +552,24 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C
client = client.WithVersion(dockerclient.DockerVersion(*container.DockerConfig.Version))
}

// Resolve HostConfig
// we have to do this in create, not start, because docker no longer handles
// merging create config with start hostconfig the same; e.g. memory limits
// get lost
dockerContainerName := ""
containerMap, ok := engine.state.ContainerMapByArn(task.Arn)
if !ok {
containerMap = make(map[string]*api.DockerContainer)
} else {
// looking for container that has docker name but not created
for _, v := range containerMap {
if v.Container.Name == container.Name {
dockerContainerName = v.DockerName
break
}
}
}

// Resolve HostConfig
// we have to do this in create, not start, because docker no longer handles
// merging create config with start hostconfig the same; e.g. memory limits
// get lost
hostConfig, hcerr := task.DockerHostConfig(container, containerMap)
if hcerr != nil {
return DockerContainerMetadata{Error: api.NamedError(hcerr)}
Expand All @@ -579,28 +589,30 @@ func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.C
config.Labels[labelPrefix+"task-definition-version"] = task.Version
config.Labels[labelPrefix+"cluster"] = engine.cfg.Cluster

name := ""
for i := 0; i < len(container.Name); i++ {
c := container.Name[i]
if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) {
continue
if dockerContainerName == "" {
name := ""
for i := 0; i < len(container.Name); i++ {
c := container.Name[i]
if !((c <= '9' && c >= '0') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c == '-')) {
continue
}
name += string(c)
}
name += string(c)
}

containerName := "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex()
dockerContainerName = "ecs-" + task.Family + "-" + task.Version + "-" + name + "-" + utils.RandHex()

// Pre-add the container in case we stop before the next, more useful,
// AddContainer call. This ensures we have a way to get the container if
// we die before 'createContainer' returns because we can inspect by
// name
engine.state.AddContainer(&api.DockerContainer{DockerName: containerName, Container: container}, task)
seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, containerName)
engine.saver.ForceSave()
// Pre-add the container in case we stop before the next, more useful,
// AddContainer call. This ensures we have a way to get the container if
// we die before 'createContainer' returns because we can inspect by
// name
engine.state.AddContainer(&api.DockerContainer{DockerName: dockerContainerName, Container: container}, task)
seelog.Infof("Created container name mapping for task %s - %s -> %s", task, container, dockerContainerName)
engine.saver.ForceSave()
}

metadata := client.CreateContainer(config, hostConfig, containerName, createContainerTimeout)
metadata := client.CreateContainer(config, hostConfig, dockerContainerName, createContainerTimeout)
if metadata.DockerID != "" {
engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: containerName, Container: container}, task)
engine.state.AddContainer(&api.DockerContainer{DockerID: metadata.DockerID, DockerName: dockerContainerName, Container: container}, task)
}
seelog.Infof("Created docker container for task %s: %s -> %s", task, container, metadata.DockerID)
return metadata
Expand Down
26 changes: 26 additions & 0 deletions agent/engine/docker_task_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1203,3 +1203,29 @@ func TestTaskWithCircularDependency(t *testing.T) {
_, ok = taskEngine.(*DockerTaskEngine).managedTasks[task.Arn]
assert.False(t, ok, "Task should not be added to task manager for processing")
}

// TestCreateContainerOnAgentRestart tests when agent restarts it should use the
// docker container name restored from agent state file to create the container
func TestCreateContainerOnAgentRestart(t *testing.T) {
ctrl, client, _, privateTaskEngine, _, _ := mocks(t, &config.Config{})
saver := mock_statemanager.NewMockStateManager(ctrl)
defer ctrl.Finish()

taskEngine, _ := privateTaskEngine.(*DockerTaskEngine)
taskEngine.SetSaver(saver)
state := taskEngine.State()

sleepTask := testdata.LoadTask("sleep5")
sleepContainer, _ := sleepTask.ContainerByName("sleep5")
// Store the generated container name to state
state.AddContainer(&api.DockerContainer{DockerName: "docker_container_name", Container: sleepContainer}, sleepTask)

gomock.InOrder(
client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), "docker_container_name", gomock.Any()),
)

metadata := taskEngine.createContainer(sleepTask, sleepContainer)
if metadata.Error != nil {
t.Error("Unexpected error", metadata.Error)
}
}
15 changes: 11 additions & 4 deletions agent/engine/dockerstate/docker_task_engine_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,18 +250,25 @@ func (state *DockerTaskEngineState) AddContainer(container *api.DockerContainer,
}

if container.DockerID != "" {
// Update the container id to the state
state.idToContainer[container.DockerID] = container
state.idToTask[container.DockerID] = task.Arn

// Remove the previously added name mapping
delete(state.idToContainer, container.DockerName)
delete(state.idToTask, container.DockerName)
} else if container.DockerName != "" {
// Update the container name mapping to the state when the ID isn't available
state.idToContainer[container.DockerName] = container
state.idToTask[container.DockerName] = task.Arn
}

existingMap, exists := state.taskToID[task.Arn]
if !exists {
existingMap = make(map[string]*api.DockerContainer, len(task.Containers))
state.taskToID[task.Arn] = existingMap
}
existingMap[container.Container.Name] = container

if container.DockerID != "" {
state.idToContainer[container.DockerID] = container
}
}

// AddImageState adds an image.ImageState to be stored
Expand Down
41 changes: 41 additions & 0 deletions agent/engine/dockerstate/dockerstate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,44 @@ func TestRemoveNonExistingImageState(t *testing.T) {
t.Error("Error removing incorrect image state")
}
}

// TestAddContainer tests first add container with docker name and
// then add the container with dockerID
func TestAddContainerNameAndID(t *testing.T) {
state := NewTaskEngineState()

task := &api.Task{
Arn: "taskArn",
}
container := &api.DockerContainer{
DockerName: "ecs-test-container-1",
Container: &api.Container{
Name: "test",
},
}
state.AddTask(task)
state.AddContainer(container, task)
containerMap, ok := state.ContainerMapByArn(task.Arn)
assert.True(t, ok)
assert.Len(t, containerMap, 1)

assert.Len(t, state.GetAllContainerIDs(), 1)

_, ok = state.ContainerByID(container.DockerName)
assert.True(t, ok, "container with DockerName should be added to the state")

container = &api.DockerContainer{
DockerName: "ecs-test-container-1",
DockerID: "dockerid",
Container: &api.Container{
Name: "test",
},
}
state.AddContainer(container, task)
assert.Len(t, containerMap, 1)
assert.Len(t, state.GetAllContainerIDs(), 1)
_, ok = state.ContainerByID(container.DockerID)
assert.True(t, ok, "container with DockerName should be added to the state")
_, ok = state.ContainerByID(container.DockerName)
assert.False(t, ok, "container with DockerName should be added to the state")
}
52 changes: 52 additions & 0 deletions agent/engine/engine_integ_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ import (
"github.com/aws/amazon-ecs-agent/agent/eventstream"
"github.com/aws/amazon-ecs-agent/agent/statechange"
"github.com/aws/amazon-ecs-agent/agent/statemanager"
docker "github.com/fsouza/go-dockerclient"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/net/context"
)

Expand Down Expand Up @@ -115,6 +117,56 @@ func discardEvents(from interface{}) func() {
}
}

// TestDockerStateToContainerState tests convert the container status from
// docker inspect to the status defined in agent
func TestDockerStateToContainerState(t *testing.T) {
taskEngine, done, _ := setupWithDefaultConfig(t)
defer done()

testTask := createTestTask("test_task")
container := testTask.Containers[0]

client, err := docker.NewClientFromEnv()
require.NoError(t, err, "Creating go docker client failed")

containerMetadata := taskEngine.(*DockerTaskEngine).pullContainer(testTask, container)
assert.NoError(t, containerMetadata.Error)

containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container)
assert.NoError(t, containerMetadata.Error)
state, _ := client.InspectContainer(containerMetadata.DockerID)
assert.Equal(t, api.ContainerCreated, dockerStateToState(state.State))

containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container)
assert.NoError(t, containerMetadata.Error)
state, _ = client.InspectContainer(containerMetadata.DockerID)
assert.Equal(t, api.ContainerRunning, dockerStateToState(state.State))

containerMetadata = taskEngine.(*DockerTaskEngine).stopContainer(testTask, container)
assert.NoError(t, containerMetadata.Error)
state, _ = client.InspectContainer(containerMetadata.DockerID)
assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State))

// clean up the container
err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container)
assert.NoError(t, err, "remove the created container failed")

// Start the container failed
testTask = createTestTask("test_task2")
testTask.Containers[0].EntryPoint = &[]string{"non-existed"}
container = testTask.Containers[0]
containerMetadata = taskEngine.(*DockerTaskEngine).createContainer(testTask, container)
assert.NoError(t, containerMetadata.Error)
containerMetadata = taskEngine.(*DockerTaskEngine).startContainer(testTask, container)
assert.Error(t, containerMetadata.Error)
state, _ = client.InspectContainer(containerMetadata.DockerID)
assert.Equal(t, api.ContainerStopped, dockerStateToState(state.State))

// clean up the container
err = taskEngine.(*DockerTaskEngine).removeContainer(testTask, container)
assert.NoError(t, err, "remove the created container failed")
}

func TestHostVolumeMount(t *testing.T) {
taskEngine, done, _ := setupWithDefaultConfig(t)
defer done()
Expand Down

0 comments on commit b18e674

Please sign in to comment.