Skip to content

Commit ab0e1cb

Browse files
committed
Centralize DockerID getting, make it more resilient
1 parent 5f2a38e commit ab0e1cb

File tree

2 files changed

+46
-59
lines changed

2 files changed

+46
-59
lines changed

agent/dockerclient/dockerapi/docker_client.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,7 @@ func (dg *dockerGoClient) stopContainer(ctx context.Context, dockerID string, ti
687687
err = client.ContainerStop(ctx, dockerID, &timeout)
688688
metadata := dg.containerMetadata(ctx, dockerID)
689689
if err != nil {
690-
seelog.Errorf("DockerGoClient: error stopping container %s: %v", dockerID, err)
690+
seelog.Errorf("DockerGoClient: error stopping container ID=%s: %v", dockerID, err)
691691
if metadata.Error == nil {
692692
if strings.Contains(err.Error(), "No such container") {
693693
err = NoSuchContainerError{dockerID}

agent/engine/docker_task_engine.go

+45-58
Original file line numberDiff line numberDiff line change
@@ -487,17 +487,12 @@ func (engine *DockerTaskEngine) synchronizeContainerStatus(container *apicontain
487487
// their state to the managed task's container channel.
488488
func (engine *DockerTaskEngine) checkTaskState(task *apitask.Task) {
489489
defer metrics.MetricsEngineGlobal.RecordTaskEngineMetric("CHECK_TASK_STATE")()
490-
taskContainers, ok := engine.state.ContainerMapByArn(task.Arn)
491-
if !ok {
492-
seelog.Warnf("Task engine [%s]: could not check task state; no task in state", task.Arn)
493-
return
494-
}
495490
for _, container := range task.Containers {
496-
dockerContainer, ok := taskContainers[container.Name]
497-
if !ok {
491+
dockerID, err := engine.getDockerID(task, container)
492+
if err != nil {
498493
continue
499494
}
500-
status, metadata := engine.client.DescribeContainer(engine.ctx, dockerContainer.DockerID)
495+
status, metadata := engine.client.DescribeContainer(engine.ctx, dockerID)
501496
engine.tasksLock.RLock()
502497
managedTask, ok := engine.managedTasks[task.Arn]
503498
engine.tasksLock.RUnlock()
@@ -1136,25 +1131,17 @@ func (engine *DockerTaskEngine) startContainer(task *apitask.Task, container *ap
11361131
client = client.WithVersion(dockerclient.DockerVersion(*container.DockerConfig.Version))
11371132
}
11381133

1139-
containerMap, ok := engine.state.ContainerMapByArn(task.Arn)
1140-
if !ok {
1134+
dockerID, err := engine.getDockerID(task, container)
1135+
if err != nil {
11411136
return dockerapi.DockerContainerMetadata{
11421137
Error: dockerapi.CannotStartContainerError{
1143-
FromError: errors.Errorf("Container belongs to unrecognized task %s", task.Arn),
1138+
FromError: err,
11441139
},
11451140
}
11461141
}
11471142

1148-
dockerContainer, ok := containerMap[container.Name]
1149-
if !ok {
1150-
return dockerapi.DockerContainerMetadata{
1151-
Error: dockerapi.CannotStartContainerError{
1152-
FromError: errors.Errorf("Container not recorded as created"),
1153-
},
1154-
}
1155-
}
11561143
startContainerBegin := time.Now()
1157-
dockerContainerMD := client.StartContainer(engine.ctx, dockerContainer.DockerID, engine.cfg.ContainerStartTimeout)
1144+
dockerContainerMD := client.StartContainer(engine.ctx, dockerID, engine.cfg.ContainerStartTimeout)
11581145

11591146
// Get metadata through container inspection and available task information then write this to the metadata file
11601147
// Performs this in the background to avoid delaying container start
@@ -1164,7 +1151,7 @@ func (engine *DockerTaskEngine) startContainer(task *apitask.Task, container *ap
11641151
engine.cfg.ContainerMetadataEnabled.Enabled() &&
11651152
!container.IsInternal() {
11661153
go func() {
1167-
err := engine.metadataManager.Update(engine.ctx, dockerContainer.DockerID, task, container.Name)
1154+
err := engine.metadataManager.Update(engine.ctx, dockerID, task, container.Name)
11681155
if err != nil {
11691156
seelog.Warnf("Task engine [%s]: failed to update metadata file for container %s: %v",
11701157
task.Arn, container.Name, err)
@@ -1217,7 +1204,7 @@ func (engine *DockerTaskEngine) startContainer(task *apitask.Task, container *ap
12171204
func (engine *DockerTaskEngine) provisionContainerResources(task *apitask.Task, container *apicontainer.Container) dockerapi.DockerContainerMetadata {
12181205
seelog.Infof("Task engine [%s]: setting up container resources for container [%s]",
12191206
task.Arn, container.Name)
1220-
containerInspectOutput, err := engine.inspectContainerByName(task.Arn, container.Name)
1207+
containerInspectOutput, err := engine.inspectContainer(task, container)
12211208
if err != nil {
12221209
return dockerapi.DockerContainerMetadata{
12231210
Error: ContainerNetworkingError{
@@ -1268,7 +1255,7 @@ func (engine *DockerTaskEngine) cleanupPauseContainerNetwork(task *apitask.Task,
12681255
seelog.Infof("Task engine [%s]: waiting %s before cleaning up pause container.", task.Arn, delay)
12691256
engine.handleDelay(delay)
12701257
}
1271-
containerInspectOutput, err := engine.inspectContainerByName(task.Arn, container.Name)
1258+
containerInspectOutput, err := engine.inspectContainer(task, container)
12721259
if err != nil {
12731260
return errors.Wrap(err, "engine: cannot cleanup task network namespace due to error inspecting pause container")
12741261
}
@@ -1312,43 +1299,27 @@ func (engine *DockerTaskEngine) buildCNIConfigFromTaskContainer(
13121299
return cniConfig, nil
13131300
}
13141301

1315-
func (engine *DockerTaskEngine) inspectContainerByName(taskArn, containerName string) (*types.ContainerJSON, error) {
1316-
containers, ok := engine.state.ContainerMapByArn(taskArn)
1317-
if !ok {
1318-
return nil, errors.New("engine: failed to find the pause container, no containers in the task")
1319-
}
1320-
1321-
pauseContainer, ok := containers[containerName]
1322-
if !ok {
1323-
return nil, errors.New("engine: failed to find the pause container")
1302+
func (engine *DockerTaskEngine) inspectContainer(task *apitask.Task, container *apicontainer.Container) (*types.ContainerJSON, error) {
1303+
dockerID, err := engine.getDockerID(task, container)
1304+
if err != nil {
1305+
return nil, err
13241306
}
1325-
containerInspectOutput, err := engine.client.InspectContainer(
1326-
engine.ctx,
1327-
pauseContainer.DockerName,
1328-
dockerclient.InspectContainerTimeout,
1329-
)
13301307

1331-
return containerInspectOutput, err
1308+
return engine.client.InspectContainer(engine.ctx, dockerID, dockerclient.InspectContainerTimeout)
13321309
}
13331310

13341311
func (engine *DockerTaskEngine) stopContainer(task *apitask.Task, container *apicontainer.Container) dockerapi.DockerContainerMetadata {
13351312
seelog.Infof("Task engine [%s]: stopping container [%s]", task.Arn, container.Name)
1336-
containerMap, ok := engine.state.ContainerMapByArn(task.Arn)
1337-
if !ok {
1313+
1314+
dockerID, err := engine.getDockerID(task, container)
1315+
if err != nil {
13381316
return dockerapi.DockerContainerMetadata{
13391317
Error: dockerapi.CannotStopContainerError{
1340-
FromError: errors.Errorf("Container belongs to unrecognized task %s", task.Arn),
1318+
FromError: err,
13411319
},
13421320
}
13431321
}
13441322

1345-
dockerContainer, ok := containerMap[container.Name]
1346-
if !ok {
1347-
return dockerapi.DockerContainerMetadata{
1348-
Error: dockerapi.CannotStopContainerError{FromError: errors.Errorf("Container not recorded as created")},
1349-
}
1350-
}
1351-
13521323
// Cleanup the pause container network namespace before stop the container
13531324
if container.Type == apicontainer.ContainerCNIPause {
13541325
err := engine.cleanupPauseContainerNetwork(task, container)
@@ -1364,23 +1335,18 @@ func (engine *DockerTaskEngine) stopContainer(task *apitask.Task, container *api
13641335
apiTimeoutStopContainer = engine.cfg.DockerStopTimeout
13651336
}
13661337

1367-
return engine.client.StopContainer(engine.ctx, dockerContainer.DockerID, apiTimeoutStopContainer)
1338+
return engine.client.StopContainer(engine.ctx, dockerID, apiTimeoutStopContainer)
13681339
}
13691340

13701341
func (engine *DockerTaskEngine) removeContainer(task *apitask.Task, container *apicontainer.Container) error {
13711342
seelog.Infof("Task engine [%s]: removing container: %s", task.Arn, container.Name)
1372-
containerMap, ok := engine.state.ContainerMapByArn(task.Arn)
13731343

1374-
if !ok {
1375-
return errors.New("No such task: " + task.Arn)
1376-
}
1377-
1378-
dockerContainer, ok := containerMap[container.Name]
1379-
if !ok {
1380-
return errors.New("No container named '" + container.Name + "' created in " + task.Arn)
1344+
dockerID, err := engine.getDockerID(task, container)
1345+
if err != nil {
1346+
return err
13811347
}
13821348

1383-
return engine.client.RemoveContainer(engine.ctx, dockerContainer.DockerName, dockerclient.RemoveContainerTimeout)
1349+
return engine.client.RemoveContainer(engine.ctx, dockerID, dockerclient.RemoveContainerTimeout)
13841350
}
13851351

13861352
// updateTaskUnsafe determines if a new transition needs to be applied to the
@@ -1497,3 +1463,24 @@ func getContainerHostIP(networkSettings *types.NetworkSettings) (string, bool) {
14971463
}
14981464
return "", false
14991465
}
1466+
1467+
func (engine *DockerTaskEngine) getDockerID(task *apitask.Task, container *apicontainer.Container) (string, error) {
1468+
runtimeID := container.GetRuntimeID()
1469+
if runtimeID != "" {
1470+
return runtimeID, nil
1471+
}
1472+
containerMap, ok := engine.state.ContainerMapByArn(task.Arn)
1473+
if !ok {
1474+
return "", errors.Errorf("container name=%s belongs to unrecognized task taskArn=%s", container.Name, task.Arn)
1475+
}
1476+
1477+
dockerContainer, ok := containerMap[container.Name]
1478+
if !ok {
1479+
return "", errors.Errorf("container name=%s not recognized by agent", container.Name)
1480+
}
1481+
1482+
if dockerContainer.DockerID == "" {
1483+
return dockerContainer.DockerName, nil
1484+
}
1485+
return dockerContainer.DockerID, nil
1486+
}

0 commit comments

Comments
 (0)