From c7090f8fd75689fba77f398b9fc1ed07f9640161 Mon Sep 17 00:00:00 2001 From: haikuoliu Date: Thu, 12 Apr 2018 18:15:49 -0700 Subject: [PATCH] engine: introduce a new env var to distinct pull image behavior --- CHANGELOG.md | 1 + README.md | 1 + agent/config/config.go | 22 ++- agent/config/config_test.go | 52 ++++++ agent/config/parse.go | 16 ++ agent/config/types.go | 10 +- .../dockerapi/docker_client_test.go | 2 +- agent/engine/common_test.go | 2 +- agent/engine/docker_image_manager.go | 21 ++- .../engine/docker_image_manager_integ_test.go | 82 ++++----- agent/engine/docker_image_manager_test.go | 22 ++- agent/engine/docker_task_engine.go | 78 ++++++-- agent/engine/docker_task_engine_test.go | 154 ++++++++++++++-- agent/engine/dockerstate/json_test.go | 5 +- agent/engine/engine_integ_test.go | 17 ++ agent/engine/engine_windows_integ_test.go | 8 +- agent/engine/image/types.go | 73 +++++--- agent/engine/mocks/engine_mocks.go | 5 +- agent/engine/task_manager.go | 24 ++- agent/engine/task_manager_test.go | 168 ++++++++++-------- agent/statemanager/state_manager.go | 11 +- 21 files changed, 573 insertions(+), 201 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e25661711e3..dc2eb5964f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## 1.17.4-dev +* Feature - Configurable agent pull behavior [#1348](https://github.com/aws/amazon-ecs-agent/pull/1348) * Bug - Fixed a bug where Docker Version() API never returns by adding a timeout [#1363](https://github.com/aws/amazon-ecs-agent/pull/1363) ## 1.17.3 diff --git a/README.md b/README.md index cb44a7129aa..c8429bdb488 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,7 @@ additional details on each available environment variable. | `ECS_IMAGE_CLEANUP_INTERVAL` | 30m | The time interval between automated image cleanup cycles. If set to less than 10 minutes, the value is ignored. | 30m | 30m | | `ECS_IMAGE_MINIMUM_CLEANUP_AGE` | 30m | The minimum time interval between when an image is pulled and when it can be considered for automated image cleanup. | 1h | 1h | | `ECS_NUM_IMAGES_DELETE_PER_CYCLE` | 5 | The maximum number of images to delete in a single automated image cleanup cycle. If set to less than 1, the value is ignored. | 5 | 5 | +| `ECS_IMAGE_PULL_BEHAVIOR` | <default | always | once | prefer-cached > | The behavior used to customize the pull image process. If `default` is specified, the image will be pulled remotely, if the pull fails then the cached image will be used. If `always` is specified, the image will be pulled remotely, if the pull fails then the task will fail. If `once` is specified, the image will be pulled remotely if it has not been pulled before, otherwise the cached image will be used. If `prefer-cached` is specified, the image will be pulled remotely if there is no cached image, otherwise the cached image will be used. | default | default | | `ECS_INSTANCE_ATTRIBUTES` | `{"stack": "prod"}` | These attributes take effect only during initial registration. After the agent has joined an ECS cluster, use the PutAttributes API action to add additional attributes. For more information, see [Amazon ECS Container Agent Configuration](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html) in the Amazon ECS Developer Guide.| `{}` | `{}` | | `ECS_ENABLE_TASK_ENI` | `false` | Whether to enable task networking for task to be launched with its own network interface | `false` | Not applicable | | `ECS_CNI_PLUGINS_PATH` | `/ecs/cni` | The path where the cni binary file is located | `/amazon-ecs-cni-plugins` | Not applicable | diff --git a/agent/config/config.go b/agent/config/config.go index cf55e565cf3..40bb2f92df4 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -65,7 +65,7 @@ const ( // image cleanup. DefaultNumImagesToDeletePerCycle = 5 - //DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image + // DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image // has been pulled before it can be deleted. DefaultImageDeletionAge = 1 * time.Hour @@ -102,6 +102,25 @@ const ( DefaultTaskMetadataBurstRate = 60 ) +const ( + // ImagePullDefaultBehavior specifies the behavior that if an image pull API call fails, + // agent tries to start from the Docker image cache anyway, assuming that the image has not changed. + ImagePullDefaultBehavior ImagePullBehaviorType = iota + + // ImagePullAlwaysBehavior specifies the behavior that if an image pull API call fails, + // the task fails instead of using cached image. + ImagePullAlwaysBehavior + + // ImagePullOnceBehavior specifies the behavior that agent will only attempt to pull + // the same image once, once an image is pulled, local image cache will be used + // for all the containers. + ImagePullOnceBehavior + + // ImagePullPreferCachedBehavior specifies the behavior that agent will only attempt to pull + // the image if there is no cached image. + ImagePullPreferCachedBehavior +) + var ( // DefaultPauseContainerImageName is the name of the pause container image. The linker's // load flags are used to populate this value from the Makefile @@ -386,6 +405,7 @@ func environmentConfig() (Config, error) { MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"), ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"), NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(), + ImagePullBehavior: parseImagePullBehavior(), InstanceAttributes: instanceAttributes, CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"), AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false), diff --git a/agent/config/config_test.go b/agent/config/config_test.go index 555186360f3..76d48aec800 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -81,6 +81,7 @@ func TestEnvironmentConfig(t *testing.T) { defer setTestEnv("ECS_IMAGE_CLEANUP_INTERVAL", "2h")() defer setTestEnv("ECS_IMAGE_MINIMUM_CLEANUP_AGE", "30m")() defer setTestEnv("ECS_NUM_IMAGES_DELETE_PER_CYCLE", "2")() + defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "always")() defer setTestEnv("ECS_INSTANCE_ATTRIBUTES", "{\"my_attribute\": \"testing\"}")() defer setTestEnv("ECS_ENABLE_TASK_ENI", "true")() defer setTestEnv("ECS_TASK_METADATA_RPS_LIMIT", "1000,1100")() @@ -112,6 +113,7 @@ func TestEnvironmentConfig(t *testing.T) { assert.Equal(t, (30 * time.Minute), conf.MinimumImageDeletionAge) assert.Equal(t, (2 * time.Hour), conf.ImageCleanupInterval) assert.Equal(t, 2, conf.NumImagesToDeletePerCycle) + assert.Equal(t, ImagePullAlwaysBehavior, conf.ImagePullBehavior) assert.Equal(t, "testing", conf.InstanceAttributes["my_attribute"]) assert.Equal(t, (90 * time.Second), conf.TaskCleanupWaitDuration) serializedAdditionalLocalRoutesJSON, err := json.Marshal(conf.AWSVPCAdditionalLocalRoutes) @@ -369,6 +371,56 @@ func TestImageCleanupMinimumNumImagesToDeletePerCycle(t *testing.T) { assert.Equal(t, cfg.NumImagesToDeletePerCycle, DefaultNumImagesToDeletePerCycle, "Wrong value for NumImagesToDeletePerCycle") } +func TestInvalidImagePullBehavior(t *testing.T) { + defer setTestRegion()() + defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "invalid")() + cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient()) + assert.NoError(t, err) + assert.Equal(t, cfg.ImagePullBehavior, ImagePullDefaultBehavior, "Wrong value for ImagePullBehavior") +} + +func TestParseImagePullBehavior(t *testing.T) { + testcases := []struct { + name string + envVarVal string + expectedImagePullBehavior ImagePullBehaviorType + }{ + { + name: "default agent behavior", + envVarVal: "default", + expectedImagePullBehavior: ImagePullDefaultBehavior, + }, + { + name: "always agent behavior", + envVarVal: "always", + expectedImagePullBehavior: ImagePullAlwaysBehavior, + }, + { + name: "once agent behavior", + envVarVal: "once", + expectedImagePullBehavior: ImagePullOnceBehavior, + }, + { + name: "prefer-cached agent behavior", + envVarVal: "prefer-cached", + expectedImagePullBehavior: ImagePullPreferCachedBehavior, + }, + { + name: "invalid agent behavior", + envVarVal: "invalid", + expectedImagePullBehavior: ImagePullDefaultBehavior, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + defer setTestRegion()() + defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", tc.envVarVal)() + assert.Equal(t, parseImagePullBehavior(), tc.expectedImagePullBehavior, "Wrong value for ImagePullBehavior") + }) + } +} + func TestTaskResourceLimitsOverride(t *testing.T) { defer setTestRegion()() defer setTestEnv("ECS_ENABLE_TASK_CPU_MEM_LIMIT", "false")() diff --git a/agent/config/parse.go b/agent/config/parse.go index 63069546737..b8ffa18191d 100644 --- a/agent/config/parse.go +++ b/agent/config/parse.go @@ -117,6 +117,22 @@ func parseNumImagesToDeletePerCycle() int { return numImagesToDeletePerCycle } +func parseImagePullBehavior() ImagePullBehaviorType { + ImagePullBehaviorString := os.Getenv("ECS_IMAGE_PULL_BEHAVIOR") + switch ImagePullBehaviorString { + case "always": + return ImagePullAlwaysBehavior + case "once": + return ImagePullOnceBehavior + case "prefer-cached": + return ImagePullPreferCachedBehavior + default: + // Use the default image pull behavior when ECS_IMAGE_PULL_BEHAVIOR is + // "default" or not valid + return ImagePullDefaultBehavior + } +} + func parseInstanceAttributes(errs []error) (map[string]string, []error) { var instanceAttributes map[string]string instanceAttributesEnv := os.Getenv("ECS_INSTANCE_ATTRIBUTES") diff --git a/agent/config/types.go b/agent/config/types.go index 70af9f8e977..5d790b5b694 100644 --- a/agent/config/types.go +++ b/agent/config/types.go @@ -1,4 +1,4 @@ -// Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2014-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"). You may // not use this file except in compliance with the License. A copy of the @@ -20,6 +20,10 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types" ) +// ImagePullBehaviorType is an enum variable type corresponding to different agent pull +// behaviors including default, always, never and once. +type ImagePullBehaviorType int8 + type Config struct { // DEPRECATED // ClusterArn is the Name or full ARN of a Cluster to register into. It has @@ -149,6 +153,10 @@ type Config struct { // when Agent performs cleanup NumImagesToDeletePerCycle int + // ImagePullBehavior specifies the agent's behavior for pulling image and loading + // local Docker image cache + ImagePullBehavior ImagePullBehaviorType + // InstanceAttributes contains key/value pairs representing // attributes to be associated with this instance within the // ECS service and used to influence behavior such as launch diff --git a/agent/dockerclient/dockerapi/docker_client_test.go b/agent/dockerclient/dockerapi/docker_client_test.go index 21e3e0edbf3..d0d71d53a6f 100644 --- a/agent/dockerclient/dockerapi/docker_client_test.go +++ b/agent/dockerclient/dockerapi/docker_client_test.go @@ -441,7 +441,7 @@ func TestStartContainerTimeout(t *testing.T) { wait.Add(1) mockDocker.EXPECT().StartContainerWithContext("id", nil, gomock.Any()).Do(func(x, y, z interface{}) { wait.Wait() // wait until timeout happens - }) + }).MaxTimes(1) mockDocker.EXPECT().InspectContainerWithContext("id", gomock.Any()).Return(nil, errors.New("test error")).AnyTimes() ctx, cancel := context.WithCancel(context.TODO()) defer cancel() diff --git a/agent/engine/common_test.go b/agent/engine/common_test.go index cdd1d2dcb12..f56e9a8eaf7 100644 --- a/agent/engine/common_test.go +++ b/agent/engine/common_test.go @@ -136,7 +136,7 @@ func validateContainerRunWorkflow(t *testing.T, imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes() client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{}) imageManager.EXPECT().RecordContainerReference(container).Return(nil) - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil) + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false) client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil) dockerConfig, err := task.DockerConfig(container, defaultDockerClientAPIVersion) if err != nil { diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index e3a49df2093..97238d7142a 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -14,13 +14,12 @@ package engine import ( + "context" "fmt" "sort" "sync" "time" - "context" - "github.com/aws/amazon-ecs-agent/agent/api" "github.com/aws/amazon-ecs-agent/agent/config" "github.com/aws/amazon-ecs-agent/agent/dockerclient" @@ -41,7 +40,7 @@ type ImageManager interface { RecordContainerReference(container *api.Container) error RemoveContainerReferenceFromImageState(container *api.Container) error AddAllImageStates(imageStates []*image.ImageState) - GetImageStateFromImageName(containerImageName string) *image.ImageState + GetImageStateFromImageName(containerImageName string) (*image.ImageState, bool) StartImageCleanupProcess(ctx context.Context) SetSaver(stateManager statemanager.Saver) } @@ -59,6 +58,7 @@ type dockerImageManager struct { minimumAgeBeforeDeletion time.Duration numImagesToDelete int imageCleanupTimeInterval time.Duration + imagePullBehavior config.ImagePullBehaviorType } // ImageStatesForDeletion is used for implementing the sort interface @@ -72,6 +72,7 @@ func NewImageManager(cfg *config.Config, client dockerapi.DockerClient, state do minimumAgeBeforeDeletion: cfg.MinimumImageDeletionAge, numImagesToDelete: cfg.NumImagesToDeletePerCycle, imageCleanupTimeInterval: cfg.ImageCleanupInterval, + imagePullBehavior: cfg.ImagePullBehavior, } } @@ -155,6 +156,8 @@ func (imageManager *dockerImageManager) addContainerReferenceToNewImageState(con Image: sourceImage, PulledAt: time.Now(), LastUsedAt: time.Now(), + // PullSucceeded should be set to true when the pull image succeeds. + PullSucceeded: false, } sourceImageState.UpdateImageState(container) imageManager.addImageState(sourceImageState) @@ -267,6 +270,12 @@ func (imageManager *dockerImageManager) removeExistingImageNameOfDifferentID(con } func (imageManager *dockerImageManager) StartImageCleanupProcess(ctx context.Context) { + // If the image pull behavior is prefer cached, don't clean up the image, + // because the cached image is needed. + if imageManager.imagePullBehavior == config.ImagePullPreferCachedBehavior { + seelog.Info("Pull behavior is set to always use cache. Disabling cleanup") + return + } // passing the cleanup interval as argument which would help during testing imageManager.performPeriodicImageCleanup(ctx, imageManager.imageCleanupTimeInterval) } @@ -369,15 +378,15 @@ func (imageManager *dockerImageManager) deleteImage(ctx context.Context, imageID } } -func (imageManager *dockerImageManager) GetImageStateFromImageName(containerImageName string) *image.ImageState { +func (imageManager *dockerImageManager) GetImageStateFromImageName(containerImageName string) (*image.ImageState, bool) { imageManager.updateLock.Lock() defer imageManager.updateLock.Unlock() for _, imageState := range imageManager.getAllImageStates() { for _, imageName := range imageState.Image.Names { if imageName == containerImageName { - return imageState + return imageState, true } } } - return nil + return nil, false } diff --git a/agent/engine/docker_image_manager_integ_test.go b/agent/engine/docker_image_manager_integ_test.go index 2fcfbbc0828..937c397e03d 100644 --- a/agent/engine/docker_image_manager_integ_test.go +++ b/agent/engine/docker_image_manager_integ_test.go @@ -81,24 +81,17 @@ func TestIntegImageCleanupHappyCase(t *testing.T) { t.Fatal(err) } - imageState1 := imageManager.GetImageStateFromImageName(test1Image1Name) - if imageState1 == nil { - t.Fatalf("Could not find image state for %s", test1Image1Name) - } else { - t.Logf("Found image state for %s", test1Image1Name) - } - imageState2 := imageManager.GetImageStateFromImageName(test1Image2Name) - if imageState2 == nil { - t.Fatalf("Could not find image state for %s", test1Image2Name) - } else { - t.Logf("Found image state for %s", test1Image2Name) - } - imageState3 := imageManager.GetImageStateFromImageName(test1Image3Name) - if imageState3 == nil { - t.Fatalf("Could not find image state for %s", test1Image3Name) - } else { - t.Logf("Found image state for %s", test1Image3Name) - } + imageState1, ok := imageManager.GetImageStateFromImageName(test1Image1Name) + require.True(t, ok, "Could not find image state for %s", test1Image1Name) + t.Logf("Found image state for %s", test1Image1Name) + + imageState2, ok := imageManager.GetImageStateFromImageName(test1Image2Name) + require.True(t, ok, "Could not find image state for %s", test1Image2Name) + t.Logf("Found image state for %s", test1Image2Name) + + imageState3, ok := imageManager.GetImageStateFromImageName(test1Image3Name) + require.True(t, ok, "Could not find image state for %s", test1Image3Name) + t.Logf("Found image state for %s", test1Image3Name) imageState1ImageID := imageState1.Image.ImageID imageState2ImageID := imageState2.Image.ImageID @@ -196,24 +189,17 @@ func TestIntegImageCleanupThreshold(t *testing.T) { t.Fatal(err) } - imageState1 := imageManager.GetImageStateFromImageName(test2Image1Name) - if imageState1 == nil { - t.Fatalf("Could not find image state for %s", test2Image1Name) - } else { - t.Logf("Found image state for %s", test2Image1Name) - } - imageState2 := imageManager.GetImageStateFromImageName(test2Image2Name) - if imageState2 == nil { - t.Fatalf("Could not find image state for %s", test2Image2Name) - } else { - t.Logf("Found image state for %s", test2Image2Name) - } - imageState3 := imageManager.GetImageStateFromImageName(test2Image3Name) - if imageState3 == nil { - t.Fatalf("Could not find image state for %s", test2Image3Name) - } else { - t.Logf("Found image state for %s", test2Image3Name) - } + imageState1, ok := imageManager.GetImageStateFromImageName(test2Image1Name) + require.True(t, ok, "Could not find image state for %s", test2Image1Name) + t.Logf("Found image state for %s", test2Image1Name) + + imageState2, ok := imageManager.GetImageStateFromImageName(test2Image2Name) + require.True(t, ok, "Could not find image state for %s", test2Image2Name) + t.Logf("Found image state for %s", test2Image2Name) + + imageState3, ok := imageManager.GetImageStateFromImageName(test2Image3Name) + require.True(t, ok, "Could not find image state for %s", test2Image3Name) + t.Logf("Found image state for %s", test2Image3Name) imageState1ImageID := imageState1.Image.ImageID imageState2ImageID := imageState2.Image.ImageID @@ -333,8 +319,8 @@ func TestImageWithSameNameAndDifferentID(t *testing.T) { require.NoError(t, err, "task1") // Verify image state is updated correctly - imageState1 := imageManager.GetImageStateFromImageName(identicalImageName) - require.NotNil(t, imageState1, "Could not find image state for %s", identicalImageName) + imageState1, ok := imageManager.GetImageStateFromImageName(identicalImageName) + require.True(t, ok, "Could not find image state for %s", identicalImageName) t.Logf("Found image state for %s", identicalImageName) imageID1 := imageState1.Image.ImageID @@ -348,8 +334,8 @@ func TestImageWithSameNameAndDifferentID(t *testing.T) { require.NoError(t, err, "task2") // Verify image state is updated correctly - imageState2 := imageManager.GetImageStateFromImageName(identicalImageName) - require.NotNil(t, imageState2, "Could not find image state for %s", identicalImageName) + imageState2, ok := imageManager.GetImageStateFromImageName(identicalImageName) + require.True(t, ok, "Could not find image state for %s", identicalImageName) t.Logf("Found image state for %s", identicalImageName) imageID2 := imageState2.Image.ImageID require.NotEqual(t, imageID2, imageID1, "The image id in task 2 should be different from image in task 1") @@ -364,8 +350,8 @@ func TestImageWithSameNameAndDifferentID(t *testing.T) { require.NoError(t, err, "task3") // Verify image state is updated correctly - imageState3 := imageManager.GetImageStateFromImageName(identicalImageName) - require.NotNil(t, imageState3, "Could not find image state for %s", identicalImageName) + imageState3, ok := imageManager.GetImageStateFromImageName(identicalImageName) + require.True(t, ok, "Could not find image state for %s", identicalImageName) t.Logf("Found image state for %s", identicalImageName) imageID3 := imageState3.Image.ImageID require.NotEqual(t, imageID3, imageID1, "The image id in task3 should be different from image in task1") @@ -463,8 +449,8 @@ func TestImageWithSameIDAndDifferentNames(t *testing.T) { err = verifyTaskIsRunning(stateChangeEvents, task1) require.NoError(t, err) - imageState1 := imageManager.GetImageStateFromImageName(task1.Containers[0].Image) - require.NotNil(t, imageState1, "Could not find image state for %s", task1.Containers[0].Image) + imageState1, ok := imageManager.GetImageStateFromImageName(task1.Containers[0].Image) + require.True(t, ok, "Could not find image state for %s", task1.Containers[0].Image) t.Logf("Found image state for %s", task1.Containers[0].Image) imageID1 := imageState1.Image.ImageID @@ -481,8 +467,8 @@ func TestImageWithSameIDAndDifferentNames(t *testing.T) { err = verifyTaskIsRunning(stateChangeEvents, task2) require.NoError(t, err) - imageState2 := imageManager.GetImageStateFromImageName(task2.Containers[0].Image) - require.NotNil(t, imageState2, "Could not find image state for %s", task2.Containers[0].Image) + imageState2, ok := imageManager.GetImageStateFromImageName(task2.Containers[0].Image) + require.True(t, ok, "Could not find image state for %s", task2.Containers[0].Image) t.Logf("Found image state for %s", task2.Containers[0].Image) imageID2 := imageState2.Image.ImageID require.Equal(t, imageID2, imageID1, "The image id in task2 should be same as in task1") @@ -500,8 +486,8 @@ func TestImageWithSameIDAndDifferentNames(t *testing.T) { err = verifyTaskIsRunning(stateChangeEvents, task3) assert.NoError(t, err) - imageState3 := imageManager.GetImageStateFromImageName(task3.Containers[0].Image) - require.NotNil(t, imageState3, "Could not find image state for %s", task3.Containers[0].Image) + imageState3, ok := imageManager.GetImageStateFromImageName(task3.Containers[0].Image) + require.True(t, ok, "Could not find image state for %s", task3.Containers[0].Image) t.Logf("Found image state for %s", task3.Containers[0].Image) imageID3 := imageState3.Image.ImageID require.Equal(t, imageID3, imageID1, "The image id in task3 should be the same as in task1") diff --git a/agent/engine/docker_image_manager_test.go b/agent/engine/docker_image_manager_test.go index 830a849310c..6373f78acc0 100644 --- a/agent/engine/docker_image_manager_test.go +++ b/agent/engine/docker_image_manager_test.go @@ -998,8 +998,8 @@ func TestGetImageStateFromImageName(t *testing.T) { if err != nil { t.Error("Error in adding container to an existing image state") } - imageState := imageManager.GetImageStateFromImageName(container.Image) - if imageState == nil { + _, ok := imageManager.GetImageStateFromImageName(container.Image) + if !ok { t.Error("Error retrieving image state by image name") } } @@ -1022,8 +1022,8 @@ func TestGetImageStateFromImageNameNoImageState(t *testing.T) { if err != nil { t.Error("Error in adding container to an existing image state") } - imageState := imageManager.GetImageStateFromImageName("noSuchImage") - if imageState != nil { + _, ok := imageManager.GetImageStateFromImageName("noSuchImage") + if ok { t.Error("Incorrect image state retrieved by image name") } } @@ -1095,3 +1095,17 @@ func TestConcurrentRemoveUnusedImages(t *testing.T) { waitGroup.Wait() require.Equal(t, 0, len(imageManager.imageStates)) } + +func TestImageCleanupProcessNotStart(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + + cfg := defaultTestConfig() + cfg.ImagePullBehavior = config.ImagePullPreferCachedBehavior + imageManager := NewImageManager(cfg, client, dockerstate.NewTaskEngineState()) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + imageManager.StartImageCleanupProcess(ctx) + // Nothing should happen. +} diff --git a/agent/engine/docker_task_engine.go b/agent/engine/docker_task_engine.go index df6cc9f5983..548df86d17c 100644 --- a/agent/engine/docker_task_engine.go +++ b/agent/engine/docker_task_engine.go @@ -636,18 +636,59 @@ func (engine *DockerTaskEngine) pullContainer(task *api.Task, container *api.Con } } - // Record the pullStoppedAt timestamp - defer func() { - timestamp := engine.time().Now() - task.SetPullStoppedAt(timestamp) - }() - - if engine.enableConcurrentPull { - seelog.Infof("Task engine [%s]: pulling container %s concurrently", task.Arn, container.Name) - return engine.concurrentPull(task, container) - } - seelog.Infof("Task engine [%s]: pulling container %s serially", task.Arn, container.Name) - return engine.serialPull(task, container) + if engine.imagePullRequired(engine.cfg.ImagePullBehavior, container, task.Arn) { + // Record the pullStoppedAt timestamp + defer func() { + timestamp := engine.time().Now() + task.SetPullStoppedAt(timestamp) + }() + + if engine.enableConcurrentPull { + seelog.Infof("Task engine [%s]: pulling container %s concurrently", task.Arn, container.Name) + return engine.concurrentPull(task, container) + } + seelog.Infof("Task engine [%s]: pulling container %s serially", task.Arn, container.Name) + return engine.serialPull(task, container) + } + + // No pull image is required, just update container reference and use cached image. + engine.updateContainerReference(false, container, task.Arn) + // Return the metadata without any error + return dockerapi.DockerContainerMetadata{Error: nil} +} + +// imagePullRequired returns true if pulling image is required, or return false if local image cache +// should be used, by inspecting the agent pull behavior variable defined in config. The caller has +// to make sure the container passed in is not an internal container. +func (engine *DockerTaskEngine) imagePullRequired(imagePullBehavior config.ImagePullBehaviorType, + container *api.Container, + taskArn string) bool { + switch imagePullBehavior { + case config.ImagePullOnceBehavior: + // If this image has been pulled successfully before, don't pull the image, + // otherwise pull the image as usual, regardless whether the image exists or not + // (the image can be prepopulated with the AMI and never be pulled). + imageState, ok := engine.imageManager.GetImageStateFromImageName(container.Image) + if ok && imageState.GetPullSucceeded() { + seelog.Infof("Task engine [%s]: image %s has been pulled once, not pulling it again", + taskArn, container.Image) + return false + } + return true + case config.ImagePullPreferCachedBehavior: + // If the behavior is prefer cached, don't pull if we found cached image + // by inspecting the image. + _, err := engine.client.InspectImage(container.Image) + if err != nil { + return true + } + seelog.Infof("Task engine [%s]: found cached image %s, use it directly for container %s", + taskArn, container.Image, container.Name) + return false + default: + // Need to pull the image for always and default agent pull behavior + return true + } } func (engine *DockerTaskEngine) concurrentPull(task *api.Task, container *api.Container) dockerapi.DockerContainerMetadata { @@ -734,16 +775,23 @@ func (engine *DockerTaskEngine) pullAndUpdateContainerReference(task *api.Task, if container.IsInternal() { return metadata } + pullSucceeded := metadata.Error == nil + engine.updateContainerReference(pullSucceeded, container, task.Arn) + return metadata +} +func (engine *DockerTaskEngine) updateContainerReference(pullSucceeded bool, container *api.Container, taskArn string) { err := engine.imageManager.RecordContainerReference(container) if err != nil { seelog.Errorf("Task engine [%s]: Unable to add container reference to image state: %v", - task.Arn, err) + taskArn, err) + } + imageState, ok := engine.imageManager.GetImageStateFromImageName(container.Image) + if ok && pullSucceeded { + imageState.SetPullSucceeded(true) } - imageState := engine.imageManager.GetImageStateFromImageName(container.Image) engine.state.AddImageState(imageState) engine.saver.Save() - return metadata } func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.Container) dockerapi.DockerContainerMetadata { diff --git a/agent/engine/docker_task_engine_test.go b/agent/engine/docker_task_engine_test.go index 586e290a07d..af4606ac753 100644 --- a/agent/engine/docker_task_engine_test.go +++ b/agent/engine/docker_task_engine_test.go @@ -328,7 +328,7 @@ func TestTaskWithSteadyStateResourcesProvisioned(t *testing.T) { imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes() client.EXPECT().PullImage(sleepContainer.Image, nil).Return(dockerapi.DockerContainerMetadata{}) imageManager.EXPECT().RecordContainerReference(sleepContainer).Return(nil) - imageManager.EXPECT().GetImageStateFromImageName(sleepContainer.Image).Return(nil) + imageManager.EXPECT().GetImageStateFromImageName(sleepContainer.Image).Return(nil, false) gomock.InOrder( // Ensure that the pause container is created first @@ -523,7 +523,7 @@ func TestStartTimeoutThenStart(t *testing.T) { client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{}) imageManager.EXPECT().RecordContainerReference(container) - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil) + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false) client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Do( func(ctx interface{}, x, y, z, timeout interface{}) { go func() { eventStream <- createDockerEvent(api.ContainerCreated) }() @@ -784,7 +784,7 @@ func TestTaskTransitionWhenStopContainerTimesout(t *testing.T) { imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes() client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{}) imageManager.EXPECT().RecordContainerReference(container) - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil) + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false) client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil) client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Do( @@ -877,7 +877,7 @@ func TestTaskTransitionWhenStopContainerReturnsUnretriableError(t *testing.T) { imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes(), client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{}), imageManager.EXPECT().RecordContainerReference(container), - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil), + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false), client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil), // Simulate successful create container client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Do( @@ -952,7 +952,7 @@ func TestTaskTransitionWhenStopContainerReturnsTransientErrorBeforeSucceeding(t imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes(), client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{}), imageManager.EXPECT().RecordContainerReference(container), - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil), + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false), // Simulate successful create container client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil), client.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return( @@ -1091,7 +1091,7 @@ func TestPauseContainerHappyPath(t *testing.T) { imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes() dockerClient.EXPECT().PullImage(gomock.Any(), nil).Return(dockerapi.DockerContainerMetadata{}) imageManager.EXPECT().RecordContainerReference(gomock.Any()).Return(nil) - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil) + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false) dockerClient.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil) dockerClient.EXPECT().CreateContainer(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(dockerapi.DockerContainerMetadata{DockerID: containerID}) @@ -1375,12 +1375,146 @@ func TestPullNormalImage(t *testing.T) { client.EXPECT().PullImage(imageName, nil) imageManager.EXPECT().RecordContainerReference(container) - imageManager.EXPECT().GetImageStateFromImageName(imageName).Return(imageState) + imageManager.EXPECT().GetImageStateFromImageName(imageName).Return(imageState, true) saver.EXPECT().Save() metadata := taskEngine.pullContainer(task, container) assert.Equal(t, dockerapi.DockerContainerMetadata{}, metadata, "expected empty metadata") } +func TestPullImageWithImagePullOnceBehavior(t *testing.T) { + testcases := []struct { + name string + pullSucceeded bool + }{ + { + name: "PullSucceeded is true", + pullSucceeded: true, + }, + { + name: "PullSucceeded is false", + pullSucceeded: false, + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + ctrl, client, _, privateTaskEngine, _, imageManager, _ := mocks(t, ctx, &config.Config{ImagePullBehavior: config.ImagePullOnceBehavior}) + defer ctrl.Finish() + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + saver := mock_statemanager.NewMockStateManager(ctrl) + taskEngine.SetSaver(saver) + taskEngine._time = nil + imageName := "image" + container := &api.Container{ + Type: api.ContainerNormal, + Image: imageName, + } + task := &api.Task{ + Containers: []*api.Container{container}, + } + imageState := &image.ImageState{ + Image: &image.Image{ImageID: "id"}, + PullSucceeded: tc.pullSucceeded, + } + if !tc.pullSucceeded { + client.EXPECT().PullImage(imageName, nil) + } + imageManager.EXPECT().RecordContainerReference(container) + imageManager.EXPECT().GetImageStateFromImageName(imageName).Return(imageState, true).Times(2) + saver.EXPECT().Save() + metadata := taskEngine.pullContainer(task, container) + assert.Equal(t, dockerapi.DockerContainerMetadata{}, metadata, "expected empty metadata") + }) + } +} + +func TestPullImageWithImagePullPreferCachedBehaviorWithCachedImage(t *testing.T) { + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + ctrl, client, _, privateTaskEngine, _, imageManager, _ := mocks(t, ctx, &config.Config{ImagePullBehavior: config.ImagePullPreferCachedBehavior}) + defer ctrl.Finish() + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + saver := mock_statemanager.NewMockStateManager(ctrl) + taskEngine.SetSaver(saver) + taskEngine._time = nil + imageName := "image" + container := &api.Container{ + Type: api.ContainerNormal, + Image: imageName, + } + task := &api.Task{ + Containers: []*api.Container{container}, + } + imageState := &image.ImageState{ + Image: &image.Image{ImageID: "id"}, + } + client.EXPECT().InspectImage(imageName).Return(nil, nil) + imageManager.EXPECT().RecordContainerReference(container) + imageManager.EXPECT().GetImageStateFromImageName(imageName).Return(imageState, true) + saver.EXPECT().Save() + metadata := taskEngine.pullContainer(task, container) + assert.Equal(t, dockerapi.DockerContainerMetadata{}, metadata, "expected empty metadata") +} + +func TestPullImageWithImagePullPreferCachedBehaviorWithoutCachedImage(t *testing.T) { + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + ctrl, client, _, privateTaskEngine, _, imageManager, _ := mocks(t, ctx, &config.Config{ImagePullBehavior: config.ImagePullPreferCachedBehavior}) + defer ctrl.Finish() + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + saver := mock_statemanager.NewMockStateManager(ctrl) + taskEngine.SetSaver(saver) + taskEngine._time = nil + imageName := "image" + container := &api.Container{ + Type: api.ContainerNormal, + Image: imageName, + } + task := &api.Task{ + Containers: []*api.Container{container}, + } + imageState := &image.ImageState{ + Image: &image.Image{ImageID: "id"}, + } + client.EXPECT().InspectImage(imageName).Return(nil, errors.New("error")) + client.EXPECT().PullImage(imageName, nil) + imageManager.EXPECT().RecordContainerReference(container) + imageManager.EXPECT().GetImageStateFromImageName(imageName).Return(imageState, true) + saver.EXPECT().Save() + metadata := taskEngine.pullContainer(task, container) + assert.Equal(t, dockerapi.DockerContainerMetadata{}, metadata, "expected empty metadata") +} + +func TestUpdateContainerReference(t *testing.T) { + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + ctrl, _, _, privateTaskEngine, _, imageManager, _ := mocks(t, ctx, &config.Config{}) + defer ctrl.Finish() + taskEngine, _ := privateTaskEngine.(*DockerTaskEngine) + saver := mock_statemanager.NewMockStateManager(ctrl) + taskEngine.SetSaver(saver) + taskEngine._time = nil + imageName := "image" + container := &api.Container{ + Type: api.ContainerNormal, + Image: imageName, + } + task := &api.Task{ + Containers: []*api.Container{container}, + } + imageState := &image.ImageState{ + Image: &image.Image{ImageID: "id"}, + } + + imageManager.EXPECT().RecordContainerReference(container) + imageManager.EXPECT().GetImageStateFromImageName(imageName).Return(imageState, true) + saver.EXPECT().Save() + taskEngine.updateContainerReference(true, container, task.Arn) + assert.True(t, imageState.PullSucceeded, "PullSucceeded set to false") +} + // TestMetadataFileUpdatedAgentRestart checks whether metadataManager.Update(...) is // invoked in the path DockerTaskEngine.Init() -> .synchronizeState() -> .updateMetadataFile(...) // for the following case: @@ -1613,7 +1747,7 @@ func TestPullStartedStoppedAtWasSetCorrectly(t *testing.T) { client.EXPECT().PullImage(gomock.Any(), gomock.Any()).Times(3) imageManager.EXPECT().RecordContainerReference(gomock.Any()).Times(3) - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil).Times(3) + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false).Times(3) gomock.InOrder( // three container pull start timestamp @@ -1666,7 +1800,7 @@ func TestPullStoppedAtWasSetCorrectlyWhenPullFail(t *testing.T) { dockerapi.DockerContainerMetadata{Error: dockerapi.CannotPullContainerError{fmt.Errorf("error")}}), ) imageManager.EXPECT().RecordContainerReference(gomock.Any()).Times(3) - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil).Times(3) + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false).Times(3) gomock.InOrder( // three container pull start timestamp mockTime.EXPECT().Now().Return(startTime1), @@ -1911,7 +2045,7 @@ func TestContainerProgressParallize(t *testing.T) { client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil).AnyTimes() imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes() imageManager.EXPECT().RecordContainerReference(gomock.Any()).Return(nil).AnyTimes() - imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil).AnyTimes() + imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false).AnyTimes() client.EXPECT().ContainerEvents(gomock.Any()).Return(eventStream, nil) client.EXPECT().PullImage(fastPullImage, gomock.Any()) client.EXPECT().PullImage(slowPullImage, gomock.Any()).Do( diff --git a/agent/engine/dockerstate/json_test.go b/agent/engine/dockerstate/json_test.go index a66a072021b..7b34d01277c 100644 --- a/agent/engine/dockerstate/json_test.go +++ b/agent/engine/dockerstate/json_test.go @@ -1,4 +1,4 @@ -// Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2017-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"). You may // not use this file except in compliance with the License. A copy of the @@ -242,7 +242,8 @@ const ( "Size": 150947595 }, "PulledAt": "2017-11-01T15:26:56.610709526Z", - "LastUsedAt": "2017-11-01T15:26:56.610709991Z" + "LastUsedAt": "2017-11-01T15:26:56.610709991Z", + "PullSucceeded": true } ], "ENIAttachments": [ diff --git a/agent/engine/engine_integ_test.go b/agent/engine/engine_integ_test.go index b9eef946861..e2460809f62 100644 --- a/agent/engine/engine_integ_test.go +++ b/agent/engine/engine_integ_test.go @@ -317,6 +317,23 @@ func TestStartStopWithCredentials(t *testing.T) { assert.False(t, ok, "Credentials not removed from credentials manager for stopped task") } +func TestTaskStopWhenPullImageFail(t *testing.T) { + cfg := defaultTestConfigIntegTest() + cfg.ImagePullBehavior = config.ImagePullAlwaysBehavior + taskEngine, done, _ := setup(cfg, nil, t) + defer done() + + testTask := createTestTask("testTaskStopWhenPullImageFail") + // Assign an invalid image to the task, and verify the task fails + // when the pull image behavior is "always". + testTask.Containers = []*api.Container{createTestContainerWithImageAndName("invalidImage", "invalidName")} + + go taskEngine.AddTask(testTask) + + verifyContainerStoppedStateChange(t, taskEngine) + verifyTaskStoppedStateChange(t, taskEngine) +} + func TestContainerHealthCheck(t *testing.T) { taskEngine, done, _ := setupWithDefaultConfig(t) defer done() diff --git a/agent/engine/engine_windows_integ_test.go b/agent/engine/engine_windows_integ_test.go index af74ef7cf43..1552005f35d 100644 --- a/agent/engine/engine_windows_integ_test.go +++ b/agent/engine/engine_windows_integ_test.go @@ -45,9 +45,13 @@ var endpoint = utils.DefaultIfBlank(os.Getenv(DockerEndpointEnvVariable), docker func isDockerRunning() bool { return true } func createTestContainer() *api.Container { + return createTestContainerWithImageAndName("microsoft/windowsservercore:latest", "windows") +} + +func createTestContainerWithImageAndName(image string, name string) *api.Container { return &api.Container{ - Name: "windows", - Image: "microsoft/windowsservercore:latest", + Name: name, + Image: image, Essential: true, DesiredStatusUnsafe: api.ContainerRunning, CPU: 512, diff --git a/agent/engine/image/types.go b/agent/engine/image/types.go index 246c70a5a23..56fd34646dd 100644 --- a/agent/engine/image/types.go +++ b/agent/engine/image/types.go @@ -1,4 +1,4 @@ -// Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2014-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"). You may // not use this file except in compliance with the License. A copy of the @@ -37,23 +37,30 @@ func (image *Image) String() string { // ImageState represents a docker image // and its state information such as containers associated with it type ImageState struct { - Image *Image + // Image is the image corresponding to this ImageState. + Image *Image + // Containers are the containers that use this image. Containers []*api.Container `json:"-"` - PulledAt time.Time + // PulledAt is the time when this image was pulled. + PulledAt time.Time + // LastUsedAt is the time when this image was used last time. LastUsedAt time.Time - updateLock sync.RWMutex + // PullSucceeded defines whether this image has been pulled successfully before, + // this should be set to true when one of the pull image call succeeds. + PullSucceeded bool + lock sync.RWMutex } func (imageState *ImageState) UpdateContainerReference(container *api.Container) { - imageState.updateLock.Lock() - defer imageState.updateLock.Unlock() + imageState.lock.Lock() + defer imageState.lock.Unlock() seelog.Infof("Updating container reference %v in Image State - %v", container.Name, imageState.Image.ImageID) imageState.Containers = append(imageState.Containers, container) } func (imageState *ImageState) AddImageName(imageName string) { - imageState.updateLock.Lock() - defer imageState.updateLock.Unlock() + imageState.lock.Lock() + defer imageState.lock.Unlock() if !imageState.HasImageName(imageName) { seelog.Infof("Adding image name- %v to Image state- %v", imageName, imageState.Image.ImageID) imageState.Image.Names = append(imageState.Image.Names, imageName) @@ -61,8 +68,8 @@ func (imageState *ImageState) AddImageName(imageName string) { } func (imageState *ImageState) GetImageNamesCount() int { - imageState.updateLock.RLock() - defer imageState.updateLock.RUnlock() + imageState.lock.RLock() + defer imageState.lock.RUnlock() return len(imageState.Image.Names) } @@ -76,8 +83,8 @@ func (imageState *ImageState) UpdateImageState(container *api.Container) { } func (imageState *ImageState) RemoveImageName(containerImageName string) { - imageState.updateLock.Lock() - defer imageState.updateLock.Unlock() + imageState.lock.Lock() + defer imageState.lock.Unlock() for i, imageName := range imageState.Image.Names { if imageName == containerImageName { imageState.Image.Names = append(imageState.Image.Names[:i], imageState.Image.Names[i+1:]...) @@ -96,8 +103,8 @@ func (imageState *ImageState) HasImageName(containerImageName string) bool { func (imageState *ImageState) RemoveContainerReference(container *api.Container) error { // Get the image state write lock for updating container reference - imageState.updateLock.Lock() - defer imageState.updateLock.Unlock() + imageState.lock.Lock() + defer imageState.lock.Unlock() for i := range imageState.Containers { if imageState.Containers[i].Name == container.Name { // Container reference found; hence remove it @@ -111,18 +118,36 @@ func (imageState *ImageState) RemoveContainerReference(container *api.Container) return fmt.Errorf("Container reference is not found in the image state container: %s", container.String()) } +// SetPullSucceeded sets the PullSucceeded of the imageState +func (imageState *ImageState) SetPullSucceeded(pullSucceeded bool) { + imageState.lock.Lock() + defer imageState.lock.Unlock() + + imageState.PullSucceeded = pullSucceeded +} + +// GetPullSucceeded safely returns the PullSucceeded of the imageState +func (imageState *ImageState) GetPullSucceeded() bool { + imageState.lock.RLock() + defer imageState.lock.RUnlock() + + return imageState.PullSucceeded +} + func (imageState *ImageState) MarshalJSON() ([]byte, error) { - imageState.updateLock.Lock() - defer imageState.updateLock.Unlock() + imageState.lock.Lock() + defer imageState.lock.Unlock() return json.Marshal(&struct { - Image *Image - PulledAt time.Time - LastUsedAt time.Time + Image *Image + PulledAt time.Time + LastUsedAt time.Time + PullSucceeded bool }{ - Image: imageState.Image, - PulledAt: imageState.PulledAt, - LastUsedAt: imageState.LastUsedAt, + Image: imageState.Image, + PulledAt: imageState.PulledAt, + LastUsedAt: imageState.LastUsedAt, + PullSucceeded: imageState.PullSucceeded, }) } @@ -131,6 +156,6 @@ func (imageState *ImageState) String() string { if imageState.Image != nil { image = imageState.Image.String() } - return fmt.Sprintf("Image: [%s] referenced by %d containers; PulledAt: %s; LastUsedAt: %s", - image, len(imageState.Containers), imageState.PulledAt.String(), imageState.LastUsedAt.String()) + return fmt.Sprintf("Image: [%s] referenced by %d containers; PulledAt: %s; LastUsedAt: %s; PullSucceeded: %t", + image, len(imageState.Containers), imageState.PulledAt.String(), imageState.LastUsedAt.String(), imageState.PullSucceeded) } diff --git a/agent/engine/mocks/engine_mocks.go b/agent/engine/mocks/engine_mocks.go index 06bb0e3ec8d..3d39c7aa3f1 100644 --- a/agent/engine/mocks/engine_mocks.go +++ b/agent/engine/mocks/engine_mocks.go @@ -215,10 +215,11 @@ func (mr *MockImageManagerMockRecorder) AddAllImageStates(arg0 interface{}) *gom } // GetImageStateFromImageName mocks base method -func (m *MockImageManager) GetImageStateFromImageName(arg0 string) *image.ImageState { +func (m *MockImageManager) GetImageStateFromImageName(arg0 string) (*image.ImageState, bool) { ret := m.ctrl.Call(m, "GetImageStateFromImageName", arg0) ret0, _ := ret[0].(*image.ImageState) - return ret0 + ret1, _ := ret[1].(bool) + return ret0, ret1 } // GetImageStateFromImageName indicates an expected call of GetImageStateFromImageName diff --git a/agent/engine/task_manager.go b/agent/engine/task_manager.go index 719ddbbe39e..c4bc48ab047 100644 --- a/agent/engine/task_manager.go +++ b/agent/engine/task_manager.go @@ -543,10 +543,26 @@ func (mtask *managedTask) handleEventError(containerChange dockerContainerChange // event.Status is the desired container transition from container's known status // (* -> event.Status) case api.ContainerPulled: - // Container's desired transition was to 'PULLED'. A failure to pull might - // not be fatal if e.g. the image already exists. - seelog.Errorf("Managed task [%s]: Error while pulling container %s, will try to run anyway: %v", - mtask.Arn, container.Name, event.Error) + // If the agent pull behavior is always or once, we receive the error because + // the image pull fails, the task should fail. If we don't fail task here, + // then the cached image will probably be used for creating container, and we + // don't want to use cached image for both cases. + if mtask.cfg.ImagePullBehavior == config.ImagePullAlwaysBehavior || + mtask.cfg.ImagePullBehavior == config.ImagePullOnceBehavior { + seelog.Errorf("Managed task [%s]: Error while pulling image %s for container %s , moving task to STOPPED: %v", + mtask.Arn, container.Image, container.Name, event.Error) + // The task should be stopped regardless of whether this container is + // essential or non-essential. + mtask.SetDesiredStatus(api.TaskStopped) + return false + } + // If the agent pull behavior is prefer_cached, we receive the error because + // the image pull fails and there is no cached image in local, we don't make + // the task fail here, will let create container handle it instead. + // If the agent pull behavior is default, use local image cache directly, + // assuming it exists. + seelog.Errorf("Managed task [%s]: Error while pulling container %s and image %s, will try to run anyway: %v", + mtask.Arn, container.Name, container.Image, event.Error) // proceed anyway return true case api.ContainerStopped: diff --git a/agent/engine/task_manager_test.go b/agent/engine/task_manager_test.go index 61ca2246e62..d36a9d52220 100644 --- a/agent/engine/task_manager_test.go +++ b/agent/engine/task_manager_test.go @@ -15,6 +15,7 @@ package engine import ( + "context" "errors" "fmt" "sync" @@ -43,53 +44,53 @@ import ( docker "github.com/fsouza/go-dockerclient" "github.com/stretchr/testify/assert" - "context" - "github.com/golang/mock/gomock" ) func TestHandleEventError(t *testing.T) { testCases := []struct { - Name string - EventStatus api.ContainerStatus - CurrentKnownStatus api.ContainerStatus - Error apierrors.NamedError - ExpectedKnownStatusSet bool - ExpectedKnownStatus api.ContainerStatus - ExpectedDesiredStatusStopped bool - ExpectedOK bool + Name string + EventStatus api.ContainerStatus + CurrentContainerKnownStatus api.ContainerStatus + ImagePullBehavior config.ImagePullBehaviorType + Error apierrors.NamedError + ExpectedContainerKnownStatusSet bool + ExpectedContainerKnownStatus api.ContainerStatus + ExpectedContainerDesiredStatusStopped bool + ExpectedTaskDesiredStatusStopped bool + ExpectedOK bool }{ { - Name: "Stop timed out", - EventStatus: api.ContainerStopped, - CurrentKnownStatus: api.ContainerRunning, - Error: &dockerapi.DockerTimeoutError{}, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerRunning, - ExpectedOK: false, + Name: "Stop timed out", + EventStatus: api.ContainerStopped, + CurrentContainerKnownStatus: api.ContainerRunning, + Error: &dockerapi.DockerTimeoutError{}, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerRunning, + ExpectedOK: false, }, { - Name: "Retriable error with stop", - EventStatus: api.ContainerStopped, - CurrentKnownStatus: api.ContainerRunning, + Name: "Retriable error with stop", + EventStatus: api.ContainerStopped, + CurrentContainerKnownStatus: api.ContainerRunning, Error: &dockerapi.CannotStopContainerError{ FromError: errors.New(""), }, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerRunning, - ExpectedOK: false, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerRunning, + ExpectedOK: false, }, { - Name: "Unretriable error with Stop", - EventStatus: api.ContainerStopped, - CurrentKnownStatus: api.ContainerRunning, + Name: "Unretriable error with Stop", + EventStatus: api.ContainerStopped, + CurrentContainerKnownStatus: api.ContainerRunning, Error: &dockerapi.CannotStopContainerError{ FromError: &docker.ContainerNotRunning{}, }, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerStopped, - ExpectedDesiredStatusStopped: true, - ExpectedOK: true, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerStopped, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: true, }, { Name: "Pull failed", @@ -98,65 +99,76 @@ func TestHandleEventError(t *testing.T) { ExpectedOK: true, }, { - Name: "Container vanished betweeen pull and running", - EventStatus: api.ContainerRunning, - CurrentKnownStatus: api.ContainerPulled, - Error: &ContainerVanishedError{}, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerPulled, - ExpectedDesiredStatusStopped: true, - ExpectedOK: false, + Name: "Container vanished betweeen pull and running", + EventStatus: api.ContainerRunning, + CurrentContainerKnownStatus: api.ContainerPulled, + Error: &ContainerVanishedError{}, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerPulled, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: false, }, { - Name: "Inspect failed during start", - EventStatus: api.ContainerRunning, - CurrentKnownStatus: api.ContainerCreated, + Name: "Inspect failed during start", + EventStatus: api.ContainerRunning, + CurrentContainerKnownStatus: api.ContainerCreated, Error: &dockerapi.CannotInspectContainerError{ FromError: errors.New("error"), }, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerCreated, - ExpectedDesiredStatusStopped: true, - ExpectedOK: false, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerCreated, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: false, }, { - Name: "Start timed out", - EventStatus: api.ContainerRunning, - CurrentKnownStatus: api.ContainerCreated, - Error: &dockerapi.DockerTimeoutError{}, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerCreated, - ExpectedDesiredStatusStopped: true, - ExpectedOK: false, + Name: "Start timed out", + EventStatus: api.ContainerRunning, + CurrentContainerKnownStatus: api.ContainerCreated, + Error: &dockerapi.DockerTimeoutError{}, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerCreated, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: false, }, { - Name: "Inspect failed during create", - EventStatus: api.ContainerCreated, - CurrentKnownStatus: api.ContainerPulled, + Name: "Inspect failed during create", + EventStatus: api.ContainerCreated, + CurrentContainerKnownStatus: api.ContainerPulled, Error: &dockerapi.CannotInspectContainerError{ FromError: errors.New("error"), }, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerPulled, - ExpectedDesiredStatusStopped: true, - ExpectedOK: false, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerPulled, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: false, }, { - Name: "Create timed out", - EventStatus: api.ContainerCreated, - CurrentKnownStatus: api.ContainerPulled, - Error: &dockerapi.DockerTimeoutError{}, - ExpectedKnownStatusSet: true, - ExpectedKnownStatus: api.ContainerPulled, - ExpectedDesiredStatusStopped: true, - ExpectedOK: false, + Name: "Create timed out", + EventStatus: api.ContainerCreated, + CurrentContainerKnownStatus: api.ContainerPulled, + Error: &dockerapi.DockerTimeoutError{}, + ExpectedContainerKnownStatusSet: true, + ExpectedContainerKnownStatus: api.ContainerPulled, + ExpectedContainerDesiredStatusStopped: true, + ExpectedOK: false, + }, + { + Name: "Pull image fails and task fails", + EventStatus: api.ContainerPulled, + Error: &dockerapi.CannotPullContainerError{ + FromError: errors.New("error"), + }, + ImagePullBehavior: config.ImagePullAlwaysBehavior, + ExpectedContainerKnownStatusSet: false, + ExpectedTaskDesiredStatusStopped: true, + ExpectedOK: false, }, } for _, tc := range testCases { t.Run(tc.Name, func(t *testing.T) { container := &api.Container{ - KnownStatusUnsafe: tc.CurrentKnownStatus, + KnownStatusUnsafe: tc.CurrentContainerKnownStatus, } containerChange := dockerContainerChange{ container: container, @@ -172,18 +184,24 @@ func TestHandleEventError(t *testing.T) { Arn: "task1", }, engine: &DockerTaskEngine{}, + cfg: &config.Config{ImagePullBehavior: tc.ImagePullBehavior}, } - ok := mtask.handleEventError(containerChange, tc.CurrentKnownStatus) + ok := mtask.handleEventError(containerChange, tc.CurrentContainerKnownStatus) assert.Equal(t, tc.ExpectedOK, ok, "to proceed") - if tc.ExpectedKnownStatusSet { + if tc.ExpectedContainerKnownStatusSet { containerKnownStatus := containerChange.container.GetKnownStatus() - assert.Equal(t, tc.ExpectedKnownStatus, containerKnownStatus, - "expected known status %s != %s", tc.ExpectedKnownStatus.String(), containerKnownStatus.String()) + assert.Equal(t, tc.ExpectedContainerKnownStatus, containerKnownStatus, + "expected container known status %s != %s", tc.ExpectedContainerKnownStatus.String(), containerKnownStatus.String()) } - if tc.ExpectedDesiredStatusStopped { + if tc.ExpectedContainerDesiredStatusStopped { containerDesiredStatus := containerChange.container.GetDesiredStatus() assert.Equal(t, api.ContainerStopped, containerDesiredStatus, - "desired status %s != %s", api.ContainerStopped.String(), containerDesiredStatus.String()) + "desired container status %s != %s", api.ContainerStopped.String(), containerDesiredStatus.String()) + } + if tc.ExpectedTaskDesiredStatusStopped { + taskDesiredStatus := mtask.GetDesiredStatus() + assert.Equal(t, api.TaskStopped, taskDesiredStatus, + "desired task status %s != %s", api.TaskStopped.String(), taskDesiredStatus.String()) } assert.Equal(t, tc.Error.ErrorName(), containerChange.container.ApplyingError.ErrorName()) }) diff --git a/agent/statemanager/state_manager.go b/agent/statemanager/state_manager.go index 7ee452397ea..8de6d563df8 100644 --- a/agent/statemanager/state_manager.go +++ b/agent/statemanager/state_manager.go @@ -51,18 +51,19 @@ const ( // e) Deprecate 'SteadyStateDependencies' in favor of 'TransitionDependencySet' // 7) // a) Add 'MetadataUpdated' field to 'api.Container' - // b) Add 'DomainNameServers' and 'DomainNameSearchList' in `api.ENI` + // b) Add 'DomainNameServers' and 'DomainNameSearchList' in 'api.ENI' // 8) - // a) Add 'UseExecutionRole' in `api.ECRAuthData` - // b) Add `executionCredentialsID` in `api.Task` + // a) Add 'UseExecutionRole' in 'api.ECRAuthData' + // b) Add 'executionCredentialsID' in 'api.Task' // c) Add 'LogsAuthStrategy' field to 'api.Container' // d) Added task cgroup related fields ('CPU', 'Memory', 'MemoryCPULimitsEnabled') to 'api.Task' // 9) Add 'ipToTask' map to state file // 10) Add 'healthCheckType' field in 'api.Container' // 11) // a) Add 'PrivateDNSName' field to 'api.ENI' - // b)Remove `AppliedStatus` field form 'api.Container' - ECSDataVersion = 11 + // b)Remove 'AppliedStatus' field from 'api.Container' + // 12) Add 'PullSucceeded' field to 'ImageState' + ECSDataVersion = 12 // ecsDataFile specifies the filename in the ECS_DATADIR ecsDataFile = "ecs_agent_data.json"