Skip to content

Commit 961b2ef

Browse files
committed
engine: introduce a new env var to distinct pull image behavior
1 parent 6f3cf0f commit 961b2ef

21 files changed

+580
-210
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Changelog
22

33
## 1.17.4-dev
4+
* Feature - Configurable container image pull behavior [#1348](https://github.com/aws/amazon-ecs-agent/pull/1348)
45
* Bug - Fixed a bug where Docker Version() API never returns by adding a timeout [#1363](https://github.com/aws/amazon-ecs-agent/pull/1363)
56
* Bug - Fixed a bug where tasks could get stuck waiting for execution of CNI plugin [#1358](https://github.com/aws/amazon-ecs-agent/pull/1358)
67

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ additional details on each available environment variable.
169169
| `ECS_IMAGE_CLEANUP_INTERVAL` | 30m | The time interval between automated image cleanup cycles. If set to less than 10 minutes, the value is ignored. | 30m | 30m |
170170
| `ECS_IMAGE_MINIMUM_CLEANUP_AGE` | 30m | The minimum time interval between when an image is pulled and when it can be considered for automated image cleanup. | 1h | 1h |
171171
| `ECS_NUM_IMAGES_DELETE_PER_CYCLE` | 5 | The maximum number of images to delete in a single automated image cleanup cycle. If set to less than 1, the value is ignored. | 5 | 5 |
172+
| `ECS_IMAGE_PULL_BEHAVIOR` | <default | always | once | prefer-cached > | The behavior used to customize the pull image process. If `default` is specified, the image will be pulled remotely, if the pull fails then the cached image in the instance will be used. If `always` is specified, the image will be pulled remotely, if the pull fails then the task will fail. If `once` is specified, the image will be pulled remotely if it has not been pulled before or if the image was removed by image cleanup, otherwise the cached image in the instance will be used. If `prefer-cached` is specified, the image will be pulled remotely if there is no cached image, otherwise the cached image in the instance will be used. | default | default |
172173
| `ECS_INSTANCE_ATTRIBUTES` | `{"stack": "prod"}` | These attributes take effect only during initial registration. After the agent has joined an ECS cluster, use the PutAttributes API action to add additional attributes. For more information, see [Amazon ECS Container Agent Configuration](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html) in the Amazon ECS Developer Guide.| `{}` | `{}` |
173174
| `ECS_ENABLE_TASK_ENI` | `false` | Whether to enable task networking for task to be launched with its own network interface | `false` | Not applicable |
174175
| `ECS_CNI_PLUGINS_PATH` | `/ecs/cni` | The path where the cni binary file is located | `/amazon-ecs-cni-plugins` | Not applicable |

agent/config/config.go

+21-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ const (
6565
// image cleanup.
6666
DefaultNumImagesToDeletePerCycle = 5
6767

68-
//DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image
68+
// DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image
6969
// has been pulled before it can be deleted.
7070
DefaultImageDeletionAge = 1 * time.Hour
7171

@@ -102,6 +102,25 @@ const (
102102
DefaultTaskMetadataBurstRate = 60
103103
)
104104

105+
const (
106+
// ImagePullDefaultBehavior specifies the behavior that if an image pull API call fails,
107+
// agent tries to start from the Docker image cache anyway, assuming that the image has not changed.
108+
ImagePullDefaultBehavior ImagePullBehaviorType = iota
109+
110+
// ImagePullAlwaysBehavior specifies the behavior that if an image pull API call fails,
111+
// the task fails instead of using cached image.
112+
ImagePullAlwaysBehavior
113+
114+
// ImagePullOnceBehavior specifies the behavior that agent will only attempt to pull
115+
// the same image once, once an image is pulled, local image cache will be used
116+
// for all the containers.
117+
ImagePullOnceBehavior
118+
119+
// ImagePullPreferCachedBehavior specifies the behavior that agent will only attempt to pull
120+
// the image if there is no cached image.
121+
ImagePullPreferCachedBehavior
122+
)
123+
105124
var (
106125
// DefaultPauseContainerImageName is the name of the pause container image. The linker's
107126
// load flags are used to populate this value from the Makefile
@@ -386,6 +405,7 @@ func environmentConfig() (Config, error) {
386405
MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"),
387406
ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"),
388407
NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(),
408+
ImagePullBehavior: parseImagePullBehavior(),
389409
InstanceAttributes: instanceAttributes,
390410
CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"),
391411
AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false),

agent/config/config_test.go

+52
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ func TestEnvironmentConfig(t *testing.T) {
8181
defer setTestEnv("ECS_IMAGE_CLEANUP_INTERVAL", "2h")()
8282
defer setTestEnv("ECS_IMAGE_MINIMUM_CLEANUP_AGE", "30m")()
8383
defer setTestEnv("ECS_NUM_IMAGES_DELETE_PER_CYCLE", "2")()
84+
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "always")()
8485
defer setTestEnv("ECS_INSTANCE_ATTRIBUTES", "{\"my_attribute\": \"testing\"}")()
8586
defer setTestEnv("ECS_ENABLE_TASK_ENI", "true")()
8687
defer setTestEnv("ECS_TASK_METADATA_RPS_LIMIT", "1000,1100")()
@@ -112,6 +113,7 @@ func TestEnvironmentConfig(t *testing.T) {
112113
assert.Equal(t, (30 * time.Minute), conf.MinimumImageDeletionAge)
113114
assert.Equal(t, (2 * time.Hour), conf.ImageCleanupInterval)
114115
assert.Equal(t, 2, conf.NumImagesToDeletePerCycle)
116+
assert.Equal(t, ImagePullAlwaysBehavior, conf.ImagePullBehavior)
115117
assert.Equal(t, "testing", conf.InstanceAttributes["my_attribute"])
116118
assert.Equal(t, (90 * time.Second), conf.TaskCleanupWaitDuration)
117119
serializedAdditionalLocalRoutesJSON, err := json.Marshal(conf.AWSVPCAdditionalLocalRoutes)
@@ -369,6 +371,56 @@ func TestImageCleanupMinimumNumImagesToDeletePerCycle(t *testing.T) {
369371
assert.Equal(t, cfg.NumImagesToDeletePerCycle, DefaultNumImagesToDeletePerCycle, "Wrong value for NumImagesToDeletePerCycle")
370372
}
371373

374+
func TestInvalidImagePullBehavior(t *testing.T) {
375+
defer setTestRegion()()
376+
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "invalid")()
377+
cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
378+
assert.NoError(t, err)
379+
assert.Equal(t, cfg.ImagePullBehavior, ImagePullDefaultBehavior, "Wrong value for ImagePullBehavior")
380+
}
381+
382+
func TestParseImagePullBehavior(t *testing.T) {
383+
testcases := []struct {
384+
name string
385+
envVarVal string
386+
expectedImagePullBehavior ImagePullBehaviorType
387+
}{
388+
{
389+
name: "default agent behavior",
390+
envVarVal: "default",
391+
expectedImagePullBehavior: ImagePullDefaultBehavior,
392+
},
393+
{
394+
name: "always agent behavior",
395+
envVarVal: "always",
396+
expectedImagePullBehavior: ImagePullAlwaysBehavior,
397+
},
398+
{
399+
name: "once agent behavior",
400+
envVarVal: "once",
401+
expectedImagePullBehavior: ImagePullOnceBehavior,
402+
},
403+
{
404+
name: "prefer-cached agent behavior",
405+
envVarVal: "prefer-cached",
406+
expectedImagePullBehavior: ImagePullPreferCachedBehavior,
407+
},
408+
{
409+
name: "invalid agent behavior",
410+
envVarVal: "invalid",
411+
expectedImagePullBehavior: ImagePullDefaultBehavior,
412+
},
413+
}
414+
415+
for _, tc := range testcases {
416+
t.Run(tc.name, func(t *testing.T) {
417+
defer setTestRegion()()
418+
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", tc.envVarVal)()
419+
assert.Equal(t, parseImagePullBehavior(), tc.expectedImagePullBehavior, "Wrong value for ImagePullBehavior")
420+
})
421+
}
422+
}
423+
372424
func TestTaskResourceLimitsOverride(t *testing.T) {
373425
defer setTestRegion()()
374426
defer setTestEnv("ECS_ENABLE_TASK_CPU_MEM_LIMIT", "false")()

agent/config/parse.go

+16
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,22 @@ func parseNumImagesToDeletePerCycle() int {
117117
return numImagesToDeletePerCycle
118118
}
119119

120+
func parseImagePullBehavior() ImagePullBehaviorType {
121+
ImagePullBehaviorString := os.Getenv("ECS_IMAGE_PULL_BEHAVIOR")
122+
switch ImagePullBehaviorString {
123+
case "always":
124+
return ImagePullAlwaysBehavior
125+
case "once":
126+
return ImagePullOnceBehavior
127+
case "prefer-cached":
128+
return ImagePullPreferCachedBehavior
129+
default:
130+
// Use the default image pull behavior when ECS_IMAGE_PULL_BEHAVIOR is
131+
// "default" or not valid
132+
return ImagePullDefaultBehavior
133+
}
134+
}
135+
120136
func parseInstanceAttributes(errs []error) (map[string]string, []error) {
121137
var instanceAttributes map[string]string
122138
instanceAttributesEnv := os.Getenv("ECS_INSTANCE_ATTRIBUTES")

agent/config/types.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
1+
// Copyright 2014-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License"). You may
44
// not use this file except in compliance with the License. A copy of the
@@ -20,6 +20,10 @@ import (
2020
cnitypes "github.com/containernetworking/cni/pkg/types"
2121
)
2222

23+
// ImagePullBehaviorType is an enum variable type corresponding to different agent pull
24+
// behaviors including default, always, never and once.
25+
type ImagePullBehaviorType int8
26+
2327
type Config struct {
2428
// DEPRECATED
2529
// ClusterArn is the Name or full ARN of a Cluster to register into. It has
@@ -149,6 +153,10 @@ type Config struct {
149153
// when Agent performs cleanup
150154
NumImagesToDeletePerCycle int
151155

156+
// ImagePullBehavior specifies the agent's behavior for pulling image and loading
157+
// local Docker image cache
158+
ImagePullBehavior ImagePullBehaviorType
159+
152160
// InstanceAttributes contains key/value pairs representing
153161
// attributes to be associated with this instance within the
154162
// ECS service and used to influence behavior such as launch

agent/dockerclient/dockerapi/docker_client_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ func TestStartContainerTimeout(t *testing.T) {
441441
wait.Add(1)
442442
mockDocker.EXPECT().StartContainerWithContext("id", nil, gomock.Any()).Do(func(x, y, z interface{}) {
443443
wait.Wait() // wait until timeout happens
444-
})
444+
}).MaxTimes(1)
445445
mockDocker.EXPECT().InspectContainerWithContext("id", gomock.Any()).Return(nil, errors.New("test error")).AnyTimes()
446446
ctx, cancel := context.WithCancel(context.TODO())
447447
defer cancel()

agent/engine/common_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ func validateContainerRunWorkflow(t *testing.T,
136136
imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes()
137137
client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{})
138138
imageManager.EXPECT().RecordContainerReference(container).Return(nil)
139-
imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil)
139+
imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false)
140140
client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil)
141141
dockerConfig, err := task.DockerConfig(container, defaultDockerClientAPIVersion)
142142
if err != nil {

agent/engine/docker_image_manager.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,12 @@
1414
package engine
1515

1616
import (
17+
"context"
1718
"fmt"
1819
"sort"
1920
"sync"
2021
"time"
2122

22-
"context"
23-
2423
"github.com/aws/amazon-ecs-agent/agent/api"
2524
"github.com/aws/amazon-ecs-agent/agent/config"
2625
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
@@ -41,7 +40,7 @@ type ImageManager interface {
4140
RecordContainerReference(container *api.Container) error
4241
RemoveContainerReferenceFromImageState(container *api.Container) error
4342
AddAllImageStates(imageStates []*image.ImageState)
44-
GetImageStateFromImageName(containerImageName string) *image.ImageState
43+
GetImageStateFromImageName(containerImageName string) (*image.ImageState, bool)
4544
StartImageCleanupProcess(ctx context.Context)
4645
SetSaver(stateManager statemanager.Saver)
4746
}
@@ -59,6 +58,7 @@ type dockerImageManager struct {
5958
minimumAgeBeforeDeletion time.Duration
6059
numImagesToDelete int
6160
imageCleanupTimeInterval time.Duration
61+
imagePullBehavior config.ImagePullBehaviorType
6262
}
6363

6464
// ImageStatesForDeletion is used for implementing the sort interface
@@ -72,6 +72,7 @@ func NewImageManager(cfg *config.Config, client dockerapi.DockerClient, state do
7272
minimumAgeBeforeDeletion: cfg.MinimumImageDeletionAge,
7373
numImagesToDelete: cfg.NumImagesToDeletePerCycle,
7474
imageCleanupTimeInterval: cfg.ImageCleanupInterval,
75+
imagePullBehavior: cfg.ImagePullBehavior,
7576
}
7677
}
7778

@@ -267,6 +268,12 @@ func (imageManager *dockerImageManager) removeExistingImageNameOfDifferentID(con
267268
}
268269

269270
func (imageManager *dockerImageManager) StartImageCleanupProcess(ctx context.Context) {
271+
// If the image pull behavior is prefer cached, don't clean up the image,
272+
// because the cached image is needed.
273+
if imageManager.imagePullBehavior == config.ImagePullPreferCachedBehavior {
274+
seelog.Info("Pull behavior is set to always use cache. Disabling cleanup")
275+
return
276+
}
270277
// passing the cleanup interval as argument which would help during testing
271278
imageManager.performPeriodicImageCleanup(ctx, imageManager.imageCleanupTimeInterval)
272279
}
@@ -369,15 +376,15 @@ func (imageManager *dockerImageManager) deleteImage(ctx context.Context, imageID
369376
}
370377
}
371378

372-
func (imageManager *dockerImageManager) GetImageStateFromImageName(containerImageName string) *image.ImageState {
379+
func (imageManager *dockerImageManager) GetImageStateFromImageName(containerImageName string) (*image.ImageState, bool) {
373380
imageManager.updateLock.Lock()
374381
defer imageManager.updateLock.Unlock()
375382
for _, imageState := range imageManager.getAllImageStates() {
376383
for _, imageName := range imageState.Image.Names {
377384
if imageName == containerImageName {
378-
return imageState
385+
return imageState, true
379386
}
380387
}
381388
}
382-
return nil
389+
return nil, false
383390
}

0 commit comments

Comments
 (0)