Skip to content

Commit

Permalink
engine: introduce a new env var to distinct pull image behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
haikuoliu committed Apr 26, 2018
1 parent fbfc6c1 commit 7e52650
Show file tree
Hide file tree
Showing 18 changed files with 475 additions and 109 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 1.17.4-dev
* Feature - Configurable agent pull behavior [#1348](https://github.com/aws/amazon-ecs-agent/pull/1348)

## 1.17.3
* Enhancement - Distinct startContainerTimeouts for windows/linux, introduce a new environment variable `ECS_CONTAINER_START_TIMEOUT` to make it configurable [#1321](https://github.com/aws/amazon-ecs-agent/pull/1321)
* Enhancement - Add support for containers to inhereit ENI private DNS hostnames for `awsvpc` tasks [#1278](https://github.com/aws/amazon-ecs-agent/pull/1278)
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ additional details on each available environment variable.
| `ECS_IMAGE_CLEANUP_INTERVAL` | 30m | The time interval between automated image cleanup cycles. If set to less than 10 minutes, the value is ignored. | 30m | 30m |
| `ECS_IMAGE_MINIMUM_CLEANUP_AGE` | 30m | The minimum time interval between when an image is pulled and when it can be considered for automated image cleanup. | 1h | 1h |
| `ECS_NUM_IMAGES_DELETE_PER_CYCLE` | 5 | The maximum number of images to delete in a single automated image cleanup cycle. If set to less than 1, the value is ignored. | 5 | 5 |
| `ECS_IMAGE_PULL_BEHAVIOR` | <default | always | never | once | cache > | The behavior used to customize the pull image process. If `default` is specified, the image will be pulled remotely, if the pull fails then the cached image will be used. If `always` is specified, the image will be pulled remotely, if the pull fails then the task will fail. If `never` is specified, the cached image will be used. If `once` is specified, the image will be pulled remotely if it has not been pulled before, otherwise the cached image will be used. If `cache` is specified, the image will be pulled remotely if there is no cached image, otherwise the cached image will be used. | default | default |
| `ECS_INSTANCE_ATTRIBUTES` | `{"stack": "prod"}` | These attributes take effect only during initial registration. After the agent has joined an ECS cluster, use the PutAttributes API action to add additional attributes. For more information, see [Amazon ECS Container Agent Configuration](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html) in the Amazon ECS Developer Guide.| `{}` | `{}` |
| `ECS_ENABLE_TASK_ENI` | `false` | Whether to enable task networking for task to be launched with its own network interface | `false` | Not applicable |
| `ECS_CNI_PLUGINS_PATH` | `/ecs/cni` | The path where the cni binary file is located | `/amazon-ecs-cni-plugins` | Not applicable |
Expand Down
24 changes: 23 additions & 1 deletion agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ const (
// image cleanup.
DefaultNumImagesToDeletePerCycle = 5

//DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image
// DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image
// has been pulled before it can be deleted.
DefaultImageDeletionAge = 1 * time.Hour

Expand Down Expand Up @@ -102,6 +102,27 @@ const (
DefaultTaskMetadataBurstRate = 60
)

const (
// ImagePullDefaultBehavior specifies the behavior that if an image pull API call fails,
// agent tries to start from the Docker image cache anyway, assuming that the image has not changed.
ImagePullDefaultBehavior ImagePullBehaviorType = iota

// ImagePullAlwaysBehavior specifies the behavior that if an image pull API call fails,
// the task fails instead of using cached image.
ImagePullAlwaysBehavior

// ImagePullNeverBehavior specifies the behavior that agent will never attempt to pull
// the image, use cached image anyway.
ImagePullNeverBehavior

// ImagePullOnceBehavior specifies the behavior that agent will only attempt to pull
// the same image once, once an image is pulled, local image cache will be used
// for all the containers.
ImagePullOnceBehavior

ImagePullPreferCachedBehavior
)

var (
// DefaultPauseContainerImageName is the name of the pause container image. The linker's
// load flags are used to populate this value from the Makefile
Expand Down Expand Up @@ -386,6 +407,7 @@ func environmentConfig() (Config, error) {
MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"),
ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"),
NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(),
ImagePullBehavior: parseImagePullBehavior(),
InstanceAttributes: instanceAttributes,
CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"),
AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false),
Expand Down
52 changes: 52 additions & 0 deletions agent/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func TestEnvironmentConfig(t *testing.T) {
defer setTestEnv("ECS_IMAGE_CLEANUP_INTERVAL", "2h")()
defer setTestEnv("ECS_IMAGE_MINIMUM_CLEANUP_AGE", "30m")()
defer setTestEnv("ECS_NUM_IMAGES_DELETE_PER_CYCLE", "2")()
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "always")()
defer setTestEnv("ECS_INSTANCE_ATTRIBUTES", "{\"my_attribute\": \"testing\"}")()
defer setTestEnv("ECS_ENABLE_TASK_ENI", "true")()
defer setTestEnv("ECS_TASK_METADATA_RPS_LIMIT", "1000,1100")()
Expand Down Expand Up @@ -112,6 +113,7 @@ func TestEnvironmentConfig(t *testing.T) {
assert.Equal(t, (30 * time.Minute), conf.MinimumImageDeletionAge)
assert.Equal(t, (2 * time.Hour), conf.ImageCleanupInterval)
assert.Equal(t, 2, conf.NumImagesToDeletePerCycle)
assert.Equal(t, ImagePullAlwaysBehavior, conf.ImagePullBehavior)
assert.Equal(t, "testing", conf.InstanceAttributes["my_attribute"])
assert.Equal(t, (90 * time.Second), conf.TaskCleanupWaitDuration)
serializedAdditionalLocalRoutesJSON, err := json.Marshal(conf.AWSVPCAdditionalLocalRoutes)
Expand Down Expand Up @@ -369,6 +371,56 @@ func TestImageCleanupMinimumNumImagesToDeletePerCycle(t *testing.T) {
assert.Equal(t, cfg.NumImagesToDeletePerCycle, DefaultNumImagesToDeletePerCycle, "Wrong value for NumImagesToDeletePerCycle")
}

func TestInvalidImagePullBehavior(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "invalid")()
cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, cfg.ImagePullBehavior, ImagePullDefaultBehavior, "Wrong value for ImagePullBehavior")
}

func TestParseImagePullBehavior(t *testing.T) {
testcases := []struct {
name string
envVarVal string
expectedImagePullBehavior ImagePullBehaviorType
}{
{
name: "default agent behavior",
envVarVal: "default",
expectedImagePullBehavior: ImagePullDefaultBehavior,
},
{
name: "always agent behavior",
envVarVal: "always",
expectedImagePullBehavior: ImagePullAlwaysBehavior,
},
{
name: "never agent behavior",
envVarVal: "never",
expectedImagePullBehavior: ImagePullNeverBehavior,
},
{
name: "once agent behavior",
envVarVal: "once",
expectedImagePullBehavior: ImagePullOnceBehavior,
},
{
name: "invalid agent behavior",
envVarVal: "invalid",
expectedImagePullBehavior: ImagePullDefaultBehavior,
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", tc.envVarVal)()
assert.Equal(t, parseImagePullBehavior(), tc.expectedImagePullBehavior, "Wrong value for ImagePullBehavior")
})
}
}

func TestTaskResourceLimitsOverride(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_ENABLE_TASK_CPU_MEM_LIMIT", "false")()
Expand Down
18 changes: 18 additions & 0 deletions agent/config/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,24 @@ func parseNumImagesToDeletePerCycle() int {
return numImagesToDeletePerCycle
}

func parseImagePullBehavior() ImagePullBehaviorType {
ImagePullBehaviorString := os.Getenv("ECS_IMAGE_PULL_BEHAVIOR")
switch ImagePullBehaviorString {
case "always":
return ImagePullAlwaysBehavior
case "never":
return ImagePullNeverBehavior
case "once":
return ImagePullOnceBehavior
case "prefer_cache":
return ImagePullPreferCachedBehavior
default:
// Use the default image pull behavior when ECS_IMAGE_PULL_BEHAVIOR is
// "default" or not valid
return ImagePullDefaultBehavior
}
}

func parseInstanceAttributes(errs []error) (map[string]string, []error) {
var instanceAttributes map[string]string
instanceAttributesEnv := os.Getenv("ECS_INSTANCE_ATTRIBUTES")
Expand Down
10 changes: 9 additions & 1 deletion agent/config/types.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// Copyright 2014-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
Expand All @@ -20,6 +20,10 @@ import (
cnitypes "github.com/containernetworking/cni/pkg/types"
)

// ImagePullBehaviorType is an enum variable type corresponding to different agent pull
// behaviors including default, always, never and once.
type ImagePullBehaviorType int8

type Config struct {
// DEPRECATED
// ClusterArn is the Name or full ARN of a Cluster to register into. It has
Expand Down Expand Up @@ -149,6 +153,10 @@ type Config struct {
// when Agent performs cleanup
NumImagesToDeletePerCycle int

// ImagePullBehavior specifies the agent's behavior for pulling image and loading
// local Docker image cache
ImagePullBehavior ImagePullBehaviorType

// InstanceAttributes contains key/value pairs representing
// attributes to be associated with this instance within the
// ECS service and used to influence behavior such as launch
Expand Down
2 changes: 1 addition & 1 deletion agent/dockerclient/dockerapi/docker_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ func TestStartContainerTimeout(t *testing.T) {
wait.Add(1)
mockDocker.EXPECT().StartContainerWithContext("id", nil, gomock.Any()).Do(func(x, y, z interface{}) {
wait.Wait() // wait until timeout happens
})
}).MaxTimes(1)
mockDocker.EXPECT().InspectContainerWithContext("id", gomock.Any()).Return(nil, errors.New("test error")).AnyTimes()
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
Expand Down
16 changes: 12 additions & 4 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@
package engine

import (
"context"
"fmt"
"sort"
"sync"
"time"

"context"

"github.com/aws/amazon-ecs-agent/agent/api"
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
Expand Down Expand Up @@ -58,6 +57,7 @@ type dockerImageManager struct {
minimumAgeBeforeDeletion time.Duration
numImagesToDelete int
imageCleanupTimeInterval time.Duration
imagePullBehavior config.ImagePullBehaviorType
}

// ImageStatesForDeletion is used for implementing the sort interface
Expand All @@ -71,6 +71,7 @@ func NewImageManager(cfg *config.Config, client dockerapi.DockerClient, state do
minimumAgeBeforeDeletion: cfg.MinimumImageDeletionAge,
numImagesToDelete: cfg.NumImagesToDeletePerCycle,
imageCleanupTimeInterval: cfg.ImageCleanupInterval,
imagePullBehavior: cfg.ImagePullBehavior,
}
}

Expand Down Expand Up @@ -154,6 +155,9 @@ func (imageManager *dockerImageManager) addContainerReferenceToNewImageState(con
Image: sourceImage,
PulledAt: time.Now(),
LastUsedAt: time.Now(),
// The PullSucceeded filed is false by default,
// one has to explicitly set it to be true when the pull image succeeds.
PullSucceeded: false,
}
sourceImageState.UpdateImageState(container)
imageManager.addImageState(sourceImageState)
Expand Down Expand Up @@ -266,8 +270,12 @@ func (imageManager *dockerImageManager) removeExistingImageNameOfDifferentID(con
}

func (imageManager *dockerImageManager) StartImageCleanupProcess(ctx context.Context) {
// passing the cleanup interval as argument which would help during testing
imageManager.performPeriodicImageCleanup(ctx, imageManager.imageCleanupTimeInterval)
// If the image pull behavior is prefer cached, don't clean up the image,
// because the cached image is needed.
if imageManager.imagePullBehavior != config.ImagePullPreferCachedBehavior {
// passing the cleanup interval as argument which would help during testing
imageManager.performPeriodicImageCleanup(ctx, imageManager.imageCleanupTimeInterval)
}
}

func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context.Context, imageCleanupInterval time.Duration) {
Expand Down
14 changes: 14 additions & 0 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1092,3 +1092,17 @@ func TestConcurrentRemoveUnusedImages(t *testing.T) {
waitGroup.Wait()
require.Equal(t, 0, len(imageManager.imageStates))
}

func TestImageCleanupProcessNotStart(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)

cfg := defaultTestConfig()
cfg.ImagePullBehavior = config.ImagePullPreferCachedBehavior
imageManager := NewImageManager(cfg, client, dockerstate.NewTaskEngineState())
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
imageManager.StartImageCleanupProcess(ctx)
// Nothing should happen.
}
57 changes: 55 additions & 2 deletions agent/engine/docker_task_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -641,6 +641,14 @@ func (engine *DockerTaskEngine) pullContainer(task *api.Task, container *api.Con
if emptyvolume.LocalImage {
return engine.client.ImportLocalEmptyVolumeImage()
}
default:
if engine.shouldUseLocalImage(engine.cfg.ImagePullBehavior, container, task.Arn) {
// It's not an internal container, so it's safe to update container reference.
engine.updateContainerReference(false, container, task.Arn)
// Return the metadata without any error even if there is no image cache,
// the image not found error will be handled by creating container.
return dockerapi.DockerContainerMetadata{Error: nil}
}
}

// Record the pullStoppedAt timestamp
Expand All @@ -657,6 +665,44 @@ func (engine *DockerTaskEngine) pullContainer(task *api.Task, container *api.Con
return engine.serialPull(task, container)
}

// shouldUseLocalImage returns true if local image cache should be used, or return false to continue
// pulling image, by inspecting the agent pull behavior variable defined in config. The caller has
// to make sure the container passed in is not an internal container.
func (engine *DockerTaskEngine) shouldUseLocalImage(ImagePullBehavior config.ImagePullBehaviorType,
container *api.Container,
taskArn string) bool {
switch ImagePullBehavior {
// If the agent pull behavior is never, we don't try to pull the image,
// try to use local image cache instead.
case config.ImagePullNeverBehavior:
seelog.Infof("Task engine [%s]: use cached image directly for container %s",
taskArn, container.Name)
return true
case config.ImagePullOnceBehavior:
// If this image has been pulled successfully before, don't pull the image,
// otherwise pull the image as usual, regardless whether the image exists or not
// (the image can be prepopulated with the AMI and never be pulled).
imageState := engine.imageManager.GetImageStateFromImageName(container.Image)
if imageState != nil && imageState.GetPullSucceeded() {
seelog.Infof("Task engine [%s]: image %s has been pulled once, not pulling it again",
taskArn, container.Image)
return true
}
return false
case config.ImagePullPreferCachedBehavior:
_, err := engine.client.InspectImage(container.Image)
if err != nil {
return false
}
seelog.Infof("Task engine [%s]: use cached image directly for container %s",
taskArn, container.Name)
return true
default:
// Need to pull the image for always and default agent pull behavior
return false
}
}

func (engine *DockerTaskEngine) concurrentPull(task *api.Task, container *api.Container) dockerapi.DockerContainerMetadata {
seelog.Debugf("Task engine [%s]: attempting to obtain ImagePullDeleteLock to pull image - %s",
task.Arn, container.Image)
Expand Down Expand Up @@ -741,16 +787,23 @@ func (engine *DockerTaskEngine) pullAndUpdateContainerReference(task *api.Task,
if container.IsInternal() {
return metadata
}
pullSucceeded := metadata.Error == nil
engine.updateContainerReference(pullSucceeded, container, task.Arn)
return metadata
}

func (engine *DockerTaskEngine) updateContainerReference(pullSucceeded bool, container *api.Container, taskArn string) {
err := engine.imageManager.RecordContainerReference(container)
if err != nil {
seelog.Errorf("Task engine [%s]: Unable to add container reference to image state: %v",
task.Arn, err)
taskArn, err)
}
imageState := engine.imageManager.GetImageStateFromImageName(container.Image)
if imageState != nil && pullSucceeded {
imageState.SetPullSucceeded(true)
}
engine.state.AddImageState(imageState)
engine.saver.Save()
return metadata
}

func (engine *DockerTaskEngine) createContainer(task *api.Task, container *api.Container) dockerapi.DockerContainerMetadata {
Expand Down
Loading

0 comments on commit 7e52650

Please sign in to comment.