Skip to content

Commit

Permalink
engine: introduce a new env var to distinct pull image behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
haikuoliu committed May 2, 2018
1 parent e69ad51 commit c7090f8
Show file tree
Hide file tree
Showing 21 changed files with 573 additions and 201 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Changelog

## 1.17.4-dev
* Feature - Configurable agent pull behavior [#1348](https://github.com/aws/amazon-ecs-agent/pull/1348)
* Bug - Fixed a bug where Docker Version() API never returns by adding a timeout [#1363](https://github.com/aws/amazon-ecs-agent/pull/1363)

## 1.17.3
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ additional details on each available environment variable.
| `ECS_IMAGE_CLEANUP_INTERVAL` | 30m | The time interval between automated image cleanup cycles. If set to less than 10 minutes, the value is ignored. | 30m | 30m |
| `ECS_IMAGE_MINIMUM_CLEANUP_AGE` | 30m | The minimum time interval between when an image is pulled and when it can be considered for automated image cleanup. | 1h | 1h |
| `ECS_NUM_IMAGES_DELETE_PER_CYCLE` | 5 | The maximum number of images to delete in a single automated image cleanup cycle. If set to less than 1, the value is ignored. | 5 | 5 |
| `ECS_IMAGE_PULL_BEHAVIOR` | <default | always | once | prefer-cached > | The behavior used to customize the pull image process. If `default` is specified, the image will be pulled remotely, if the pull fails then the cached image will be used. If `always` is specified, the image will be pulled remotely, if the pull fails then the task will fail. If `once` is specified, the image will be pulled remotely if it has not been pulled before, otherwise the cached image will be used. If `prefer-cached` is specified, the image will be pulled remotely if there is no cached image, otherwise the cached image will be used. | default | default |
| `ECS_INSTANCE_ATTRIBUTES` | `{"stack": "prod"}` | These attributes take effect only during initial registration. After the agent has joined an ECS cluster, use the PutAttributes API action to add additional attributes. For more information, see [Amazon ECS Container Agent Configuration](http://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-agent-config.html) in the Amazon ECS Developer Guide.| `{}` | `{}` |
| `ECS_ENABLE_TASK_ENI` | `false` | Whether to enable task networking for task to be launched with its own network interface | `false` | Not applicable |
| `ECS_CNI_PLUGINS_PATH` | `/ecs/cni` | The path where the cni binary file is located | `/amazon-ecs-cni-plugins` | Not applicable |
Expand Down
22 changes: 21 additions & 1 deletion agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ const (
// image cleanup.
DefaultNumImagesToDeletePerCycle = 5

//DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image
// DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image
// has been pulled before it can be deleted.
DefaultImageDeletionAge = 1 * time.Hour

Expand Down Expand Up @@ -102,6 +102,25 @@ const (
DefaultTaskMetadataBurstRate = 60
)

const (
// ImagePullDefaultBehavior specifies the behavior that if an image pull API call fails,
// agent tries to start from the Docker image cache anyway, assuming that the image has not changed.
ImagePullDefaultBehavior ImagePullBehaviorType = iota

// ImagePullAlwaysBehavior specifies the behavior that if an image pull API call fails,
// the task fails instead of using cached image.
ImagePullAlwaysBehavior

// ImagePullOnceBehavior specifies the behavior that agent will only attempt to pull
// the same image once, once an image is pulled, local image cache will be used
// for all the containers.
ImagePullOnceBehavior

// ImagePullPreferCachedBehavior specifies the behavior that agent will only attempt to pull
// the image if there is no cached image.
ImagePullPreferCachedBehavior
)

var (
// DefaultPauseContainerImageName is the name of the pause container image. The linker's
// load flags are used to populate this value from the Makefile
Expand Down Expand Up @@ -386,6 +405,7 @@ func environmentConfig() (Config, error) {
MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"),
ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"),
NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(),
ImagePullBehavior: parseImagePullBehavior(),
InstanceAttributes: instanceAttributes,
CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"),
AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false),
Expand Down
52 changes: 52 additions & 0 deletions agent/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func TestEnvironmentConfig(t *testing.T) {
defer setTestEnv("ECS_IMAGE_CLEANUP_INTERVAL", "2h")()
defer setTestEnv("ECS_IMAGE_MINIMUM_CLEANUP_AGE", "30m")()
defer setTestEnv("ECS_NUM_IMAGES_DELETE_PER_CYCLE", "2")()
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "always")()
defer setTestEnv("ECS_INSTANCE_ATTRIBUTES", "{\"my_attribute\": \"testing\"}")()
defer setTestEnv("ECS_ENABLE_TASK_ENI", "true")()
defer setTestEnv("ECS_TASK_METADATA_RPS_LIMIT", "1000,1100")()
Expand Down Expand Up @@ -112,6 +113,7 @@ func TestEnvironmentConfig(t *testing.T) {
assert.Equal(t, (30 * time.Minute), conf.MinimumImageDeletionAge)
assert.Equal(t, (2 * time.Hour), conf.ImageCleanupInterval)
assert.Equal(t, 2, conf.NumImagesToDeletePerCycle)
assert.Equal(t, ImagePullAlwaysBehavior, conf.ImagePullBehavior)
assert.Equal(t, "testing", conf.InstanceAttributes["my_attribute"])
assert.Equal(t, (90 * time.Second), conf.TaskCleanupWaitDuration)
serializedAdditionalLocalRoutesJSON, err := json.Marshal(conf.AWSVPCAdditionalLocalRoutes)
Expand Down Expand Up @@ -369,6 +371,56 @@ func TestImageCleanupMinimumNumImagesToDeletePerCycle(t *testing.T) {
assert.Equal(t, cfg.NumImagesToDeletePerCycle, DefaultNumImagesToDeletePerCycle, "Wrong value for NumImagesToDeletePerCycle")
}

func TestInvalidImagePullBehavior(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", "invalid")()
cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
assert.NoError(t, err)
assert.Equal(t, cfg.ImagePullBehavior, ImagePullDefaultBehavior, "Wrong value for ImagePullBehavior")
}

func TestParseImagePullBehavior(t *testing.T) {
testcases := []struct {
name string
envVarVal string
expectedImagePullBehavior ImagePullBehaviorType
}{
{
name: "default agent behavior",
envVarVal: "default",
expectedImagePullBehavior: ImagePullDefaultBehavior,
},
{
name: "always agent behavior",
envVarVal: "always",
expectedImagePullBehavior: ImagePullAlwaysBehavior,
},
{
name: "once agent behavior",
envVarVal: "once",
expectedImagePullBehavior: ImagePullOnceBehavior,
},
{
name: "prefer-cached agent behavior",
envVarVal: "prefer-cached",
expectedImagePullBehavior: ImagePullPreferCachedBehavior,
},
{
name: "invalid agent behavior",
envVarVal: "invalid",
expectedImagePullBehavior: ImagePullDefaultBehavior,
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_IMAGE_PULL_BEHAVIOR", tc.envVarVal)()
assert.Equal(t, parseImagePullBehavior(), tc.expectedImagePullBehavior, "Wrong value for ImagePullBehavior")
})
}
}

func TestTaskResourceLimitsOverride(t *testing.T) {
defer setTestRegion()()
defer setTestEnv("ECS_ENABLE_TASK_CPU_MEM_LIMIT", "false")()
Expand Down
16 changes: 16 additions & 0 deletions agent/config/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,22 @@ func parseNumImagesToDeletePerCycle() int {
return numImagesToDeletePerCycle
}

func parseImagePullBehavior() ImagePullBehaviorType {
ImagePullBehaviorString := os.Getenv("ECS_IMAGE_PULL_BEHAVIOR")
switch ImagePullBehaviorString {
case "always":
return ImagePullAlwaysBehavior
case "once":
return ImagePullOnceBehavior
case "prefer-cached":
return ImagePullPreferCachedBehavior
default:
// Use the default image pull behavior when ECS_IMAGE_PULL_BEHAVIOR is
// "default" or not valid
return ImagePullDefaultBehavior
}
}

func parseInstanceAttributes(errs []error) (map[string]string, []error) {
var instanceAttributes map[string]string
instanceAttributesEnv := os.Getenv("ECS_INSTANCE_ATTRIBUTES")
Expand Down
10 changes: 9 additions & 1 deletion agent/config/types.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2014-2017 Amazon.com, Inc. or its affiliates. All Rights Reserved.
// Copyright 2014-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
Expand All @@ -20,6 +20,10 @@ import (
cnitypes "github.com/containernetworking/cni/pkg/types"
)

// ImagePullBehaviorType is an enum variable type corresponding to different agent pull
// behaviors including default, always, never and once.
type ImagePullBehaviorType int8

type Config struct {
// DEPRECATED
// ClusterArn is the Name or full ARN of a Cluster to register into. It has
Expand Down Expand Up @@ -149,6 +153,10 @@ type Config struct {
// when Agent performs cleanup
NumImagesToDeletePerCycle int

// ImagePullBehavior specifies the agent's behavior for pulling image and loading
// local Docker image cache
ImagePullBehavior ImagePullBehaviorType

// InstanceAttributes contains key/value pairs representing
// attributes to be associated with this instance within the
// ECS service and used to influence behavior such as launch
Expand Down
2 changes: 1 addition & 1 deletion agent/dockerclient/dockerapi/docker_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ func TestStartContainerTimeout(t *testing.T) {
wait.Add(1)
mockDocker.EXPECT().StartContainerWithContext("id", nil, gomock.Any()).Do(func(x, y, z interface{}) {
wait.Wait() // wait until timeout happens
})
}).MaxTimes(1)
mockDocker.EXPECT().InspectContainerWithContext("id", gomock.Any()).Return(nil, errors.New("test error")).AnyTimes()
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
Expand Down
2 changes: 1 addition & 1 deletion agent/engine/common_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ func validateContainerRunWorkflow(t *testing.T,
imageManager.EXPECT().AddAllImageStates(gomock.Any()).AnyTimes()
client.EXPECT().PullImage(container.Image, nil).Return(dockerapi.DockerContainerMetadata{})
imageManager.EXPECT().RecordContainerReference(container).Return(nil)
imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil)
imageManager.EXPECT().GetImageStateFromImageName(gomock.Any()).Return(nil, false)
client.EXPECT().APIVersion().Return(defaultDockerClientAPIVersion, nil)
dockerConfig, err := task.DockerConfig(container, defaultDockerClientAPIVersion)
if err != nil {
Expand Down
21 changes: 15 additions & 6 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@
package engine

import (
"context"
"fmt"
"sort"
"sync"
"time"

"context"

"github.com/aws/amazon-ecs-agent/agent/api"
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
Expand All @@ -41,7 +40,7 @@ type ImageManager interface {
RecordContainerReference(container *api.Container) error
RemoveContainerReferenceFromImageState(container *api.Container) error
AddAllImageStates(imageStates []*image.ImageState)
GetImageStateFromImageName(containerImageName string) *image.ImageState
GetImageStateFromImageName(containerImageName string) (*image.ImageState, bool)
StartImageCleanupProcess(ctx context.Context)
SetSaver(stateManager statemanager.Saver)
}
Expand All @@ -59,6 +58,7 @@ type dockerImageManager struct {
minimumAgeBeforeDeletion time.Duration
numImagesToDelete int
imageCleanupTimeInterval time.Duration
imagePullBehavior config.ImagePullBehaviorType
}

// ImageStatesForDeletion is used for implementing the sort interface
Expand All @@ -72,6 +72,7 @@ func NewImageManager(cfg *config.Config, client dockerapi.DockerClient, state do
minimumAgeBeforeDeletion: cfg.MinimumImageDeletionAge,
numImagesToDelete: cfg.NumImagesToDeletePerCycle,
imageCleanupTimeInterval: cfg.ImageCleanupInterval,
imagePullBehavior: cfg.ImagePullBehavior,
}
}

Expand Down Expand Up @@ -155,6 +156,8 @@ func (imageManager *dockerImageManager) addContainerReferenceToNewImageState(con
Image: sourceImage,
PulledAt: time.Now(),
LastUsedAt: time.Now(),
// PullSucceeded should be set to true when the pull image succeeds.
PullSucceeded: false,
}
sourceImageState.UpdateImageState(container)
imageManager.addImageState(sourceImageState)
Expand Down Expand Up @@ -267,6 +270,12 @@ func (imageManager *dockerImageManager) removeExistingImageNameOfDifferentID(con
}

func (imageManager *dockerImageManager) StartImageCleanupProcess(ctx context.Context) {
// If the image pull behavior is prefer cached, don't clean up the image,
// because the cached image is needed.
if imageManager.imagePullBehavior == config.ImagePullPreferCachedBehavior {
seelog.Info("Pull behavior is set to always use cache. Disabling cleanup")
return
}
// passing the cleanup interval as argument which would help during testing
imageManager.performPeriodicImageCleanup(ctx, imageManager.imageCleanupTimeInterval)
}
Expand Down Expand Up @@ -369,15 +378,15 @@ func (imageManager *dockerImageManager) deleteImage(ctx context.Context, imageID
}
}

func (imageManager *dockerImageManager) GetImageStateFromImageName(containerImageName string) *image.ImageState {
func (imageManager *dockerImageManager) GetImageStateFromImageName(containerImageName string) (*image.ImageState, bool) {
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
for _, imageState := range imageManager.getAllImageStates() {
for _, imageName := range imageState.Image.Names {
if imageName == containerImageName {
return imageState
return imageState, true
}
}
}
return nil
return nil, false
}
Loading

0 comments on commit c7090f8

Please sign in to comment.