From 602e48ef44aea0a559ccbb91166f3849839d1099 Mon Sep 17 00:00:00 2001 From: Yajie Chu Date: Fri, 30 Nov 2018 13:31:31 -0800 Subject: [PATCH 1/6] remove non ecs containers and images --- agent/config/config.go | 104 ++++----- agent/config/config_test.go | 15 +- agent/config/config_unix.go | 67 +++--- agent/config/config_windows.go | 43 ++-- agent/config/parse.go | 9 + agent/config/types.go | 7 + agent/dockerclient/dockerapi/docker_client.go | 44 ++++ .../dockerapi/docker_client_test.go | 50 +++++ agent/dockerclient/dockerapi/errors.go | 13 ++ .../dockerapi/mocks/dockerapi_mocks.go | 12 ++ agent/dockerclient/dockerapi/types.go | 11 + agent/dockerclient/timeout.go | 2 + agent/engine/docker_image_manager.go | 197 +++++++++++++++--- agent/engine/docker_image_manager_test.go | 83 +++++++- 14 files changed, 522 insertions(+), 135 deletions(-) diff --git a/agent/config/config.go b/agent/config/config.go index 8b24fc1be89..f716360fbb9 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -75,6 +75,10 @@ const ( // image cleanup. DefaultNumImagesToDeletePerCycle = 5 + // DefaultNumImagesToDeletePerCycle specifies the default number of nonecs containers to delete when agent performs + // nonecs containers cleanup. + DefaultNumNonECSContainersToDeletePerCycle = 5 + // DefaultImageDeletionAge specifies the default value for minimum amount of elapsed time after an image // has been pulled before it can be deleted. DefaultImageDeletionAge = 1 * time.Hour @@ -473,55 +477,57 @@ func environmentConfig() (Config, error) { err = apierrors.NewMultiError(errs...) } return Config{ - Cluster: os.Getenv("ECS_CLUSTER"), - APIEndpoint: os.Getenv("ECS_BACKEND_HOST"), - AWSRegion: os.Getenv("AWS_DEFAULT_REGION"), - DockerEndpoint: os.Getenv("DOCKER_HOST"), - ReservedPorts: parseReservedPorts("ECS_RESERVED_PORTS"), - ReservedPortsUDP: parseReservedPorts("ECS_RESERVED_PORTS_UDP"), - DataDir: dataDir, - Checkpoint: parseCheckpoint(dataDir), - EngineAuthType: os.Getenv("ECS_ENGINE_AUTH_TYPE"), - EngineAuthData: NewSensitiveRawMessage([]byte(os.Getenv("ECS_ENGINE_AUTH_DATA"))), - UpdatesEnabled: utils.ParseBool(os.Getenv("ECS_UPDATES_ENABLED"), false), - UpdateDownloadDir: os.Getenv("ECS_UPDATE_DOWNLOAD_DIR"), - DisableMetrics: utils.ParseBool(os.Getenv("ECS_DISABLE_METRICS"), false), - PollMetrics: utils.ParseBool(os.Getenv("ECS_POLL_METRICS"), false), - PollingMetricsWaitDuration: parseEnvVariableDuration("ECS_POLLING_METRICS_WAIT_DURATION"), - ReservedMemory: parseEnvVariableUint16("ECS_RESERVED_MEMORY"), - AvailableLoggingDrivers: parseAvailableLoggingDrivers(), - PrivilegedDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_PRIVILEGED"), false), - SELinuxCapable: utils.ParseBool(os.Getenv("ECS_SELINUX_CAPABLE"), false), - AppArmorCapable: utils.ParseBool(os.Getenv("ECS_APPARMOR_CAPABLE"), false), - TaskCleanupWaitDuration: parseEnvVariableDuration("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION"), - TaskENIEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_ENI"), false), - TaskIAMRoleEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_IAM_ROLE"), false), - TaskCPUMemLimit: parseTaskCPUMemLimitEnabled(), - DockerStopTimeout: parseDockerStopTimeout(), - ContainerStartTimeout: parseContainerStartTimeout(), - ImagePullInactivityTimeout: parseImagePullInactivityTimeout(), - CredentialsAuditLogFile: os.Getenv("ECS_AUDIT_LOGFILE"), - CredentialsAuditLogDisabled: utils.ParseBool(os.Getenv("ECS_AUDIT_LOGFILE_DISABLED"), false), - TaskIAMRoleEnabledForNetworkHost: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST"), false), - ImageCleanupDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_IMAGE_CLEANUP"), false), - MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"), - ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"), - NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(), - ImagePullBehavior: parseImagePullBehavior(), - ImageCleanupExclusionList: parseImageCleanupExclusionList("ECS_IMAGE_CLEANUP_EXCLUDE"), - InstanceAttributes: instanceAttributes, - CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"), - AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false), - AWSVPCAdditionalLocalRoutes: additionalLocalRoutes, - ContainerMetadataEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_CONTAINER_METADATA"), false), - DataDirOnHost: os.Getenv("ECS_HOST_DATA_DIR"), - OverrideAWSLogsExecutionRole: utils.ParseBool(os.Getenv("ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE"), false), - CgroupPath: os.Getenv("ECS_CGROUP_PATH"), - TaskMetadataSteadyStateRate: steadyStateRate, - TaskMetadataBurstRate: burstRate, - SharedVolumeMatchFullConfig: utils.ParseBool(os.Getenv("ECS_SHARED_VOLUME_MATCH_FULL_CONFIG"), false), - ContainerInstanceTags: containerInstanceTags, - ContainerInstancePropagateTagsFrom: parseContainerInstancePropagateTagsFrom(), + Cluster: os.Getenv("ECS_CLUSTER"), + APIEndpoint: os.Getenv("ECS_BACKEND_HOST"), + AWSRegion: os.Getenv("AWS_DEFAULT_REGION"), + DockerEndpoint: os.Getenv("DOCKER_HOST"), + ReservedPorts: parseReservedPorts("ECS_RESERVED_PORTS"), + ReservedPortsUDP: parseReservedPorts("ECS_RESERVED_PORTS_UDP"), + DataDir: dataDir, + Checkpoint: parseCheckpoint(dataDir), + EngineAuthType: os.Getenv("ECS_ENGINE_AUTH_TYPE"), + EngineAuthData: NewSensitiveRawMessage([]byte(os.Getenv("ECS_ENGINE_AUTH_DATA"))), + UpdatesEnabled: utils.ParseBool(os.Getenv("ECS_UPDATES_ENABLED"), false), + UpdateDownloadDir: os.Getenv("ECS_UPDATE_DOWNLOAD_DIR"), + DisableMetrics: utils.ParseBool(os.Getenv("ECS_DISABLE_METRICS"), false), + ReservedMemory: parseEnvVariableUint16("ECS_RESERVED_MEMORY"), + AvailableLoggingDrivers: parseAvailableLoggingDrivers(), + PrivilegedDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_PRIVILEGED"), false), + SELinuxCapable: utils.ParseBool(os.Getenv("ECS_SELINUX_CAPABLE"), false), + AppArmorCapable: utils.ParseBool(os.Getenv("ECS_APPARMOR_CAPABLE"), false), + TaskCleanupWaitDuration: parseEnvVariableDuration("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION"), + TaskENIEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_ENI"), false), + TaskIAMRoleEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_IAM_ROLE"), false), + DeleteNonECSImagesEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_UNTRACKED_IMAGE_CLEANUP"), false), + TaskCPUMemLimit: parseTaskCPUMemLimitEnabled(), + DockerStopTimeout: parseDockerStopTimeout(), + ContainerStartTimeout: parseContainerStartTimeout(), + ImagePullInactivityTimeout: parseImagePullInactivityTimeout(), + CredentialsAuditLogFile: os.Getenv("ECS_AUDIT_LOGFILE"), + CredentialsAuditLogDisabled: utils.ParseBool(os.Getenv("ECS_AUDIT_LOGFILE_DISABLED"), false), + TaskIAMRoleEnabledForNetworkHost: utils.ParseBool(os.Getenv("ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST"), false), + ImageCleanupDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_IMAGE_CLEANUP"), false), + MinimumImageDeletionAge: parseEnvVariableDuration("ECS_IMAGE_MINIMUM_CLEANUP_AGE"), + ImageCleanupInterval: parseEnvVariableDuration("ECS_IMAGE_CLEANUP_INTERVAL"), + NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(), + NumNonECSContainersToDeletePerCycle: parseNumNonECSContainersToDeletePerCycle(), + ImagePullBehavior: parseImagePullBehavior(), + ImageCleanupExclusionList: parseImageCleanupExclusionList("ECS_NONECS_IMAGE_CLEANUP_EXCLUDE"), + InstanceAttributes: instanceAttributes, + CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"), + AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false), + AWSVPCAdditionalLocalRoutes: additionalLocalRoutes, + ContainerMetadataEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_CONTAINER_METADATA"), false), + DataDirOnHost: os.Getenv("ECS_HOST_DATA_DIR"), + OverrideAWSLogsExecutionRole: utils.ParseBool(os.Getenv("ECS_ENABLE_AWSLOGS_EXECUTIONROLE_OVERRIDE"), false), + CgroupPath: os.Getenv("ECS_CGROUP_PATH"), + TaskMetadataSteadyStateRate: steadyStateRate, + TaskMetadataBurstRate: burstRate, + SharedVolumeMatchFullConfig: utils.ParseBool(os.Getenv("ECS_SHARED_VOLUME_MATCH_FULL_CONFIG"), false), + ContainerInstanceTags: containerInstanceTags, + ContainerInstancePropagateTagsFrom: parseContainerInstancePropagateTagsFrom(), + PollMetrics: utils.ParseBool(os.Getenv("ECS_POLL_METRICS"), false), + PollingMetricsWaitDuration: parseEnvVariableDuration("ECS_POLLING_METRICS_WAIT_DURATION"), }, err } diff --git a/agent/config/config_test.go b/agent/config/config_test.go index 1b69d0a2098..99ba17cc50c 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -100,6 +100,7 @@ func TestEnvironmentConfig(t *testing.T) { defer setTestEnv("ECS_DISABLE_PRIVILEGED", "true")() defer setTestEnv("ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION", "90s")() defer setTestEnv("ECS_ENABLE_TASK_IAM_ROLE", "true")() + defer setTestEnv("ECS_ENABLE_UNTRACKED_IMAGE_CLEANUP", "true")() defer setTestEnv("ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST", "true")() defer setTestEnv("ECS_DISABLE_IMAGE_CLEANUP", "true")() defer setTestEnv("ECS_IMAGE_CLEANUP_INTERVAL", "2h")() @@ -134,12 +135,12 @@ func TestEnvironmentConfig(t *testing.T) { assert.True(t, conf.SELinuxCapable, "Wrong value for SELinuxCapable") assert.True(t, conf.AppArmorCapable, "Wrong value for AppArmorCapable") assert.True(t, conf.TaskIAMRoleEnabled, "Wrong value for TaskIAMRoleEnabled") + assert.True(t, conf.DeleteNonECSImagesEnabled, "Wrong value for DeleteNonECSImagesEnabled") assert.True(t, conf.TaskIAMRoleEnabledForNetworkHost, "Wrong value for TaskIAMRoleEnabledForNetworkHost") assert.True(t, conf.ImageCleanupDisabled, "Wrong value for ImageCleanupDisabled") assert.True(t, conf.PollMetrics, "Wrong value for PollMetrics") expectedDurationPollingMetricsWaitDuration, _ := time.ParseDuration("10s") assert.Equal(t, expectedDurationPollingMetricsWaitDuration, conf.PollingMetricsWaitDuration) - assert.True(t, conf.TaskENIEnabled, "Wrong value for TaskNetwork") assert.Equal(t, (30 * time.Minute), conf.MinimumImageDeletionAge) assert.Equal(t, (2 * time.Hour), conf.ImageCleanupInterval) @@ -373,8 +374,8 @@ func TestInvalidFormatParseEnvVariableDuration(t *testing.T) { func TestValidForImagesCleanupExclusion(t *testing.T) { defer setTestRegion()() - defer setTestEnv("ECS_IMAGE_CLEANUP_EXCLUDE", "amazonlinux:2,amazonlinux:3")() - imagesNotDelete := parseImageCleanupExclusionList("ECS_IMAGE_CLEANUP_EXCLUDE") + defer setTestEnv("ECS_NONECS_IMAGE_CLEANUP_EXCLUDE", "amazonlinux:2,amazonlinux:3")() + imagesNotDelete := parseImageCleanupExclusionList("ECS_NONECS_IMAGE_CLEANUP_EXCLUDE") assert.Equal(t, []string{"amazonlinux:2", "amazonlinux:3"}, imagesNotDelete, "unexpected imageCleanupExclusionList") } @@ -430,6 +431,14 @@ func TestTaskIAMRoleEnabled(t *testing.T) { assert.True(t, cfg.TaskIAMRoleEnabled, "Wrong value for TaskIAMRoleEnabled") } +func TestDeleteNonECSImagesEnabled(t *testing.T) { + defer setTestRegion()() + defer setTestEnv("ECS_ENABLE_UNTRACKED_IMAGE_CLEANUP", "true")() + cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient()) + assert.NoError(t, err) + assert.True(t, cfg.DeleteNonECSImagesEnabled, "Wrong value for DeleteNonECSImagesEnabled") +} + func TestTaskIAMRoleForHostNetworkEnabled(t *testing.T) { defer setTestRegion()() defer setTestEnv("ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST", "true")() diff --git a/agent/config/config_unix.go b/agent/config/config_unix.go index 3a7e4369b1b..37983a08f8b 100644 --- a/agent/config/config_unix.go +++ b/agent/config/config_unix.go @@ -45,39 +45,40 @@ const ( // DefaultConfig returns the default configuration for Linux func DefaultConfig() Config { return Config{ - DockerEndpoint: "unix:///var/run/docker.sock", - ReservedPorts: []uint16{SSHPort, DockerReservedPort, DockerReservedSSLPort, AgentIntrospectionPort, AgentCredentialsPort}, - ReservedPortsUDP: []uint16{}, - DataDir: "/data/", - DataDirOnHost: "/var/lib/ecs", - DisableMetrics: false, - ReservedMemory: 0, - AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver}, - TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration, - DockerStopTimeout: defaultDockerStopTimeout, - ContainerStartTimeout: defaultContainerStartTimeout, - CredentialsAuditLogFile: defaultCredentialsAuditLogFile, - CredentialsAuditLogDisabled: false, - ImageCleanupDisabled: false, - MinimumImageDeletionAge: DefaultImageDeletionAge, - ImageCleanupInterval: DefaultImageCleanupTimeInterval, - NumImagesToDeletePerCycle: DefaultNumImagesToDeletePerCycle, - CNIPluginsPath: defaultCNIPluginsPath, - PauseContainerTarballPath: pauseContainerTarballPath, - PauseContainerImageName: DefaultPauseContainerImageName, - PauseContainerTag: DefaultPauseContainerTag, - PollMetrics: false, - PollingMetricsWaitDuration: DefaultPollingMetricsWaitDuration, - AWSVPCBlockInstanceMetdata: false, - ContainerMetadataEnabled: false, - TaskCPUMemLimit: DefaultEnabled, - CgroupPath: defaultCgroupPath, - TaskMetadataSteadyStateRate: DefaultTaskMetadataSteadyStateRate, - TaskMetadataBurstRate: DefaultTaskMetadataBurstRate, - SharedVolumeMatchFullConfig: false, // only requiring shared volumes to match on name, which is default docker behavior - ImagePullInactivityTimeout: defaultImagePullInactivityTimeout, - ContainerInstancePropagateTagsFrom: ContainerInstancePropagateTagsFromNoneType, - PrometheusMetricsEnabled: false, + DockerEndpoint: "unix:///var/run/docker.sock", + ReservedPorts: []uint16{SSHPort, DockerReservedPort, DockerReservedSSLPort, AgentIntrospectionPort, AgentCredentialsPort}, + ReservedPortsUDP: []uint16{}, + DataDir: "/data/", + DataDirOnHost: "/var/lib/ecs", + DisableMetrics: false, + ReservedMemory: 0, + AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver}, + TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration, + DockerStopTimeout: defaultDockerStopTimeout, + ContainerStartTimeout: defaultContainerStartTimeout, + CredentialsAuditLogFile: defaultCredentialsAuditLogFile, + CredentialsAuditLogDisabled: false, + ImageCleanupDisabled: false, + MinimumImageDeletionAge: DefaultImageDeletionAge, + ImageCleanupInterval: DefaultImageCleanupTimeInterval, + ImagePullInactivityTimeout: defaultImagePullInactivityTimeout, + NumImagesToDeletePerCycle: DefaultNumImagesToDeletePerCycle, + NumNonECSContainersToDeletePerCycle: DefaultNumNonECSContainersToDeletePerCycle, + CNIPluginsPath: defaultCNIPluginsPath, + PauseContainerTarballPath: pauseContainerTarballPath, + PauseContainerImageName: DefaultPauseContainerImageName, + PauseContainerTag: DefaultPauseContainerTag, + AWSVPCBlockInstanceMetdata: false, + ContainerMetadataEnabled: false, + TaskCPUMemLimit: DefaultEnabled, + CgroupPath: defaultCgroupPath, + TaskMetadataSteadyStateRate: DefaultTaskMetadataSteadyStateRate, + TaskMetadataBurstRate: DefaultTaskMetadataBurstRate, + SharedVolumeMatchFullConfig: false, // only requiring shared volumes to match on name, which is default docker behavior + ContainerInstancePropagateTagsFrom: ContainerInstancePropagateTagsFromNoneType, + PrometheusMetricsEnabled: false, + PollMetrics: false, + PollingMetricsWaitDuration: DefaultPollingMetricsWaitDuration, } } diff --git a/agent/config/config_windows.go b/agent/config/config_windows.go index 9418ae523d0..7f4371b6858 100644 --- a/agent/config/config_windows.go +++ b/agent/config/config_windows.go @@ -79,27 +79,28 @@ func DefaultConfig() Config { DataDir: dataDir, // DataDirOnHost is identical to DataDir for Windows because we do not // run as a container - DataDirOnHost: dataDir, - ReservedMemory: 0, - AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver, dockerclient.AWSLogsDriver}, - TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration, - PollMetrics: false, - PollingMetricsWaitDuration: DefaultPollingMetricsWaitDuration, - DockerStopTimeout: defaultDockerStopTimeout, - ContainerStartTimeout: defaultContainerStartTimeout, - ImagePullInactivityTimeout: defaultImagePullInactivityTimeout, - CredentialsAuditLogFile: filepath.Join(ecsRoot, defaultCredentialsAuditLogFile), - CredentialsAuditLogDisabled: false, - ImageCleanupDisabled: false, - MinimumImageDeletionAge: DefaultImageDeletionAge, - ImageCleanupInterval: DefaultImageCleanupTimeInterval, - NumImagesToDeletePerCycle: DefaultNumImagesToDeletePerCycle, - ContainerMetadataEnabled: false, - TaskCPUMemLimit: ExplicitlyDisabled, - PlatformVariables: platformVariables, - TaskMetadataSteadyStateRate: DefaultTaskMetadataSteadyStateRate, - TaskMetadataBurstRate: DefaultTaskMetadataBurstRate, - SharedVolumeMatchFullConfig: false, //only requiring shared volumes to match on name, which is default docker behavior + DataDirOnHost: dataDir, + ReservedMemory: 0, + AvailableLoggingDrivers: []dockerclient.LoggingDriver{dockerclient.JSONFileDriver, dockerclient.NoneDriver, dockerclient.AWSLogsDriver}, + TaskCleanupWaitDuration: DefaultTaskCleanupWaitDuration, + DockerStopTimeout: defaultDockerStopTimeout, + ContainerStartTimeout: defaultContainerStartTimeout, + ImagePullInactivityTimeout: defaultImagePullInactivityTimeout, + CredentialsAuditLogFile: filepath.Join(ecsRoot, defaultCredentialsAuditLogFile), + CredentialsAuditLogDisabled: false, + ImageCleanupDisabled: false, + MinimumImageDeletionAge: DefaultImageDeletionAge, + ImageCleanupInterval: DefaultImageCleanupTimeInterval, + NumImagesToDeletePerCycle: DefaultNumImagesToDeletePerCycle, + NumNonECSContainersToDeletePerCycle: DefaultNumNonECSContainersToDeletePerCycle, + ContainerMetadataEnabled: false, + TaskCPUMemLimit: ExplicitlyDisabled, + PlatformVariables: platformVariables, + TaskMetadataSteadyStateRate: DefaultTaskMetadataSteadyStateRate, + TaskMetadataBurstRate: DefaultTaskMetadataBurstRate, + SharedVolumeMatchFullConfig: false, //only requiring shared volumes to match on name, which is default docker behavior + PollMetrics: false, + PollingMetricsWaitDuration: DefaultPollingMetricsWaitDuration, } } diff --git a/agent/config/parse.go b/agent/config/parse.go index 1ca63290cc7..7b116299a6a 100644 --- a/agent/config/parse.go +++ b/agent/config/parse.go @@ -130,6 +130,15 @@ func parseNumImagesToDeletePerCycle() int { return numImagesToDeletePerCycle } +func parseNumNonECSContainersToDeletePerCycle() int { + numNonEcsContainersToDeletePerCycleEnvVal := os.Getenv("NONECS_NUM_CONTAINERS_DELETE_PER_CYCLE") + numNonEcsContainersToDeletePerCycle, err := strconv.Atoi(numNonEcsContainersToDeletePerCycleEnvVal) + if numNonEcsContainersToDeletePerCycleEnvVal != "" && err != nil { + seelog.Warnf("Invalid format for \"NONECS_NUM_CONTAINERS_DELETE_PER_CYCLE\", expected an integer. err %v", err) + } + return numNonEcsContainersToDeletePerCycle +} + func parseImagePullBehavior() ImagePullBehaviorType { ImagePullBehaviorString := os.Getenv("ECS_IMAGE_PULL_BEHAVIOR") switch ImagePullBehaviorString { diff --git a/agent/config/types.go b/agent/config/types.go index cf297f853e1..689e83a39c6 100644 --- a/agent/config/types.go +++ b/agent/config/types.go @@ -135,6 +135,9 @@ type Config struct { // tasks with IAM Roles. TaskIAMRoleEnabled bool + // DeleteNonECSImagesEnabled specifies if the Agent can delete the cached, unused non-ecs images. + DeleteNonECSImagesEnabled bool + // TaskCPUMemLimit specifies if Agent can launch a task with a hierarchical cgroup TaskCPUMemLimit Conditional @@ -168,6 +171,10 @@ type Config struct { // when Agent performs cleanup NumImagesToDeletePerCycle int + // NumNonECSContainersToDeletePerCycle specifies the num of NonECS containers to delete every time + // when Agent performs cleanup + NumNonECSContainersToDeletePerCycle int + // ImagePullBehavior specifies the agent's behavior for pulling image and loading // local Docker image cache ImagePullBehavior ImagePullBehaviorType diff --git a/agent/dockerclient/dockerapi/docker_client.go b/agent/dockerclient/dockerapi/docker_client.go index 06a4c5449e9..941e836e426 100644 --- a/agent/dockerclient/dockerapi/docker_client.go +++ b/agent/dockerclient/dockerapi/docker_client.go @@ -164,6 +164,9 @@ type DockerClient interface { // should be provided for the request. ListContainers(context.Context, bool, time.Duration) ListContainersResponse + // ListImages returns the set of the images known to the Docker daemon + ListImages(context.Context, time.Duration) ListImagesResponse + // CreateVolume creates a docker volume. A timeout value should be provided for the request CreateVolume(context.Context, string, string, map[string]string, map[string]string, time.Duration) SDKVolumeResponse @@ -198,6 +201,7 @@ type DockerClient interface { // RemoveImage removes the metadata associated with an image and may remove the underlying layer data. A timeout // value and a context should be provided for the request. RemoveImage(context.Context, string, time.Duration) error + // LoadImage loads an image from an input stream. A timeout value and a context should be provided for the request. LoadImage(context.Context, io.Reader, time.Duration) error } @@ -999,6 +1003,46 @@ func (dg *dockerGoClient) listContainers(ctx context.Context, all bool) ListCont return ListContainersResponse{DockerIDs: containerIDs, Error: nil} } +func (dg *dockerGoClient) ListImages(ctx context.Context, timeout time.Duration) ListImagesResponse { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + response := make(chan ListImagesResponse, 1) + go func() { response <- dg.listImages(ctx) }() + select { + case resp := <-response: + return resp + case <-ctx.Done(): + err := ctx.Err() + if err == context.DeadlineExceeded { + return ListImagesResponse{Error: &DockerTimeoutError{timeout, "listing"}} + } + return ListImagesResponse{Error: &CannotListImagesError{err}} + } +} + +func (dg *dockerGoClient) listImages(ctx context.Context) ListImagesResponse { + client, err := dg.dockerClient() + if err != nil { + return ListImagesResponse{Error: err} + } + images, err := client.ListImages(docker.ListImagesOptions{ + All: false, + }) + if err != nil { + return ListImagesResponse{Error: err} + } + var imagesRepoTag []string + imageIDs := make([]string, len(images)) + for i, image := range images { + imageIDs[i] = image.ID + for _, imageRepoTag := range image.RepoTags { + imagesRepoTag = append(imagesRepoTag, imageRepoTag) + } + } + return ListImagesResponse{ImageIDs: imageIDs, RepoTags: imagesRepoTag, Error: nil} +} + func (dg *dockerGoClient) SupportedVersions() []dockerclient.DockerVersion { return dg.sdkClientFactory.FindSupportedAPIVersions() } diff --git a/agent/dockerclient/dockerapi/docker_client_test.go b/agent/dockerclient/dockerapi/docker_client_test.go index a03e72952d6..18a0de0b09b 100644 --- a/agent/dockerclient/dockerapi/docker_client_test.go +++ b/agent/dockerclient/dockerapi/docker_client_test.go @@ -59,6 +59,11 @@ import ( // TODO Remove mock go-dockerclient calls and fields after migration is complete. const xContainerShortTimeout = 1 * time.Millisecond +// xImgesShortTimeout is a short duration intended to be used by the +// docker client APIs that test if the underlying context gets canceled +// upon the expiration of the timeout duration. +const xImageShortTimeout = 1 * time.Millisecond + func defaultTestConfig() *config.Config { cfg, _ := config.NewConfig(ec2.NewBlackholeEC2MetadataClient()) return cfg @@ -807,6 +812,51 @@ func TestListContainersTimeout(t *testing.T) { wait.Done() } +func TestListImages(t *testing.T) { + mockDocker, client, _, _, _, done := dockerClientSetup(t) + defer done() + + images := []docker.APIImages{{ID: "id"}} + mockDocker.EXPECT().ListImages(gomock.Any()).Return(images, nil) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + response := client.ListImages(ctx, dockerclient.ListImagesTimeout) + if response.Error != nil { + t.Error("Did not expect error") + } + + imageIDs := response.ImageIDs + if len(imageIDs) != 1 { + t.Error("Unexpected number of images in list", len(imageIDs)) + } + + if imageIDs[0] != "id" { + t.Error("Unexpected image id in the list:", imageIDs[0]) + } +} + +func TestListImagesTimeout(t *testing.T) { + mockDocker, client, _, _, _, done := dockerClientSetup(t) + defer done() + + wait := &sync.WaitGroup{} + wait.Add(1) + mockDocker.EXPECT().ListImages(gomock.Any()).Do(func(x interface{}) { + wait.Wait() + // Don't return, verify timeout happens + }).MaxTimes(1).Return(nil, errors.New("test error")) + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + response := client.ListImages(ctx, xImageShortTimeout) + if response.Error == nil { + t.Error("Expected error for pull timeout") + } + if response.Error.(apierrors.NamedError).ErrorName() != "DockerTimeoutError" { + t.Error("Wrong error type") + } + wait.Done() +} + // Test for constructor fail when Docker SDK Client Ping() fails func TestPingSdkFailError(t *testing.T) { ctrl := gomock.NewController(t) diff --git a/agent/dockerclient/dockerapi/errors.go b/agent/dockerclient/dockerapi/errors.go index e8792d99436..88552d8b68b 100644 --- a/agent/dockerclient/dockerapi/errors.go +++ b/agent/dockerclient/dockerapi/errors.go @@ -260,6 +260,19 @@ func (err CannotListContainersError) ErrorName() string { return "CannotListContainersError" } +type CannotListImagesError struct { + FromError error +} + +func (err CannotListImagesError) Error() string { + return err.FromError.Error() +} + +// ErrorName returns name of the CannotListImagesError +func (err CannotListImagesError) ErrorName() string { + return "CannotListImagesError" +} + // CannotCreateVolumeError indicates any error when trying to create a volume type CannotCreateVolumeError struct { fromError error diff --git a/agent/dockerclient/dockerapi/mocks/dockerapi_mocks.go b/agent/dockerclient/dockerapi/mocks/dockerapi_mocks.go index e2401ec035a..74020dafff8 100644 --- a/agent/dockerclient/dockerapi/mocks/dockerapi_mocks.go +++ b/agent/dockerclient/dockerapi/mocks/dockerapi_mocks.go @@ -181,6 +181,18 @@ func (mr *MockDockerClientMockRecorder) ListContainers(arg0, arg1, arg2 interfac return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListContainers", reflect.TypeOf((*MockDockerClient)(nil).ListContainers), arg0, arg1, arg2) } +// ListImages mocks base method +func (m *MockDockerClient) ListImages(arg0 context.Context, arg1 time.Duration) dockerapi.ListImagesResponse { + ret := m.ctrl.Call(m, "ListImages", arg0, arg1) + ret0, _ := ret[0].(dockerapi.ListImagesResponse) + return ret0 +} + +// ListImages indicates an expected call of ListImages +func (mr *MockDockerClientMockRecorder) ListImages(arg0, arg1 interface{}) *gomock.Call { + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListImages", reflect.TypeOf((*MockDockerClient)(nil).ListImages), arg0, arg1) +} + // ListPlugins mocks base method func (m *MockDockerClient) ListPlugins(arg0 context.Context, arg1 time.Duration, arg2 filters.Args) dockerapi.ListPluginsResponse { ret := m.ctrl.Call(m, "ListPlugins", arg0, arg1, arg2) diff --git a/agent/dockerclient/dockerapi/types.go b/agent/dockerclient/dockerapi/types.go index 5053d971454..fbe950d677b 100644 --- a/agent/dockerclient/dockerapi/types.go +++ b/agent/dockerclient/dockerapi/types.go @@ -82,6 +82,17 @@ type ListContainersResponse struct { Error error } +// ListImagesResponse encapsulates the response from the docker client for the +// ListImages call. +type ListImagesResponse struct { + // ImagesIDs is the list of Images IDs from the ListImages call + ImageIDs []string + // RepoTags is the list of Images names from the ListImages call + RepoTags []string + // Error contains any error returned when listing images + Error error +} + // VolumeResponse wrapper for CreateVolume and InspectVolume // TODO Remove type when migration is complete type VolumeResponse struct { diff --git a/agent/dockerclient/timeout.go b/agent/dockerclient/timeout.go index cb8152b41d5..c5c2497a3b6 100644 --- a/agent/dockerclient/timeout.go +++ b/agent/dockerclient/timeout.go @@ -19,6 +19,8 @@ import "time" const ( // ListContainersTimeout is the timeout for the ListContainers API. ListContainersTimeout = 10 * time.Minute + // ListImagesTimeout is the timeout for the ListImages API + ListImagesTimeout = 10 * time.Minute // LoadImageTimeout is the timeout for the LoadImage API. It's set // to much lower value than pullImageTimeout as it involves loading // image from either a file or STDIN diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index b8877d3cdaa..c5cd060c735 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -48,18 +48,21 @@ type ImageManager interface { // dockerImageManager accounts all the images and their states in the instance. // It also has the cleanup policy configuration. type dockerImageManager struct { - imageStates []*image.ImageState - client dockerapi.DockerClient - updateLock sync.RWMutex - imageCleanupTicker *time.Ticker - state dockerstate.TaskEngineState - saver statemanager.Saver - imageStatesConsideredForDeletion map[string]*image.ImageState - minimumAgeBeforeDeletion time.Duration - numImagesToDelete int - imageCleanupTimeInterval time.Duration - imagePullBehavior config.ImagePullBehaviorType - imageCleanupExclusionList []string + imageStates []*image.ImageState + client dockerapi.DockerClient + updateLock sync.RWMutex + imageCleanupTicker *time.Ticker + state dockerstate.TaskEngineState + saver statemanager.Saver + imageStatesConsideredForDeletion map[string]*image.ImageState + minimumAgeBeforeDeletion time.Duration + numImagesToDelete int + imageCleanupTimeInterval time.Duration + imagePullBehavior config.ImagePullBehaviorType + imageCleanupExclusionList []string + deleteNonECSImagesEnabled bool + nonECSContainerCleanupWaitDuration time.Duration + numNonECSContainersToDelete int } // ImageStatesForDeletion is used for implementing the sort interface @@ -68,13 +71,16 @@ type ImageStatesForDeletion []*image.ImageState // NewImageManager returns a new ImageManager func NewImageManager(cfg *config.Config, client dockerapi.DockerClient, state dockerstate.TaskEngineState) ImageManager { return &dockerImageManager{ - client: client, - state: state, - minimumAgeBeforeDeletion: cfg.MinimumImageDeletionAge, - numImagesToDelete: cfg.NumImagesToDeletePerCycle, - imageCleanupTimeInterval: cfg.ImageCleanupInterval, - imagePullBehavior: cfg.ImagePullBehavior, - imageCleanupExclusionList: cfg.ImageCleanupExclusionList, + client: client, + state: state, + minimumAgeBeforeDeletion: cfg.MinimumImageDeletionAge, + numImagesToDelete: cfg.NumImagesToDeletePerCycle, + imageCleanupTimeInterval: cfg.ImageCleanupInterval, + imagePullBehavior: cfg.ImagePullBehavior, + imageCleanupExclusionList: cfg.ImageCleanupExclusionList, + deleteNonECSImagesEnabled: cfg.DeleteNonECSImagesEnabled, + nonECSContainerCleanupWaitDuration: cfg.TaskCleanupWaitDuration, + numNonECSContainersToDelete: cfg.NumNonECSContainersToDeletePerCycle, } } @@ -303,14 +309,154 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context) imageManager.updateLock.Lock() defer imageManager.updateLock.Unlock() + var numECSImagesDeleted int imageManager.imageStatesConsideredForDeletion = imageManager.imagesConsiderForDeletion(imageManager.getAllImageStates()) for i := 0; i < imageManager.numImagesToDelete; i++ { err := imageManager.removeLeastRecentlyUsedImage(ctx) + numECSImagesDeleted = i if err != nil { seelog.Infof("End of eligible images for deletion: %v; Still have %d image states being managed", err, len(imageManager.getAllImageStates())) break } } + if imageManager.deleteNonECSImagesEnabled { + // remove nonecs containers + imageManager.removeNonECSContainers(ctx) + // remove nonecs images + var nonECSImagesNumToDelete = imageManager.numImagesToDelete - numECSImagesDeleted + imageManager.removeNonECSImages(ctx, nonECSImagesNumToDelete) + } +} + +func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Context) { + nonECSContainersIDs, err := imageManager.nonECSContainersIDs(ctx) + if err != nil { + seelog.Errorf("Error getting non-ECS container IDs: %v", err) + } + var nonECSContainerRemoveAvailableIDs []string + for _, id := range nonECSContainersIDs { + response, _ := imageManager.client.InspectContainer(ctx, id, dockerclient.InspectContainerTimeout) + if response.State.Status == "exited" && time.Now().Sub(response.State.FinishedAt) > imageManager.nonECSContainerCleanupWaitDuration { + nonECSContainerRemoveAvailableIDs = append(nonECSContainerRemoveAvailableIDs, id) + } + } + var numNonECSContainerDeleted = 0 + for _, id := range nonECSContainerRemoveAvailableIDs { + if numNonECSContainerDeleted == imageManager.numNonECSContainersToDelete { + break + } + seelog.Infof("Removing non-ECS container id: %s", id) + err := imageManager.client.RemoveContainer(ctx, id, dockerclient.RemoveContainerTimeout) + if err == nil { + seelog.Infof("Image removed: %s", id) + numNonECSContainerDeleted++ + } else { + seelog.Errorf("Error removing Image %s - %v", id, err) + continue + } + } +} + +func (imageManager *dockerImageManager) nonECSContainersIDs(ctx context.Context) ([]string, error) { + var allContainersIDs []string + listContainersResponse := imageManager.client.ListContainers(ctx, true, dockerclient.ListContainersTimeout) + if listContainersResponse.Error != nil { + fmt.Errorf("error listing containers: %v", listContainersResponse.Error) + } + for _, dockerID := range listContainersResponse.DockerIDs { + allContainersIDs = append(allContainersIDs, dockerID) + } + ECSContainersIDs := imageManager.state.GetAllContainerIDs() + nonECSContainersIDs := exclude(allContainersIDs, ECSContainersIDs) + return nonECSContainersIDs, nil +} + +type ImageWithSize struct { + ImageName string + Size int64 +} + +func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, nonECSImagesNumToDelete int) { + if nonECSImagesNumToDelete == 0 { + return + } + var nonECSImageNames = imageManager.nonECSImagesNames(ctx) + var nonECSImageNamesRemoveEligible []string + for _, nonECSImage := range nonECSImageNames { + if !isInExclusionList(nonECSImage, imageManager.imageCleanupExclusionList) { + nonECSImageNamesRemoveEligible = append(nonECSImageNamesRemoveEligible, nonECSImage) + } + } + + var imageWithSizeList []ImageWithSize + for _, imageName := range nonECSImageNamesRemoveEligible { + resp, _ := imageManager.client.InspectImage(imageName) + imageWithSizeList = append(imageWithSizeList, ImageWithSize{imageName, resp.Size}) + } + // we want to sort images with size ascending + sort.Slice(imageWithSizeList, func(i, j int) bool { + return imageWithSizeList[i].Size < imageWithSizeList[j].Size + }) + + // we will remove the remaining nonECSImages in each performPeriodicImageCleanup call() + var numImagesAlreadyDelete = 0 + for _, kv := range imageWithSizeList { + if numImagesAlreadyDelete == nonECSImagesNumToDelete { + break + } + seelog.Infof("Removing non-ECS Image: %s", kv.ImageName) + err := imageManager.client.RemoveImage(ctx, kv.ImageName, dockerapi.RemoveImageTimeout) + if err != nil { + seelog.Errorf("Error removing Image %s - %v", kv.ImageName, err) + continue + } else { + seelog.Infof("Image removed: %s", kv.ImageName) + numImagesAlreadyDelete++ + } + } +} + +func (imageManager *dockerImageManager) nonECSImagesNames(ctx context.Context) []string { + response := imageManager.client.ListImages(ctx, dockerclient.ListImagesTimeout) + var allImagesNames []string + for _, name := range response.RepoTags { + allImagesNames = append(allImagesNames, name) + } + var ecsImageNames []string + for _, imageState := range imageManager.getAllImageStates() { + for _, imageName := range imageState.Image.Names { + ecsImageNames = append(ecsImageNames, imageName) + } + } + + var nonECSImageNames = exclude(allImagesNames, ecsImageNames) + return nonECSImageNames +} + +func isInExclusionList(imageName string, imageExclusionList []string) bool { + for _, exclusionName := range imageExclusionList { + if imageName == exclusionName { + return true + } + } + return false +} + +func exclude(allList []string, exclusionList []string) []string { + var ret []string + var allMap = make(map[string]bool) + for _, a := range allList { + allMap[a] = true + } + for _, b := range exclusionList { + allMap[b] = false + } + for k := range allMap { + if allMap[k] == true { + ret = append(ret, k) + } + } + return ret } func (imageManager *dockerImageManager) imagesConsiderForDeletion(allImageStates []*image.ImageState) map[string]*image.ImageState { @@ -328,14 +474,11 @@ func (imageManager *dockerImageManager) imagesConsiderForDeletion(allImageStates } func (imageManager *dockerImageManager) isExcludedFromCleanup(imageState *image.ImageState) bool { - var encountered = make(map[string]bool) - for _, imageNotDelete := range imageManager.imageCleanupExclusionList { - encountered[imageNotDelete] = true - } - - for _, name := range imageState.Image.Names { - if encountered[name] { - return true + for _, ecsName := range imageState.Image.Names { + for _, exclusionName := range imageManager.imageCleanupExclusionList { + if ecsName == exclusionName { + return true + } } } return false diff --git a/agent/engine/docker_image_manager_test.go b/agent/engine/docker_image_manager_test.go index b0170ea3bb8..8ca9d4f0ce7 100644 --- a/agent/engine/docker_image_manager_test.go +++ b/agent/engine/docker_image_manager_test.go @@ -26,6 +26,7 @@ import ( apicontainer "github.com/aws/amazon-ecs-agent/agent/api/container" "github.com/aws/amazon-ecs-agent/agent/config" "github.com/aws/amazon-ecs-agent/agent/dockerclient" + "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi" "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks" "github.com/aws/amazon-ecs-agent/agent/ec2" "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" @@ -61,6 +62,10 @@ func TestImagePullRemoveDeadlock(t *testing.T) { sleepContainerImageInspected := &types.ImageInspect{ ID: "sha256:qwerty", } + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty"}, + } + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() // Cause a fake delay when recording container reference so that the // race condition between ImagePullLock and updateLock gets exercised @@ -611,9 +616,12 @@ func TestImageCleanupExclusionListWithSingleName(t *testing.T) { "sha256:qwerty2": ImageStateB, }, } + listImagesResponse := dockerapi.ListImagesResponse{ + RepoTags: []string{"b"}, + } + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() var testImageStates = []*image.ImageState{ImageStateA, ImageStateB, ImageStateC} var testResult = imageManager.imagesConsiderForDeletion(testImageStates) - assert.Equal(t, 1, len(testResult), "Expected 1 image state to be returned for deletion") if !reflect.DeepEqual(imageManager.imageStatesConsideredForDeletion, testResult) { t.Error("Incorrect image return from getCandidateImagesForDeletionHelper function") @@ -667,9 +675,12 @@ func TestImageCleanupExclusionListWithMultipleNames(t *testing.T) { "sha256:qwerty4": ImageStateD, }, } + listImagesResponse := dockerapi.ListImagesResponse{ + RepoTags: []string{"x", "y", "z"}, + } + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() var testImageStates = []*image.ImageState{ImageStateA, ImageStateB, ImageStateC, ImageStateD} var testResult = imageManager.imagesConsiderForDeletion(testImageStates) - assert.Equal(t, 1, len(testResult), "Expected 1 image state to be returned for deletion") if !reflect.DeepEqual(imageManager.imageStatesConsideredForDeletion, testResult) { t.Error("Incorrect image return from getCandidateImagesForDeletionHelper function") @@ -820,6 +831,11 @@ func TestImageCleanupHappyPath(t *testing.T) { imageInspected := &types.ImageInspect{ ID: "sha256:qwerty", } + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty"}, + } + + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes() err := imageManager.RecordContainerReference(container) if err != nil { @@ -876,6 +892,11 @@ func TestImageCleanupCannotRemoveImage(t *testing.T) { imageInspected := &types.ImageInspect{ ID: "sha256:qwerty", } + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty"}, + } + + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes() err := imageManager.RecordContainerReference(container) if err != nil { @@ -929,6 +950,10 @@ func TestImageCleanupRemoveImageById(t *testing.T) { imageInspected := &types.ImageInspect{ ID: "sha256:qwerty", } + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty"}, + } + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes() err := imageManager.RecordContainerReference(container) if err != nil { @@ -954,6 +979,56 @@ func TestImageCleanupRemoveImageById(t *testing.T) { } } +func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + } + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, + } + inspectContainerResponse := &docker.Container{ + ID: "1", + State: docker.State{ + Status: "exited", + FinishedAt: time.Now().AddDate(0, -2, 0), + }, + } + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).AnyTimes() + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.ImageIDs[0], dockerclient.RemoveImageTimeout).Return(nil).AnyTimes() + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) + + if len(listContainersResponse.DockerIDs) != 1 { + t.Error("Error removing containers ids") + } + if len(inspectContainerResponse.ID) != 1 { + t.Error("Error inspecting containers ids") + } + if len(imageManager.imageStates) != 0 { + t.Error("Error removing image state after the image is removed") + } +} + func TestDeleteImage(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() @@ -1160,6 +1235,10 @@ func TestConcurrentRemoveUnusedImages(t *testing.T) { imageInspected := &types.ImageInspect{ ID: "sha256:qwerty", } + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty"}, + } + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes() err := imageManager.RecordContainerReference(container) if err != nil { From 40e98497ddcd2a73675f1cc5c6c42f41e857d79f Mon Sep 17 00:00:00 2001 From: Adnan Khan Date: Thu, 20 Dec 2018 14:51:42 -0800 Subject: [PATCH 2/6] dockerclient, engine: image removal adds docker sdk client changes --- agent/dockerclient/dockerapi/docker_client.go | 4 ++-- .../dockerclient/dockerapi/docker_client_test.go | 6 +++--- agent/dockerclient/sdkclient/interface.go | 3 ++- .../sdkclient/mocks/sdkclient_mocks.go | 15 ++++++++++++++- agent/engine/docker_image_manager.go | 5 ++++- agent/engine/docker_image_manager_test.go | 14 +++++++++----- 6 files changed, 34 insertions(+), 13 deletions(-) diff --git a/agent/dockerclient/dockerapi/docker_client.go b/agent/dockerclient/dockerapi/docker_client.go index 941e836e426..d1a5d70780f 100644 --- a/agent/dockerclient/dockerapi/docker_client.go +++ b/agent/dockerclient/dockerapi/docker_client.go @@ -1022,11 +1022,11 @@ func (dg *dockerGoClient) ListImages(ctx context.Context, timeout time.Duration) } func (dg *dockerGoClient) listImages(ctx context.Context) ListImagesResponse { - client, err := dg.dockerClient() + client, err := dg.sdkDockerClient() if err != nil { return ListImagesResponse{Error: err} } - images, err := client.ListImages(docker.ListImagesOptions{ + images, err := client.ImageList(ctx, types.ImageListOptions{ All: false, }) if err != nil { diff --git a/agent/dockerclient/dockerapi/docker_client_test.go b/agent/dockerclient/dockerapi/docker_client_test.go index 18a0de0b09b..3bf2cf86aaa 100644 --- a/agent/dockerclient/dockerapi/docker_client_test.go +++ b/agent/dockerclient/dockerapi/docker_client_test.go @@ -816,8 +816,8 @@ func TestListImages(t *testing.T) { mockDocker, client, _, _, _, done := dockerClientSetup(t) defer done() - images := []docker.APIImages{{ID: "id"}} - mockDocker.EXPECT().ListImages(gomock.Any()).Return(images, nil) + images := []types.ImageSummary{{ID: "id"}} + mockDocker.EXPECT().ImageList(gomock.Any(), gomock.Any()).Return(images, nil) ctx, cancel := context.WithCancel(context.TODO()) defer cancel() response := client.ListImages(ctx, dockerclient.ListImagesTimeout) @@ -841,7 +841,7 @@ func TestListImagesTimeout(t *testing.T) { wait := &sync.WaitGroup{} wait.Add(1) - mockDocker.EXPECT().ListImages(gomock.Any()).Do(func(x interface{}) { + mockDocker.EXPECT().ImageList(gomock.Any(), gomock.Any()).Do(func(x, y interface{}) { wait.Wait() // Don't return, verify timeout happens }).MaxTimes(1).Return(nil, errors.New("test error")) diff --git a/agent/dockerclient/sdkclient/interface.go b/agent/dockerclient/sdkclient/interface.go index 7c77b901005..b1e9ba1380b 100644 --- a/agent/dockerclient/sdkclient/interface.go +++ b/agent/dockerclient/sdkclient/interface.go @@ -22,9 +22,9 @@ import ( "github.com/docker/docker/api/types" "github.com/docker/docker/api/types/container" - "github.com/docker/docker/api/types/network" "github.com/docker/docker/api/types/events" "github.com/docker/docker/api/types/filters" + "github.com/docker/docker/api/types/network" "github.com/docker/docker/api/types/volume" ) @@ -45,6 +45,7 @@ type Client interface { options types.ImageImportOptions) (io.ReadCloser, error) ImageInspectWithRaw(ctx context.Context, imageID string) (types.ImageInspect, []byte, error) ImageLoad(ctx context.Context, input io.Reader, quiet bool) (types.ImageLoadResponse, error) + ImageList(ctx context.Context, options types.ImageListOptions) ([]types.ImageSummary, error) ImagePull(ctx context.Context, refStr string, options types.ImagePullOptions) (io.ReadCloser, error) ImageRemove(ctx context.Context, imageID string, options types.ImageRemoveOptions) ([]types.ImageDeleteResponseItem, error) diff --git a/agent/dockerclient/sdkclient/mocks/sdkclient_mocks.go b/agent/dockerclient/sdkclient/mocks/sdkclient_mocks.go index 2bbe5106177..d63f7f2067f 100644 --- a/agent/dockerclient/sdkclient/mocks/sdkclient_mocks.go +++ b/agent/dockerclient/sdkclient/mocks/sdkclient_mocks.go @@ -1,4 +1,4 @@ -// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// Copyright 2015-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"). You may // not use this file except in compliance with the License. A copy of the @@ -195,6 +195,19 @@ func (mr *MockClientMockRecorder) ImageInspectWithRaw(arg0, arg1 interface{}) *g return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ImageInspectWithRaw", reflect.TypeOf((*MockClient)(nil).ImageInspectWithRaw), arg0, arg1) } +// ImageList mocks base method +func (m *MockClient) ImageList(arg0 context.Context, arg1 types.ImageListOptions) ([]types.ImageSummary, error) { + ret := m.ctrl.Call(m, "ImageList", arg0, arg1) + ret0, _ := ret[0].([]types.ImageSummary) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// ImageList indicates an expected call of ImageList +func (mr *MockClientMockRecorder) ImageList(arg0, arg1 interface{}) *gomock.Call { + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ImageList", reflect.TypeOf((*MockClient)(nil).ImageList), arg0, arg1) +} + // ImageLoad mocks base method func (m *MockClient) ImageLoad(arg0 context.Context, arg1 io.Reader, arg2 bool) (types.ImageLoadResponse, error) { ret := m.ctrl.Call(m, "ImageLoad", arg0, arg1, arg2) diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index c5cd060c735..915ea7fdf34 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -336,7 +336,10 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte var nonECSContainerRemoveAvailableIDs []string for _, id := range nonECSContainersIDs { response, _ := imageManager.client.InspectContainer(ctx, id, dockerclient.InspectContainerTimeout) - if response.State.Status == "exited" && time.Now().Sub(response.State.FinishedAt) > imageManager.nonECSContainerCleanupWaitDuration { + + finishedTime, _ := time.Parse(time.Now().String(), response.State.FinishedAt) + + if response.State.Status == "exited" && time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration { nonECSContainerRemoveAvailableIDs = append(nonECSContainerRemoveAvailableIDs, id) } } diff --git a/agent/engine/docker_image_manager_test.go b/agent/engine/docker_image_manager_test.go index 8ca9d4f0ce7..b559d4ce4df 100644 --- a/agent/engine/docker_image_manager_test.go +++ b/agent/engine/docker_image_manager_test.go @@ -996,13 +996,17 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) { listContainersResponse := dockerapi.ListContainersResponse{ DockerIDs: []string{"1"}, } - inspectContainerResponse := &docker.Container{ - ID: "1", - State: docker.State{ - Status: "exited", - FinishedAt: time.Now().AddDate(0, -2, 0), + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "exited", + FinishedAt: time.Now().AddDate(0, -2, 0).String(), + }, }, } + listImagesResponse := dockerapi.ListImagesResponse{ ImageIDs: []string{"sha256:qwerty1"}, } From e9c4be7e0a8f690c210ec4ae21536bebafeabe60 Mon Sep 17 00:00:00 2001 From: Adnan Khan Date: Fri, 21 Dec 2018 12:02:04 -0800 Subject: [PATCH 3/6] dockerclient: set mock expectations count --- agent/dockerclient/dockerapi/docker_client_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/dockerclient/dockerapi/docker_client_test.go b/agent/dockerclient/dockerapi/docker_client_test.go index 3bf2cf86aaa..a095f458a18 100644 --- a/agent/dockerclient/dockerapi/docker_client_test.go +++ b/agent/dockerclient/dockerapi/docker_client_test.go @@ -736,7 +736,7 @@ func TestContainerEventsStreamError(t *testing.T) { eventsChan := make(chan events.Message, dockerEventBufferSize) errChan := make(chan error) - mockDockerSDK.EXPECT().Events(gomock.Any(), gomock.Any()).Return(eventsChan, errChan).Times(2) + mockDockerSDK.EXPECT().Events(gomock.Any(), gomock.Any()).Return(eventsChan, errChan).MinTimes(1) dockerEvents, err := client.ContainerEvents(context.TODO()) require.NoError(t, err, "Could not get container events") From e9f23ab9b31da7112f94c9811d3ee92e1870f7e9 Mon Sep 17 00:00:00 2001 From: Adnan Khan Date: Wed, 26 Dec 2018 11:38:52 -0800 Subject: [PATCH 4/6] agent: naming changes for exclude non ecs images --- agent/config/config.go | 2 +- agent/config/config_test.go | 4 ++-- agent/dockerclient/dockerapi/docker_client.go | 4 +--- agent/engine/docker_image_manager.go | 8 ++++---- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/agent/config/config.go b/agent/config/config.go index f716360fbb9..a9fd4fde7ce 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -512,7 +512,7 @@ func environmentConfig() (Config, error) { NumImagesToDeletePerCycle: parseNumImagesToDeletePerCycle(), NumNonECSContainersToDeletePerCycle: parseNumNonECSContainersToDeletePerCycle(), ImagePullBehavior: parseImagePullBehavior(), - ImageCleanupExclusionList: parseImageCleanupExclusionList("ECS_NONECS_IMAGE_CLEANUP_EXCLUDE"), + ImageCleanupExclusionList: parseImageCleanupExclusionList("ECS_EXCLUDE_UNTRACKED_IMAGE"), InstanceAttributes: instanceAttributes, CNIPluginsPath: os.Getenv("ECS_CNI_PLUGINS_PATH"), AWSVPCBlockInstanceMetdata: utils.ParseBool(os.Getenv("ECS_AWSVPC_BLOCK_IMDS"), false), diff --git a/agent/config/config_test.go b/agent/config/config_test.go index 99ba17cc50c..a5138cbaf8e 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -374,8 +374,8 @@ func TestInvalidFormatParseEnvVariableDuration(t *testing.T) { func TestValidForImagesCleanupExclusion(t *testing.T) { defer setTestRegion()() - defer setTestEnv("ECS_NONECS_IMAGE_CLEANUP_EXCLUDE", "amazonlinux:2,amazonlinux:3")() - imagesNotDelete := parseImageCleanupExclusionList("ECS_NONECS_IMAGE_CLEANUP_EXCLUDE") + defer setTestEnv("ECS_EXCLUDE_UNTRACKED_IMAGE", "amazonlinux:2,amazonlinux:3")() + imagesNotDelete := parseImageCleanupExclusionList("ECS_EXCLUDE_UNTRACKED_IMAGE") assert.Equal(t, []string{"amazonlinux:2", "amazonlinux:3"}, imagesNotDelete, "unexpected imageCleanupExclusionList") } diff --git a/agent/dockerclient/dockerapi/docker_client.go b/agent/dockerclient/dockerapi/docker_client.go index d1a5d70780f..91583a0fef3 100644 --- a/agent/dockerclient/dockerapi/docker_client.go +++ b/agent/dockerclient/dockerapi/docker_client.go @@ -1026,9 +1026,7 @@ func (dg *dockerGoClient) listImages(ctx context.Context) ListImagesResponse { if err != nil { return ListImagesResponse{Error: err} } - images, err := client.ImageList(ctx, types.ImageListOptions{ - All: false, - }) + images, err := client.ImageList(ctx, types.ImageListOptions{}) if err != nil { return ListImagesResponse{Error: err} } diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index 915ea7fdf34..b16b2e1584f 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -329,7 +329,7 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context) } func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Context) { - nonECSContainersIDs, err := imageManager.nonECSContainersIDs(ctx) + nonECSContainersIDs, err := imageManager.getNonECSContainerIDs(ctx) if err != nil { seelog.Errorf("Error getting non-ECS container IDs: %v", err) } @@ -360,7 +360,7 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte } } -func (imageManager *dockerImageManager) nonECSContainersIDs(ctx context.Context) ([]string, error) { +func (imageManager *dockerImageManager) getNonECSContainerIDs(ctx context.Context) ([]string, error) { var allContainersIDs []string listContainersResponse := imageManager.client.ListContainers(ctx, true, dockerclient.ListContainersTimeout) if listContainersResponse.Error != nil { @@ -383,7 +383,7 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, if nonECSImagesNumToDelete == 0 { return } - var nonECSImageNames = imageManager.nonECSImagesNames(ctx) + var nonECSImageNames = imageManager.getNonECSImageNames(ctx) var nonECSImageNamesRemoveEligible []string for _, nonECSImage := range nonECSImageNames { if !isInExclusionList(nonECSImage, imageManager.imageCleanupExclusionList) { @@ -419,7 +419,7 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, } } -func (imageManager *dockerImageManager) nonECSImagesNames(ctx context.Context) []string { +func (imageManager *dockerImageManager) getNonECSImageNames(ctx context.Context) []string { response := imageManager.client.ListImages(ctx, dockerclient.ListImagesTimeout) var allImagesNames []string for _, name := range response.RepoTags { From ad95b99dc7619ad5ff24ae3562fb88afd1156cd3 Mon Sep 17 00:00:00 2001 From: Adnan Khan Date: Wed, 26 Dec 2018 17:10:08 -0800 Subject: [PATCH 5/6] dockerapi, engine: use assert and add inspect* error logging --- .../dockerapi/docker_client_test.go | 23 ++++++------------- agent/engine/docker_image_manager.go | 18 +++++++++++---- 2 files changed, 20 insertions(+), 21 deletions(-) diff --git a/agent/dockerclient/dockerapi/docker_client_test.go b/agent/dockerclient/dockerapi/docker_client_test.go index a095f458a18..f94bb490be2 100644 --- a/agent/dockerclient/dockerapi/docker_client_test.go +++ b/agent/dockerclient/dockerapi/docker_client_test.go @@ -821,18 +821,11 @@ func TestListImages(t *testing.T) { ctx, cancel := context.WithCancel(context.TODO()) defer cancel() response := client.ListImages(ctx, dockerclient.ListImagesTimeout) - if response.Error != nil { - t.Error("Did not expect error") - } + assert.NoError(t, response.Error, "Did not expect error") imageIDs := response.ImageIDs - if len(imageIDs) != 1 { - t.Error("Unexpected number of images in list", len(imageIDs)) - } - - if imageIDs[0] != "id" { - t.Error("Unexpected image id in the list:", imageIDs[0]) - } + assert.EqualValues(t, len(imageIDs), 1, "Unexpected number of images in list") + assert.EqualValues(t, imageIDs[0], "id", "Unexpected id in list of images") } func TestListImagesTimeout(t *testing.T) { @@ -847,13 +840,11 @@ func TestListImagesTimeout(t *testing.T) { }).MaxTimes(1).Return(nil, errors.New("test error")) ctx, cancel := context.WithCancel(context.TODO()) defer cancel() + response := client.ListImages(ctx, xImageShortTimeout) - if response.Error == nil { - t.Error("Expected error for pull timeout") - } - if response.Error.(apierrors.NamedError).ErrorName() != "DockerTimeoutError" { - t.Error("Wrong error type") - } + assert.Error(t, response.Error, "Expected error for pull timeout") + assert.Equal(t, response.Error.(apierrors.NamedError).ErrorName(), "DockerTimeoutError", "Wrong error type") + wait.Done() } diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index b16b2e1584f..87d8c5fdd83 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -335,7 +335,11 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte } var nonECSContainerRemoveAvailableIDs []string for _, id := range nonECSContainersIDs { - response, _ := imageManager.client.InspectContainer(ctx, id, dockerclient.InspectContainerTimeout) + response, icErr := imageManager.client.InspectContainer(ctx, id, dockerclient.InspectContainerTimeout) + if icErr != nil { + seelog.Errorf("Error inspecting non-ECS container id: %s - %v", id, icErr) + continue + } finishedTime, _ := time.Parse(time.Now().String(), response.State.FinishedAt) @@ -393,7 +397,11 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, var imageWithSizeList []ImageWithSize for _, imageName := range nonECSImageNamesRemoveEligible { - resp, _ := imageManager.client.InspectImage(imageName) + resp, iiErr := imageManager.client.InspectImage(imageName) + if iiErr != nil { + seelog.Errorf("Error inspecting non-ECS image name: %s - %v", imageName, iiErr) + continue + } imageWithSizeList = append(imageWithSizeList, ImageWithSize{imageName, resp.Size}) } // we want to sort images with size ascending @@ -402,9 +410,9 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, }) // we will remove the remaining nonECSImages in each performPeriodicImageCleanup call() - var numImagesAlreadyDelete = 0 + var numImagesAlreadyDeleted = 0 for _, kv := range imageWithSizeList { - if numImagesAlreadyDelete == nonECSImagesNumToDelete { + if numImagesAlreadyDeleted == nonECSImagesNumToDelete { break } seelog.Infof("Removing non-ECS Image: %s", kv.ImageName) @@ -414,7 +422,7 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context, continue } else { seelog.Infof("Image removed: %s", kv.ImageName) - numImagesAlreadyDelete++ + numImagesAlreadyDeleted++ } } } From 4cd48041bfc07c96eb52a4f46627703e98bbd395 Mon Sep 17 00:00:00 2001 From: Adnan Khan Date: Fri, 28 Dec 2018 10:42:25 -0800 Subject: [PATCH 6/6] config: exclude known cached images --- agent/config/config.go | 4 ++++ agent/config/config_test.go | 3 ++- agent/config/parse.go | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/agent/config/config.go b/agent/config/config.go index a9fd4fde7ce..8fe202a1c61 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -126,6 +126,10 @@ const ( // DefaultTaskMetadataBurstRate is set to handle 60 burst requests at once DefaultTaskMetadataBurstRate = 60 + + //Known cached image names + CachedImageNamePauseContainer = "amazon/amazon-ecs-pause:0.1.0" + CachedImageNameAgentContainer = "amazon/amazon-ecs-agent:latest" ) const ( diff --git a/agent/config/config_test.go b/agent/config/config_test.go index a5138cbaf8e..d64e46ef2d4 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -376,7 +376,8 @@ func TestValidForImagesCleanupExclusion(t *testing.T) { defer setTestRegion()() defer setTestEnv("ECS_EXCLUDE_UNTRACKED_IMAGE", "amazonlinux:2,amazonlinux:3")() imagesNotDelete := parseImageCleanupExclusionList("ECS_EXCLUDE_UNTRACKED_IMAGE") - assert.Equal(t, []string{"amazonlinux:2", "amazonlinux:3"}, imagesNotDelete, "unexpected imageCleanupExclusionList") + expectedImages := []string{"amazonlinux:2", "amazonlinux:3", CachedImageNameAgentContainer, CachedImageNamePauseContainer} + assert.Equal(t, expectedImages, imagesNotDelete, "unexpected imageCleanupExclusionList") } func TestValidFormatParseEnvVariableDuration(t *testing.T) { diff --git a/agent/config/parse.go b/agent/config/parse.go index 7b116299a6a..0aa202d54f6 100644 --- a/agent/config/parse.go +++ b/agent/config/parse.go @@ -298,6 +298,10 @@ func parseImageCleanupExclusionList(envVar string) []string { } else { imageCleanupExclusionList = strings.Split(imageEnv, ",") } + + // append known cached internal images to imageCleanupExclusionLis + imageCleanupExclusionList = append(imageCleanupExclusionList, CachedImageNameAgentContainer, CachedImageNamePauseContainer) + for _, image := range imageCleanupExclusionList { seelog.Infof("Image excluded from cleanup: %s", image) }