From 38ac49eb2c7e962e68b7a7b2bbbb3319fd39acb9 Mon Sep 17 00:00:00 2001 From: fenxiong Date: Thu, 21 May 2020 17:24:53 -0700 Subject: [PATCH] Added inf support config and neuron runtime override. Added an agent config InferentiaSupportEnabled populated by ECS_ENABLE_INF_SUPPORT env. For a container that has AWS_NEURON_VISIBLE_DEVICES specified, if InferentiaSupportEnabled is on, the agent will override its runtime to the neuron docker runtime needed for using the inferentia devices. This enables us to only use the neuron runtime for container that needs the inf device, and only do so when such runtime is installed on the AMI (which is indicated by the ECS_ENABLE_INF_SUPPORT config that we will add together with installing the neuron runtime). --- agent/api/container/container.go | 11 +++ agent/api/container/container_test.go | 7 ++ agent/api/task/task.go | 35 ++++++-- agent/api/task/task_linux_test.go | 6 +- agent/api/task/task_test.go | 118 +++++++++++++++++--------- agent/api/task/task_windows_test.go | 16 ++-- agent/config/config.go | 1 + agent/config/config_test.go | 8 ++ agent/config/types.go | 3 + agent/engine/docker_task_engine.go | 2 +- 10 files changed, 151 insertions(+), 56 deletions(-) diff --git a/agent/api/container/container.go b/agent/api/container/container.go index eda464063b5..1c3a1855e84 100644 --- a/agent/api/container/container.go +++ b/agent/api/container/container.go @@ -77,6 +77,9 @@ const ( // TargetLogDriver is to show secret target being "LOG_DRIVER", the default will be "CONTAINER" SecretTargetLogDriver = "LOG_DRIVER" + + // neuronVisibleDevicesEnvVar is the env which indicates that the container wants to use inferentia devices. + neuronVisibleDevicesEnvVar = "AWS_NEURON_VISIBLE_DEVICES" ) // DockerConfig represents additional metadata about a container to run. It's @@ -1120,3 +1123,11 @@ func (c *Container) GetEnvironmentFiles() []EnvironmentFile { return c.EnvironmentFiles } + +func (c *Container) RequireNeuronRuntime() bool { + c.lock.RLock() + defer c.lock.RUnlock() + + _, ok := c.Environment[neuronVisibleDevicesEnvVar] + return ok +} diff --git a/agent/api/container/container_test.go b/agent/api/container/container_test.go index e98374cf685..fb0e37cbba8 100644 --- a/agent/api/container/container_test.go +++ b/agent/api/container/container_test.go @@ -750,3 +750,10 @@ func TestMergeEnvironmentVariablesFromEnvfiles(t *testing.T) { }) } } + +func TestRequireNeuronRuntime(t *testing.T) { + c := &Container{ + Environment: map[string]string{neuronVisibleDevicesEnvVar: "all"}, + } + assert.True(t, c.RequireNeuronRuntime()) +} diff --git a/agent/api/task/task.go b/agent/api/task/task.go index e72bd4c3705..a4258818419 100644 --- a/agent/api/task/task.go +++ b/agent/api/task/task.go @@ -77,6 +77,9 @@ const ( NvidiaVisibleDevicesEnvVar = "NVIDIA_VISIBLE_DEVICES" GPUAssociationType = "gpu" + // neuronRuntime is the name of the neuron docker runtime. + neuronRuntime = "neuron" + ContainerOrderingCreateCondition = "CREATE" ContainerOrderingStartCondition = "START" @@ -1430,8 +1433,8 @@ func (task *Task) dockerExposedPorts(container *apicontainer.Container) nat.Port } // DockerHostConfig construct the configuration recognized by docker -func (task *Task) DockerHostConfig(container *apicontainer.Container, dockerContainerMap map[string]*apicontainer.DockerContainer, apiVersion dockerclient.DockerVersion) (*dockercontainer.HostConfig, *apierrors.HostConfigError) { - return task.dockerHostConfig(container, dockerContainerMap, apiVersion) +func (task *Task) DockerHostConfig(container *apicontainer.Container, dockerContainerMap map[string]*apicontainer.DockerContainer, apiVersion dockerclient.DockerVersion, cfg *config.Config) (*dockercontainer.HostConfig, *apierrors.HostConfigError) { + return task.dockerHostConfig(container, dockerContainerMap, apiVersion, cfg) } // ApplyExecutionRoleLogsAuth will check whether the task has execution role @@ -1459,7 +1462,7 @@ func (task *Task) ApplyExecutionRoleLogsAuth(hostConfig *dockercontainer.HostCon return nil } -func (task *Task) dockerHostConfig(container *apicontainer.Container, dockerContainerMap map[string]*apicontainer.DockerContainer, apiVersion dockerclient.DockerVersion) (*dockercontainer.HostConfig, *apierrors.HostConfigError) { +func (task *Task) dockerHostConfig(container *apicontainer.Container, dockerContainerMap map[string]*apicontainer.DockerContainer, apiVersion dockerclient.DockerVersion, cfg *config.Config) (*dockercontainer.HostConfig, *apierrors.HostConfigError) { dockerLinkArr, err := task.dockerLinks(container, dockerContainerMap) if err != nil { return nil, &apierrors.HostConfigError{Msg: err.Error()} @@ -1488,12 +1491,8 @@ func (task *Task) dockerHostConfig(container *apicontainer.Container, dockerCont Resources: resources, } - if task.isGPUEnabled() && task.shouldRequireNvidiaRuntime(container) { - if task.NvidiaRuntime == "" { - return nil, &apierrors.HostConfigError{Msg: "Runtime is not set for GPU containers"} - } - seelog.Debugf("Setting runtime as %s for container %s", task.NvidiaRuntime, container.Name) - hostConfig.Runtime = task.NvidiaRuntime + if err := task.overrideContainerRuntime(container, hostConfig, cfg); err != nil { + return nil, err } if container.DockerConfig.HostConfig != nil { @@ -1537,6 +1536,24 @@ func (task *Task) dockerHostConfig(container *apicontainer.Container, dockerCont return hostConfig, nil } +// overrideContainerRuntime overrides the runtime for the container in host config if needed. +func (task *Task) overrideContainerRuntime(container *apicontainer.Container, hostCfg *dockercontainer.HostConfig, + cfg *config.Config) *apierrors.HostConfigError { + if task.isGPUEnabled() && task.shouldRequireNvidiaRuntime(container) { + if task.NvidiaRuntime == "" { + return &apierrors.HostConfigError{Msg: "Runtime is not set for GPU containers"} + } + seelog.Debugf("Setting runtime as %s for container %s", task.NvidiaRuntime, container.Name) + hostCfg.Runtime = task.NvidiaRuntime + } + + if cfg.InferentiaSupportEnabled && container.RequireNeuronRuntime() { + seelog.Debugf("Setting runtime as %s for container %s", neuronRuntime, container.Name) + hostCfg.Runtime = neuronRuntime + } + return nil +} + // Requires an *apicontainer.Container and returns the Resources for the HostConfig struct func (task *Task) getDockerResources(container *apicontainer.Container) dockercontainer.Resources { // Convert MB to B and set Memory diff --git a/agent/api/task/task_linux_test.go b/agent/api/task/task_linux_test.go index 331224e2f99..e675f53e585 100644 --- a/agent/api/task/task_linux_test.go +++ b/agent/api/task/task_linux_test.go @@ -422,7 +422,8 @@ func TestPlatformHostConfigOverrideErrorPath(t *testing.T) { }, } - dockerHostConfig, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion) + dockerHostConfig, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion, + &config.Config{}) assert.Error(t, err) assert.Empty(t, dockerHostConfig) } @@ -464,7 +465,8 @@ func TestDockerHostConfigRawConfigMerging(t *testing.T) { }, } - hostConfig, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), minDockerClientAPIVersion) + hostConfig, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), + minDockerClientAPIVersion, &config.Config{}) assert.Nil(t, configErr) expected := dockercontainer.HostConfig{ diff --git a/agent/api/task/task_test.go b/agent/api/task/task_test.go index 0183d0b3228..e77f41f3f51 100644 --- a/agent/api/task/task_test.go +++ b/agent/api/task/task_test.go @@ -94,7 +94,8 @@ func TestDockerHostConfigCPUShareZero(t *testing.T) { }, } - hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) if err != nil { t.Error(err) } @@ -118,7 +119,8 @@ func TestDockerHostConfigCPUShareMinimum(t *testing.T) { }, } - hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) if err != nil { t.Error(err) } @@ -143,7 +145,8 @@ func TestDockerHostConfigCPUShareUnchanged(t *testing.T) { }, } - hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) if err != nil { t.Error(err) } @@ -168,7 +171,8 @@ func TestDockerHostConfigPortBinding(t *testing.T) { }, } - config, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + config, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) bindings, ok := config.PortBindings["10/tcp"] @@ -195,7 +199,8 @@ func TestDockerHostConfigVolumesFrom(t *testing.T) { }, } - config, err := testTask.DockerHostConfig(testTask.Containers[1], dockerMap(testTask), defaultDockerClientAPIVersion) + config, err := testTask.DockerHostConfig(testTask.Containers[1], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) if !reflect.DeepEqual(config.VolumesFrom, []string{"dockername-c1"}) { @@ -240,7 +245,8 @@ func TestDockerHostConfigRawConfig(t *testing.T) { }, } - config, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + config, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, configErr) expectedOutput := rawHostConfigInput @@ -274,20 +280,23 @@ func TestDockerHostConfigPauseContainer(t *testing.T) { pauseContainer := testTask.Containers[1] // Verify that the network mode is set to "container:" // for a non pause container - config, err := testTask.DockerHostConfig(customContainer, dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, err := testTask.DockerHostConfig(customContainer, dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - assert.Equal(t, "container:"+dockerIDPrefix+NetworkPauseContainerName, string(config.NetworkMode)) + assert.Equal(t, "container:"+dockerIDPrefix+NetworkPauseContainerName, string(cfg.NetworkMode)) // Verify that the network mode is not set to "none" for the // empty volume container - config, err = testTask.DockerHostConfig(testTask.Containers[1], dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, err = testTask.DockerHostConfig(testTask.Containers[1], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - assert.Equal(t, networkModeNone, string(config.NetworkMode)) + assert.Equal(t, networkModeNone, string(cfg.NetworkMode)) // Verify that the network mode is set to "none" for the pause container - config, err = testTask.DockerHostConfig(pauseContainer, dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, err = testTask.DockerHostConfig(pauseContainer, dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - assert.Equal(t, networkModeNone, string(config.NetworkMode)) + assert.Equal(t, networkModeNone, string(cfg.NetworkMode)) // Verify that overridden DNS settings are set for the pause container // and not set for non pause containers @@ -296,25 +305,28 @@ func TestDockerHostConfigPauseContainer(t *testing.T) { // DNS overrides are only applied to the pause container. Verify that the non-pause // container contains no overrides - config, err = testTask.DockerHostConfig(customContainer, dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, err = testTask.DockerHostConfig(customContainer, dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - assert.Equal(t, 0, len(config.DNS)) - assert.Equal(t, 0, len(config.DNSSearch)) + assert.Equal(t, 0, len(cfg.DNS)) + assert.Equal(t, 0, len(cfg.DNSSearch)) // Verify DNS settings are overridden for the pause container - config, err = testTask.DockerHostConfig(pauseContainer, dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, err = testTask.DockerHostConfig(pauseContainer, dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - assert.Equal(t, []string{"169.254.169.253"}, config.DNS) - assert.Equal(t, []string{"us-west-2.compute.internal"}, config.DNSSearch) + assert.Equal(t, []string{"169.254.169.253"}, cfg.DNS) + assert.Equal(t, []string{"us-west-2.compute.internal"}, cfg.DNSSearch) // Verify eni ExtraHosts added to HostConfig for pause container ipaddr := &apieni.ENIIPV4Address{Primary: true, Address: "10.0.1.1"} testTask.ENIs[0].IPV4Addresses = []*apieni.ENIIPV4Address{ipaddr} testTask.ENIs[0].PrivateDNSName = "eni.ip.region.compute.internal" - config, err = testTask.DockerHostConfig(pauseContainer, dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, err = testTask.DockerHostConfig(pauseContainer, dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - assert.Equal(t, []string{"eni.ip.region.compute.internal:10.0.1.1"}, config.ExtraHosts) + assert.Equal(t, []string{"eni.ip.region.compute.internal:10.0.1.1"}, cfg.ExtraHosts) // Verify eni Hostname is added to DockerConfig for pause container dockerconfig, dockerConfigErr := testTask.DockerConfig(pauseContainer, defaultDockerClientAPIVersion) @@ -338,7 +350,8 @@ func TestBadDockerHostConfigRawConfig(t *testing.T) { }, }, } - _, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(&testTask), defaultDockerClientAPIVersion) + _, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(&testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Error(t, err) } } @@ -980,7 +993,8 @@ func TestNamespaceProvisionDependencyAndHostConfig(t *testing.T) { docMaps := dockerMap(task) for _, container := range task.Containers { //configure HostConfig for each container - dockHostCfg, err := task.DockerHostConfig(container, docMaps, defaultDockerClientAPIVersion) + dockHostCfg, err := task.DockerHostConfig(container, docMaps, defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) assert.Equal(t, task.IPCMode, string(dockHostCfg.IpcMode)) assert.Equal(t, task.PIDMode, string(dockHostCfg.PidMode)) @@ -1012,7 +1026,8 @@ func TestNamespaceProvisionDependencyAndHostConfig(t *testing.T) { for _, container := range task.Containers { //configure HostConfig for each container - dockHostCfg, err := task.DockerHostConfig(container, docMaps, defaultDockerClientAPIVersion) + dockHostCfg, err := task.DockerHostConfig(container, docMaps, defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) if namespacePause == container { // Expected behavior for IPCMode="task" is "shareable" @@ -1733,13 +1748,14 @@ func TestApplyExecutionRoleLogsAuthSet(t *testing.T) { credentialsManager.EXPECT().GetTaskCredentials(credentialsIDInTask).Return(taskCredentials, true) task.initializeCredentialsEndpoint(credentialsManager) - config, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion) + cfg, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - err = task.ApplyExecutionRoleLogsAuth(config, credentialsManager) + err = task.ApplyExecutionRoleLogsAuth(cfg, credentialsManager) assert.Nil(t, err) - endpoint, ok := config.LogConfig.Config["awslogs-credentials-endpoint"] + endpoint, ok := cfg.LogConfig.Config["awslogs-credentials-endpoint"] assert.True(t, ok) assert.Equal(t, expectedEndpoint, endpoint) } @@ -1782,13 +1798,14 @@ func TestApplyExecutionRoleLogsAuthNoConfigInHostConfig(t *testing.T) { credentialsManager.EXPECT().GetTaskCredentials(credentialsIDInTask).Return(taskCredentials, true) task.initializeCredentialsEndpoint(credentialsManager) - config, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion) + cfg, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - err = task.ApplyExecutionRoleLogsAuth(config, credentialsManager) + err = task.ApplyExecutionRoleLogsAuth(cfg, credentialsManager) assert.Nil(t, err) - endpoint, ok := config.LogConfig.Config["awslogs-credentials-endpoint"] + endpoint, ok := cfg.LogConfig.Config["awslogs-credentials-endpoint"] assert.True(t, ok) assert.Equal(t, expectedEndpoint, endpoint) } @@ -1824,10 +1841,11 @@ func TestApplyExecutionRoleLogsAuthFailEmptyCredentialsID(t *testing.T) { task.initializeCredentialsEndpoint(credentialsManager) - config, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion) + cfg, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) - err = task.ApplyExecutionRoleLogsAuth(config, credentialsManager) + err = task.ApplyExecutionRoleLogsAuth(cfg, credentialsManager) assert.Error(t, err) } @@ -1866,10 +1884,10 @@ func TestApplyExecutionRoleLogsAuthFailNoCredentialsForTask(t *testing.T) { credentialsManager.EXPECT().GetTaskCredentials(credentialsIDInTask).Return(credentials.TaskIAMRoleCredentials{}, false) task.initializeCredentialsEndpoint(credentialsManager) - config, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion) + cfg, err := task.DockerHostConfig(task.Containers[0], dockerMap(task), defaultDockerClientAPIVersion, &config.Config{}) assert.Error(t, err) - err = task.ApplyExecutionRoleLogsAuth(config, credentialsManager) + err = task.ApplyExecutionRoleLogsAuth(cfg, credentialsManager) assert.Error(t, err) } @@ -1884,12 +1902,14 @@ func TestSetMinimumMemoryLimit(t *testing.T) { }, } - hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Nil(t, err) assert.Equal(t, int64(apicontainer.DockerContainerMinimumMemoryInBytes), hostconfig.Memory) - hostconfig, err = testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), dockerclient.Version_1_18) + hostconfig, err = testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), dockerclient.Version_1_18, + &config.Config{}) assert.Nil(t, err) assert.Equal(t, int64(apicontainer.DockerContainerMinimumMemoryInBytes), hostconfig.Memory) @@ -2946,7 +2966,8 @@ func TestDockerHostConfigNvidiaRuntime(t *testing.T) { cfg := &config.Config{GPUSupportEnabled: true, NvidiaRuntime: config.DefaultNvidiaRuntime} testTask.addGPUResource(cfg) - dockerHostConfig, _ := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + dockerHostConfig, _ := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Equal(t, testTask.NvidiaRuntime, dockerHostConfig.Runtime) } @@ -2963,7 +2984,8 @@ func TestDockerHostConfigRuntimeWithoutGPU(t *testing.T) { cfg := &config.Config{GPUSupportEnabled: true} testTask.addGPUResource(cfg) - dockerHostConfig, _ := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + dockerHostConfig, _ := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Equal(t, "", dockerHostConfig.Runtime) } @@ -2994,10 +3016,30 @@ func TestDockerHostConfigNoNvidiaRuntime(t *testing.T) { cfg := &config.Config{GPUSupportEnabled: true} testTask.addGPUResource(cfg) - _, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + _, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{}) assert.Error(t, err) } +func TestDockerHostConfigNeuronRuntime(t *testing.T) { + testTask := &Task{ + Arn: "test", + Containers: []*apicontainer.Container{ + { + Name: "myName1", + Image: "image:tag", + Environment: map[string]string{ + "AWS_NEURON_VISIBLE_DEVICES": "all", + }, + }, + }, + } + + dockerHostConfig, _ := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion, + &config.Config{InferentiaSupportEnabled: true}) + assert.Equal(t, neuronRuntime, dockerHostConfig.Runtime) +} + func TestAssociationsByTypeAndContainer(t *testing.T) { associationType := "elastic-inference" container1 := &apicontainer.Container{ diff --git a/agent/api/task/task_windows_test.go b/agent/api/task/task_windows_test.go index 612cbb40542..65ab10ceded 100644 --- a/agent/api/task/task_windows_test.go +++ b/agent/api/task/task_windows_test.go @@ -187,7 +187,8 @@ func TestDockerHostConfigRawConfigMerging(t *testing.T) { }, } - hostConfig, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), minDockerClientAPIVersion) + hostConfig, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), + minDockerClientAPIVersion, &config.Config{}) assert.Nil(t, configErr) expected := dockercontainer.HostConfig{ @@ -258,7 +259,8 @@ func TestCPUPercentBasedOnUnboundedEnabled(t *testing.T) { }, } - hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), minDockerClientAPIVersion) + hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), + minDockerClientAPIVersion, &config.Config{}) assert.Nil(t, err) assert.Empty(t, hostconfig.CPUShares) assert.Equal(t, tc.cpuPercent, hostconfig.CPUPercent) @@ -297,17 +299,19 @@ func TestWindowsMemoryReservationOption(t *testing.T) { } // With MemoryUnbounded set to false, MemoryReservation is not overridden - config, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, configErr := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), + defaultDockerClientAPIVersion, &config.Config{}) assert.Nil(t, configErr) - assert.EqualValues(t, nonZeroMemoryReservationValue, config.MemoryReservation) + assert.EqualValues(t, nonZeroMemoryReservationValue, cfg.MemoryReservation) // With MemoryUnbounded set to true, tasks with no memory hard limit will have their memory reservation set to zero testTask.PlatformFields.MemoryUnbounded = true - config, configErr = testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), defaultDockerClientAPIVersion) + cfg, configErr = testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), + defaultDockerClientAPIVersion, &config.Config{}) assert.Nil(t, configErr) - assert.EqualValues(t, expectedMemoryReservationValue, config.MemoryReservation) + assert.EqualValues(t, expectedMemoryReservationValue, cfg.MemoryReservation) } func TestGetCanonicalPath(t *testing.T) { diff --git a/agent/config/config.go b/agent/config/config.go index 07b92b694b5..f53079a7e5c 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -553,6 +553,7 @@ func environmentConfig() (Config, error) { PollingMetricsWaitDuration: parseEnvVariableDuration("ECS_POLLING_METRICS_WAIT_DURATION"), DisableDockerHealthCheck: utils.ParseBool(os.Getenv("ECS_DISABLE_DOCKER_HEALTH_CHECK"), false), GPUSupportEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_GPU_SUPPORT"), false), + InferentiaSupportEnabled: utils.ParseBool(os.Getenv("ECS_ENABLE_INF_SUPPORT"), false), NvidiaRuntime: os.Getenv("ECS_NVIDIA_RUNTIME"), TaskMetadataAZDisabled: utils.ParseBool(os.Getenv("ECS_DISABLE_TASK_METADATA_AZ"), false), CgroupCPUPeriod: parseCgroupCPUPeriod(), diff --git a/agent/config/config_test.go b/agent/config/config_test.go index 9917742383e..b171bfb6b69 100644 --- a/agent/config/config_test.go +++ b/agent/config/config_test.go @@ -765,6 +765,14 @@ func TestGPUSupportEnabled(t *testing.T) { assert.True(t, cfg.GPUSupportEnabled, "Wrong value for GPUSupportEnabled") } +func TestInferentiaSupportEnabled(t *testing.T) { + defer setTestRegion()() + defer setTestEnv("ECS_ENABLE_INF_SUPPORT", "true")() + cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient()) + assert.NoError(t, err) + assert.True(t, cfg.InferentiaSupportEnabled, "Wrong value for InferentiaSupportEnabled") +} + func TestTaskMetadataAZDisabled(t *testing.T) { defer setTestRegion()() defer setTestEnv("ECS_DISABLE_TASK_METADATA_AZ", "true")() diff --git a/agent/config/types.go b/agent/config/types.go index 1e02ca3b2a5..a13461aeda2 100644 --- a/agent/config/types.go +++ b/agent/config/types.go @@ -282,6 +282,9 @@ type Config struct { // GPUSupportEnabled specifies if the Agent is capable of launching GPU tasks GPUSupportEnabled bool + // InferentiaSupportEnabled specifies whether the built-in support for inferentia task is enabled. + InferentiaSupportEnabled bool + // ImageCleanupExclusionList is the list of image names customers want to keep for their own use and delete automatically ImageCleanupExclusionList []string diff --git a/agent/engine/docker_task_engine.go b/agent/engine/docker_task_engine.go index f8c7d5085d1..863f6df0141 100644 --- a/agent/engine/docker_task_engine.go +++ b/agent/engine/docker_task_engine.go @@ -912,7 +912,7 @@ func (engine *DockerTaskEngine) createContainer(task *apitask.Task, container *a if versionErr != nil { return dockerapi.DockerContainerMetadata{Error: CannotGetDockerClientVersionError{versionErr}} } - hostConfig, hcerr := task.DockerHostConfig(container, containerMap, dockerClientVersion) + hostConfig, hcerr := task.DockerHostConfig(container, containerMap, dockerClientVersion, engine.cfg) if hcerr != nil { return dockerapi.DockerContainerMetadata{Error: apierrors.NamedError(hcerr)} }