Skip to content

Commit bc323cb

Browse files
committed
changelog, readme, os specific task structs
1 parent ded74dc commit bc323cb

8 files changed

+185
-95
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Changelog
22

3+
## Unreleased
4+
* Bug - Workaround for an issue where CPU percent was set to 1 when CPU was not
5+
set or set to zero(unbounded) in Windows [#1227](https://github.com/aws/amazon-ecs-agent/pull/1227)
6+
37
## 1.17.0
48
* Feature - Support a HTTP endpoint for `awsvpc` tasks to query metadata
59
* Feature - Support Docker health check

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ additional details on each available environment variable.
177177
| `ECS_HOST_DATA_DIR` | `/var/lib/ecs` | The source directory on the host from which ECS_DATADIR is mounted. We use this to determine the source mount path for container metadata files in the case the ECS Agent is running as a container. We do not use this value in Windows because the ECS Agent is not running as container in Windows. | `/var/lib/ecs` | `Not used` |
178178
| `ECS_ENABLE_TASK_CPU_MEM_LIMIT` | `true` | Whether to enable task-level cpu and memory limits | `true` | `false` |
179179
| `ECS_CGROUP_PATH` | `/sys/fs/cgroup` | The root cgroup path that is expected by the ECS agent. This is the path that accessible from the agent mount. | `/sys/fs/cgroup` | Not applicable |
180+
| `ECS_ENABLE_CPU_UNBOUNDED_WINDOWS_HACK` | `true` | When `true`, ECS will allow CPU unbounded(CPU=`0`) tasks to run along with CPU bounded tasks in Windows. | Not applicable | `false` |
180181

181182
### Persistence
182183

agent/api/task.go

-86
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"path/filepath"
2020
"strconv"
2121
"strings"
22-
"sync"
2322
"time"
2423

2524
"github.com/aws/amazon-ecs-agent/agent/acs/model/ecsacs"
@@ -64,91 +63,6 @@ const (
6463
// TaskOverrides are the overrides applied to a task
6564
type TaskOverrides struct{}
6665

67-
// Task is the internal representation of a task in the ECS agent
68-
type Task struct {
69-
// Arn is the unique identifer for the task
70-
Arn string
71-
// Overrides are the overrides applied to a task
72-
Overrides TaskOverrides `json:"-"`
73-
// Family is the name of the task definition family
74-
Family string
75-
// Version is the version of the task definition
76-
Version string
77-
// Containers are the containers for the task
78-
Containers []*Container
79-
// Volumes are the volumes for the task
80-
Volumes []TaskVolume `json:"volumes"`
81-
// CPU is a task-level limit for compute resources. A value of 1 means that
82-
// the task may access 100% of 1 vCPU on the instance
83-
CPU float64 `json:"Cpu,omitempty"`
84-
// Memory is a task-level limit for memory resources in bytes
85-
Memory int64 `json:"Memory,omitempty"`
86-
// DesiredStatusUnsafe represents the state where the task should go. Generally,
87-
// the desired status is informed by the ECS backend as a result of either
88-
// API calls made to ECS or decisions made by the ECS service scheduler.
89-
// The DesiredStatusUnsafe is almost always either TaskRunning or TaskStopped.
90-
// NOTE: Do not access DesiredStatusUnsafe directly. Instead, use `UpdateStatus`,
91-
// `UpdateDesiredStatus`, `SetDesiredStatus`, and `SetDesiredStatus`.
92-
// TODO DesiredStatusUnsafe should probably be private with appropriately written
93-
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
94-
// is handled properly so that the state storage continues to work.
95-
DesiredStatusUnsafe TaskStatus `json:"DesiredStatus"`
96-
97-
// KnownStatusUnsafe represents the state where the task is. This is generally
98-
// the minimum of equivalent status types for the containers in the task;
99-
// if one container is at ContainerRunning and another is at ContainerPulled,
100-
// the task KnownStatusUnsafe would be TaskPulled.
101-
// NOTE: Do not access KnownStatusUnsafe directly. Instead, use `UpdateStatus`,
102-
// and `GetKnownStatus`.
103-
// TODO KnownStatusUnsafe should probably be private with appropriately written
104-
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
105-
// is handled properly so that the state storage continues to work.
106-
KnownStatusUnsafe TaskStatus `json:"KnownStatus"`
107-
// KnownStatusTimeUnsafe captures the time when the KnownStatusUnsafe was last updated.
108-
// NOTE: Do not access KnownStatusTime directly, instead use `GetKnownStatusTime`.
109-
KnownStatusTimeUnsafe time.Time `json:"KnownTime"`
110-
111-
// PullStartedAtUnsafe is the timestamp when the task start pulling the first container,
112-
// it won't be set if the pull never happens
113-
PullStartedAtUnsafe time.Time `json:"PullStartedAt"`
114-
// PullStoppedAtUnsafe is the timestamp when the task finished pulling the last container,
115-
// it won't be set if the pull never happens
116-
PullStoppedAtUnsafe time.Time `json:"PullStoppedAt"`
117-
// ExecutionStoppedAtUnsafe is the timestamp when the task desired status moved to stopped,
118-
// which is when the any of the essential containers stopped
119-
ExecutionStoppedAtUnsafe time.Time `json:"ExecutionStoppedAt"`
120-
121-
// SentStatusUnsafe represents the last KnownStatusUnsafe that was sent to the ECS SubmitTaskStateChange API.
122-
// TODO(samuelkarp) SentStatusUnsafe needs a lock and setters/getters.
123-
// TODO SentStatusUnsafe should probably be private with appropriately written
124-
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
125-
// is handled properly so that the state storage continues to work.
126-
SentStatusUnsafe TaskStatus `json:"SentStatus"`
127-
128-
StartSequenceNumber int64
129-
StopSequenceNumber int64
130-
131-
// ExecutionCredentialsID is the ID of credentials that are used by agent to
132-
// perform some action at the task level, such as pulling image from ECR
133-
ExecutionCredentialsID string `json:"executionCredentialsID"`
134-
135-
// credentialsID is used to set the CredentialsId field for the
136-
// IAMRoleCredentials object associated with the task. This id can be
137-
// used to look up the credentials for task in the credentials manager
138-
credentialsID string
139-
140-
// ENI is the elastic network interface specified by this task
141-
ENI *ENI
142-
143-
// MemoryCPULimitsEnabled to determine if task supports CPU, memory limits
144-
MemoryCPULimitsEnabled bool `json:"MemoryCPULimitsEnabled,omitempty"`
145-
146-
// lock is for protecting all fields in the task struct
147-
lock sync.RWMutex
148-
149-
cpuUnboundedWindowsEnabled bool
150-
}
151-
15266
// PostUnmarshalTask is run after a task has been unmarshalled, but before it has been
15367
// run. It is possible it will be subsequently called after that and should be
15468
// able to handle such an occurrence appropriately (e.g. behave idempotently).

agent/api/task_unix.go

+84
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package api
1717

1818
import (
1919
"path/filepath"
20+
"sync"
2021
"time"
2122

2223
"github.com/aws/amazon-ecs-agent/agent/config"
@@ -41,6 +42,89 @@ const (
4142
bytesPerMegabyte = 1024 * 1024
4243
)
4344

45+
// Task is the internal representation of a task in the ECS agent
46+
type Task struct {
47+
// Arn is the unique identifer for the task
48+
Arn string
49+
// Overrides are the overrides applied to a task
50+
Overrides TaskOverrides `json:"-"`
51+
// Family is the name of the task definition family
52+
Family string
53+
// Version is the version of the task definition
54+
Version string
55+
// Containers are the containers for the task
56+
Containers []*Container
57+
// Volumes are the volumes for the task
58+
Volumes []TaskVolume `json:"volumes"`
59+
// CPU is a task-level limit for compute resources. A value of 1 means that
60+
// the task may access 100% of 1 vCPU on the instance
61+
CPU float64 `json:"Cpu,omitempty"`
62+
// Memory is a task-level limit for memory resources in bytes
63+
Memory int64 `json:"Memory,omitempty"`
64+
// DesiredStatusUnsafe represents the state where the task should go. Generally,
65+
// the desired status is informed by the ECS backend as a result of either
66+
// API calls made to ECS or decisions made by the ECS service scheduler.
67+
// The DesiredStatusUnsafe is almost always either TaskRunning or TaskStopped.
68+
// NOTE: Do not access DesiredStatusUnsafe directly. Instead, use `UpdateStatus`,
69+
// `UpdateDesiredStatus`, `SetDesiredStatus`, and `SetDesiredStatus`.
70+
// TODO DesiredStatusUnsafe should probably be private with appropriately written
71+
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
72+
// is handled properly so that the state storage continues to work.
73+
DesiredStatusUnsafe TaskStatus `json:"DesiredStatus"`
74+
75+
// KnownStatusUnsafe represents the state where the task is. This is generally
76+
// the minimum of equivalent status types for the containers in the task;
77+
// if one container is at ContainerRunning and another is at ContainerPulled,
78+
// the task KnownStatusUnsafe would be TaskPulled.
79+
// NOTE: Do not access KnownStatusUnsafe directly. Instead, use `UpdateStatus`,
80+
// and `GetKnownStatus`.
81+
// TODO KnownStatusUnsafe should probably be private with appropriately written
82+
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
83+
// is handled properly so that the state storage continues to work.
84+
KnownStatusUnsafe TaskStatus `json:"KnownStatus"`
85+
// KnownStatusTimeUnsafe captures the time when the KnownStatusUnsafe was last updated.
86+
// NOTE: Do not access KnownStatusTime directly, instead use `GetKnownStatusTime`.
87+
KnownStatusTimeUnsafe time.Time `json:"KnownTime"`
88+
89+
// PullStartedAtUnsafe is the timestamp when the task start pulling the first container,
90+
// it won't be set if the pull never happens
91+
PullStartedAtUnsafe time.Time `json:"PullStartedAt"`
92+
// PullStoppedAtUnsafe is the timestamp when the task finished pulling the last container,
93+
// it won't be set if the pull never happens
94+
PullStoppedAtUnsafe time.Time `json:"PullStoppedAt"`
95+
// ExecutionStoppedAtUnsafe is the timestamp when the task desired status moved to stopped,
96+
// which is when the any of the essential containers stopped
97+
ExecutionStoppedAtUnsafe time.Time `json:"ExecutionStoppedAt"`
98+
99+
// SentStatusUnsafe represents the last KnownStatusUnsafe that was sent to the ECS SubmitTaskStateChange API.
100+
// TODO(samuelkarp) SentStatusUnsafe needs a lock and setters/getters.
101+
// TODO SentStatusUnsafe should probably be private with appropriately written
102+
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
103+
// is handled properly so that the state storage continues to work.
104+
SentStatusUnsafe TaskStatus `json:"SentStatus"`
105+
106+
StartSequenceNumber int64
107+
StopSequenceNumber int64
108+
109+
// ExecutionCredentialsID is the ID of credentials that are used by agent to
110+
// perform some action at the task level, such as pulling image from ECR
111+
ExecutionCredentialsID string `json:"executionCredentialsID"`
112+
113+
// credentialsID is used to set the CredentialsId field for the
114+
// IAMRoleCredentials object associated with the task. This id can be
115+
// used to look up the credentials for task in the credentials manager
116+
credentialsID string
117+
118+
// ENI is the elastic network interface specified by this task
119+
ENI *ENI
120+
121+
// MemoryCPULimitsEnabled to determine if task supports CPU, memory limits
122+
MemoryCPULimitsEnabled bool `json:"MemoryCPULimitsEnabled,omitempty"`
123+
124+
// lock is for protecting all fields in the task struct
125+
lock sync.RWMutex
126+
}
127+
44128
func (task *Task) adjustForPlatform(cfg *config.Config) {
45129
task.lock.Lock()
46130
defer task.lock.Unlock()

agent/api/task_windows.go

+93-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"path/filepath"
2020
"runtime"
2121
"strings"
22+
"sync"
2223

2324
"github.com/aws/amazon-ecs-agent/agent/config"
2425
"github.com/cihub/seelog"
@@ -36,10 +37,98 @@ const (
3637

3738
var cpuShareScaleFactor = runtime.NumCPU() * cpuSharesPerCore
3839

40+
// Task is the internal representation of a task in the ECS agent
41+
// TODO: there are some fields which are not related to Windows. Remove them.
42+
type Task struct {
43+
// Arn is the unique identifer for the task
44+
Arn string
45+
// Overrides are the overrides applied to a task
46+
Overrides TaskOverrides `json:"-"`
47+
// Family is the name of the task definition family
48+
Family string
49+
// Version is the version of the task definition
50+
Version string
51+
// Containers are the containers for the task
52+
Containers []*Container
53+
// Volumes are the volumes for the task
54+
Volumes []TaskVolume `json:"volumes"`
55+
// CPU is a task-level limit for compute resources. A value of 1 means that
56+
// the task may access 100% of 1 vCPU on the instance
57+
CPU float64 `json:"Cpu,omitempty"`
58+
// Memory is a task-level limit for memory resources in bytes
59+
Memory int64 `json:"Memory,omitempty"`
60+
// DesiredStatusUnsafe represents the state where the task should go. Generally,
61+
// the desired status is informed by the ECS backend as a result of either
62+
// API calls made to ECS or decisions made by the ECS service scheduler.
63+
// The DesiredStatusUnsafe is almost always either TaskRunning or TaskStopped.
64+
// NOTE: Do not access DesiredStatusUnsafe directly. Instead, use `UpdateStatus`,
65+
// `UpdateDesiredStatus`, `SetDesiredStatus`, and `SetDesiredStatus`.
66+
// TODO DesiredStatusUnsafe should probably be private with appropriately written
67+
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
68+
// is handled properly so that the state storage continues to work.
69+
DesiredStatusUnsafe TaskStatus `json:"DesiredStatus"`
70+
71+
// KnownStatusUnsafe represents the state where the task is. This is generally
72+
// the minimum of equivalent status types for the containers in the task;
73+
// if one container is at ContainerRunning and another is at ContainerPulled,
74+
// the task KnownStatusUnsafe would be TaskPulled.
75+
// NOTE: Do not access KnownStatusUnsafe directly. Instead, use `UpdateStatus`,
76+
// and `GetKnownStatus`.
77+
// TODO KnownStatusUnsafe should probably be private with appropriately written
78+
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
79+
// is handled properly so that the state storage continues to work.
80+
KnownStatusUnsafe TaskStatus `json:"KnownStatus"`
81+
// KnownStatusTimeUnsafe captures the time when the KnownStatusUnsafe was last updated.
82+
// NOTE: Do not access KnownStatusTime directly, instead use `GetKnownStatusTime`.
83+
KnownStatusTimeUnsafe time.Time `json:"KnownTime"`
84+
85+
// PullStartedAtUnsafe is the timestamp when the task start pulling the first container,
86+
// it won't be set if the pull never happens
87+
PullStartedAtUnsafe time.Time `json:"PullStartedAt"`
88+
// PullStoppedAtUnsafe is the timestamp when the task finished pulling the last container,
89+
// it won't be set if the pull never happens
90+
PullStoppedAtUnsafe time.Time `json:"PullStoppedAt"`
91+
// ExecutionStoppedAtUnsafe is the timestamp when the task desired status moved to stopped,
92+
// which is when the any of the essential containers stopped
93+
ExecutionStoppedAtUnsafe time.Time `json:"ExecutionStoppedAt"`
94+
95+
// SentStatusUnsafe represents the last KnownStatusUnsafe that was sent to the ECS SubmitTaskStateChange API.
96+
// TODO(samuelkarp) SentStatusUnsafe needs a lock and setters/getters.
97+
// TODO SentStatusUnsafe should probably be private with appropriately written
98+
// setter/getter. When this is done, we need to ensure that the UnmarshalJSON
99+
// is handled properly so that the state storage continues to work.
100+
SentStatusUnsafe TaskStatus `json:"SentStatus"`
101+
102+
StartSequenceNumber int64
103+
StopSequenceNumber int64
104+
105+
// ExecutionCredentialsID is the ID of credentials that are used by agent to
106+
// perform some action at the task level, such as pulling image from ECR
107+
ExecutionCredentialsID string `json:"executionCredentialsID"`
108+
109+
// credentialsID is used to set the CredentialsId field for the
110+
// IAMRoleCredentials object associated with the task. This id can be
111+
// used to look up the credentials for task in the credentials manager
112+
credentialsID string
113+
114+
// ENI is the elastic network interface specified by this task
115+
ENI *ENI
116+
117+
// MemoryCPULimitsEnabled to determine if task supports CPU, memory limits
118+
MemoryCPULimitsEnabled bool `json:"MemoryCPULimitsEnabled,omitempty"`
119+
120+
// cpuUnboundedEnabled determines whether a mix of unbounded and bounded CPU tasks
121+
// are allowed to run in the instance
122+
cpuUnboundedEnabled bool
123+
124+
// lock is for protecting all fields in the task struct
125+
lock sync.RWMutex
126+
}
127+
39128
// adjustForPlatform makes Windows-specific changes to the task after unmarshal
40129
func (task *Task) adjustForPlatform(cfg *config.Config) {
41130
task.downcaseAllVolumePaths()
42-
task.cpuUnboundedWindowsEnabled = cfg.CPUUnboundedWindowsEnabled
131+
task.cpuUnboundedEnabled = cfg.CPUUnboundedWindowsEnabled
43132
}
44133

45134
// downcaseAllVolumePaths forces all volume paths (host path and container path)
@@ -70,14 +159,12 @@ func (task *Task) platformHostConfigOverride(hostConfig *docker.HostConfig) erro
70159
task.overrideDefaultMemorySwappiness(hostConfig)
71160
// Convert the CPUShares to CPUPercent
72161
hostConfig.CPUPercent = hostConfig.CPUShares * percentageFactor / int64(cpuShareScaleFactor)
73-
if hostConfig.CPUPercent == 0 {
162+
if hostConfig.CPUPercent == 0 && hostConfig.CPUShares != 0 {
74163
// if CPU percent is too low, we set it to the minimum(linux and some windows tasks).
75164
// if the CPU is explicitly set to zero or not set at all, and CPU unbounded
76165
// tasks are allowed for windows, let CPU percent be zero.
77166
// this is a workaround to allow CPU unbounded tasks(https://github.com/aws/amazon-ecs-agent/issues/1127)
78-
if hostConfig.CPUShares != 0 {
79-
hostConfig.CPUPercent = minimumCPUPercent
80-
}
167+
hostConfig.CPUPercent = minimumCPUPercent
81168
}
82169
hostConfig.CPUShares = 0
83170
return nil
@@ -98,7 +185,7 @@ func (task *Task) overrideDefaultMemorySwappiness(hostConfig *docker.HostConfig)
98185
// want. Instead, we convert 0 to 2 to be closer to expected behavior. The
99186
// reason for 2 over 1 is that 1 is an invalid value (Linux's choice, not Docker's).
100187
func (task *Task) dockerCPUShares(containerCPU uint) int64 {
101-
if containerCPU <= 1 && !task.cpuUnboundedWindowsEnabled {
188+
if containerCPU <= 1 && !task.cpuUnboundedEnabled {
102189
seelog.Debugf(
103190
"Converting CPU shares to allowed minimum of 2 for task arn: [%s] and cpu shares: %d",
104191
task.Arn, containerCPU)

agent/api/task_windows_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ func TestCPUPercentBasedOnUnboundedEnabled(t *testing.T) {
288288
CPU: uint(tc.cpu),
289289
},
290290
},
291-
cpuUnboundedWindowsEnabled: tc.cpuUnboundedEnabled,
291+
cpuUnboundedEnabled: tc.cpuUnboundedEnabled,
292292
}
293293

294294
hostconfig, err := testTask.DockerHostConfig(testTask.Containers[0], dockerMap(testTask), minDockerClientAPIVersion)

agent/config/config_windows.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func (cfg *Config) platformOverrides() {
9696
// ensure TaskResourceLimit is disabled
9797
cfg.TaskCPUMemLimit = ExplicitlyDisabled
9898

99-
cfg.CPUUnboundedWindowsEnabled = utils.ParseBool(os.Getenv("ECS_ENABLE_CPU_UNBOUNDED_WINDOWS_WORKAROUND"), false)
99+
cfg.CPUUnboundedWindowsEnabled = utils.ParseBool(os.Getenv("ECS_ENABLE_CPU_UNBOUNDED_WINDOWS_HACK"), false)
100100
}
101101

102102
// platformString returns platform-specific config data that can be serialized

agent/config/config_windows_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func TestTaskResourceLimitPlatformOverrideDisabled(t *testing.T) {
9191

9292
func TestCPUUnboundedWindowsEnabledSet(t *testing.T) {
9393
defer setTestRegion()()
94-
defer setTestEnv("ECS_ENABLE_CPU_UNBOUNDED_WINDOWS_WORKAROUND", "true")()
94+
defer setTestEnv("ECS_ENABLE_CPU_UNBOUNDED_WINDOWS_HACK", "true")()
9595
cfg, err := NewConfig(ec2.NewBlackholeEC2MetadataClient())
9696
cfg.platformOverrides()
9797
assert.NoError(t, err)

0 commit comments

Comments
 (0)