Skip to content

Commit

Permalink
Expedited image manifest digest reporting (#4177)
Browse files Browse the repository at this point in the history
  • Loading branch information
amogh09 authored May 22, 2024
1 parent 58464c6 commit c763e4d
Show file tree
Hide file tree
Showing 43 changed files with 3,015 additions and 226 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ additional details on each available environment variable.
| `ECS_APPARMOR_CAPABLE` | `true` | Whether AppArmor is available on the container instance. | `false` | `false` |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` | 10m | Default time to wait to delete containers for a stopped task (see also `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER`). If set to less than 1 second, the value is ignored. | 3h | 3h |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION_JITTER` | 1h | Jitter value for the task engine cleanup wait duration. When specified, the actual cleanup wait duration time for each task will be the duration specified in `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` plus a random duration between 0 and the jitter duration. | blank | blank |
| `ECS_MANIFEST_PULL_TIMEOUT` | 10m | Timeout before giving up on fetching image manifest for a container image. | 1m | 1m |
| `ECS_CONTAINER_STOP_TIMEOUT` | 10m | Instance scoped configuration for time to wait for the container to exit normally before being forcibly killed. | 30s | 30s |
| `ECS_CONTAINER_START_TIMEOUT` | 10m | Timeout before giving up on starting a container. | 3m | 8m |
| `ECS_CONTAINER_CREATE_TIMEOUT` | 10m | Timeout before giving up on creating a container. Minimum value is 1m. If user sets a value below minimum it will be set to min. | 4m | 4m |
Expand Down
15 changes: 15 additions & 0 deletions agent/api/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"time"

resourcestatus "github.com/aws/amazon-ecs-agent/agent/taskresource/status"
referenceutil "github.com/aws/amazon-ecs-agent/agent/utils/reference"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
apierrors "github.com/aws/amazon-ecs-agent/ecs-agent/api/errors"
"github.com/aws/amazon-ecs-agent/ecs-agent/credentials"
Expand Down Expand Up @@ -1521,3 +1522,17 @@ func (c *Container) GetImageName() string {
containerImage := strings.Split(c.Image, ":")[0]
return containerImage
}

// Checks if the container has a resolved image manifest digest.
// Always returns false for internal containers as those are out-of-scope of digest resolution.
func (c *Container) DigestResolved() bool {
return !c.IsInternal() && c.GetImageDigest() != ""
}

// Checks if the container's image requires manifest digest resolution.
// Manifest digest resolution is required if the container's image reference does not
// have a digest.
// Always returns false for internal containers as those are out-of-scope of digest resolution.
func (c *Container) DigestResolutionRequired() bool {
return !c.IsInternal() && referenceutil.GetDigestFromImageRef(c.Image) == ""
}
25 changes: 25 additions & 0 deletions agent/api/container/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1342,3 +1342,28 @@ func getContainer(hostConfig string, credentialSpecs []string) *Container {
c.CredentialSpecs = credentialSpecs
return c
}

func TestDigestResolved(t *testing.T) {
t.Run("never resolved for internal container", func(t *testing.T) {
assert.False(t, (&Container{Type: ContainerServiceConnectRelay}).DigestResolved())
})
t.Run("digest resolved if it is populated", func(t *testing.T) {
assert.True(t, (&Container{ImageDigest: "digest"}).DigestResolved())
})
t.Run("digest not resolved if it is not populated", func(t *testing.T) {
assert.False(t, (&Container{}).DigestResolved())
})
}

func TestDigestResolutionRequired(t *testing.T) {
t.Run("never required for internal containers", func(t *testing.T) {
assert.False(t, (&Container{Type: ContainerServiceConnectRelay}).DigestResolutionRequired())
})
t.Run("required if not found in image reference", func(t *testing.T) {
assert.True(t, (&Container{Image: "alpine"}).DigestResolutionRequired())
})
t.Run("not required if found in image reference", func(t *testing.T) {
image := "ubuntu@sha256:ed6d2c43c8fbcd3eaa44c9dab6d94cb346234476230dc1681227aa72d07181ee"
assert.False(t, (&Container{Image: image}).DigestResolutionRequired())
})
}
77 changes: 51 additions & 26 deletions agent/api/statechange.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func NewTaskStateChangeEvent(task *apitask.Task, reason string) (TaskStateChange
return event, ErrShouldNotSendEvent{task.Arn}
}
taskKnownStatus := task.GetKnownStatus()
if !taskKnownStatus.BackendRecognized() {
if taskKnownStatus != apitaskstatus.TaskManifestPulled && !taskKnownStatus.BackendRecognized() {
return event, errors.Errorf(
"create task state change event api: status not recognized by ECS: %v",
taskKnownStatus)
Expand All @@ -140,6 +140,14 @@ func NewTaskStateChangeEvent(task *apitask.Task, reason string) (TaskStateChange
"create task state change event api: status [%s] already sent",
taskKnownStatus.String())
}
if taskKnownStatus == apitaskstatus.TaskManifestPulled && !task.HasAContainerWithResolvedDigest() {
return event, ErrShouldNotSendEvent{
fmt.Sprintf(
"create task state change event api: status %s not eligible for backend reporting as"+
" no digests were resolved",
apitaskstatus.TaskManifestPulled.String()),
}
}

event = TaskStateChange{
TaskARN: task.Arn,
Expand All @@ -161,11 +169,21 @@ func NewContainerStateChangeEvent(task *apitask.Task, cont *apicontainer.Contain
return event, err
}
contKnownStatus := cont.GetKnownStatus()
if !contKnownStatus.ShouldReportToBackend(cont.GetSteadyStateStatus()) {
if contKnownStatus != apicontainerstatus.ContainerManifestPulled &&
!contKnownStatus.ShouldReportToBackend(cont.GetSteadyStateStatus()) {
return event, ErrShouldNotSendEvent{fmt.Sprintf(
"create container state change event api: status not recognized by ECS: %v",
contKnownStatus)}
}
if contKnownStatus == apicontainerstatus.ContainerManifestPulled && !cont.DigestResolved() {
// Transition to MANIFEST_PULLED state is sent to the backend only to report a resolved
// image manifest digest. No need to generate an event if the digest was not resolved
// which could happen due to various reasons.
return event, ErrShouldNotSendEvent{fmt.Sprintf(
"create container state change event api:"+
" no need to send %s event as no resolved digests were found",
apicontainerstatus.ContainerManifestPulled.String())}
}
if cont.GetSentStatus() >= contKnownStatus {
return event, ErrShouldNotSendEvent{fmt.Sprintf(
"create container state change event api: status [%s] already sent for container %s, task %s",
Expand Down Expand Up @@ -196,7 +214,7 @@ func newUncheckedContainerStateChangeEvent(task *apitask.Task, cont *apicontaine
TaskArn: task.Arn,
ContainerName: cont.Name,
RuntimeID: cont.GetRuntimeID(),
Status: contKnownStatus.BackendStatus(cont.GetSteadyStateStatus()),
Status: containerStatusChangeStatus(contKnownStatus, cont.GetSteadyStateStatus()),
ExitCode: cont.GetKnownExitCode(),
PortBindings: portBindings,
ImageDigest: cont.GetImageDigest(),
Expand All @@ -206,6 +224,27 @@ func newUncheckedContainerStateChangeEvent(task *apitask.Task, cont *apicontaine
return event, nil
}

// Maps container known status to a suitable status for ContainerStateChange.
//
// Returns ContainerRunning if known status matches steady state status,
// returns knownStatus if it is ContainerManifestPulled or ContainerStopped,
// returns ContainerStatusNone for all other cases.
func containerStatusChangeStatus(
knownStatus apicontainerstatus.ContainerStatus,
steadyStateStatus apicontainerstatus.ContainerStatus,
) apicontainerstatus.ContainerStatus {
switch knownStatus {
case steadyStateStatus:
return apicontainerstatus.ContainerRunning
case apicontainerstatus.ContainerManifestPulled:
return apicontainerstatus.ContainerManifestPulled
case apicontainerstatus.ContainerStopped:
return apicontainerstatus.ContainerStopped
default:
return apicontainerstatus.ContainerStatusNone
}
}

// NewManagedAgentChangeEvent creates a new managedAgent change event to convey managed agent state changes
// returns error if the state change doesn't need to be sent to the ECS backend.
func NewManagedAgentChangeEvent(task *apitask.Task, cont *apicontainer.Container, managedAgentName string, reason string) (ManagedAgentStateChange, error) {
Expand Down Expand Up @@ -322,24 +361,6 @@ func (change *TaskStateChange) SetTaskTimestamps() {
}
}

// ShouldBeReported checks if the statechange should be reported to backend
func (change *TaskStateChange) ShouldBeReported() bool {
// Events that should be reported:
// 1. Normal task state change: RUNNING/STOPPED
// 2. Container state change, with task status in CREATED/RUNNING/STOPPED
// The task timestamp will be sent in both of the event type
// TODO Move the Attachment statechange check into this method
if change.Status == apitaskstatus.TaskRunning || change.Status == apitaskstatus.TaskStopped {
return true
}

if len(change.Containers) != 0 {
return true
}

return false
}

func (change *TaskStateChange) ToFields() logger.Fields {
fields := logger.Fields{
"eventType": "TaskStateChange",
Expand Down Expand Up @@ -494,19 +515,23 @@ func buildContainerStateChangePayload(change ContainerStateChange) (*ecsmodel.Co
statechange.ImageDigest = aws.String(change.ImageDigest)
}

stat := change.Status.String()
if stat != apicontainerstatus.ContainerStopped.String() && stat != apicontainerstatus.ContainerRunning.String() {
// TODO: This check already exists in NewContainerStateChangeEvent and shouldn't be repeated here; remove after verifying
stat := change.Status
if stat != apicontainerstatus.ContainerManifestPulled &&
stat != apicontainerstatus.ContainerStopped &&
stat != apicontainerstatus.ContainerRunning {
logger.Warn("Not submitting unsupported upstream container state", logger.Fields{
field.Status: stat,
field.ContainerName: change.ContainerName,
field.TaskARN: change.TaskArn,
})
return nil, nil
}
if stat == "DEAD" {
stat = apicontainerstatus.ContainerStopped.String()
// TODO: This check is probably redundant as String() method never returns "DEAD"; remove after verifying
if stat.String() == "DEAD" {
stat = apicontainerstatus.ContainerStopped
}
statechange.Status = aws.String(stat)
statechange.Status = aws.String(stat.BackendStatusString())

if change.ExitCode != nil {
exitCode := int64(aws.IntValue(change.ExitCode))
Expand Down
Loading

0 comments on commit c763e4d

Please sign in to comment.