Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Container Restart Policy feature #4264

Merged
merged 12 commits into from
Jul 29, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 62 additions & 6 deletions agent/api/container/container.go
Original file line number Diff line number Diff line change
@@ -24,12 +24,12 @@ import (

resourcestatus "github.com/aws/amazon-ecs-agent/agent/taskresource/status"
referenceutil "github.com/aws/amazon-ecs-agent/agent/utils/reference"
"github.com/aws/amazon-ecs-agent/ecs-agent/api/container/restart"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
apierrors "github.com/aws/amazon-ecs-agent/ecs-agent/api/errors"
"github.com/aws/amazon-ecs-agent/ecs-agent/credentials"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger/field"

"github.com/aws/aws-sdk-go/aws"
"github.com/cihub/seelog"
"github.com/docker/docker/api/types"
@@ -323,9 +323,16 @@ type Container struct {
// pause container
ContainerTornDownUnsafe bool `json:"containerTornDown"`

createdAt time.Time
startedAt time.Time
finishedAt time.Time
createdAt time.Time
// StartedAtUnsafe specifies the started at time of the container.
// It is exposed outside this container package so that it is marshalled/unmarshalled in JSON body while
// saving state.
// NOTE: Do not access StartedAtUnsafe directly. Instead, use `GetStartedAt` and `SetStartedAt`.
StartedAtUnsafe time.Time `json:"startedAt,omitempty"`
// setStartedAtOnce is used to set the value of the container's started at time only the first time SetStartedAt is
// invoked.
setStartedAtOnce sync.Once
finishedAt time.Time

labels map[string]string

@@ -335,13 +342,31 @@ type Container struct {
ContainerPortSet map[int]struct{}
// ContainerPortRangeMap is a map of containerPortRange to its associated hostPortRange
ContainerPortRangeMap map[string]string

// RestartPolicy is an object representing the restart policy of the container
RestartPolicy *restart.RestartPolicy `json:"restartPolicy,omitempty"`
// RestartTracker tracks this container's restart policy metadata, such
// as restart count and last restart time. This is only initialized if the container
// has a restart policy defined and enabled.
RestartTracker *restart.RestartTracker `json:"restartTracker,omitempty"`
// RestartAggregationDataForStatsUnsafe specifies the restart aggregation data used for stats of the container.
// It is exposed outside this container package so that it is marshalled/unmarshalled in JSON body while
// saving state.
// NOTE: Do not access RestartAggregationDataForStatsUnsafe directly. Instead, use
// `GetRestartAggregationDataForStats` and `SetRestartAggregationDataForStats`.
RestartAggregationDataForStatsUnsafe ContainerRestartAggregationDataForStats `json:"RestartAggregationDataForStats,omitempty"`
}

type DependsOn struct {
ContainerName string `json:"containerName"`
Condition string `json:"condition"`
}

type ContainerRestartAggregationDataForStats struct {
LastRestartDetectedAt time.Time `json:"LastRestartDetectedAt,omitempty"`
LastStatBeforeLastRestart types.StatsJSON `json:"LastStatBeforeLastRestart,omitempty"`
}

// DockerContainer is a mapping between containers-as-docker-knows-them and
// containers-as-we-know-them.
// This is primarily used in DockerState, but lives here such that tasks and
@@ -628,6 +653,16 @@ func (c *Container) IsEssential() bool {
return c.Essential
}

// RestartPolicyEnabled returns whether the restart policy is defined and enabled
func (c *Container) RestartPolicyEnabled() bool {
c.lock.RLock()
defer c.lock.RUnlock()
if c.RestartPolicy == nil {
return false
}
return c.RestartPolicy.Enabled
}

// AWSLogAuthExecutionRole returns true if the auth is by execution role
func (c *Container) AWSLogAuthExecutionRole() bool {
return c.LogsAuthStrategy == awslogsAuthExecutionRole
@@ -653,7 +688,11 @@ func (c *Container) SetStartedAt(startedAt time.Time) {
c.lock.Lock()
defer c.lock.Unlock()

c.startedAt = startedAt
c.setStartedAtOnce.Do(func() {
if c.StartedAtUnsafe.IsZero() {
c.StartedAtUnsafe = startedAt
}
})
}

// SetFinishedAt sets the timestamp for container's stopped time
@@ -681,7 +720,7 @@ func (c *Container) GetStartedAt() time.Time {
c.lock.RLock()
defer c.lock.RUnlock()

return c.startedAt
return c.StartedAtUnsafe
}

// GetFinishedAt sets the timestamp for container's stopped time
@@ -1536,3 +1575,20 @@ func (c *Container) DigestResolved() bool {
func (c *Container) DigestResolutionRequired() bool {
return !c.IsInternal() && referenceutil.GetDigestFromImageRef(c.Image) == ""
}

// GetRestartAggregationDataForStats gets the restart aggregation data for stats of a container.
func (c *Container) GetRestartAggregationDataForStats() ContainerRestartAggregationDataForStats {
c.lock.RLock()
defer c.lock.RUnlock()

return c.RestartAggregationDataForStatsUnsafe
}

// SetRestartAggregationDataForStats sets the restart aggregation data for stats of a container.
func (c *Container) SetRestartAggregationDataForStats(
restartAggregationDataForStats ContainerRestartAggregationDataForStats) {
c.lock.Lock()
defer c.lock.Unlock()

c.RestartAggregationDataForStatsUnsafe = restartAggregationDataForStats
}
115 changes: 115 additions & 0 deletions agent/api/container/container_test.go
Original file line number Diff line number Diff line change
@@ -23,11 +23,14 @@ import (
"time"

resourcestatus "github.com/aws/amazon-ecs-agent/agent/taskresource/status"
"github.com/aws/amazon-ecs-agent/ecs-agent/api/container/restart"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
"github.com/docker/docker/api/types"

"github.com/aws/amazon-ecs-agent/agent/utils"
dockercontainer "github.com/docker/docker/api/types/container"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

type configPair struct {
@@ -1334,6 +1337,118 @@ func TestGetCredentialSpec(t *testing.T) {
}
}

func TestRestartPolicy(t *testing.T) {
testCases := []struct {
name string
container *Container
restartCount int
expectedEnabled bool
}{
{
name: "nil restart policy",
container: &Container{
RestartPolicy: nil,
},
restartCount: 0,
expectedEnabled: false,
},
{
name: "not enabled restart policy",
container: &Container{
RestartPolicy: &restart.RestartPolicy{},
},
restartCount: 0,
expectedEnabled: false,
},
{
name: "explicitly not enabled restart policy",
container: &Container{
RestartPolicy: &restart.RestartPolicy{
Enabled: false,
},
},
restartCount: 0,
expectedEnabled: false,
},
{
name: "enabled restart policy",
container: &Container{
RestartPolicy: &restart.RestartPolicy{
Enabled: true,
},
},
restartCount: 0,
expectedEnabled: true,
},
{
name: "enabled restart policy, record 5 restarts",
container: &Container{
RestartPolicy: &restart.RestartPolicy{
Enabled: true,
},
},
restartCount: 5,
expectedEnabled: true,
},
}

for _, tc := range testCases {
require.Equal(t, tc.expectedEnabled, tc.container.RestartPolicyEnabled())
if tc.container.RestartPolicyEnabled() {
tc.container.RestartTracker = restart.NewRestartTracker(*tc.container.RestartPolicy)
for i := 0; i < tc.restartCount; i++ {
tc.container.RestartTracker.RecordRestart()
}
require.Equal(t, tc.restartCount, tc.container.RestartTracker.GetRestartCount())
}
}
}

func TestGetAndSetStartedAt(t *testing.T) {
testTime := time.Date(1969, 12, 31, 23, 59, 59, 0, time.UTC)
c := &Container{}

// Test getting started at time when it has never been set is the zero value of time.
require.True(t, c.GetStartedAt().IsZero())

// Test setting started at time sets the started at time.
c.SetStartedAt(testTime)
require.Equal(t, testTime, c.GetStartedAt())

// Test setting started at time after it has already been set does not change the originally set started at time.
c.SetStartedAt(time.Now())
require.Equal(t, testTime, c.GetStartedAt())
}

func TestGetAndSetRestartAggregationDataForStats(t *testing.T) {
testTime := time.Date(1969, 12, 31, 23, 59, 59, 0, time.UTC)
testStatsJSON := types.StatsJSON{
Stats: types.Stats{
CPUStats: types.CPUStats{
CPUUsage: types.CPUUsage{
TotalUsage: 100,
},
},
MemoryStats: types.MemoryStats{
MaxUsage: 200,
},
},
}
testRestartAggregationDataForStats := ContainerRestartAggregationDataForStats{
LastRestartDetectedAt: testTime,
LastStatBeforeLastRestart: testStatsJSON,
}
c := &Container{}

// Test getting restart aggregation data for stats when it has never been set is the zero value of the
// ContainerRestartAggregationDataForStats struct.
require.Equal(t, ContainerRestartAggregationDataForStats{}, c.GetRestartAggregationDataForStats())

// Test setting restart aggregation data for stats sets the restart aggregation data for stats.
c.SetRestartAggregationDataForStats(testRestartAggregationDataForStats)
require.Equal(t, testRestartAggregationDataForStats, c.GetRestartAggregationDataForStats())
}

func getContainer(hostConfig string, credentialSpecs []string) *Container {
c := &Container{
Name: "c",
13 changes: 13 additions & 0 deletions agent/api/task/task.go
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@ import (
taskresourcevolume "github.com/aws/amazon-ecs-agent/agent/taskresource/volume"
"github.com/aws/amazon-ecs-agent/agent/utils"
"github.com/aws/amazon-ecs-agent/ecs-agent/acs/model/ecsacs"
"github.com/aws/amazon-ecs-agent/ecs-agent/api/container/restart"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
"github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/model/ecs"
apierrors "github.com/aws/amazon-ecs-agent/ecs-agent/api/errors"
@@ -466,6 +467,8 @@ func (task *Task) PostUnmarshalTask(cfg *config.Config,
}
}

task.initRestartTrackers()

for _, opt := range options {
if err := opt(task); err != nil {
logger.Error("Could not apply task option", logger.Fields{
@@ -523,6 +526,16 @@ func (task *Task) initNetworkMode(acsTaskNetworkMode *string) {
}
}

// initRestartTrackers initializes the restart policy tracker for each container
// that has a restart policy configured and enabled.
func (task *Task) initRestartTrackers() {
for _, c := range task.Containers {
if c.RestartPolicyEnabled() {
c.RestartTracker = restart.NewRestartTracker(*c.RestartPolicy)
}
}
}

func (task *Task) initServiceConnectResources() error {
// TODO [SC]: ServiceConnectConfig will come from ACS. Adding this here for dev/testing purposes only Remove when
// ACS model is integrated
38 changes: 38 additions & 0 deletions agent/api/task/task_test.go
Original file line number Diff line number Diff line change
@@ -49,6 +49,7 @@ import (
"github.com/aws/amazon-ecs-agent/agent/utils"
"github.com/aws/amazon-ecs-agent/ecs-agent/acs/model/ecsacs"
apiresource "github.com/aws/amazon-ecs-agent/ecs-agent/api/attachment/resource"
"github.com/aws/amazon-ecs-agent/ecs-agent/api/container/restart"
apicontainerstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/container/status"
"github.com/aws/amazon-ecs-agent/ecs-agent/api/ecs/model/ecs"
apitaskstatus "github.com/aws/amazon-ecs-agent/ecs-agent/api/task/status"
@@ -3908,6 +3909,43 @@ func TestPostUnmarshalTaskEnvfiles(t *testing.T) {
task.Containers[0].TransitionDependenciesMap[apicontainerstatus.ContainerCreated].ResourceDependencies[0])
}

func TestPostUnmarshalTaskContainerRestartPolicy(t *testing.T) {
container := &apicontainer.Container{
Name: "containerName",
Image: "image:tag",
RestartPolicy: &restart.RestartPolicy{
Enabled: true,
},
TransitionDependenciesMap: make(map[apicontainerstatus.ContainerStatus]apicontainer.TransitionDependencySet),
}

task := &Task{
Arn: testTaskARN,
ResourcesMapUnsafe: make(map[string][]taskresource.TaskResource),
Containers: []*apicontainer.Container{container},
}

ctrl := gomock.NewController(t)
defer ctrl.Finish()

cfg := &config.Config{}
credentialsManager := mock_credentials.NewMockManager(ctrl)
resFields := &taskresource.ResourceFields{
ResourceFieldsCommon: &taskresource.ResourceFieldsCommon{
CredentialsManager: credentialsManager,
},
}

err := task.PostUnmarshalTask(cfg, credentialsManager, resFields, nil, nil)
assert.NoError(t, err)

for _, c := range task.Containers {
assert.True(t, c.RestartPolicyEnabled())
assert.NotNil(t, c.RestartTracker)
assert.Equal(t, 0, c.RestartTracker.GetRestartCount())
}
}

func TestInitializeAndGetEnvfilesResource(t *testing.T) {
envfile1 := apicontainer.EnvironmentFile{
Value: "s3://bucket/envfile1",
2 changes: 1 addition & 1 deletion agent/app/agent.go
Original file line number Diff line number Diff line change
@@ -918,7 +918,7 @@ func (agent *ecsAgent) startAsyncRoutines(
telemetryMessages := make(chan ecstcs.TelemetryMessage, telemetryChannelDefaultBufferSize)
healthMessages := make(chan ecstcs.HealthMessage, telemetryChannelDefaultBufferSize)

statsEngine := stats.NewDockerStatsEngine(agent.cfg, agent.dockerClient, containerChangeEventStream, telemetryMessages, healthMessages)
statsEngine := stats.NewDockerStatsEngine(agent.cfg, agent.dockerClient, containerChangeEventStream, telemetryMessages, healthMessages, agent.dataClient)

// Start serving the endpoint to fetch IAM Role credentials and other task metadata
if agent.cfg.TaskMetadataAZDisabled {
4 changes: 4 additions & 0 deletions agent/app/agent_capability.go
Original file line number Diff line number Diff line change
@@ -81,6 +81,7 @@ const (
capabilityServiceConnect = "service-connect-v1"
capabilityGpuDriverVersion = "gpu-driver-version"
capabilityEBSTaskAttach = "storage.ebs-task-volume-attach"
capabilityContainerRestartPolicy = "container-restart-policy"

// network capabilities, going forward, please append "network." prefix to any new networking capability we introduce
networkCapabilityPrefix = "network."
@@ -110,6 +111,8 @@ var (
capabilityEnvFilesS3,
// support container port range in container definition - port mapping field
capabilityContainerPortRange,
// support container restart policy
capabilityContainerRestartPolicy,
}
// use empty struct as value type to simulate set
capabilityExecInvalidSsmVersions = map[string]struct{}{}
@@ -194,6 +197,7 @@ var (
// ecs.capability.external
// ecs.capability.service-connect-v1
// ecs.capability.network.container-port-range
// ecs.capability.container-restart-policy
func (agent *ecsAgent) capabilities() ([]*ecs.Attribute, error) {
var capabilities []*ecs.Attribute

2 changes: 2 additions & 0 deletions agent/app/agent_capability_test.go
Original file line number Diff line number Diff line change
@@ -143,6 +143,7 @@ func TestCapabilities(t *testing.T) {
attributePrefix + capabilityExec,
attributePrefix + capabilityServiceConnect,
attributePrefix + capabilityContainerPortRange,
attributePrefix + capabilityContainerRestartPolicy,
}

var expectedCapabilities []*ecs.Attribute
@@ -1314,6 +1315,7 @@ func TestCapabilitiesNoServiceConnect(t *testing.T) {
attributePrefix + taskENIBlockInstanceMetadataAttributeSuffix,
attributePrefix + capabilityExec,
attributePrefix + capabilityContainerPortRange,
attributePrefix + capabilityContainerRestartPolicy,
}

var expectedCapabilities []*ecs.Attribute
Loading