Skip to content

Commit

Permalink
Structured logging for docker image manager (aws#3696)
Browse files Browse the repository at this point in the history
* Structured logging for docker image manager

* Add some reused fields to logger/field/constants.go

* Add missing 'imageTag' field from err message
  • Loading branch information
sparrc authored and Richa Gangwar committed May 18, 2023
1 parent 97c31b9 commit f9706fc
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 93 deletions.
16 changes: 16 additions & 0 deletions agent/api/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ import (
apierrors "github.com/aws/amazon-ecs-agent/agent/api/errors"
resourcestatus "github.com/aws/amazon-ecs-agent/agent/taskresource/status"
"github.com/aws/amazon-ecs-agent/ecs-agent/credentials"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger/field"

"github.com/aws/aws-sdk-go/aws"
"github.com/cihub/seelog"
Expand Down Expand Up @@ -509,6 +511,20 @@ func (c *Container) String() string {
return ret
}

func (c *Container) Fields() logger.Fields {
exitCode := "nil"
if c.GetKnownExitCode() != nil {
exitCode = strconv.Itoa(*c.GetKnownExitCode())
}
return logger.Fields{
field.ContainerName: c.Name,
field.ContainerImage: c.Image,
"containerKnownStatus": c.GetKnownStatus().String(),
"containerDesiredStatus": c.GetDesiredStatus().String(),
field.ContainerExitCode: exitCode,
}
}

// GetSteadyStateStatus returns the steady state status for the container. If
// Container.steadyState is not initialized, the default steady state status
// defined by `defaultContainerSteadyStateStatus` is returned. In awsvpc, the 'pause'
Expand Down
95 changes: 58 additions & 37 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
"github.com/cihub/seelog"
)

const (
Expand Down Expand Up @@ -152,7 +151,9 @@ func (imageManager *dockerImageManager) RecordContainerReference(container *apic
// Inspect image for obtaining Container's Image ID
imageInspected, err := imageManager.client.InspectImage(container.Image)
if err != nil {
seelog.Errorf("Error inspecting image %v: %v", container.Image, err)
fields := container.Fields()
fields[field.Error] = err
logger.Error("Error inspecting image", fields)
return err
}
container.ImageID = imageInspected.ID
Expand All @@ -177,7 +178,7 @@ func (imageManager *dockerImageManager) fetchRepoDigest(imageInspected *types.Im
resultRepoDigest = repoDigestSplitList[1]
return resultRepoDigest
} else {
seelog.Warnf("ImageRepoDigest doesn't have the right format: %v", imageRepoDigest)
logger.Warn(fmt.Sprintf("ImageRepoDigest doesn't have the right format: %v", imageRepoDigest), container.Fields())
return ""
}
}
Expand Down Expand Up @@ -272,7 +273,7 @@ func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *
for i, imageState := range imageManager.imageStates {
if imageState.Image.ImageID == imageStateToBeRemoved.Image.ImageID {
// Image State found; hence remove it
seelog.Infof("Removing Image State: [%s] from Image Manager", imageState.String())
logger.Debug("Image Manager: removing image state", imageState.Fields())
imageManager.imageStates = append(imageManager.imageStates[:i], imageManager.imageStates[i+1:]...)
imageManager.removeImageStateData(imageState.Image.ImageID)
return
Expand All @@ -282,14 +283,14 @@ func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *

func (imageManager *dockerImageManager) getCandidateImagesForDeletion() []*image.ImageState {
if len(imageManager.imageStatesConsideredForDeletion) < 1 {
seelog.Debugf("Image Manager: Empty state!")
logger.Debug("Image Manager: Empty state!")
// no image states present in image manager
return nil
}
var imagesForDeletion []*image.ImageState
for _, imageState := range imageManager.imageStatesConsideredForDeletion {
if imageManager.isImageOldEnough(imageState) && imageState.HasNoAssociatedContainers() {
seelog.Infof("Candidate image for deletion: [%s]", imageState.String())
logger.Debug("Candidate image for deletion", imageState.Fields())
imagesForDeletion = append(imagesForDeletion, imageState)
}
}
Expand Down Expand Up @@ -346,7 +347,7 @@ func (imageManager *dockerImageManager) StartImageCleanupProcess(ctx context.Con
// If the image pull behavior is prefer cached, don't clean up the image,
// because the cached image is needed.
if imageManager.imagePullBehavior == config.ImagePullPreferCachedBehavior {
seelog.Info("Pull behavior is set to always use cache. Disabling cleanup")
logger.Info("Pull behavior is set to always use cache. Disabling cleanup")
return
}
// passing the cleanup interval as argument which would help during testing
Expand All @@ -367,10 +368,10 @@ func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context.
}

func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context) {
seelog.Debug("Attempting to obtain ImagePullDeleteLock for removing images")
logger.Debug("Attempting to obtain ImagePullDeleteLock for removing images")
ImagePullDeleteLock.Lock()
seelog.Debug("Obtained ImagePullDeleteLock for removing images")
defer seelog.Debug("Released ImagePullDeleteLock after removing images")
logger.Debug("Obtained ImagePullDeleteLock for removing images")
defer logger.Debug("Released ImagePullDeleteLock after removing images")
defer ImagePullDeleteLock.Unlock()

imageManager.updateLock.Lock()
Expand All @@ -383,7 +384,9 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context)
err := imageManager.removeLeastRecentlyUsedImage(ctx)
numECSImagesDeleted = i
if err != nil {
seelog.Infof("End of eligible images for deletion: %v; Still have %d image states being managed", err, len(imageManager.getAllImageStates()))
logger.Info("End of eligible images for deletion", logger.Fields{
"managedImagesRemaining": len(imageManager.getAllImageStates()),
})
break
}
}
Expand All @@ -399,20 +402,20 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context)
func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Context) {
nonECSContainersIDs, err := imageManager.getNonECSContainerIDs(ctx)
if err != nil {
seelog.Errorf("Error getting non-ECS container IDs: %v", err)
logger.Error(fmt.Sprintf("Error getting non-ECS container IDs: %v", err))
}
var nonECSContainerRemoveAvailableIDs []string
for _, id := range nonECSContainersIDs {
response, icErr := imageManager.client.InspectContainer(ctx, id, dockerclient.InspectContainerTimeout)
if icErr != nil {
seelog.Errorf("Error inspecting non-ECS container id: %s - %v", id, icErr)
logger.Error(fmt.Sprintf("Error inspecting non-ECS container id: %s - %v", id, icErr))
continue
}

seelog.Debugf("Inspecting Non-ECS Container ID [%s] for removal, Finished [%s] Status [%s]", id, response.State.FinishedAt, response.State.Status)
logger.Debug(fmt.Sprintf("Inspecting Non-ECS Container ID [%s] for removal, Finished [%s] Status [%s]", id, response.State.FinishedAt, response.State.Status))
finishedTime, err := time.Parse(time.RFC3339Nano, response.State.FinishedAt)
if err != nil {
seelog.Errorf("Error parsing time string for container. id: %s, time: %s err: %s", id, response.State.FinishedAt, err)
logger.Error(fmt.Sprintf("Error parsing time string for container. id: %s, time: %s err: %s", id, response.State.FinishedAt, err))
continue
}

Expand All @@ -428,13 +431,17 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
if numNonECSContainerDeleted == imageManager.numNonECSContainersToDelete {
break
}
seelog.Debugf("Removing non-ECS Container ID %s", id)
logger.Debug("Removing non-ECS container", logger.Fields{
"runtimeID": id,
})
err := imageManager.client.RemoveContainer(ctx, id, dockerclient.RemoveContainerTimeout)
if err == nil {
seelog.Infof("Removed Container ID: %s", id)
logger.Info("Removed non-ECS container", logger.Fields{
"runtimeID": id,
})
numNonECSContainerDeleted++
} else {
seelog.Errorf("Error Removing Container ID %s - %s", id, err)
logger.Error(fmt.Sprintf("Error Removing Container ID %s - %s", id, err))
continue
}
}
Expand Down Expand Up @@ -480,24 +487,32 @@ func (imageManager *dockerImageManager) removeNonECSImages(ctx context.Context,
if !imageManager.nonECSImageOldEnough(image) {
continue
}
fields := logger.Fields{
field.ImageID: image.ImageID,
field.ImageSizeBytes: image.Size,
"repoTags": image.RepoTags,
}
if len(image.RepoTags) > 1 {
seelog.Debugf("Non-ECS image has more than one tag Image: %s (Tags: %s)", image.ImageID, image.RepoTags)
logger.Debug("Non-ECS image has more than one tag", fields)
for _, tag := range image.RepoTags {
err := imageManager.client.RemoveImage(ctx, tag, dockerclient.RemoveImageTimeout)
if err != nil {
seelog.Errorf("Error removing RepoTag (ImageID: %s, Tag: %s) %v", image.ImageID, tag, err)
logger.Error("Error removing non-ECS RepoTag", fields, logger.Fields{
field.Error: err,
"imageTag": tag,
})
} else {
seelog.Infof("Image Tag Removed: %s (ImageID: %s)", tag, image.ImageID)
logger.Info("Non-ECS image tag removed", fields, logger.Fields{"imageTag": tag})
numImagesAlreadyDeleted++
}
}
} else {
seelog.Debugf("Removing non-ECS Image: %s (Tags: %s)", image.ImageID, image.RepoTags)
logger.Debug("Removing non-ECS image", fields)
err := imageManager.client.RemoveImage(ctx, image.ImageID, dockerclient.RemoveImageTimeout)
if err != nil {
seelog.Errorf("Error removing Image %s (Tags: %s) - %v", image.ImageID, image.RepoTags, err)
logger.Error("Error removing non-ECS image", fields, logger.Fields{field.Error: err})
} else {
seelog.Infof("Image removed: %s (Tags: %s)", image.ImageID, image.RepoTags)
logger.Info("Non-ECS image removed", fields)
numImagesAlreadyDeleted++
}
}
Expand All @@ -512,13 +527,14 @@ func (imageManager *dockerImageManager) getNonECSImages(ctx context.Context) []I
for _, imageID := range r.ImageIDs {
resp, err := imageManager.client.InspectImage(imageID)
if err != nil {
seelog.Errorf("Error inspecting non-ECS image: (ImageID: %s), %s", imageID, err)
logger.Error("Error inspecting non-ECS image", logger.Fields{field.ImageID: imageID, field.Error: err})
continue
}
createTime := time.Time{}
createTime, err = time.Parse(time.RFC3339, resp.Created)
if err != nil {
seelog.Warnf("Error parse the inspected non-ECS image created time (ImageID: %s), %v", imageID, err)
logger.Warn("Error parsing the inspected non-ECS image created time.",
logger.Fields{field.ImageID: imageID, field.Error: err})
}
allImages = append(allImages,
ImageWithSizeID{
Expand Down Expand Up @@ -588,13 +604,13 @@ func exclude(allList []string, exclusionList []string) []string {

func (imageManager *dockerImageManager) imagesConsiderForDeletion(allImageStates []*image.ImageState) map[string]*image.ImageState {
var imagesConsiderForDeletionMap = make(map[string]*image.ImageState)
seelog.Info("Begin building map of eligible unused images for deletion")
logger.Debug("Begin building map of eligible unused images for deletion")
for _, imageState := range allImageStates {
if imageManager.isExcludedFromCleanup(imageState) {
//imageState that we want to keep
seelog.Debugf("Image excluded from deletion: [%s]", imageState.String())
logger.Debug("Image excluded from deletion", imageState.Fields())
} else {
seelog.Debugf("Image going to be considered for deletion: [%s]", imageState.String())
logger.Debug("Image going to be considered for deletion", imageState.Fields())
imagesConsiderForDeletionMap[imageState.Image.ImageID] = imageState
}
}
Expand All @@ -617,17 +633,18 @@ func (imageManager *dockerImageManager) removeLeastRecentlyUsedImage(ctx context
if leastRecentlyUsedImage == nil {
return fmt.Errorf("No more eligible images for deletion")
}
logger.Info("Image ready for deletion", leastRecentlyUsedImage.Fields())
imageManager.removeImage(ctx, leastRecentlyUsedImage)
return nil
}

func (imageManager *dockerImageManager) getUnusedImageForDeletion() *image.ImageState {
candidateImageStatesForDeletion := imageManager.getCandidateImagesForDeletion()
if len(candidateImageStatesForDeletion) < 1 {
seelog.Infof("No eligible images for deletion for this cleanup cycle")
logger.Debug("No eligible images for deletion for this cleanup cycle")
return nil
}
seelog.Infof("Found %d eligible images for deletion", len(candidateImageStatesForDeletion))
logger.Debug(fmt.Sprintf("Found %d eligible images for deletion", len(candidateImageStatesForDeletion)))
return imageManager.getLeastRecentlyUsedImage(candidateImageStatesForDeletion)
}

Expand All @@ -648,24 +665,28 @@ func (imageManager *dockerImageManager) removeImage(ctx context.Context, leastRe

func (imageManager *dockerImageManager) deleteImage(ctx context.Context, imageID string, imageState *image.ImageState) {
if imageID == "" {
seelog.Errorf("Image ID to be deleted is null")
var fields logger.Fields
if imageState != nil {
fields = imageState.Fields()
}
logger.Error("Image ID to be deleted is null", fields)
return
}
seelog.Infof("Removing Image: %s", imageID)
logger.Debug(fmt.Sprintf("Removing Image: %s", imageID), imageState.Fields())
err := imageManager.client.RemoveImage(ctx, imageID, dockerclient.RemoveImageTimeout)
if err != nil {
if strings.Contains(strings.ToLower(err.Error()), imageNotFoundForDeletionError) {
seelog.Errorf("Image already removed from the instance: %v", err)
logger.Error(fmt.Sprintf("Image already removed from the instance: %v", imageID), imageState.Fields())
} else {
seelog.Errorf("Error removing Image %v - %v", imageID, err)
logger.Error(fmt.Sprintf("Error removing Image %v", imageID), imageState.Fields(), logger.Fields{field.Error: err})
delete(imageManager.imageStatesConsideredForDeletion, imageState.Image.ImageID)
return
}
}
seelog.Infof("Image removed: %v", imageID)
logger.Info(fmt.Sprintf("Image removed: %v", imageID), imageState.Fields())
imageState.RemoveImageName(imageID)
if len(imageState.Image.Names) == 0 {
seelog.Infof("Cleaning up all tracking information for image %s as it has zero references", imageID)
logger.Info(fmt.Sprintf("Cleaning up all tracking information for image %s as it has zero references", imageID), imageState.Fields())
delete(imageManager.imageStatesConsideredForDeletion, imageState.Image.ImageID)
imageManager.removeImageState(imageState)
imageManager.state.RemoveImageState(imageState)
Expand Down
5 changes: 2 additions & 3 deletions agent/engine/docker_task_engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -2090,9 +2090,8 @@ func (engine *DockerTaskEngine) stopDockerContainer(dockerID, containerName stri
}

func (engine *DockerTaskEngine) removeContainer(task *apitask.Task, container *apicontainer.Container) error {
logger.Info("Removing container", logger.Fields{
field.TaskID: task.GetID(),
field.Container: container.Name,
logger.Info("Removing container", container.Fields(), logger.Fields{
field.TaskID: task.GetID(),
})
dockerID, err := engine.getDockerID(task, container)
if err != nil {
Expand Down
24 changes: 23 additions & 1 deletion agent/engine/image/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,5 +188,27 @@ func (imageState *ImageState) String() string {
image = imageState.Image.String()
}
return fmt.Sprintf("Image: [%s] referenced by %d containers; PulledAt: %s; LastUsedAt: %s; PullSucceeded: %t",
image, len(imageState.Containers), imageState.PulledAt.String(), imageState.LastUsedAt.String(), imageState.PullSucceeded)
image, len(imageState.Containers), imageState.PulledAt.UTC().Format(time.RFC3339),
imageState.LastUsedAt.UTC().Format(time.RFC3339), imageState.PullSucceeded)
}

func (imageState *ImageState) Fields() logger.Fields {
imageID := ""
names := []string{}
size := int64(-1)
if imageState.Image != nil {
imageID = imageState.Image.ImageID
names = imageState.Image.Names
size = imageState.Image.Size
}

return logger.Fields{
field.ImageID: imageID,
field.ImageNames: names,
field.ImageSizeBytes: size,
"referencedBy": len(imageState.Containers),
field.ImagePulledAt: imageState.PulledAt.UTC().Format(time.RFC3339),
field.ImageLastUsedAt: imageState.LastUsedAt.UTC().Format(time.RFC3339),
field.ImagePullSucceeded: imageState.PullSucceeded,
}
}
5 changes: 3 additions & 2 deletions agent/stats/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
"github.com/aws/amazon-ecs-agent/agent/stats/resolver"
"github.com/aws/amazon-ecs-agent/agent/utils/retry"
"github.com/aws/amazon-ecs-agent/ecs-agent/logger"
"github.com/cihub/seelog"
)

Expand Down Expand Up @@ -74,7 +75,7 @@ func (container *StatsContainer) collect() {
err := container.processStatsStream()
select {
case <-container.ctx.Done():
seelog.Infof("Container [%s]: Stopping stats collection", dockerID)
logger.Info("Stopping container stats collection", logger.Fields{"runtimeID": dockerID})
return
default:
if err != nil {
Expand All @@ -97,7 +98,7 @@ func (container *StatsContainer) collect() {
container.StopStatsCollection()
return
} else if terminal {
seelog.Infof("Container [%s]: container is terminal, stopping stats collection", dockerID)
logger.Info("Container is terminal, stopping stats collection", logger.Fields{"runtimeID": dockerID})
container.StopStatsCollection()
return
}
Expand Down
Loading

0 comments on commit f9706fc

Please sign in to comment.