From 3eb2ca8edf3a5f6b9022f0bc55c77803775ec67e Mon Sep 17 00:00:00 2001 From: Vinothkumar Siddharth Date: Fri, 17 Mar 2017 16:01:50 -0700 Subject: [PATCH] Fix concurrent writes during image cleanup This patch inspects the cleanup and resolves the inherent concurrent map write issue reported. An unit test has been added to increase confidence in the fix. Fixes #707 Signed-off-by: Vinothkumar Siddharth --- agent/engine/docker_image_manager.go | 8 +-- agent/engine/docker_image_manager_test.go | 70 +++++++++++++++++++++++ 2 files changed, 74 insertions(+), 4 deletions(-) diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index 1e572fe25fd..718d5d8ab87 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -193,8 +193,6 @@ func (imageManager *dockerImageManager) getImageState(containerImageID string) ( // removeImageState removes the imageState from the list of imageState objects in ImageManager func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *image.ImageState) { - imageManager.updateLock.Lock() - defer imageManager.updateLock.Unlock() for i, imageState := range imageManager.imageStates { if imageState.Image.ImageID == imageStateToBeRemoved.Image.ImageID { // Image State found; hence remove it @@ -277,6 +275,9 @@ func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context. } func (imageManager *dockerImageManager) removeUnusedImages() { + seelog.Infof("Begin building map of eligible unused images for deletion") + imageManager.updateLock.Lock() + defer imageManager.updateLock.Unlock() imageManager.imageStatesConsideredForDeletion = make(map[string]*image.ImageState) for _, imageState := range imageManager.getAllImageStates() { imageManager.imageStatesConsideredForDeletion[imageState.Image.ImageID] = imageState @@ -305,8 +306,6 @@ func (imageManager *dockerImageManager) removeLeastRecentlyUsedImage() error { } func (imageManager *dockerImageManager) getUnusedImageForDeletion() *image.ImageState { - imageManager.updateLock.RLock() - defer imageManager.updateLock.RUnlock() candidateImageStatesForDeletion := imageManager.getCandidateImagesForDeletion() if len(candidateImageStatesForDeletion) < 1 { seelog.Infof("No eligible images for deletion for this cleanup cycle") @@ -350,6 +349,7 @@ func (imageManager *dockerImageManager) deleteImage(imageID string, imageState * seelog.Infof("Image removed: %v", imageID) imageState.RemoveImageName(imageID) if len(imageState.Image.Names) == 0 { + seelog.Infof("Cleaning up all tracking information for image %s as it has zero references", imageID) delete(imageManager.imageStatesConsideredForDeletion, imageState.Image.ImageID) imageManager.removeImageState(imageState) imageManager.state.RemoveImageState(imageState) diff --git a/agent/engine/docker_image_manager_test.go b/agent/engine/docker_image_manager_test.go index f05789b7b1c..3500fe9845c 100644 --- a/agent/engine/docker_image_manager_test.go +++ b/agent/engine/docker_image_manager_test.go @@ -17,6 +17,7 @@ package engine import ( "errors" "reflect" + "sync" "testing" "time" @@ -25,8 +26,11 @@ import ( "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" "github.com/aws/amazon-ecs-agent/agent/engine/image" "github.com/aws/amazon-ecs-agent/agent/statemanager" + docker "github.com/fsouza/go-dockerclient" "github.com/golang/mock/gomock" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "golang.org/x/net/context" ) @@ -951,3 +955,69 @@ func TestGetImageStateFromImageNameNoImageState(t *testing.T) { t.Error("Incorrect image state retrieved by image name") } } + +// TestConcurrentRemoveUnusedImages checks for concurrent map writes +// in the imageManager +func TestConcurrentRemoveUnusedImages(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := NewMockDockerClient(ctrl) + + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + } + + imageManager.SetSaver(statemanager.NewNoopStateManager()) + container := &api.Container{ + Name: "testContainer", + Image: "testContainerImage", + } + sourceImage := &image.Image{ + ImageID: "sha256:qwerty", + } + sourceImage.Names = append(sourceImage.Names, container.Image) + imageInspected := &docker.Image{ + ID: "sha256:qwerty", + } + client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes() + err := imageManager.RecordContainerReference(container) + if err != nil { + t.Error("Error in adding container to an existing image state") + } + require.Equal(t, 1, len(imageManager.imageStates)) + + // Remove container reference from image state to trigger cleanup + err = imageManager.RemoveContainerReferenceFromImageState(container) + assert.NoError(t, err) + + imageState, _ := imageManager.getImageState(imageInspected.ID) + imageState.PulledAt = time.Now().AddDate(0, -2, 0) + imageState.LastUsedAt = time.Now().AddDate(0, -2, 0) + + client.EXPECT().RemoveImage(container.Image, removeImageTimeout).Return(nil) + require.Equal(t, 1, len(imageManager.imageStates)) + + // We create 1000 goroutines and then perform a channel close + // to simulate the concurrent map write problem + numRoutines := 1000 + var waitGroup sync.WaitGroup + waitGroup.Add(numRoutines) + + ok := make(chan bool) + + for i := 0; i < numRoutines; i++ { + go func() { + <-ok + imageManager.removeUnusedImages() + waitGroup.Done() + }() + } + + close(ok) + waitGroup.Wait() + require.Equal(t, 0, len(imageManager.imageStates)) +}