Skip to content

Commit

Permalink
Fix concurrent writes during image cleanup
Browse files Browse the repository at this point in the history
This patch inspects the cleanup and resolves the inherent concurrent map write
issue reported. An unit test has been added to increase confidence in the fix.

Fixes #707

Signed-off-by: Vinothkumar Siddharth <[email protected]>
  • Loading branch information
vsiddharth committed May 23, 2017
1 parent 737db6b commit b019dfb
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 4 deletions.
8 changes: 4 additions & 4 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ func (imageManager *dockerImageManager) getImageState(containerImageID string) (

// removeImageState removes the imageState from the list of imageState objects in ImageManager
func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *image.ImageState) {
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
for i, imageState := range imageManager.imageStates {
if imageState.Image.ImageID == imageStateToBeRemoved.Image.ImageID {
// Image State found; hence remove it
Expand Down Expand Up @@ -277,6 +275,9 @@ func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context.
}

func (imageManager *dockerImageManager) removeUnusedImages() {
seelog.Infof("Begin building map of eligible unused images for deletion")
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
imageManager.imageStatesConsideredForDeletion = make(map[string]*image.ImageState)
for _, imageState := range imageManager.getAllImageStates() {
imageManager.imageStatesConsideredForDeletion[imageState.Image.ImageID] = imageState
Expand Down Expand Up @@ -305,8 +306,6 @@ func (imageManager *dockerImageManager) removeLeastRecentlyUsedImage() error {
}

func (imageManager *dockerImageManager) getUnusedImageForDeletion() *image.ImageState {
imageManager.updateLock.RLock()
defer imageManager.updateLock.RUnlock()
candidateImageStatesForDeletion := imageManager.getCandidateImagesForDeletion()
if len(candidateImageStatesForDeletion) < 1 {
seelog.Infof("No eligible images for deletion for this cleanup cycle")
Expand Down Expand Up @@ -350,6 +349,7 @@ func (imageManager *dockerImageManager) deleteImage(imageID string, imageState *
seelog.Infof("Image removed: %v", imageID)
imageState.RemoveImageName(imageID)
if len(imageState.Image.Names) == 0 {
seelog.Infof("Cleaning up all tracking information for image %s as it has zero references", imageID)
delete(imageManager.imageStatesConsideredForDeletion, imageState.Image.ImageID)
imageManager.removeImageState(imageState)
imageManager.state.RemoveImageState(imageState)
Expand Down
70 changes: 70 additions & 0 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package engine
import (
"errors"
"reflect"
"sync"
"testing"
"time"

Expand All @@ -25,8 +26,11 @@ import (
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
"github.com/aws/amazon-ecs-agent/agent/statemanager"

docker "github.com/fsouza/go-dockerclient"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/net/context"
)

Expand Down Expand Up @@ -951,3 +955,69 @@ func TestGetImageStateFromImageNameNoImageState(t *testing.T) {
t.Error("Incorrect image state retrieved by image name")
}
}

// TestConcurrentRemoveUnusedImages checks for concurrent map writes
// in the imageManager
func TestConcurrentRemoveUnusedImages(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
}

imageManager.SetSaver(statemanager.NewNoopStateManager())
container := &api.Container{
Name: "testContainer",
Image: "testContainerImage",
}
sourceImage := &image.Image{
ImageID: "sha256:qwerty",
}
sourceImage.Names = append(sourceImage.Names, container.Image)
imageInspected := &docker.Image{
ID: "sha256:qwerty",
}
client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes()
err := imageManager.RecordContainerReference(container)
if err != nil {
t.Error("Error in adding container to an existing image state")
}
require.Equal(t, 1, len(imageManager.imageStates))

// Remove container reference from image state to trigger cleanup
err = imageManager.RemoveContainerReferenceFromImageState(container)
assert.NoError(t, err)

imageState, _ := imageManager.getImageState(imageInspected.ID)
imageState.PulledAt = time.Now().AddDate(0, -2, 0)
imageState.LastUsedAt = time.Now().AddDate(0, -2, 0)

client.EXPECT().RemoveImage(container.Image, removeImageTimeout).Return(nil)
require.Equal(t, 1, len(imageManager.imageStates))

// We create 1000 goroutines and then perform a channel close
// to simulate the concurrent map write problem
numRoutines := 1000
var waitGroup sync.WaitGroup
waitGroup.Add(numRoutines)

ok := make(chan bool)

for i := 0; i < numRoutines; i++ {
go func() {
<-ok
imageManager.removeUnusedImages()
waitGroup.Done()
}()
}

close(ok)
waitGroup.Wait()
require.Equal(t, 0, len(imageManager.imageStates))
}

0 comments on commit b019dfb

Please sign in to comment.