Skip to content

Fix concurrent writes during image cleanup #743

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,6 @@ func (imageManager *dockerImageManager) getImageState(containerImageID string) (

// removeImageState removes the imageState from the list of imageState objects in ImageManager
func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *image.ImageState) {
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
for i, imageState := range imageManager.imageStates {
if imageState.Image.ImageID == imageStateToBeRemoved.Image.ImageID {
// Image State found; hence remove it
Expand Down Expand Up @@ -277,6 +275,9 @@ func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context.
}

func (imageManager *dockerImageManager) removeUnusedImages() {
seelog.Infof("Begin building map of eligible unused images for deletion")
imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
imageManager.imageStatesConsideredForDeletion = make(map[string]*image.ImageState)
for _, imageState := range imageManager.getAllImageStates() {
imageManager.imageStatesConsideredForDeletion[imageState.Image.ImageID] = imageState
Expand Down Expand Up @@ -305,8 +306,6 @@ func (imageManager *dockerImageManager) removeLeastRecentlyUsedImage() error {
}

func (imageManager *dockerImageManager) getUnusedImageForDeletion() *image.ImageState {
imageManager.updateLock.RLock()
defer imageManager.updateLock.RUnlock()
candidateImageStatesForDeletion := imageManager.getCandidateImagesForDeletion()
if len(candidateImageStatesForDeletion) < 1 {
seelog.Infof("No eligible images for deletion for this cleanup cycle")
Expand Down Expand Up @@ -350,6 +349,7 @@ func (imageManager *dockerImageManager) deleteImage(imageID string, imageState *
seelog.Infof("Image removed: %v", imageID)
imageState.RemoveImageName(imageID)
if len(imageState.Image.Names) == 0 {
seelog.Infof("Cleaning up all tracking information for image %s as it has zero references", imageID)
delete(imageManager.imageStatesConsideredForDeletion, imageState.Image.ImageID)
imageManager.removeImageState(imageState)
imageManager.state.RemoveImageState(imageState)
Expand Down
70 changes: 70 additions & 0 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package engine
import (
"errors"
"reflect"
"sync"
"testing"
"time"

Expand All @@ -25,8 +26,11 @@ import (
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
"github.com/aws/amazon-ecs-agent/agent/statemanager"

docker "github.com/fsouza/go-dockerclient"
"github.com/golang/mock/gomock"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/net/context"
)

Expand Down Expand Up @@ -951,3 +955,69 @@ func TestGetImageStateFromImageNameNoImageState(t *testing.T) {
t.Error("Incorrect image state retrieved by image name")
}
}

// TestConcurrentRemoveUnusedImages checks for concurrent map writes
// in the imageManager
func TestConcurrentRemoveUnusedImages(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := NewMockDockerClient(ctrl)

imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
}

imageManager.SetSaver(statemanager.NewNoopStateManager())
container := &api.Container{
Name: "testContainer",
Image: "testContainerImage",
}
sourceImage := &image.Image{
ImageID: "sha256:qwerty",
}
sourceImage.Names = append(sourceImage.Names, container.Image)
imageInspected := &docker.Image{
ID: "sha256:qwerty",
}
client.EXPECT().InspectImage(container.Image).Return(imageInspected, nil).AnyTimes()
err := imageManager.RecordContainerReference(container)
if err != nil {
t.Error("Error in adding container to an existing image state")
}
require.Equal(t, 1, len(imageManager.imageStates))

// Remove container reference from image state to trigger cleanup
err = imageManager.RemoveContainerReferenceFromImageState(container)
assert.NoError(t, err)

imageState, _ := imageManager.getImageState(imageInspected.ID)
imageState.PulledAt = time.Now().AddDate(0, -2, 0)
imageState.LastUsedAt = time.Now().AddDate(0, -2, 0)

client.EXPECT().RemoveImage(container.Image, removeImageTimeout).Return(nil)
require.Equal(t, 1, len(imageManager.imageStates))

// We create 1000 goroutines and then perform a channel close
// to simulate the concurrent map write problem
numRoutines := 1000
var waitGroup sync.WaitGroup
waitGroup.Add(numRoutines)

ok := make(chan bool)

for i := 0; i < numRoutines; i++ {
go func() {
<-ok
imageManager.removeUnusedImages()
waitGroup.Done()
}()
}

close(ok)
waitGroup.Wait()
require.Equal(t, 0, len(imageManager.imageStates))
}