aws · samuelkarp · Feb 14, 2017 · Feb 9, 2017 · Feb 9, 2017 · Feb 9, 2017
@@ -112,6 +112,7 @@ func TestIntegImageCleanupHappyCase(t *testing.T) {
 
 	// Verify Task is stopped.
 	verifyTaskIsStopped(taskEvents, testTask)
+	testTask.SetSentStatus(api.TaskStopped)
 
 	// Allow Task cleanup to occur
 	time.Sleep(5 * time.Second)
@@ -230,6 +231,7 @@ func TestIntegImageCleanupThreshold(t *testing.T) {
 
 	// Verify Task is stopped
 	verifyTaskIsStopped(taskEvents, testTask)
+	testTask.SetSentStatus(api.TaskStopped)
 
 	// Allow Task cleanup to occur
 	time.Sleep(2 * time.Second)
@@ -378,6 +380,9 @@ func TestImageWithSameNameAndDifferentID(t *testing.T) {
 
 	// Verify Task is stopped
 	verifyTaskIsStopped(taskEvents, task1, task2, task3)
+	task1.SetSentStatus(api.TaskStopped)
+	task2.SetSentStatus(api.TaskStopped)
+	task3.SetSentStatus(api.TaskStopped)
 
 	// Allow Task cleanup to occur
 	time.Sleep(2 * time.Second)
@@ -501,6 +506,9 @@ func TestImageWithSameIDAndDifferentNames(t *testing.T) {
 
 	// Verify Task is stopped
 	verifyTaskIsStopped(taskEvents, task1, task2, task3)
+	task1.SetSentStatus(api.TaskStopped)
+	task2.SetSentStatus(api.TaskStopped)
+	task3.SetSentStatus(api.TaskStopped)
 
 	// Allow Task cleanup to occur
 	time.Sleep(2 * time.Second)

@@ -187,6 +187,7 @@ func TestBatchContainerHappyPath(t *testing.T) {
 		assert.Equal(t, *cont.ExitCode, 0, "Exit code should be present")
 	}
 	assert.Equal(t, (<-taskEvents).Status, api.TaskStopped, "Task is not in STOPPED state")
+	sleepTask.SetSentStatus(api.TaskStopped)
 
 	// Extra events should not block forever; duplicate acs and docker events are possible
 	go func() { eventStream <- createDockerEvent(api.ContainerStopped) }()
@@ -317,6 +318,7 @@ func TestRemoveEvents(t *testing.T) {
 		}).Return(nil)
 
 	taskEngine.AddTask(sleepTaskStop)
+	sleepTask.SetSentStatus(api.TaskStopped)
 	imageManager.EXPECT().RemoveContainerReferenceFromImageState(gomock.Any())
 	// trigger cleanup
 	cleanup <- time.Now()

@@ -41,6 +41,11 @@ const (
 	credentialsIDIntegTest = "credsid"
 )
 
+func init() {
+	// Set this very low for integ tests only
+	_stoppedSentWaitInterval = 1 * time.Second
+}
+
 func createTestTask(arn string) *api.Task {
 	return &api.Task{
 		Arn:           arn,
@@ -183,8 +188,9 @@ func TestSweepContainer(t *testing.T) {
 	defer discardEvents(taskEvents)()
 
 	// Should be stopped, let's verify it's still listed...
-	_, ok := taskEngine.(*DockerTaskEngine).State().TaskByArn("testSweepContainer")
+	task, ok := taskEngine.(*DockerTaskEngine).State().TaskByArn("testSweepContainer")
 	assert.True(t, ok, "Expected task to be present still, but wasn't")
+	task.SetSentStatus(api.TaskStopped) // cleanupTask waits for TaskStopped to be sent before cleaning
 	time.Sleep(1 * time.Minute)
 	for i := 0; i < 60; i++ {
 		_, ok = taskEngine.(*DockerTaskEngine).State().TaskByArn("testSweepContainer")

@@ -26,6 +26,8 @@ import (
 
 const (
 	steadyStateTaskVerifyInterval = 10 * time.Minute
+	stoppedSentWaitInterval       = 30 * time.Second
+	maxStoppedWaitTimes           = 72 * time.Hour / stoppedSentWaitInterval
 )
 
 type acsTaskUpdate struct {
@@ -474,6 +476,9 @@ func (mtask *managedTask) time() ttime.Time {
 	return mtask._time
 }
 
+var _stoppedSentWaitInterval = stoppedSentWaitInterval
+var _maxStoppedWaitTimes = int(maxStoppedWaitTimes)
+
 func (mtask *managedTask) cleanupTask(taskStoppedDuration time.Duration) {
 	cleanupTimeDuration := mtask.GetKnownStatusTime().Add(taskStoppedDuration).Sub(ttime.Now())
 	// There is a potential deadlock here if cleanupTime is negative. Ignore the computed
@@ -489,8 +494,27 @@ func (mtask *managedTask) cleanupTask(taskStoppedDuration time.Duration) {
 		cleanupTimeBool <- true
 		close(cleanupTimeBool)
 	}()
+	// wait for the cleanup time to elapse, signalled by cleanupTimeBool
 	for !mtask.waitEvent(cleanupTimeBool) {
 	}
+	stoppedSentBool := make(chan bool)
+	go func() {
+		for i := 0; i < _maxStoppedWaitTimes; i++ {
+			// ensure that we block until api.TaskStopped is actually sent
+			sentStatus := mtask.GetSentStatus()
+			if sentStatus >= api.TaskStopped {
+				stoppedSentBool <- true
+				close(stoppedSentBool)
+				return
+			}
+			seelog.Warnf("Blocking cleanup for task %v until the task has been reported stopped. SentStatus: %v (%d/%d)", mtask, sentStatus, i, _maxStoppedWaitTimes)
+			mtask._time.Sleep(_stoppedSentWaitInterval)
+		}
+	}()
+	// wait for api.TaskStopped to be sent
+	for !mtask.waitEvent(stoppedSentBool) {
+	}
+
 	log.Info("Cleaning up task's containers and data", "task", mtask.Task)
 
 	// For the duration of this, simply discard any task events; this ensures the