-
Notifications
You must be signed in to change notification settings - Fork 618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Make the progresscontainer independent of each other #1306
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -153,7 +153,9 @@ type Container struct { | |
|
||
// AppliedStatus is the status that has been "applied" (e.g., we've called Pull, | ||
// Create, Start, or Stop) but we don't yet know that the application was successful. | ||
AppliedStatus ContainerStatus | ||
// No need to save it in the state file, as agent will synchronize the container status | ||
// on restart and for some operation eg: pull, it has to be recalled again. | ||
AppliedStatus ContainerStatus `json:"-"` | ||
// ApplyingError is an error that occurred trying to transition the container | ||
// to its desired state. It is propagated to the backend in the form | ||
// 'Name: ErrorString' as the 'reason' field. | ||
|
@@ -241,12 +243,14 @@ func (c *Container) GetKnownStatus() ContainerStatus { | |
return c.KnownStatusUnsafe | ||
} | ||
|
||
// SetKnownStatus sets the known status of the container | ||
// SetKnownStatus sets the known status of the container and update the container | ||
// applied status | ||
func (c *Container) SetKnownStatus(status ContainerStatus) { | ||
c.lock.Lock() | ||
defer c.lock.Unlock() | ||
|
||
c.KnownStatusUnsafe = status | ||
c.updateAppliedStatusUnsafe(status) | ||
} | ||
|
||
// GetDesiredStatus gets the desired status of the container | ||
|
@@ -548,3 +552,38 @@ func (c *Container) GetHealthStatus() HealthStatus { | |
|
||
return copyHealth | ||
} | ||
|
||
// updateAppliedStatusUnsafe updates the container transitioning status | ||
func (c *Container) updateAppliedStatusUnsafe(knownStatus ContainerStatus) { | ||
if c.AppliedStatus == ContainerStatusNone { | ||
return | ||
} | ||
|
||
// Check if the container transition has already finished | ||
if c.AppliedStatus <= knownStatus { | ||
c.AppliedStatus = ContainerStatusNone | ||
} | ||
} | ||
|
||
// SetAppliedStatus sets the applied status of container and returns whether | ||
// the container is already in a transition | ||
func (c *Container) SetAppliedStatus(status ContainerStatus) bool { | ||
c.lock.Lock() | ||
defer c.lock.Unlock() | ||
|
||
if c.AppliedStatus != ContainerStatusNone { | ||
// return false to indicate the set operation failed | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's not clear to me why returning false here will indicate the set operation failed. do you mean the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, failed means the container is already in a transition(where the appliedstatus isn't none). This ensures that agent won't call the same API(pull/create/start/stop) twice for the same container. |
||
return false | ||
} | ||
|
||
c.AppliedStatus = status | ||
return true | ||
} | ||
|
||
// GetAppliedStatus returns the transitioning status of container | ||
func (c *Container) GetAppliedStatus() ContainerStatus { | ||
c.lock.RLock() | ||
defer c.lock.RUnlock() | ||
|
||
return c.AppliedStatus | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -618,8 +618,7 @@ func (mtask *managedTask) progressContainers() { | |
// We've kicked off one or more transitions, wait for them to | ||
// complete, but keep reading events as we do. in fact, we have to for | ||
// transitions to complete | ||
mtask.waitForContainerTransitions(transitions, transitionChange, transitionChangeContainer) | ||
seelog.Debugf("Managed task [%s]: done transitioning all containers", mtask.Arn) | ||
mtask.waitForContainerTransition(transitions, transitionChange, transitionChangeContainer) | ||
|
||
// update the task status | ||
changed := mtask.UpdateStatus() | ||
|
@@ -663,16 +662,30 @@ func (mtask *managedTask) startContainerTransitions(transitionFunc containerTran | |
reasons = append(reasons, transition.reason) | ||
continue | ||
} | ||
|
||
// If the container is already in a transition, skip | ||
if transition.actionRequired && !cont.SetAppliedStatus(transition.nextState) { | ||
// At least one container is able to be moved forwards, so we're not deadlocked | ||
anyCanTransition = true | ||
continue | ||
} | ||
|
||
// At least one container is able to be moved forwards, so we're not deadlocked | ||
anyCanTransition = true | ||
|
||
if !transition.actionRequired { | ||
mtask.handleContainerChange(dockerContainerChange{ | ||
container: cont, | ||
event: DockerContainerChangeEvent{ | ||
Status: transition.nextState, | ||
}, | ||
}) | ||
// Updating the container status without calling any docker API, send in | ||
// a goroutine so that it won't block here before the waitForContainerTransition | ||
// was called after this function. And all the events sent to mtask.dockerMessages | ||
// will be handled by handleContainerChange. | ||
go func(cont *api.Container, status api.ContainerStatus) { | ||
mtask.dockerMessages <- dockerContainerChange{ | ||
container: cont, | ||
event: DockerContainerChangeEvent{ | ||
Status: status, | ||
}, | ||
} | ||
}(cont, transition.nextState) | ||
continue | ||
} | ||
transitions[cont.Name] = transition.nextState | ||
|
@@ -763,40 +776,26 @@ func (mtask *managedTask) onContainersUnableToTransitionState() { | |
mtask.emitTaskEvent(mtask.Task, taskUnableToTransitionToStoppedReason) | ||
// TODO we should probably panic here | ||
} else { | ||
seelog.Criticalf("Managed task [%s]: voving task to stopped due to bad state", mtask.Arn) | ||
seelog.Criticalf("Managed task [%s]: moving task to stopped due to bad state", mtask.Arn) | ||
mtask.handleDesiredStatusChange(api.TaskStopped, 0) | ||
} | ||
} | ||
|
||
func (mtask *managedTask) waitForContainerTransitions(transitions map[string]api.ContainerStatus, | ||
transitionChange <-chan struct{}, | ||
func (mtask *managedTask) waitForContainerTransition(transitions map[string]api.ContainerStatus, | ||
transition <-chan struct{}, | ||
transitionChangeContainer <-chan string) { | ||
|
||
for len(transitions) > 0 { | ||
if mtask.waitEvent(transitionChange) { | ||
changedContainer := <-transitionChangeContainer | ||
seelog.Debugf("Managed task [%s]: transition for container[%s] finished", | ||
mtask.Arn, changedContainer) | ||
delete(transitions, changedContainer) | ||
seelog.Debugf("Managed task [%s]: still waiting for: %v", mtask.Arn, transitions) | ||
} | ||
if mtask.GetDesiredStatus().Terminal() || mtask.GetKnownStatus().Terminal() { | ||
allWaitingOnPulled := true | ||
for _, desired := range transitions { | ||
if desired != api.ContainerPulled { | ||
allWaitingOnPulled = false | ||
} | ||
} | ||
if allWaitingOnPulled { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this condition handled in the new change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In previously the pull will block the whole transitions, that's why we want to break the wait if allWaitingOnPulled. Where the new change makes it non-blocked, which is handled by default. |
||
// We don't actually care to wait for 'pull' transitions to finish if | ||
// we're just heading to stopped since those resources aren't | ||
// inherently linked to this task anyways for e.g. gc and so on. | ||
seelog.Debugf("Managed task [%s]: all containers are waiting for pulled transition; exiting early: %v", | ||
mtask.Arn, transitions) | ||
break | ||
} | ||
} | ||
// There could be multiple transitions, but we just need to wait for one of them | ||
// to ensure that there is at least one container can be processed in the next | ||
// progressContainers. This is done by waiting for one transition/acs/docker message. | ||
if !mtask.waitEvent(transition) { | ||
seelog.Debugf("Managed task [%s]: received non-transition events", mtask.Arn) | ||
return | ||
} | ||
transitionedContainer := <-transitionChangeContainer | ||
seelog.Debugf("Managed task [%s]: transition for container[%s] finished", | ||
mtask.Arn, transitionedContainer) | ||
delete(transitions, transitionedContainer) | ||
seelog.Debugf("Managed task [%s]: still waiting for: %v", mtask.Arn, transitions) | ||
} | ||
|
||
func (mtask *managedTask) time() ttime.Time { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would the condition
c.AppliedStatus > knownStatus
ever be true here? We set theknownStatus
only when a transition is complete, right?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the agent received duplicate docker events for some reason, this could happen. As the event can be a past status of the container.