Skip to content

Commit

Permalink
Fix: ttlSecondsAfterFinished not taking effect. (#15)
Browse files Browse the repository at this point in the history
Signed-off-by: Syulin7 <[email protected]>
  • Loading branch information
Syulin7 authored Mar 9, 2023
1 parent 9c056b4 commit b81dfd7
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 9 deletions.
4 changes: 3 additions & 1 deletion pkg/controller.v1/tensorflow/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,9 @@ func (tc *TFController) satisfiedExpectations(tfjob *tfv1.TFJob) bool {
satisfied = satisfied || tc.Expectations.SatisfiedExpectations(expectationServicesKey)
}

if util.CheckJobCompletedV1(tfjob.Status.Conditions) && tfjob.DeletionTimestamp == nil && tfjob.Annotations[TFCleanPodStatusLabel] == TFCleanStatusDone {
if util.CheckJobCompletedV1(tfjob.Status.Conditions) && tfjob.DeletionTimestamp == nil &&
tfjob.Annotations[TFCleanPodStatusLabel] == TFCleanStatusDone &&
tfjob.Spec.TTLSecondsAfterFinished == nil {
satisfied = false
}

Expand Down
23 changes: 15 additions & 8 deletions pkg/controller.v1/tensorflow/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,21 +225,28 @@ func (tc *TFController) cleanupTFJob(tfJob *tfv1.TFJob) error {
return nil
}
duration := time.Second * time.Duration(*ttl)
if currentTime.After(tfJob.Status.CompletionTime.Add(duration)) {
if tfJob.Status.CompletionTime == nil {
return fmt.Errorf("job completion time is nil, cannot cleanup")
}
finishTime := tfJob.Status.CompletionTime
expireTime := finishTime.Add(duration)
if currentTime.After(expireTime) {
err := tc.deleteTFJobHandler(tfJob)
if err != nil {
tflogger.LoggerForJob(tfJob).Warnf("Cleanup TFJob error: %v.", err)
return err
}
return nil
} else {
remaining := expireTime.Sub(currentTime)
key, err := KeyFunc(tfJob)
if err != nil {
tflogger.LoggerForJob(tfJob).Warnf("Couldn't get key for tfjob object: %v", err)
return err
}
tc.WorkQueue.AddAfter(key, remaining)
return nil
}
key, err := KeyFunc(tfJob)
if err != nil {
tflogger.LoggerForJob(tfJob).Warnf("Couldn't get key for tfjob object: %v", err)
return err
}
tc.WorkQueue.AddRateLimited(key)
return nil
}

// deleteTFJob deletes the given TFJob.
Expand Down

0 comments on commit b81dfd7

Please sign in to comment.