@@ -21,7 +21,6 @@ import (
2121 "fmt"
2222 "reflect"
2323 "strings"
24- "time"
2524
2625 databricksv1alpha1 "github.com/microsoft/azure-databricks-operator/api/v1alpha1"
2726 "github.com/xinsnake/databricks-sdk-golang/azure"
@@ -106,36 +105,42 @@ func (r *RunReconciler) refresh(instance *databricksv1alpha1.Run) error {
106105 return r .Update (context .Background (), instance )
107106}
108107
109- func (r * RunReconciler ) delete (instance * databricksv1alpha1.Run ) error {
108+ // delete attempts to cancel and delete a run. Returns bool indicating if complete (safe to retry if not and no error) and an error
109+ func (r * RunReconciler ) delete (instance * databricksv1alpha1.Run ) (bool , error ) {
110110 r .Log .Info (fmt .Sprintf ("Deleting run %s" , instance .GetName ()))
111111
112112 if instance .Status == nil {
113- return nil
113+ return true , nil
114114 }
115115
116116 runID := instance .Status .Metadata .RunID
117117
118118 // Check if the run exists before trying to delete it
119- if _ , err := r .getRun (runID ); err != nil {
119+ run , err := r .getRun (runID )
120+ if err != nil {
120121 if strings .Contains (err .Error (), "does not exist" ) {
121- return nil
122+ return true , nil
122123 }
123- return err
124+ return false , err
124125 }
125126
126- // We will not check for error when cancelling a job,
127- // if it fails just let it be
128- execution := NewExecution ( "runs" , "cancel" )
129- err := r . APIClient . Jobs (). RunsCancel ( runID )
130- execution . Finish ( err )
131-
132- // It takes time for DataBricks to cancel a run
133- time . Sleep ( 15 * time . Second )
134-
135- execution = NewExecution ("runs" , "delete" )
127+ if run . State . ResultState == nil {
128+ // We will not check for error when cancelling a job,
129+ // if it fails just let it be
130+ execution := NewExecution ( "runs" , "cancel" )
131+ err := r . APIClient . Jobs (). RunsCancel ( runID )
132+ execution . Finish ( err )
133+ return false , nil // no error, but indicate not completed to trigger a requeue to delete once cancelled
134+ }
135+ // job has reached a terminated state
136+ execution : = NewExecution ("runs" , "delete" )
136137 err = r .APIClient .Jobs ().RunsDelete (runID )
137138 execution .Finish (err )
138- return err
139+
140+ if err != nil {
141+ return false , err
142+ }
143+ return true , nil
139144}
140145
141146func (r * RunReconciler ) runUsingRunNow (instance * databricksv1alpha1.Run ) (* dbmodels.Run , bool , error ) {
0 commit comments