-
Notifications
You must be signed in to change notification settings - Fork 213
pkg/cvo: Report current tasks #152
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,6 +4,7 @@ import ( | |
| "bytes" | ||
| "encoding/json" | ||
| "fmt" | ||
| "strings" | ||
|
|
||
| "github.com/golang/glog" | ||
|
|
||
|
|
@@ -259,7 +260,11 @@ func (optr *Operator) syncStatus(original, config *configv1.ClusterVersion, stat | |
| case len(validationErrs) > 0: | ||
| message = fmt.Sprintf("Reconciling %s: the cluster version is invalid", version) | ||
| case status.Fraction > 0: | ||
| message = fmt.Sprintf("Working towards %s: %.0f%% complete", version, status.Fraction*100) | ||
| tasks := make([]string, 0, len(status.Current)) | ||
| for _, task := range status.Current { | ||
| tasks = append(tasks, task.KindName()) | ||
| } | ||
| message = fmt.Sprintf("Working towards %s: %.0f%% complete (%s)", version, status.Fraction*100, strings.Join(tasks, ", ")) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only thing that should ever show up in this message is 1-2 operator names, that's it.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't want to keep showing operator names either. The only time we should have anything other than a % is when a specific operator has taken longer than a certain amount of time (has completed a sync period)
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
So what to show when you're, say, blocked on a RoleBinding? Collapse to the namespace?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The issue here is that the installer doesn't know about sync periods, and wants to know what was blocking the CVO when the installer decided to give up on waiting. If the CVO starts right in on another sync cycle (which it does, right?), it seems very unlikely that the last message received by the installer would include blockers, unless we include them in every message.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This or the namespace is sufficient to find the next level of debugging. We have enough operators that indicating which one the CVO is stuck on is pretty useful. In cases where we aren't currently waiting on a CO, knowing the namespace is good enough for most purposes. |
||
| case status.Step == "RetrievePayload": | ||
| if len(reason) == 0 { | ||
| reason = "DownloadingUpdate" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,6 +60,7 @@ type SyncWorkerStatus struct { | |
| Step string | ||
| Failure error | ||
|
|
||
| Current []*payload.Task | ||
| Fraction float32 | ||
|
|
||
| Completed int | ||
|
|
@@ -480,17 +481,21 @@ func (w *SyncWorker) apply(ctx context.Context, payloadUpdate *payload.Update, w | |
|
|
||
| glog.V(4).Infof("Running sync for %s", task) | ||
| glog.V(5).Infof("Manifest: %s", string(task.Manifest.Raw)) | ||
| cr.BeginTask(task) | ||
|
|
||
| ov, ok := getOverrideForManifest(work.Overrides, task.Manifest) | ||
| if ok && ov.Unmanaged { | ||
| cr.FinishTask(task, false) | ||
| glog.V(4).Infof("Skipping %s as unmanaged", task) | ||
| continue | ||
| } | ||
|
|
||
| if err := task.Run(ctx, version, w.builder, work.State); err != nil { | ||
| cr.FinishTask(task, false) | ||
| return err | ||
| } | ||
| cr.Inc() | ||
|
|
||
| cr.FinishTask(task, true) | ||
| glog.V(4).Infof("Done syncing for %s", task) | ||
| } | ||
| return nil | ||
|
|
@@ -530,10 +535,34 @@ type consistentReporter struct { | |
| reporter StatusReporter | ||
| } | ||
|
|
||
| func (r *consistentReporter) Inc() { | ||
| // Begin task appends the given task to status.Current. | ||
| func (r *consistentReporter) BeginTask(task *payload.Task) { | ||
| r.lock.Lock() | ||
| defer r.lock.Unlock() | ||
| r.status.Current = append(r.status.Current, task) | ||
| } | ||
|
|
||
| // FinishTask atomically removes the given task from status.Current | ||
| // and, if success is true, increments done. | ||
| func (r *consistentReporter) FinishTask(task *payload.Task, success bool) { | ||
| r.lock.Lock() | ||
| defer r.lock.Unlock() | ||
| r.done++ | ||
|
|
||
| if success { | ||
| r.done++ | ||
| } | ||
|
|
||
| for i, tsk := range r.status.Current { | ||
| if tsk == task { | ||
| copy(r.status.Current[i:], r.status.Current[i+1:]) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not great :(
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is the recommended way to delete from a slice of pointers while preserving ordering, per this. Did you have an alternative you prefer? |
||
| r.status.Current[len(r.status.Current)-1] = nil | ||
| r.status.Current = r.status.Current[:len(r.status.Current)-1] | ||
| if len(r.status.Current) == 0 { | ||
| r.status.Current = nil | ||
| } | ||
| return | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func (r *consistentReporter) Update() { | ||
|
|
@@ -543,6 +572,7 @@ func (r *consistentReporter) Update() { | |
| metricPayload.WithLabelValues(r.version, "applied").Set(float64(r.done)) | ||
| copied := r.status | ||
| copied.Step = "ApplyResources" | ||
| copied.Current = append([]*payload.Task(nil), r.status.Current...) | ||
| copied.Fraction = float32(r.done) / float32(r.total) | ||
| r.reporter.Report(copied) | ||
| } | ||
|
|
@@ -552,6 +582,7 @@ func (r *consistentReporter) Error(err error) { | |
| defer r.lock.Unlock() | ||
| copied := r.status | ||
| copied.Step = "ApplyResources" | ||
| copied.Current = append([]*payload.Task(nil), r.status.Current...) | ||
| copied.Fraction = float32(r.done) / float32(r.total) | ||
| copied.Failure = err | ||
| r.reporter.Report(copied) | ||
|
|
@@ -572,6 +603,7 @@ func (r *consistentReporter) Complete() { | |
| copied.Completed = r.completed + 1 | ||
| copied.Initial = false | ||
| copied.Reconciling = true | ||
| copied.Current = nil | ||
| copied.Fraction = 1 | ||
| r.reporter.Report(copied) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is going to be too large, and I don't want to show resource names here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you think of away to attach as structured data, so the consumer can make the verbosity/rendering calls?