Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/v1alpha1/imagebuild_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,11 @@ type ImageBuildStatus struct {
// or workspace build).
// +optional
ExpiresAt *metav1.Time `json:"expiresAt,omitempty"`

// PreviousPhase is the phase the build was in before transitioning to Expired.
// Used to determine whether an expired build originally succeeded or failed.
// +optional
PreviousPhase string `json:"previousPhase,omitempty"`
}

// +kubebuilder:object:root=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,11 @@ spec:
description: PipelineRunName is the name of the active PipelineRun
for this build
type: string
previousPhase:
description: |-
PreviousPhase is the phase the build was in before transitioning to Expired.
Used to determine whether an expired build originally succeeded or failed.
type: string
pushTaskRunName:
description: PushTaskRunName is the name of the TaskRun for pushing
artifacts to registry
Expand Down
34 changes: 8 additions & 26 deletions internal/controller/imagebuild/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ import (
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/manager"
)

var ibTracer = otel.Tracer("imagebuild-controller")
Expand Down Expand Up @@ -552,7 +551,8 @@ func (r *ImageBuildReconciler) checkExpiry(
return ctrl.Result{RequeueAfter: remaining}, false, nil
}

log.Info("Build expired, transitioning to Expired phase", "ttl", ttl, "anchor", anchor)
log.Info("Build expired, transitioning to Expired phase", "ttl", ttl, "anchor", anchor,
"previousPhase", imageBuild.Status.Phase)
r.emitEventf(imageBuild, corev1.EventTypeNormal, eventReasonBuildExpired,
"Build expired after %s, cleaning up resources", ttl)

Expand Down Expand Up @@ -2171,8 +2171,8 @@ func (r *ImageBuildReconciler) deleteSecret(

// SetupWithManager sets up the controller with the Manager.
func (r *ImageBuildReconciler) SetupWithManager(mgr ctrl.Manager) error {
if err := mgr.Add(r.seedActiveBuildsGauge(mgr)); err != nil {
return fmt.Errorf("failed to register ActiveBuilds seeder: %w", err)
if err := mgr.Add(r.seedMetricsFromCRs(mgr)); err != nil {
return fmt.Errorf("failed to register metrics seeder: %w", err)
}

builder := ctrl.NewControllerManagedBy(mgr).
Expand All @@ -2188,28 +2188,6 @@ func (r *ImageBuildReconciler) SetupWithManager(mgr ctrl.Manager) error {
return builder.Complete(r)
}

func (r *ImageBuildReconciler) seedActiveBuildsGauge(mgr ctrl.Manager) manager.RunnableFunc {
return func(ctx context.Context) error {
if !mgr.GetCache().WaitForCacheSync(ctx) {
return fmt.Errorf("cache sync failed")
}
var builds automotivev1alpha1.ImageBuildList
if err := mgr.GetClient().List(ctx, &builds); err != nil {
r.Log.Error(err, "Failed to seed ActiveBuilds gauge")
return err
}
var active float64
for i := range builds.Items {
if builds.Items[i].Status.Phase == phaseBuilding {
active++
}
}
ActiveBuilds.Set(active)
r.Log.Info("Seeded ActiveBuilds gauge", "active", active)
return nil
}
}

func isTaskRunCompleted(taskRun *tektonv1.TaskRun) bool {
return taskRun.Status.CompletionTime != nil
}
Expand Down Expand Up @@ -2622,6 +2600,10 @@ func (r *ImageBuildReconciler) updateStatus(
oldPhase := fresh.Status.Phase
oldMessage := fresh.Status.Message

if phase == automotivev1alpha1.ImageBuildPhaseExpired {
fresh.Status.PreviousPhase = fresh.Status.Phase
}

fresh.Status.Phase = phase
fresh.Status.Message = message

Expand Down
77 changes: 75 additions & 2 deletions internal/controller/imagebuild/metrics.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
package imagebuild

import (
"context"
"fmt"

automotivev1alpha1 "github.com/centos-automotive-suite/automotive-dev-operator/api/v1alpha1"
"github.com/prometheus/client_golang/prometheus"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

Expand Down Expand Up @@ -98,9 +104,76 @@ func adjustActiveBuildsGauge(oldPhase, newPhase string) {
if oldPhase == newPhase {
return
}
if newPhase == "Building" {
if newPhase == phaseBuilding {
ActiveBuilds.Inc()
} else if oldPhase == "Building" {
} else if oldPhase == phaseBuilding {
ActiveBuilds.Dec()
}
}

func buildMetricStatus(b *automotivev1alpha1.ImageBuild) string {
phase := b.Status.Phase
if phase == automotivev1alpha1.ImageBuildPhaseExpired {
if b.Status.PreviousPhase != "" {
phase = b.Status.PreviousPhase
} else {
return buildStatusSuccess
}
}
if phase == automotivev1alpha1.ImageBuildPhaseCompleted {
return buildStatusSuccess
}
return buildStatusFailure
}

func seedMetrics(builds []automotivev1alpha1.ImageBuild) {
var active float64

for i := range builds {
b := &builds[i]

if b.Status.Phase == phaseBuilding {
active++
}

if !automotivev1alpha1.IsTerminalBuildPhase(b.Status.Phase) {
continue
}

status := buildMetricStatus(b)
mode := b.Spec.GetMode()
distro := b.Spec.GetDistro()
target := b.Spec.GetTarget()
format := b.Spec.GetExportFormat()
arch := b.Spec.Architecture

BuildTotal.WithLabelValues(mode, distro, target, format, arch, status).Add(1)

if b.Status.FlashTaskRunName != "" {
FlashTotal.WithLabelValues(target, status).Add(1)
}
Comment on lines +152 to +154

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

FlashTotal seeding misses pipeline-based flash builds.

b.Status.FlashTaskRunName != "" is only set by the standalone flash path (createFlashTaskRunhandleFlashingState). For builds where flash runs inline in the build pipeline (recordPipelineFlashMetrics / checkBuildProgress), FlashTaskRunName is never populated. After an operator restart, those builds' flash counts won't be pre-seeded into FlashTotal.

A targeted fix for pipeline flash (where flash completing implies build success):

🛠️ Proposed fix
-		if b.Status.FlashTaskRunName != "" {
-			FlashTotal.WithLabelValues(target, status).Add(1)
-		}
+		// Standalone flash (separate TaskRun) — FlashTaskRunName is set directly.
+		// Pipeline flash (inline task) — infer from spec: if flash was enabled and
+		// the build succeeded, the pipeline flash step must have completed.
+		flashRan := b.Status.FlashTaskRunName != "" ||
+			(b.Spec.IsFlashEnabled() && status == buildStatusSuccess)
+		if flashRan {
+			FlashTotal.WithLabelValues(target, status).Add(1)
+		}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if b.Status.FlashTaskRunName != "" {
FlashTotal.WithLabelValues(target, status).Add(1)
}
// Standalone flash (separate TaskRun) — FlashTaskRunName is set directly.
// Pipeline flash (inline task) — infer from spec: if flash was enabled and
// the build succeeded, the pipeline flash step must have completed.
flashRan := b.Status.FlashTaskRunName != "" ||
(b.Spec.IsFlashEnabled() && status == buildStatusSuccess)
if flashRan {
FlashTotal.WithLabelValues(target, status).Add(1)
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@internal/controller/imagebuild/metrics.go` around lines 152 - 154, The
current seeding only increments FlashTotal when b.Status.FlashTaskRunName != ""
(standalone flash); update the seeding logic to also detect pipeline-based
flashes and increment FlashTotal for them. Concretely, extend the conditional
that checks b.Status.FlashTaskRunName to also call a helper like
isPipelineFlashCompleted(b) (or reuse the same completion-detection logic from
recordPipelineFlashMetrics / checkBuildProgress) so that builds which completed
flashing inline in the pipeline are counted during startup; ensure the helper
mirrors the exact completion criteria used by
recordPipelineFlashMetrics/checkBuildProgress to avoid double-counting.

}

ActiveBuilds.Set(active)
}

// seedMetricsFromCRs pre-loads counters from existing ImageBuild CRs so that
// metrics survive operator pod restarts. Histograms are not seeded to avoid
// inflating observation counts on each restart.
func (r *ImageBuildReconciler) seedMetricsFromCRs(mgr ctrl.Manager) manager.RunnableFunc {
return func(ctx context.Context) error {
if !mgr.GetCache().WaitForCacheSync(ctx) {
return fmt.Errorf("cache sync failed")
}
var builds automotivev1alpha1.ImageBuildList
if err := mgr.GetClient().List(ctx, &builds); err != nil {
r.Log.Error(err, "Failed to seed metrics from CRs")
return err
}

seedMetrics(builds.Items)

r.Log.Info("Seeded metrics from CRs", "totalCRs", len(builds.Items))
return nil
}
}
156 changes: 156 additions & 0 deletions internal/controller/imagebuild/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,159 @@ func TestRecordBuildMetrics_NoTimingResult(t *testing.T) {
t.Error("should not record phase metrics when build-timing result is absent")
}
}

func TestBuildMetricStatus(t *testing.T) {
tests := []struct {
name string
phase string
previousPhase string
want string
}{
{"completed", "Completed", "", buildStatusSuccess},
{"failed", "Failed", "", buildStatusFailure},
{"cancelled", "Cancelled", "", buildStatusFailure},
{"expired with previous completed", "Expired", "Completed", buildStatusSuccess},
{"expired with previous failed", "Expired", "Failed", buildStatusFailure},
{"expired without previous phase (legacy)", "Expired", "", buildStatusSuccess},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
b := &automotivev1alpha1.ImageBuild{
Status: automotivev1alpha1.ImageBuildStatus{
Phase: tt.phase,
PreviousPhase: tt.previousPhase,
},
}
if got := buildMetricStatus(b); got != tt.want {
t.Errorf("buildMetricStatus(%q, prev=%q) = %q, want %q", tt.phase, tt.previousPhase, got, tt.want)
}
})
}
}

func TestSeedMetricsFromCRs(t *testing.T) {
// Use unique label values to avoid interference from other tests
buildLabels := []string{"bootc", "fedora", "seed-target", "raw", "arm64", "success"}
failLabels := []string{"image", "fedora", "seed-target", "qcow2", "amd64", "failure"}
flashLabels := []string{"seed-target", "success"}

beforeBuild := counterValue(BuildTotal, buildLabels...)
beforeFail := counterValue(BuildTotal, failLabels...)
beforeFlash := counterValue(FlashTotal, flashLabels...)

start := metav1.NewTime(time.Now().Add(-5 * time.Minute))
end := metav1.Now()

builds := []automotivev1alpha1.ImageBuild{
{
Spec: automotivev1alpha1.ImageBuildSpec{
Architecture: "arm64",
AIB: &automotivev1alpha1.AIBSpec{Distro: "fedora", Target: "seed-target", Mode: "bootc"},
Export: &automotivev1alpha1.ExportSpec{Format: "raw"},
},
Status: automotivev1alpha1.ImageBuildStatus{
Phase: "Completed",
StartTime: &start,
CompletionTime: &end,
FlashTaskRunName: "flash-run-1",
},
},
{
Spec: automotivev1alpha1.ImageBuildSpec{
Architecture: "arm64",
AIB: &automotivev1alpha1.AIBSpec{Distro: "fedora", Target: "seed-target", Mode: "bootc"},
Export: &automotivev1alpha1.ExportSpec{Format: "raw"},
},
Status: automotivev1alpha1.ImageBuildStatus{
Phase: "Completed",
StartTime: &start,
CompletionTime: &end,
},
},
// Expired build with PreviousPhase=Completed → counts as success
{
Spec: automotivev1alpha1.ImageBuildSpec{
Architecture: "arm64",
AIB: &automotivev1alpha1.AIBSpec{Distro: "fedora", Target: "seed-target", Mode: "bootc"},
Export: &automotivev1alpha1.ExportSpec{Format: "raw"},
},
Status: automotivev1alpha1.ImageBuildStatus{
Phase: "Expired",
PreviousPhase: "Completed",
StartTime: &start,
CompletionTime: &end,
},
},
{
Spec: automotivev1alpha1.ImageBuildSpec{
Architecture: "amd64",
AIB: &automotivev1alpha1.AIBSpec{Distro: "fedora", Target: "seed-target", Mode: "image"},
Export: &automotivev1alpha1.ExportSpec{Format: "qcow2"},
},
Status: automotivev1alpha1.ImageBuildStatus{
Phase: "Failed",
},
},
// Expired build with PreviousPhase=Failed → counts as failure
{
Spec: automotivev1alpha1.ImageBuildSpec{
Architecture: "amd64",
AIB: &automotivev1alpha1.AIBSpec{Distro: "fedora", Target: "seed-target", Mode: "image"},
Export: &automotivev1alpha1.ExportSpec{Format: "qcow2"},
},
Status: automotivev1alpha1.ImageBuildStatus{
Phase: "Expired",
PreviousPhase: "Failed",
},
},
// In-progress build — should only count as active, not seed counters
{
Spec: automotivev1alpha1.ImageBuildSpec{
Architecture: "amd64",
AIB: &automotivev1alpha1.AIBSpec{Distro: "fedora", Target: "seed-target", Mode: "image"},
},
Status: automotivev1alpha1.ImageBuildStatus{
Phase: "Building",
},
},
}

seedMetrics(builds)

afterBuild := counterValue(BuildTotal, buildLabels...)
if afterBuild-beforeBuild != 3 {
t.Errorf("BuildTotal(success) delta = %v, want 3 (2 completed + 1 expired-from-completed)", afterBuild-beforeBuild)
}

afterFail := counterValue(BuildTotal, failLabels...)
if afterFail-beforeFail != 2 {
t.Errorf("BuildTotal(failure) delta = %v, want 2 (1 failed + 1 expired-from-failed)", afterFail-beforeFail)
}

afterFlash := counterValue(FlashTotal, flashLabels...)
if afterFlash-beforeFlash != 1 {
t.Errorf("FlashTotal delta = %v, want 1", afterFlash-beforeFlash)
}
}

func TestSeedMetricsFromCRs_ActiveBuilds(t *testing.T) {
ActiveBuilds.Set(0)

builds := []automotivev1alpha1.ImageBuild{
{
Status: automotivev1alpha1.ImageBuildStatus{Phase: "Building"},
},
{
Status: automotivev1alpha1.ImageBuildStatus{Phase: "Building"},
},
{
Status: automotivev1alpha1.ImageBuildStatus{Phase: "Completed"},
},
}

seedMetrics(builds)

if v := gaugeValue(ActiveBuilds); v != 2 {
t.Errorf("ActiveBuilds = %v, want 2", v)
}
}
Loading