Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 86 additions & 3 deletions controllers/updateservice_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@ import (
"fmt"
"reflect"
"regexp"
"strings"
"time"

"github.com/go-logr/logr"
conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
apiErrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -156,7 +159,6 @@ func (r *UpdateServiceReconciler) Reconcile(ctx context.Context, req ctrl.Reques
r.ensureEnvConfig,
r.ensureTrustedClusterCA,
r.ensureAdditionalTrustedCA,
r.ensureDeployment,
r.ensureGraphBuilderService,
r.ensurePolicyEngineService,
r.ensurePodDisruptionBudget,
Expand All @@ -168,6 +170,18 @@ func (r *UpdateServiceReconciler) Reconcile(ctx context.Context, req ctrl.Reques
}
}

imageSHA, err := r.ensureGraphDataSHA(ctx, reqLogger, instanceCopy)
if err != nil {
reqLogger.Error(err, "ensuring GraphData image checksum annotation")
// setting the imageSHA to an empty string to make sure we're not passing any garbage information as annotation
imageSHA = ""
}

err = r.ensureDeployment(ctx, reqLogger, instanceCopy, resources, imageSHA)
if err != nil {
return ctrl.Result{}, err
}

// handle status. Ensure functions should set conditions on the passed-in
// instance as appropriate but not save. If an ensure function returns an
// error, it should also set the ReconcileCompleted condition to false with an
Expand All @@ -186,7 +200,7 @@ func (r *UpdateServiceReconciler) Reconcile(ctx context.Context, req ctrl.Reques
reqLogger.Error(err, "Failed to update Status")
}

return ctrl.Result{}, err
return ctrl.Result{RequeueAfter: time.Duration(5 * time.Minute)}, err
}

// handleErr logs the error and sets an appropriate Condition on the status.
Expand Down Expand Up @@ -350,8 +364,71 @@ func (r *UpdateServiceReconciler) ensureTrustedClusterCA(ctx context.Context, re
return err
}

func (r *UpdateServiceReconciler) ensureGraphDataSHA(ctx context.Context, reqLogger logr.Logger, instance *cv1.UpdateService) (string, error) {

if strings.Contains(instance.Spec.GraphDataImage, "@sha256") {
return "", nil
}

pod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: "graph-data-tag-digest",
Namespace: instance.Namespace,
Annotations: map[string]string{
"updateservice.operator.openshift.io/last-refresh": time.Now().UTC().Format(time.RFC822),
},
},
Spec: corev1.PodSpec{
RestartPolicy: corev1.RestartPolicyNever,
Containers: []corev1.Container{
{
Name: NameInitContainerGraphData,
Image: instance.Spec.GraphDataImage,
ImagePullPolicy: corev1.PullAlways,
Command: []string{"/bin/sh", "-c", "--"},
Args: []string{"sleep 300;"},
},
},
},
}

if err := controllerutil.SetControllerReference(instance, pod, r.Scheme); err != nil {
return "", err
}

// Check if this pod already exists
found := &corev1.Pod{}
err := r.Client.Get(ctx, types.NamespacedName{Name: pod.Name, Namespace: pod.Namespace}, found)

if err != nil && apiErrors.IsNotFound(err) {
reqLogger.Info("Creating Pod", "Namespace", pod.Namespace, "Name", pod.Name)
err := r.Client.Create(ctx, pod)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably need a Delete in the Succeeded case. And also maybe a delete in the "we used to be a by-tag reference, but now the graph-data pullspec is a by-digest reference, so we don't need to bother launching Pods anymore" case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added a delete on success condition

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a chance of racing with someone else (is it possible that we run multiple replicas?) creating the pod? It is possible that we Get, see nothing, something else creates the pod, we create and fail with conflict... I'm fine if we say that that cannot happen realistically.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the way we're avoiding that here is.

  1. start a pod with time.wait for 5 min.
  2. once the wait finishes, the pod goes into success condition
  3. we reconcile every 5 minutes. and when we see that the pod is completed, we'll just delete it.
  4. deleting will kick off the reconcile again to create the pod.

because, we're creating a pod, it can only have 1 pod running at a time with the given name.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because, we're creating a pod, it can only have 1 pod running at a time with the given name

Petr was pointing out that other actors (e.g. a runaway second cincinnati-operator replica) might also create a pod with the same name. And actually, because ensureGraphDataSHA is checking a single UpdateService, it seems like an operator managing several by-tag UpdateService would probably struggle like:

  1. Reconciling UpdateService A, create a graph-data-tag-digest pod.
  2. Reconciling UpdateService B, pod is still running, return "", nil.
  3. Reconciling UpdateService A, pod is still running, return "", nil.
  4. Reconciling UpdateService B, pod is Succeeded, return the image ID. But this is bad, because that image ID was for the by-tag pullspec from UpdateService A, not the pullspec from B.

Instead, you'll want to somehow uniquify by pullspec-under test. Perhaps SetControllerReference allows you to clearly associate a pod with the appropriate UpdateService. And then Kube's built-in garbage collection will handle deleting any pods whose UpdateService is deleted.

if err != nil {
handleErr(reqLogger, &instance.Status, "CreateGraphDataPodFailed", err)
}
return "", err
} else if err != nil {
handleErr(reqLogger, &instance.Status, "GetGraphDataPodFailed", err)
return "", err
} else if found.Status.Phase == corev1.PodSucceeded {
err := r.Client.Delete(ctx, pod)
if err != nil {
handleErr(reqLogger, &instance.Status, "DeleteGraphDataPodFailed", err)
}
return "", err
}

if len(found.Status.ContainerStatuses) > 0 {
return found.Status.ContainerStatuses[0].ImageID, nil
} else {
handleErr(reqLogger, &instance.Status, "GraphDataPodStatusEmpty", fmt.Errorf("Graph-Data pod returned empty container status"))
}

return "", nil
}

func (r *UpdateServiceReconciler) ensureDeployment(ctx context.Context, reqLogger logr.Logger, instance *cv1.UpdateService,
resources *kubeResources) error {
resources *kubeResources, imageSHA string) error {

deployment := resources.deployment
if err := controllerutil.SetControllerReference(instance, deployment, r.Scheme); err != nil {
Expand Down Expand Up @@ -395,6 +472,11 @@ func (r *UpdateServiceReconciler) ensureDeployment(ctx context.Context, reqLogge
updated.Spec.Template.ObjectMeta.Annotations[key] = value
}

if len(imageSHA) > 0 {
reqLogger.Info("Setting SHA annotation")
updated.Spec.Template.ObjectMeta.Annotations["updateservice.operator.openshift.io/graph-data-image"] = imageSHA
}

updated.Spec.Template.Spec.Volumes = deployment.Spec.Template.Spec.Volumes
containers := updated.Spec.Template.Spec.Containers
for i := range containers {
Expand Down Expand Up @@ -749,6 +831,7 @@ func (r *UpdateServiceReconciler) SetupWithManager(mgr ctrl.Manager) error {
Owns(&corev1.Service{}).
Owns(&policyv1.PodDisruptionBudget{}).
Owns(&routev1.Route{}).
Owns(&corev1.Pod{}).
Watches(
&source.Kind{Type: &apicfgv1.Image{}},
handler.EnqueueRequestsFromMapFunc(mapped.Map),
Expand Down
2 changes: 1 addition & 1 deletion controllers/updateservice_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ func TestEnsureDeployment(t *testing.T) {

resources, err := newKubeResources(updateservice, testOperandImage, ps, cm, nil)

err = r.ensureDeployment(context.TODO(), log, updateservice, resources)
err = r.ensureDeployment(context.TODO(), log, updateservice, resources, "")
if err != nil {
t.Fatal(err)
}
Expand Down