-
Notifications
You must be signed in to change notification settings - Fork 481
Bug 2015793: Run OLM's collect profiles job on the management cluster #583
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1322,14 +1322,6 @@ func (r *HostedControlPlaneReconciler) reconcilePKI(ctx context.Context, hcp *hy | |
| return fmt.Errorf("failed to reconcile packageserver cert: %w", err) | ||
| } | ||
|
|
||
| // OLM Profile collector Cert | ||
| profileCollectorCert := manifests.OLMProfileCollectorCertSecret(hcp.Namespace) | ||
| if _, err := r.CreateOrUpdate(ctx, r, profileCollectorCert, func() error { | ||
| return pki.ReconcileOLMProfileCollectorCertSecret(profileCollectorCert, rootCASecret, p.OwnerRef) | ||
| }); err != nil { | ||
| return fmt.Errorf("failed to reconcile olm profile collector cert: %w", err) | ||
| } | ||
|
|
||
| // OLM Catalog Operator Serving Cert | ||
| catalogOperatorServingCert := manifests.OLMCatalogOperatorServingCertSecret(hcp.Namespace) | ||
| if _, err := r.CreateOrUpdate(ctx, r, catalogOperatorServingCert, func() error { | ||
|
|
@@ -2191,6 +2183,46 @@ func (r *HostedControlPlaneReconciler) reconcileOperatorLifecycleManager(ctx con | |
| return fmt.Errorf("failed to reconcile packageserver worker endpoints: %w", err) | ||
| } | ||
|
|
||
| // Collect Profiles | ||
| collectProfilesConfigMap := manifests.CollectProfilesConfigMap(hcp.Namespace) | ||
| olm.ReconcileCollectProfilesConfigMap(collectProfilesConfigMap, p.OwnerRef, p.OLMImage, hcp.Namespace) | ||
| if err := r.Create(ctx, collectProfilesConfigMap); err != nil && !apierrors.IsAlreadyExists(err) { | ||
| return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err) | ||
| } | ||
|
|
||
| collectProfilesCronJob := manifests.CollectProfilesCronJob(hcp.Namespace) | ||
| if _, err := r.CreateOrUpdate(ctx, r, collectProfilesCronJob, func() error { | ||
| return olm.ReconcileCollectProfilesCronJob(collectProfilesCronJob, p.OwnerRef, p.OLMImage, hcp.Namespace) | ||
| }); err != nil { | ||
| return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err) | ||
| } | ||
|
|
||
| collectProfilesRole := manifests.CollectProfilesRole(hcp.Namespace) | ||
| if _, err := r.CreateOrUpdate(ctx, r, collectProfilesRole, func() error { | ||
| return olm.ReconcileCollectProfilesRole(collectProfilesRole, p.OwnerRef, p.OLMImage, hcp.Namespace) | ||
| }); err != nil { | ||
| return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err) | ||
| } | ||
|
|
||
| collectProfilesRoleBinding := manifests.CollectProfilesRoleBinding(hcp.Namespace) | ||
| if _, err := r.CreateOrUpdate(ctx, r, collectProfilesRoleBinding, func() error { | ||
| return olm.ReconcileCollectProfilesRoleBinding(collectProfilesRoleBinding, p.OwnerRef, p.OLMImage, hcp.Namespace) | ||
| }); err != nil { | ||
| return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err) | ||
| } | ||
|
|
||
| collectProfilesSecret := manifests.CollectProfilesSecret(hcp.Namespace) | ||
| olm.ReconcileCollectProfilesSecret(collectProfilesSecret, p.OwnerRef, p.OLMImage, hcp.Namespace) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Helpful note: The pprof secret shouldn't be modified - it allows the cronjob to frequently change the pprof secret. |
||
| if err := r.Create(ctx, collectProfilesSecret); err != nil && !apierrors.IsAlreadyExists(err) { | ||
| return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err) | ||
| } | ||
|
|
||
| collectProfilesServiceAccount := manifests.CollectProfilesServiceAccount(hcp.Namespace) | ||
| if _, err := r.CreateOrUpdate(ctx, r, collectProfilesServiceAccount, func() error { | ||
| return olm.ReconcileCollectProfilesServiceAccount(collectProfilesServiceAccount, p.OwnerRef, p.OLMImage, hcp.Namespace) | ||
| }); err != nil { | ||
| return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,7 @@ spec: | |
| secretName: catalog-operator-serving-cert | ||
| - name: profile-collector | ||
| secret: | ||
| secretName: olm-profile-collector | ||
| secretName: pprof-cert | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately this is hardcoded in OLM |
||
| - name: kubeconfig | ||
| secret: | ||
| secretName: service-network-admin-kubeconfig | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| apiVersion: v1 | ||
| kind: ConfigMap | ||
| metadata: | ||
| name: olm-collect-profiles | ||
| data: | ||
| pprof-config.yaml: | | ||
| disabled: False |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,44 @@ | ||
| apiVersion: batch/v1beta1 | ||
| kind: CronJob | ||
| metadata: | ||
| name: olm-collect-profiles | ||
| spec: | ||
| schedule: "*/15 * * * *" | ||
| jobTemplate: | ||
| spec: | ||
| template: | ||
| spec: | ||
| serviceAccountName: olm-collect-profiles | ||
| priorityClassName: openshift-user-critical | ||
| containers: | ||
| - name: collect-profiles | ||
| image: OLM_OPERATOR_IMAGE | ||
| imagePullPolicy: IfNotPresent | ||
| command: | ||
| - bin/collect-profiles | ||
| args: | ||
| - -n | ||
| - OLM_NAMESPACE | ||
| - --config-mount-path | ||
| - /etc/config | ||
| - --cert-mount-path | ||
| - /var/run/secrets/serving-cert | ||
| - olm-operator-heap-:https://olm-operator-metrics:8443/debug/pprof/heap | ||
| - catalog-operator-heap-:https://catalog-operator-metrics:8443/debug/pprof/heap | ||
| volumeMounts: | ||
| - mountPath: /etc/config | ||
| name: config-volume | ||
| - mountPath: /var/run/secrets/serving-cert | ||
| name: secret-volume | ||
| resources: | ||
| requests: | ||
| cpu: 10m | ||
| memory: 80Mi | ||
| volumes: | ||
| - name: config-volume | ||
| configMap: | ||
| name: olm-collect-profiles | ||
| - name: secret-volume | ||
| secret: | ||
| secretName: pprof-cert | ||
| restartPolicy: Never |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: Role | ||
| metadata: | ||
| name: olm-collect-profiles | ||
| rules: | ||
| - apiGroups: [""] | ||
| resources: ["configmaps"] | ||
| verbs: ["get", "list", "create", "delete"] | ||
| - apiGroups: [""] | ||
| resources: ["secrets"] | ||
| verbs: ["get", "update"] |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| apiVersion: rbac.authorization.k8s.io/v1 | ||
| kind: RoleBinding | ||
| metadata: | ||
| name: olm-collect-profiles | ||
| subjects: | ||
| - kind: ServiceAccount | ||
| name: olm-collect-profiles | ||
| roleRef: | ||
| kind: Role | ||
| name: olm-collect-profiles | ||
| apiGroup: rbac.authorization.k8s.io |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| apiVersion: v1 | ||
| kind: Secret | ||
| metadata: | ||
| name: pprof-cert | ||
| type: kubernetes.io/tls | ||
| data: | ||
| tls.crt: "" | ||
| tls.key: "" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| apiVersion: v1 | ||
| kind: ServiceAccount | ||
| metadata: | ||
| name: olm-collect-profiles |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| package olm | ||
|
|
||
| import ( | ||
| "github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/config" | ||
| batchv1beta1 "k8s.io/api/batch/v1beta1" | ||
| corev1 "k8s.io/api/core/v1" | ||
| rbacv1 "k8s.io/api/rbac/v1" | ||
| ) | ||
|
|
||
| var ( | ||
| olmCollectProfilesConfigMap = MustConfigMap("assets/olm-collect-profiles.configmap.yaml") | ||
| olmCollectProfilesCronJob = MustCronJob("assets/olm-collect-profiles.cronjob.yaml") | ||
| olmCollectProfilesRole = MustRole("assets/olm-collect-profiles.role.yaml") | ||
| olmCollectProfilesRoleBinding = MustRoleBinding("assets/olm-collect-profiles.rolebinding.yaml") | ||
| olmCollectProfilesSecret = MustSecret("assets/olm-collect-profiles.secret.yaml") | ||
| olmCollectProfilesServiceAccount = MustServiceAccount("assets/olm-collect-profiles.serviceaccount.yaml") | ||
| ) | ||
|
|
||
| func ReconcileCollectProfilesCronJob(cronJob *batchv1beta1.CronJob, ownerRef config.OwnerRef, olmImage, namespace string) error { | ||
| return reconcileProfilingCronJob(cronJob, ownerRef, olmImage, namespace, olmCollectProfilesCronJob) | ||
| } | ||
|
|
||
| func reconcileProfilingCronJob(cronJob *batchv1beta1.CronJob, ownerRef config.OwnerRef, olmImage, namespace string, sourceCronJob *batchv1beta1.CronJob) error { | ||
| ownerRef.ApplyTo(cronJob) | ||
| cronJob.Spec = sourceCronJob.DeepCopy().Spec | ||
| cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Image = olmImage | ||
| for i, arg := range cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args { | ||
| if arg == "OLM_NAMESPACE" { | ||
| cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args[i] = namespace | ||
| } | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func ReconcileCollectProfilesConfigMap(configMap *corev1.ConfigMap, ownerRef config.OwnerRef, olmImage, namespace string) error { | ||
| return reconcileProfilingConfigMap(configMap, ownerRef, olmImage, namespace, olmCollectProfilesConfigMap) | ||
| } | ||
|
|
||
| func reconcileProfilingConfigMap(configMap *corev1.ConfigMap, ownerRef config.OwnerRef, olmImage, namespace string, sourceConfigMap *corev1.ConfigMap) error { | ||
| ownerRef.ApplyTo(configMap) | ||
| configMap.Data = sourceConfigMap.DeepCopy().Data | ||
| return nil | ||
| } | ||
|
|
||
| func ReconcileCollectProfilesRole(role *rbacv1.Role, ownerRef config.OwnerRef, olmImage, namespace string) error { | ||
| return reconcileProfilingRole(role, ownerRef, olmImage, namespace, olmCollectProfilesRole) | ||
| } | ||
|
|
||
| func reconcileProfilingRole(role *rbacv1.Role, ownerRef config.OwnerRef, olmImage, namespace string, sourceRole *rbacv1.Role) error { | ||
| ownerRef.ApplyTo(role) | ||
| role.Rules = sourceRole.DeepCopy().Rules | ||
| return nil | ||
| } | ||
|
|
||
| func ReconcileCollectProfilesRoleBinding(roleBinding *rbacv1.RoleBinding, ownerRef config.OwnerRef, olmImage, namespace string) error { | ||
| return reconcileProfilingRoleBinding(roleBinding, ownerRef, olmImage, namespace, olmCollectProfilesRoleBinding) | ||
| } | ||
|
|
||
| func reconcileProfilingRoleBinding(roleBinding *rbacv1.RoleBinding, ownerRef config.OwnerRef, olmImage, namespace string, sourceRoleBinding *rbacv1.RoleBinding) error { | ||
| ownerRef.ApplyTo(roleBinding) | ||
| roleBinding.RoleRef = sourceRoleBinding.DeepCopy().RoleRef | ||
| roleBinding.Subjects = sourceRoleBinding.DeepCopy().Subjects | ||
| return nil | ||
| } | ||
|
|
||
| func ReconcileCollectProfilesSecret(secret *corev1.Secret, ownerRef config.OwnerRef, olmImage, namespace string) error { | ||
| return reconcileProfilingSecret(secret, ownerRef, olmImage, namespace, olmCollectProfilesSecret) | ||
| } | ||
|
|
||
| func reconcileProfilingSecret(secret *corev1.Secret, ownerRef config.OwnerRef, olmImage, namespace string, sourceSecret *corev1.Secret) error { | ||
| ownerRef.ApplyTo(secret) | ||
| secret.Type = sourceSecret.DeepCopy().Type | ||
| secret.Data = sourceSecret.DeepCopy().Data | ||
| return nil | ||
| } | ||
|
|
||
| func ReconcileCollectProfilesServiceAccount(serviceAccount *corev1.ServiceAccount, ownerRef config.OwnerRef, olmImage, namespace string) error { | ||
| return reconcileProfilingServiceAccount(serviceAccount, ownerRef, olmImage, namespace, olmCollectProfilesServiceAccount) | ||
| } | ||
|
|
||
| func reconcileProfilingServiceAccount(serviceAccount *corev1.ServiceAccount, ownerRef config.OwnerRef, olmImage, namespace string, sourceSecret *corev1.ServiceAccount) error { | ||
| ownerRef.ApplyTo(serviceAccount) | ||
| return nil | ||
| } |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Helpful note: The pprof config shouldn't be modified - it allows users to disable the cronjob and the generated configmaps.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we need to decide which user can modify the configmap. If it's the owner of the guest cluster (aka customer), then we should probably create this in the guest cluster and mirror its content back to the control plane. If it's meant to be configured by the cluster provider (aka management), then there likely should be a way to modify it via the HostedCluster API. I wouldn't expect anyone to make changes directly to configmaps in the control plane's namespace.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe the owner of the control plane should be responsible for modifying this configMap. It acts as an emergency escape hatch for disabling the cronjob, which runs on the management cluster.
If we want to configure this via the hostedCluster API, we can: