Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@ var (
// TODO: These manifests should eventually be removed from the CVO payload by annotating
// them with the proper cluster profile in the OLM repository.
manifestsToOmit = []string{
"0000_50_olm_00-pprof-config.yaml",
"0000_50_olm_00-pprof-rbac.yaml",
"0000_50_olm_00-pprof-secret.yaml",
"0000_50_olm_01-olm-operator.serviceaccount.yaml",
"0000_50_olm_02-services.yaml",
"0000_50_olm_06-psm-operator.deployment.yaml",
"0000_50_olm_06-psm-operator.deployment.ibm-cloud-managed.yaml",
"0000_50_olm_07-olm-operator.deployment.ibm-cloud-managed.yaml",
"0000_50_olm_07-olm-operator.deployment.yaml",
"0000_50_olm_07-collect-profiles.cronjob.yaml",
"0000_50_olm_08-catalog-operator.deployment.ibm-cloud-managed.yaml",
"0000_50_olm_08-catalog-operator.deployment.yaml",
"0000_50_olm_15-packageserver.clusterserviceversion.yaml",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1322,14 +1322,6 @@ func (r *HostedControlPlaneReconciler) reconcilePKI(ctx context.Context, hcp *hy
return fmt.Errorf("failed to reconcile packageserver cert: %w", err)
}

// OLM Profile collector Cert
profileCollectorCert := manifests.OLMProfileCollectorCertSecret(hcp.Namespace)
if _, err := r.CreateOrUpdate(ctx, r, profileCollectorCert, func() error {
return pki.ReconcileOLMProfileCollectorCertSecret(profileCollectorCert, rootCASecret, p.OwnerRef)
}); err != nil {
return fmt.Errorf("failed to reconcile olm profile collector cert: %w", err)
}

// OLM Catalog Operator Serving Cert
catalogOperatorServingCert := manifests.OLMCatalogOperatorServingCertSecret(hcp.Namespace)
if _, err := r.CreateOrUpdate(ctx, r, catalogOperatorServingCert, func() error {
Expand Down Expand Up @@ -2191,6 +2183,46 @@ func (r *HostedControlPlaneReconciler) reconcileOperatorLifecycleManager(ctx con
return fmt.Errorf("failed to reconcile packageserver worker endpoints: %w", err)
}

// Collect Profiles
collectProfilesConfigMap := manifests.CollectProfilesConfigMap(hcp.Namespace)
olm.ReconcileCollectProfilesConfigMap(collectProfilesConfigMap, p.OwnerRef, p.OLMImage, hcp.Namespace)
if err := r.Create(ctx, collectProfilesConfigMap); err != nil && !apierrors.IsAlreadyExists(err) {
Copy link
Copy Markdown
Contributor Author

@awgreene awgreene Oct 21, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Helpful note: The pprof config shouldn't be modified - it allows users to disable the cronjob and the generated configmaps.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to decide which user can modify the configmap. If it's the owner of the guest cluster (aka customer), then we should probably create this in the guest cluster and mirror its content back to the control plane. If it's meant to be configured by the cluster provider (aka management), then there likely should be a way to modify it via the HostedCluster API. I wouldn't expect anyone to make changes directly to configmaps in the control plane's namespace.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the owner of the control plane should be responsible for modifying this configMap. It acts as an emergency escape hatch for disabling the cronjob, which runs on the management cluster.

If we want to configure this via the hostedCluster API, we can:

  • Modify the hosted control plane operator to create/update this configMap.
  • Introduce a knob on the hostedCluster API to set the value in the configMap.

return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err)
}

collectProfilesCronJob := manifests.CollectProfilesCronJob(hcp.Namespace)
if _, err := r.CreateOrUpdate(ctx, r, collectProfilesCronJob, func() error {
return olm.ReconcileCollectProfilesCronJob(collectProfilesCronJob, p.OwnerRef, p.OLMImage, hcp.Namespace)
}); err != nil {
return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err)
}

collectProfilesRole := manifests.CollectProfilesRole(hcp.Namespace)
if _, err := r.CreateOrUpdate(ctx, r, collectProfilesRole, func() error {
return olm.ReconcileCollectProfilesRole(collectProfilesRole, p.OwnerRef, p.OLMImage, hcp.Namespace)
}); err != nil {
return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err)
}

collectProfilesRoleBinding := manifests.CollectProfilesRoleBinding(hcp.Namespace)
if _, err := r.CreateOrUpdate(ctx, r, collectProfilesRoleBinding, func() error {
return olm.ReconcileCollectProfilesRoleBinding(collectProfilesRoleBinding, p.OwnerRef, p.OLMImage, hcp.Namespace)
}); err != nil {
return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err)
}

collectProfilesSecret := manifests.CollectProfilesSecret(hcp.Namespace)
olm.ReconcileCollectProfilesSecret(collectProfilesSecret, p.OwnerRef, p.OLMImage, hcp.Namespace)
Copy link
Copy Markdown
Contributor Author

@awgreene awgreene Oct 21, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Helpful note: The pprof secret shouldn't be modified - it allows the cronjob to frequently change the pprof secret.

if err := r.Create(ctx, collectProfilesSecret); err != nil && !apierrors.IsAlreadyExists(err) {
return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err)
}

collectProfilesServiceAccount := manifests.CollectProfilesServiceAccount(hcp.Namespace)
if _, err := r.CreateOrUpdate(ctx, r, collectProfilesServiceAccount, func() error {
return olm.ReconcileCollectProfilesServiceAccount(collectProfilesServiceAccount, p.OwnerRef, p.OLMImage, hcp.Namespace)
}); err != nil {
return fmt.Errorf("failed to reconcile collect profiles cronjob: %w", err)
}
return nil
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,3 +276,58 @@ func OLMAlertRulesWorkerManifest(ns string) *corev1.ConfigMap {
},
}
}

// Collect Profiles
func CollectProfilesConfigMap(ns string) *corev1.ConfigMap {
return &corev1.ConfigMap{
ObjectMeta: metav1.ObjectMeta{
Name: "olm-collect-profiles",
Namespace: ns,
},
}
}

func CollectProfilesCronJob(ns string) *batchv1beta1.CronJob {
return &batchv1beta1.CronJob{
ObjectMeta: metav1.ObjectMeta{
Name: "olm-collect-profiles",
Namespace: ns,
},
}
}

func CollectProfilesRole(ns string) *rbacv1.Role {
return &rbacv1.Role{
ObjectMeta: metav1.ObjectMeta{
Name: "olm-collect-profiles",
Namespace: ns,
},
}
}

func CollectProfilesRoleBinding(ns string) *rbacv1.RoleBinding {
return &rbacv1.RoleBinding{
ObjectMeta: metav1.ObjectMeta{
Name: "olm-collect-profiles",
Namespace: ns,
},
}
}

func CollectProfilesSecret(ns string) *corev1.Secret {
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "pprof-cert",
Namespace: ns,
},
}
}

func CollectProfilesServiceAccount(ns string) *corev1.ServiceAccount {
return &corev1.ServiceAccount{
ObjectMeta: metav1.ObjectMeta{
Name: "olm-collect-profiles",
Namespace: ns,
},
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,24 @@ func MustService(fileName string) *corev1.Service {
return svc
}

func MustServiceAccount(fileName string) *corev1.ServiceAccount {
serviceAccount := &corev1.ServiceAccount{}
deserializeResource(fileName, serviceAccount)
return serviceAccount
}

func MustSecret(fileName string) *corev1.Secret {
secret := &corev1.Secret{}
deserializeResource(fileName, secret)
return secret
}

func MustConfigMap(fileName string) *corev1.ConfigMap {
configMap := &corev1.ConfigMap{}
deserializeResource(fileName, configMap)
return configMap
}

func MustDeployment(fileName string) *appsv1.Deployment {
deployment := &appsv1.Deployment{}
deserializeResource(fileName, deployment)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
secretName: catalog-operator-serving-cert
- name: profile-collector
secret:
secretName: olm-profile-collector
secretName: pprof-cert
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately this is hardcoded in OLM

- name: kubeconfig
secret:
secretName: service-network-admin-kubeconfig
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: olm-collect-profiles
data:
pprof-config.yaml: |
disabled: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: olm-collect-profiles
spec:
schedule: "*/15 * * * *"
jobTemplate:
spec:
template:
spec:
serviceAccountName: olm-collect-profiles
priorityClassName: openshift-user-critical
containers:
- name: collect-profiles
image: OLM_OPERATOR_IMAGE
imagePullPolicy: IfNotPresent
command:
- bin/collect-profiles
args:
- -n
- OLM_NAMESPACE
- --config-mount-path
- /etc/config
- --cert-mount-path
- /var/run/secrets/serving-cert
- olm-operator-heap-:https://olm-operator-metrics:8443/debug/pprof/heap
- catalog-operator-heap-:https://catalog-operator-metrics:8443/debug/pprof/heap
volumeMounts:
- mountPath: /etc/config
name: config-volume
- mountPath: /var/run/secrets/serving-cert
name: secret-volume
resources:
requests:
cpu: 10m
memory: 80Mi
volumes:
- name: config-volume
configMap:
name: olm-collect-profiles
- name: secret-volume
secret:
secretName: pprof-cert
restartPolicy: Never
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: olm-collect-profiles
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list", "create", "delete"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["get", "update"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: olm-collect-profiles
subjects:
- kind: ServiceAccount
name: olm-collect-profiles
roleRef:
kind: Role
name: olm-collect-profiles
apiGroup: rbac.authorization.k8s.io
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
apiVersion: v1
kind: Secret
metadata:
name: pprof-cert
type: kubernetes.io/tls
data:
tls.crt: ""
tls.key: ""
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: olm-collect-profiles
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ spec:
secretName: olm-operator-serving-cert
- name: client-ca
secret:
secretName: olm-profile-collector
secretName: pprof-cert
- name: kubeconfig
secret:
secretName: service-network-admin-kubeconfig
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package olm

import (
"github.com/openshift/hypershift/control-plane-operator/controllers/hostedcontrolplane/config"
batchv1beta1 "k8s.io/api/batch/v1beta1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
)

var (
olmCollectProfilesConfigMap = MustConfigMap("assets/olm-collect-profiles.configmap.yaml")
olmCollectProfilesCronJob = MustCronJob("assets/olm-collect-profiles.cronjob.yaml")
olmCollectProfilesRole = MustRole("assets/olm-collect-profiles.role.yaml")
olmCollectProfilesRoleBinding = MustRoleBinding("assets/olm-collect-profiles.rolebinding.yaml")
olmCollectProfilesSecret = MustSecret("assets/olm-collect-profiles.secret.yaml")
olmCollectProfilesServiceAccount = MustServiceAccount("assets/olm-collect-profiles.serviceaccount.yaml")
)

func ReconcileCollectProfilesCronJob(cronJob *batchv1beta1.CronJob, ownerRef config.OwnerRef, olmImage, namespace string) error {
return reconcileProfilingCronJob(cronJob, ownerRef, olmImage, namespace, olmCollectProfilesCronJob)
}

func reconcileProfilingCronJob(cronJob *batchv1beta1.CronJob, ownerRef config.OwnerRef, olmImage, namespace string, sourceCronJob *batchv1beta1.CronJob) error {
ownerRef.ApplyTo(cronJob)
cronJob.Spec = sourceCronJob.DeepCopy().Spec
cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Image = olmImage
for i, arg := range cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args {
if arg == "OLM_NAMESPACE" {
cronJob.Spec.JobTemplate.Spec.Template.Spec.Containers[0].Args[i] = namespace
}
}
return nil
}

func ReconcileCollectProfilesConfigMap(configMap *corev1.ConfigMap, ownerRef config.OwnerRef, olmImage, namespace string) error {
return reconcileProfilingConfigMap(configMap, ownerRef, olmImage, namespace, olmCollectProfilesConfigMap)
}

func reconcileProfilingConfigMap(configMap *corev1.ConfigMap, ownerRef config.OwnerRef, olmImage, namespace string, sourceConfigMap *corev1.ConfigMap) error {
ownerRef.ApplyTo(configMap)
configMap.Data = sourceConfigMap.DeepCopy().Data
return nil
}

func ReconcileCollectProfilesRole(role *rbacv1.Role, ownerRef config.OwnerRef, olmImage, namespace string) error {
return reconcileProfilingRole(role, ownerRef, olmImage, namespace, olmCollectProfilesRole)
}

func reconcileProfilingRole(role *rbacv1.Role, ownerRef config.OwnerRef, olmImage, namespace string, sourceRole *rbacv1.Role) error {
ownerRef.ApplyTo(role)
role.Rules = sourceRole.DeepCopy().Rules
return nil
}

func ReconcileCollectProfilesRoleBinding(roleBinding *rbacv1.RoleBinding, ownerRef config.OwnerRef, olmImage, namespace string) error {
return reconcileProfilingRoleBinding(roleBinding, ownerRef, olmImage, namespace, olmCollectProfilesRoleBinding)
}

func reconcileProfilingRoleBinding(roleBinding *rbacv1.RoleBinding, ownerRef config.OwnerRef, olmImage, namespace string, sourceRoleBinding *rbacv1.RoleBinding) error {
ownerRef.ApplyTo(roleBinding)
roleBinding.RoleRef = sourceRoleBinding.DeepCopy().RoleRef
roleBinding.Subjects = sourceRoleBinding.DeepCopy().Subjects
return nil
}

func ReconcileCollectProfilesSecret(secret *corev1.Secret, ownerRef config.OwnerRef, olmImage, namespace string) error {
return reconcileProfilingSecret(secret, ownerRef, olmImage, namespace, olmCollectProfilesSecret)
}

func reconcileProfilingSecret(secret *corev1.Secret, ownerRef config.OwnerRef, olmImage, namespace string, sourceSecret *corev1.Secret) error {
ownerRef.ApplyTo(secret)
secret.Type = sourceSecret.DeepCopy().Type
secret.Data = sourceSecret.DeepCopy().Data
return nil
}

func ReconcileCollectProfilesServiceAccount(serviceAccount *corev1.ServiceAccount, ownerRef config.OwnerRef, olmImage, namespace string) error {
return reconcileProfilingServiceAccount(serviceAccount, ownerRef, olmImage, namespace, olmCollectProfilesServiceAccount)
}

func reconcileProfilingServiceAccount(serviceAccount *corev1.ServiceAccount, ownerRef config.OwnerRef, olmImage, namespace string, sourceSecret *corev1.ServiceAccount) error {
ownerRef.ApplyTo(serviceAccount)
return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,6 @@ func ReconcileOLMPackageServerCertSecret(secret, ca *corev1.Secret, ownerRef con
return reconcileSignedCertWithAddresses(secret, ca, ownerRef, "packageserver", []string{"openshift"}, X509SignerUsage, X509UsageClientServerAuth, dnsNames, nil)
}

func ReconcileOLMProfileCollectorCertSecret(secret, ca *corev1.Secret, ownerRef config.OwnerRef) error {
return reconcileSignedCert(secret, ca, ownerRef, "olm-pprof", []string{"openshift"}, X509SignerUsage, X509UsageClientAuth)
}

func ReconcileOLMCatalogOperatorServingCertSecret(secret, ca *corev1.Secret, ownerRef config.OwnerRef) error {
dnsNames := []string{
"catalog-operator-metrics",
Expand Down