diff --git a/README.md b/README.md index 719e69107..7be4f85f5 100644 --- a/README.md +++ b/README.md @@ -105,5 +105,6 @@ Using the [operator-sdk](https://github.com/operator-framework/operator-sdk), ru oc apply -f deploy/crds/operator.openshift.io_mustgathers_crd.yaml oc new-project must-gather-operator export DEFAULT_MUST_GATHER_IMAGE='quay.io/openshift/origin-must-gather:latest' +export ACM_HCP_MUST_GATHER_IMAGE='registry.redhat.io/multicluster-engine/must-gather-rhel9:v2.8' OPERATOR_NAME=must-gather-operator operator-sdk run --verbose --local --namespace '' ``` diff --git a/api/v1alpha1/mustgather_types.go b/api/v1alpha1/mustgather_types.go index 7b99fa641..03d435588 100644 --- a/api/v1alpha1/mustgather_types.go +++ b/api/v1alpha1/mustgather_types.go @@ -25,6 +25,9 @@ import ( // NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. // MustGatherSpec defines the desired state of MustGather +// +kubebuilder:validation:XValidation:rule="self.mustGatherImage != 'acm_hcp' || (has(self.hostedClusterOptions.hostedClusterNamespace) && has(self.hostedClusterOptions.hostedClusterName))",message="hostedClusterNamespace and hostedClusterName are required when mustGatherImage is 'acm_hcp'" +// +kubebuilder:validation:XValidation:rule="self.mustGatherImage != 'acm_hcp' || !self.audit",message="audit must be false when mustGatherImage is 'acm_hcp'" +// +kubebuilder:validation:XValidation:rule="self.mustGatherImage == 'acm_hcp' || (!has(self.hostedClusterOptions.hostedClusterNamespace) && !has(self.hostedClusterOptions.hostedClusterName))",message="hosted cluster fields are only valid when mustGatherImage is 'acm_hcp'" type MustGatherSpec struct { // the service account to use to run the must gather job pod, defaults to default // +kubebuilder:validation:Optional @@ -62,6 +65,30 @@ type MustGatherSpec struct { // the tar archive on the cluster. // +optional Storage *Storage `json:"storage,omitempty"` + // The image to use for the must-gather pods. + // This can currently only be one of two values: + // 'default' for most cases including OCP and Managed OpenShift, which will use quay.io/openshift/must-gather + // 'acm_hcp' for HCP must gathers, which will use registry.redhat.io/multicluster-engine/must-gather-rhel9:v2.8 + // +kubebuilder:validation:Optional + // +kubebuilder:validation:Enum={"default","acm_hcp"} + // +kubebuilder:default:=default + MustGatherImage string `json:"mustGatherImage,omitempty"` + + // The options for running must-gather on a hosted cluster. + // +kubebuilder:validation:Optional + HostedClusterOptions *HostedClusterOptions `json:"hostedClusterOptions,omitempty"` +} + +type HostedClusterOptions struct { + // The namespace of the hosted cluster to collect the must-gather for. + // This field is required if mustGatherImage='acm_hcp'. + // +kubebuilder:validation:Required + HostedClusterNamespace string `json:"hostedClusterNamespace"` + + // The name of the hosted cluster to collect the must-gather for. + // This field is required if mustGatherImage='acm_hcp'. + // +kubebuilder:validation:Required + HostedClusterName string `json:"hostedClusterName"` } // SFTPSpec defines the desired state of SFTPSpec diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2c67362d9..618393257 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -9,6 +9,21 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *HostedClusterOptions) DeepCopyInto(out *HostedClusterOptions) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HostedClusterOptions. +func (in *HostedClusterOptions) DeepCopy() *HostedClusterOptions { + if in == nil { + return nil + } + out := new(HostedClusterOptions) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MustGather) DeepCopyInto(out *MustGather) { *out = *in @@ -101,6 +116,11 @@ func (in *MustGatherSpec) DeepCopyInto(out *MustGatherSpec) { *out = new(Storage) **out = **in } + if in.HostedClusterOptions != nil { + in, out := &in.HostedClusterOptions, &out.HostedClusterOptions + *out = new(HostedClusterOptions) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MustGatherSpec. diff --git a/bundle/manifests/tech-preview/operator.openshift.io_mustgathers.yaml b/bundle/manifests/tech-preview/operator.openshift.io_mustgathers.yaml index 6301bc909..de64937ae 100644 --- a/bundle/manifests/tech-preview/operator.openshift.io_mustgathers.yaml +++ b/bundle/manifests/tech-preview/operator.openshift.io_mustgathers.yaml @@ -190,6 +190,27 @@ spec: is SFTP, and forbidden otherwise rule: 'has(self.type) && self.type == ''SFTP'' ? has(self.sftp) : !has(self.sftp)' + mustGatherImage: + default: default + description: |- + The image to use for the must-gather pods. + This can currently only be one of two values: + 'default' for most cases including OCP and Managed OpenShift, which will use quay.io/openshift/must-gather + 'acm_hcp' for HCP must gathers, which will use registry.redhat.io/multicluster-engine/must-gather-rhel9:v2.8 + enum: + - default + - acm_hcp + type: string + hostedClusterName: + description: |- + The name of the hosted cluster to collect the must-gather for. + This field is required if mustGatherImage='acm_hcp'. + type: string + hostedClusterNamespace: + description: |- + The namespace of the hosted cluster to collect the must-gather for. + This field is required if mustGatherImage='acm_hcp'. + type: string type: object status: description: MustGatherStatus defines the observed state of MustGather diff --git a/bundle/manifests/tech-preview/support-log-gather-operator.clusterserviceversion.yaml b/bundle/manifests/tech-preview/support-log-gather-operator.clusterserviceversion.yaml index fac0f7cb6..fd4fa22bf 100644 --- a/bundle/manifests/tech-preview/support-log-gather-operator.clusterserviceversion.yaml +++ b/bundle/manifests/tech-preview/support-log-gather-operator.clusterserviceversion.yaml @@ -208,6 +208,8 @@ spec: value: must-gather-operator - name: DEFAULT_MUST_GATHER_IMAGE value: quay.io/openshift/origin-must-gather:latest + - name: ACM_HCP_MUST_GATHER_IMAGE + value: registry.redhat.io/multicluster-engine/must-gather-rhel9:v2.8 - name: OPERATOR_IMAGE value: quay.io/openshift/origin-support-log-gather-operator:latest - name: OPERATOR_NAMESPACE diff --git a/controllers/mustgather/mustgather_controller.go b/controllers/mustgather/mustgather_controller.go index 71bbaa5cc..351569aeb 100644 --- a/controllers/mustgather/mustgather_controller.go +++ b/controllers/mustgather/mustgather_controller.go @@ -95,6 +95,12 @@ func (r *MustGatherReconciler) Reconcile(ctx context.Context, request reconcile. return reconcile.Result{}, err } + valid, err := r.IsValid(instance) + if !valid { + log.Error(err, "MustGather resource invalid", "instance", instance) + return r.ManageError(ctx, instance, err) + } + // Check if the MustGather instance is marked to be deleted, which is // indicated by the deletion timestamp being set. isMustGatherMarkedToBeDeleted := instance.GetDeletionTimestamp() != nil @@ -290,6 +296,27 @@ func (r *MustGatherReconciler) getJobFromInstance(ctx context.Context, instance return getJobTemplate(operatorImage, *instance), nil } +func (r *MustGatherReconciler) IsValid(instance *mustgatherv1alpha1.MustGather) (bool, error) { + if instance.Spec.MustGatherImage == acmHcpMustGatherImage { + // If MustGatherImage is set to acm_hcp, hosted cluster namespace and name must be set as well, as they are + // required arguments for the acm_hcp must gather image. + hostedClusterOptions := instance.Spec.HostedClusterOptions + if hostedClusterOptions == nil { + return false, goerror.New("hostedClusterOptions must be set when using the acm_hcp must gather image") + } + + if hostedClusterOptions.HostedClusterNamespace == "" || hostedClusterOptions.HostedClusterName == "" { + return false, goerror.New("hostedClusterNamespace and hostedClusterName must be set when using the acm_hcp must gather image") + } + + // If MustGatherImage is set to acm_hcp, we cannot collect audit logs. + if instance.Spec.Audit != nil && *instance.Spec.Audit { + return false, goerror.New("cannot collect audit logs when using the acm_hcp must gather image") + } + } + return true, nil +} + // contains is a helper function for finalizer func contains(list []string, s string) bool { for _, v := range list { diff --git a/controllers/mustgather/mustgather_controller_test.go b/controllers/mustgather/mustgather_controller_test.go index a53f5e6b1..d0911e273 100644 --- a/controllers/mustgather/mustgather_controller_test.go +++ b/controllers/mustgather/mustgather_controller_test.go @@ -1149,6 +1149,93 @@ func TestMustGatherControllerWithUploadTarget(t *testing.T) { } } +func TestIsValid(t *testing.T) { + // A mock Reconciler is needed to call IsValid + r := &MustGatherReconciler{} + + testCases := []struct { + name string + mustGather *mustgatherv1alpha1.MustGather + expectedValid bool + expectedError error + }{ + { + name: "Default image should be valid without hosted cluster info", + mustGather: &mustgatherv1alpha1.MustGather{ + Spec: mustgatherv1alpha1.MustGatherSpec{ + MustGatherImage: "default", + }, + }, + expectedValid: true, + expectedError: nil, + }, + { + name: "ACM HCP image should be valid with all required info", + mustGather: &mustgatherv1alpha1.MustGather{ + Spec: mustgatherv1alpha1.MustGatherSpec{ + MustGatherImage: "acm_hcp", + HostedClusterOptions: &mustgatherv1alpha1.HostedClusterOptions{ + HostedClusterNamespace: "test-ns", + HostedClusterName: "test-cluster", + }, + }, + }, + expectedValid: true, + expectedError: nil, + }, + { + name: "ACM HCP image should be invalid without hosted cluster name", + mustGather: &mustgatherv1alpha1.MustGather{ + Spec: mustgatherv1alpha1.MustGatherSpec{ + MustGatherImage: "acm_hcp", + HostedClusterOptions: &mustgatherv1alpha1.HostedClusterOptions{ + HostedClusterNamespace: "test-ns", + }, + }, + }, + expectedValid: false, + expectedError: errors.New("hostedClusterNamespace and hostedClusterName must be set when using the acm_hcp must gather image"), + }, + { + name: "ACM HCP image should be invalid without hosted cluster namespace", + mustGather: &mustgatherv1alpha1.MustGather{ + Spec: mustgatherv1alpha1.MustGatherSpec{ + MustGatherImage: "acm_hcp", + HostedClusterOptions: &mustgatherv1alpha1.HostedClusterOptions{ + HostedClusterName: "test-cluster", + }, + }, + }, + expectedValid: false, + expectedError: errors.New("hostedClusterNamespace and hostedClusterName must be set when using the acm_hcp must gather image"), + }, + { + name: "ACM HCP image should be invalid without any hosted cluster info", + mustGather: &mustgatherv1alpha1.MustGather{ + Spec: mustgatherv1alpha1.MustGatherSpec{ + MustGatherImage: "acm_hcp", + }, + }, + expectedValid: false, + expectedError: errors.New("hostedClusterOptions must be set when using the acm_hcp must gather image"), + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + valid, err := r.IsValid(tc.mustGather) + + if valid != tc.expectedValid { + t.Errorf("Expected valid to be %v, but got %v", tc.expectedValid, valid) + } + + if (err != nil && tc.expectedError == nil) || (err == nil && tc.expectedError != nil) || (err != nil && tc.expectedError != nil && err.Error() != tc.expectedError.Error()) { + t.Errorf("Expected error '%v', but got '%v'", tc.expectedError, err) + } + }) + } +} + func createMustGatherObject() *mustgatherv1alpha1.MustGather { return &mustgatherv1alpha1.MustGather{ TypeMeta: metav1.TypeMeta{ diff --git a/controllers/mustgather/template.go b/controllers/mustgather/template.go index f0f2d1c48..73cbdc39f 100644 --- a/controllers/mustgather/template.go +++ b/controllers/mustgather/template.go @@ -26,7 +26,7 @@ const ( gatherCommandBinaryAudit = "gather_audit_logs" gatherCommandBinaryNoAudit = "gather" - gatherCommand = "timeout %v bash -x -c -- '/usr/bin/%v' 2>&1 | tee /must-gather/must-gather.log\n\nstatus=$?\nif [[ $status -eq 124 || $status -eq 137 ]]; then\n echo \"Gather timed out.\"\n exit 0\nfi | tee -a /must-gather/must-gather.log" + gatherCommand = "timeout %v bash -x -c -- '/usr/bin/%v %v' 2>&1 | tee /must-gather/must-gather.log\n\nstatus=$?\nif [[ $status -eq 124 || $status -eq 137 ]]; then\n echo \"Gather timed out.\"\n exit 0\nfi | tee -a /must-gather/must-gather.log" gatherContainerName = "gather" backoffLimit = 3 @@ -47,8 +47,12 @@ const ( sshDir = "/tmp/must-gather-operator/.ssh" knownHostsFile = "/tmp/must-gather-operator/.ssh/known_hosts" - // Environment variable specifying the must-gather image + // Environment variables specifying the must-gather images defaultMustGatherImageEnv = "DEFAULT_MUST_GATHER_IMAGE" + acmHcpMustGatherImageEnv = "ACM_HCP_MUST_GATHER_IMAGE" + + defaultMustGatherImage = "default" + acmHcpMustGatherImage = "acm_hcp" ) func getJobTemplate(operatorImage string, mustGather v1alpha1.MustGather) *batchv1.Job { @@ -94,7 +98,12 @@ func getJobTemplate(operatorImage string, mustGather v1alpha1.MustGather) *batch job.Spec.Template.Spec.Containers = append( job.Spec.Template.Spec.Containers, - getGatherContainer(audit, timeout, mustGather.Spec.Storage), + getGatherContainer( + audit, + timeout, + mustGather.Spec.Storage, + mustGather.Spec.MustGatherImage, + mustGather.Spec.HostedClusterOptions), ) // Add the upload container only if the upload target is specified @@ -183,7 +192,7 @@ func initializeJobTemplate(name string, namespace string, serviceAccountRef stri } } -func getGatherContainer(audit bool, timeout time.Duration, storage *v1alpha1.Storage) corev1.Container { +func getGatherContainer(audit bool, timeout time.Duration, storage *v1alpha1.Storage, mustGatherImage string, hostedClusterOptions *v1alpha1.HostedClusterOptions) corev1.Container { var commandBinary string if audit { commandBinary = gatherCommandBinaryAudit @@ -200,13 +209,20 @@ func getGatherContainer(audit bool, timeout time.Duration, storage *v1alpha1.Sto volumeMount.SubPath = storage.PersistentVolume.SubPath } + commandArgs := "" + containerImage := strings.TrimSpace(os.Getenv(defaultMustGatherImageEnv)) + if mustGatherImage == acmHcpMustGatherImage { + containerImage = strings.TrimSpace(os.Getenv(acmHcpMustGatherImageEnv)) + commandArgs = fmt.Sprintf("--hosted-cluster-namespace=%s --hosted-cluster-name=%s", hostedClusterOptions.HostedClusterNamespace, hostedClusterOptions.HostedClusterName) + } + return corev1.Container{ Command: []string{ "/bin/bash", "-c", - fmt.Sprintf(gatherCommand, math.Ceil(timeout.Seconds()), commandBinary), + fmt.Sprintf(gatherCommand, math.Ceil(timeout.Seconds()), commandBinary, commandArgs), }, - Image: strings.TrimSpace(os.Getenv(defaultMustGatherImageEnv)), + Image: containerImage, Name: gatherContainerName, VolumeMounts: []corev1.VolumeMount{ volumeMount, diff --git a/controllers/mustgather/template_test.go b/controllers/mustgather/template_test.go index 867d53ede..ab05af541 100644 --- a/controllers/mustgather/template_test.go +++ b/controllers/mustgather/template_test.go @@ -76,27 +76,49 @@ func Test_initializeJobTemplate(t *testing.T) { } func Test_getGatherContainer(t *testing.T) { + + testDefaultMustGatherImage := "quay.io/foo/bar/must-gather:latest" + testAcmHcpMustGatherImage := "quay.io/acm/hcp/must-gather:latest" + tests := []struct { - name string - audit bool - timeout time.Duration - mustGatherImage string - storage *mustgatherv1alpha1.Storage + name string + audit bool + timeout time.Duration + mustGatherImage string + storage *mustgatherv1alpha1.Storage + hostedClusterOptions mustgatherv1alpha1.HostedClusterOptions + expectedImage string + expectedArgs string }{ { name: "no audit", timeout: 5 * time.Second, - mustGatherImage: "quay.io/foo/bar/must-gather:latest", + mustGatherImage: defaultMustGatherImageEnv, + expectedImage: testDefaultMustGatherImage, }, { name: "audit", audit: true, timeout: 0 * time.Second, - mustGatherImage: "quay.io/foo/bar/must-gather:latest", + mustGatherImage: defaultMustGatherImageEnv, + expectedImage: testDefaultMustGatherImage, + }, + { + name: "HCP default", + timeout: 5 * time.Second, + mustGatherImage: acmHcpMustGatherImage, + hostedClusterOptions: mustgatherv1alpha1.HostedClusterOptions{ + HostedClusterName: "foo", + HostedClusterNamespace: "bar", + }, + expectedImage: testAcmHcpMustGatherImage, + expectedArgs: "--hosted-cluster-namespace=bar --hosted-cluster-name=foo", }, { - name: "with PVC", - timeout: 5 * time.Second, + name: "with PVC", + timeout: 5 * time.Second, + mustGatherImage: defaultMustGatherImageEnv, + expectedImage: testDefaultMustGatherImage, storage: &mustgatherv1alpha1.Storage{ Type: mustgatherv1alpha1.StorageTypePersistentVolume, PersistentVolume: mustgatherv1alpha1.PersistentVolumeConfig{ @@ -110,15 +132,16 @@ func Test_getGatherContainer(t *testing.T) { { name: "robust timeout", timeout: 6*time.Hour + 5*time.Minute + 3*time.Second, // 6h5m3s - mustGatherImage: "quay.io/foo/bar/must-gather:latest", + mustGatherImage: defaultMustGatherImageEnv, + expectedImage: testDefaultMustGatherImage, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - t.Setenv(defaultMustGatherImageEnv, tt.mustGatherImage) - expectedImage := tt.mustGatherImage + t.Setenv(defaultMustGatherImageEnv, testDefaultMustGatherImage) + t.Setenv(acmHcpMustGatherImageEnv, testAcmHcpMustGatherImage) - container := getGatherContainer(tt.audit, tt.timeout, tt.storage) + container := getGatherContainer(tt.audit, tt.timeout, tt.storage, tt.mustGatherImage, &tt.hostedClusterOptions) containerCommand := container.Command[2] if tt.audit && !strings.Contains(containerCommand, gatherCommandBinaryAudit) { @@ -132,8 +155,15 @@ func Test_getGatherContainer(t *testing.T) { t.Fatalf("the duration was not properly added to the container command, got %v but wanted %v", strings.Split(containerCommand, " ")[1], timeoutInSeconds) } - if container.Image != expectedImage { - t.Fatalf("expected container image %v but got %v", expectedImage, container.Image) + if tt.expectedArgs != "" { + if !strings.Contains(containerCommand, tt.expectedArgs) { + t.Fatalf("command did not contain expected arguments: %v", tt.expectedArgs) + + } + } + + if container.Image != tt.expectedImage { + t.Fatalf("expected container image %v but got %v", tt.expectedImage, container.Image) } if tt.storage != nil { diff --git a/deploy/99_must-gather-operator.Deployment.yaml b/deploy/99_must-gather-operator.Deployment.yaml index 9355b21ac..f75573e28 100644 --- a/deploy/99_must-gather-operator.Deployment.yaml +++ b/deploy/99_must-gather-operator.Deployment.yaml @@ -42,6 +42,8 @@ spec: value: must-gather-operator - name: DEFAULT_MUST_GATHER_IMAGE value: quay.io/openshift/origin-must-gather:latest + - name: ACM_HCP_MUST_GATHER_IMAGE + value: registry.redhat.io/multicluster-engine/must-gather-rhel9:v2.8 - name: OPERATOR_IMAGE value: quay.io/openshift/origin-must-gather-operator:latest - name: OPERATOR_NAMESPACE diff --git a/deploy/crds/operator.openshift.io_mustgathers.yaml b/deploy/crds/operator.openshift.io_mustgathers.yaml index 6301bc909..d2ee92b65 100644 --- a/deploy/crds/operator.openshift.io_mustgathers.yaml +++ b/deploy/crds/operator.openshift.io_mustgathers.yaml @@ -45,6 +45,34 @@ spec: A flag to specify if audit logs must be collected See documentation for further information. type: boolean + hostedClusterOptions: + description: The options for running must-gather on a hosted cluster. + properties: + hostedClusterName: + description: |- + The name of the hosted cluster to collect the must-gather for. + This field is required if mustGatherImage='acm_hcp'. + type: string + hostedClusterNamespace: + description: |- + The namespace of the hosted cluster to collect the must-gather for. + This field is required if mustGatherImage='acm_hcp'. + type: string + required: + - hostedClusterName + - hostedClusterNamespace + type: object + mustGatherImage: + default: default + description: |- + The image to use for the must-gather pods. + This can currently only be one of two values: + 'default' for most cases including OCP and Managed OpenShift, which will use quay.io/openshift/must-gather + 'acm_hcp' for HCP must gathers, which will use registry.redhat.io/multicluster-engine/must-gather-rhel9:v2.8 + enum: + - default + - acm_hcp + type: string mustGatherTimeout: description: |- A time limit for gather command to complete a floating point number with a suffix: @@ -191,6 +219,17 @@ spec: rule: 'has(self.type) && self.type == ''SFTP'' ? has(self.sftp) : !has(self.sftp)' type: object + x-kubernetes-validations: + - message: hostedClusterNamespace and hostedClusterName are required when + mustGatherImage is 'acm_hcp' + rule: self.mustGatherImage != 'acm_hcp' || (has(self.hostedClusterOptions.hostedClusterNamespace) + && has(self.hostedClusterOptions.hostedClusterName)) + - message: audit must be false when mustGatherImage is 'acm_hcp' + rule: self.mustGatherImage != 'acm_hcp' || !self.audit + - message: hosted cluster fields are only valid when mustGatherImage is + 'acm_hcp' + rule: self.mustGatherImage == 'acm_hcp' || (!has(self.hostedClusterOptions.hostedClusterNamespace) + && !has(self.hostedClusterOptions.hostedClusterName)) status: description: MustGatherStatus defines the observed state of MustGather properties: