From db8c0cb75ea384eb73126aaa27888f756a84ce0d Mon Sep 17 00:00:00 2001 From: Dhiraj Bokde Date: Thu, 1 Feb 2024 01:21:59 -0800 Subject: [PATCH] feat: Add ModelRegistry component (#775) (#776) Squashed commit due to buildability since ComponentInterface has changed. Other patches squashed as well to avoid possible double squashing on merging. modelregistry: regenerate autogenerated files Run `make generate manifests` after all the changes Signed-off-by: Yauheni Kaliuta feat: Add ModelRegistry component (#775) (#776) * feat: Add ModelRegistry component (#775) * fix: Fix modelregistry odh overlays path * fix: fix dsc_create_test tests err nil check * fix: refactor ModelRegistry.ReconcileComponent for new parameters * chore: added modelregistry to README.md * fix: add missing rbac rules for deploymentconfigs and daemonsets * chore: code lint cleanup * fix: added check for nil DevFlags in model-registry component * fix: add nil check for dscispec.DevFlags in model-registry ReconcileComponent * fix: remove RBAC rules for daemonsets and deploymentconfigs * fix(chore): fix lint errors in dsc_deletion_test.go (cherry picked from commit 112d3f14cbe54361d936582dcb44fbd71a69f862) Signed-off-by: Yauheni Kaliuta modelregistry: partial: chore: removes SetImageParamsMap from ComponentInterface (#897) Partial application of already applied commit d10a7643c13db0a5dffdb1700f7dfa8e657907ba Author: Bartosz Majsak Date: Thu Mar 7 15:43:37 2024 +0100 chore: removes SetImageParamsMap from ComponentInterface (#897) As it's not used by any component, acting as a simple pass-return loop. This makes the API contract a bit cleaner. Signed-off-by: Yauheni Kaliuta modelregistry: partial: chore: remove the need of passing rest config (#895) Partial application of already applied commit ca7fa9868489fe8e1ec5f67e64773b2e84062cf6 Author: Bartosz Majsak Date: Fri Mar 8 17:40:54 2024 +0100 chore: remove the need of passing rest config (#895) * chore: fixes ComponentInterface docs By removing reference to non-existing func. This function has been in use outside of this component. * fix: removes rest config As we are already using client.Client interface we do not have to instantiate other typed clients to e.g. list resources using their own funcs. Generic client.Client is sufficient for these needs. Additionally this change adds ctx propogation for these calls. Signed-off-by: Yauheni Kaliuta modelregistry: partial: feat(logger): for both controller level and component level (#837) Partial application of already applied commit d8a83a28d2e3d312040886f9769c19c34fe12522 Author: Wen Zhou Date: Mon Apr 1 22:06:16 2024 +0200 feat(logger): for both controller level and component level (#837) * feat(logger): for both controller level and component level Signed-off-by: Wen Zhou * update(logger): use logr instead of uber's zap Signed-off-by: Wen Zhou * update: do not log error only print Signed-off-by: Wen Zhou * update: use zap.Options for both and tune levels Signed-off-by: Wen Zhou * update: move setting into common function Signed-off-by: Wen Zhou Signed-off-by: Zhou, Wen --------- Signed-off-by: Wen Zhou Signed-off-by: Zhou, Wen Signed-off-by: Yauheni Kaliuta update(modelregistry): rename image name (#877) Signed-off-by: Wen Zhou (cherry picked from commit b4e4d6f590f5ffae2282125e1f1b7753fd4cb962) modelregistry: partial: chore: cleanup duplicated functions packages and add more for godoc (#981) Partial application of already applied commit 96c85f2ab0324334f207fcd800e8998086ee9f8f Author: Wen Zhou Date: Tue Apr 23 14:05:24 2024 +0200 chore: cleanup duplicated functions packages and add more for godoc (#981) * chore: cleanup duplicated functions/package and add godoc - move GetPlatform() from deploy package to cluster package - move const ManagedRhods SelfManagedRhods OpenDataHub from deploy to cluster package - move WaitForDeploymentAvailable() monitoring package to cluster package - remove monitoring package - move UpdatePodSecurityRolebinding() from common package to cluster package - deprecate GetDomain from common package, to only use GetDomain from cluster package. - remove gvk package, move its GVK to cluster package - move DeleteExistingSubscription() from deploy package to upgrade package - do not export getSubscription() Signed-off-by: Wen Zhou * update: remove gvk into one file but under cluster package Signed-off-by: Wen Zhou * update: rename variable, removing GVK from it Signed-off-by: Wen Zhou * update: move gvk into a sub package under cluster Signed-off-by: Wen Zhou --------- Signed-off-by: Wen Zhou Signed-off-by: Yauheni Kaliuta feat(mr): create namespace for Model Registry (#930) * feat(mr): create namespace for smm Signed-off-by: Wen Zhou * fix: rebase Signed-off-by: Zhou, Wen * update: code review comments Signed-off-by: Wen Zhou * fix(doc): wrong comments Signed-off-by: Wen Zhou * update: remove label to keep namespace even opreator is uninstalled Signed-off-by: Wen Zhou --------- Signed-off-by: Wen Zhou Signed-off-by: Zhou, Wen (cherry picked from commit 1188ce1576078a5fc16872fca78c4c004aa0c721) feat(mr): add model registry odh extras manifests, fixes RHOAIENG-5112 (#953) (cherry picked from commit 7c3e81b29345ed3d34e2f5d3a3b5750a5aa90202) modelregistry: partial: chore: Open up util functions for context propagation (#1033) Partial application of already applied commit 105adaec65c94f60ebc32c27ff15a51c0e1a752c Author: Aslak Knutsen Date: Tue Jun 4 15:16:21 2024 +0200 chore: Open up util functions for context propagation (#1033) context should be determined by the caller and propagated down the call chain. Signed-off-by: Yauheni Kaliuta modelregistry: partial: chore: remove duplicated platform call in each component (#1055) Partial application of already applied commit 1b04761fb609ed1693956257f541f1e92e4aa2f9 Author: Wen Zhou Date: Fri Jun 14 14:47:33 2024 +0200 chore: remove duplicated platform call in each component (#1055) - get in DSC and pass into compoment Signed-off-by: Wen Zhou Signed-off-by: Yauheni Kaliuta modelregistry: update api docs run `make api-docs` add +groupName=datasciencecluster.opendatahub.io On backporting of 1b86e4214436 ("Update readme.md (#890)") Signed-off-by: Yauheni Kaliuta modelregistry: partial: chore(lint): enable contextcheck and containedctx (#1070) Partial application of already applied: commit 06e21a490df651c36474c90b9314fa7230fcb851 Author: Luca Burgazzoli Date: Tue Jun 25 17:15:13 2024 +0200 chore(lint): enable contextcheck and containedctx (#1070) * chore(lint): enable contextcheck Signed-off-by: Luca Burgazzoli * chore(lint): enable containedctx Signed-off-by: Luca Burgazzoli * Fix PR review findings * Fix rebase --------- Signed-off-by: Luca Burgazzoli Signed-off-by: Yauheni Kaliuta refactor: dashboard with new manifests structure (#1065) Partial application of already applied: commit 438f4c26cb4e6c31831aed9a338a86f50a9cc2a9 Author: Wen Zhou Date: Tue Jul 2 16:56:25 2024 +0200 refactor: dashboard with new manifests structure (#1065) * refactor: dashboard with new manifests structure - change type of platform, skip convert to string - add more support for ApplyParam() to not only take ENV but also anything from ExtraParamMaps * update: simplify override function * update: add value for Unknown platform --------- Signed-off-by: Wen Zhou Signed-off-by: Yauheni Kaliuta feat: add managed model registry prometheus config handling logic, part of RHOAIENG-4273 (#1150) (cherry picked from commit 72fc80f489b78879701349233ad45d30825a823e) Adjusted Kueue and TrainingOperator rules Signed-off-by: Yauheni Kaliuta feat: add default cert for model registry, fixes RHOAIENG-9909 (#1165) Conflicts: ApplyParams arguments due to missing: d84cd337ff1d ("update: remove unnecessary param from ApplyParams() (#1180)") * feat: add default cert for model registry, fixes RHOAIENG-9909 * fix: fixed lint errors * fix: add servicemesh feature check for MR, add MR enable check in e2e default cert test * fix: changed MR servicemesh status check to look for Managed state * fix: ignore missing model-registry default cert if already removed (cherry picked from commit 4c411a63bd67d0aec5a66a736a70db0b474230c3) feat: add servicemeshmember for model registry namespace, fixes RHOAIENG-11831 (#1202) * feat: add servicemeshmember for model registry namespace, fixes RHOAIENG-11831 * fix: ignore error if MR smm already exists * code cleanup for readability Co-authored-by: Bartosz Majsak * Avoid shadowing package name in variable Co-authored-by: Bartosz Majsak * chore: rename createServicemeshMember to enrollToServiceMesh, add log messages --------- Co-authored-by: Bartosz Majsak (cherry picked from commit 8f3d01395e615432f7e5bf898401e48c39a6e446) feat: add managed model registry prometheus job, metrics, and alering rules, fixes RHOAIENG-4273 (cherry picked from commit f811d673ba5a1b16e2e6800d25e72925d4f9af87) modelregistry: partial: fix: add check for all platform offering on components Partial application of ab7e04e1bf44 ("fix: add check for all platform offering on components (#1205)") ModelRegistry related. Signed-off-by: Yauheni Kaliuta feat: added shared registries namespace property in model registry dsc component, fixes RHOAIENG-12335 (#1221) * feat: added shared registries namespace property in model registry dsc component, fixes RHOAIENG-12335 Signed-off-by: Dhiraj Bokde * fix: added registriesNamespace property in references to model registry component in docs, samples, etc. Signed-off-by: Dhiraj Bokde * fix: updated api-overview.md Signed-off-by: Dhiraj Bokde * fix: add missing field in setupDSCInstance() Signed-off-by: Dhiraj Bokde * fix: added debug log for failing e2e test * debug: added log messages in test dsc creation Signed-off-by: Dhiraj Bokde * debug: make registriesNamespace omitempty so apiserver will set the default value, clean testDSCCreation with more log messages Signed-off-by: Dhiraj Bokde * update: modelregistry with namespace - add namespace for rhoai case - remove omitemptry on namespace: if user does not set it, it will get default value to use. since its type is string Signed-off-by: Wen Zhou * fix: test need to update for modelreg namespace Signed-off-by: Wen Zhou * update: cleanup debug info in test cases Signed-off-by: Wen Zhou * fix: make registriesNamespace immutability check dynamic in cel validation rule * fix: refactor validation rule, set registriesNamespace to optional, add patch func in modelregistry to set default value * fix: add defaulting webhook including envtest, remove patch from modelregistry component * fix: add namespace name pattern for registriesNamespace, allow empty string to replace with default value * fix: use unique webhook names * fix: don't change existing validating webhook name to avoid upgrade issues * feat: add model registry registriesNamespace in DSC status * update: - move ModelRegistryStatus from ModelReg into status - update validation rule for namespace: - when update is to removed/unmanaged modelreg: regardless previous state, can change name and set to removed - upgrade 2.13 to 2.14 when no NS set in 2.13 - when update is from remove/unmanaged: e.g change from removed to managed and change name at the same tie - keep same namespace, regardless state - reduce print on passed allowed webhook request - set to omitemptry so required is not add in the CSV - fix review comments: - move update status after reconcile is successful - set to use const for caching on ModeReg namespace Signed-off-by: Wen Zhou --------- Signed-off-by: Dhiraj Bokde Signed-off-by: Wen Zhou Co-authored-by: Wen Zhou (cherry picked from commit dd3df217be481caea5de1a02360a44bf5721c175) fix: only replace/set default model reg namespace by webhook if component is enabled - when modelreg is not set in DSC - when modelreg is set to be Removed these two cases should not block operation, for backwards compataible Signed-off-by: Wen Zhou (cherry picked from commit 03f6c0ba316acbab7d42fab5c1ae98644b1c8101) Update autogenerated files run `make generate manifests api-docs` Signed-off-by: Yauheni Kaliuta modelregistry: replace odh-model-registries with rhoai-model-registries Fix names of default registries namespace treewide. Signed-off-by: Yauheni Kaliuta tests: e2e: disable modelregistry It is not GA yet, disable for now to avoid possible problems. Signed-off-by: Yauheni Kaliuta get_all_manifests.sh: modelregistry: use rhoai-2.14 branch Use versioned branch/tag like other components. Signed-off-by: Yauheni Kaliuta Revert "get_all_manifests.sh: modelregistry: use rhoai-2.14 branch" There is no yet rhoai-2.14 branch for modelregistry. This reverts commit 4c7d83aa368db62d58943518872c0fad105d1ba4. Signed-off-by: Yauheni Kaliuta --- PROJECT | 1 + README.md | 5 + .../v1/datasciencecluster_types.go | 15 ++ .../v1/zz_generated.deepcopy.go | 23 ++ ...er.opendatahub.io_datascienceclusters.yaml | 67 ++++++ .../rhods-operator.clusterserviceversion.yaml | 50 ++++ components/component.go | 2 + components/modelregistry/modelregistry.go | 222 ++++++++++++++++++ .../resources/servicemesh-member.tmpl.yaml | 9 + .../modelregistry/zz_generated.deepcopy.go | 39 +++ ...er.opendatahub.io_datascienceclusters.yaml | 70 ++++++ .../prometheus/apps/prometheus-configs.yaml | 114 +++++++++ config/rbac/role.yaml | 26 ++ ...asciencecluster_v1_datasciencecluster.yaml | 5 +- config/webhook/manifests.yaml | 26 ++ .../datasciencecluster_controller.go | 12 + .../datasciencecluster/kubebuilder_rbac.go | 4 + controllers/status/status.go | 5 + controllers/webhook/webhook.go | 56 ++++- controllers/webhook/webhook_suite_test.go | 16 +- docs/DESIGN.md | 3 + docs/api-overview.md | 43 ++++ docs/upgrade-testing.md | 5 +- get_all_manifests.sh | 3 +- main.go | 13 +- pkg/upgrade/upgrade.go | 4 + tests/e2e/creation_test.go | 145 ++++++++++-- tests/e2e/helper_test.go | 7 +- 28 files changed, 960 insertions(+), 30 deletions(-) create mode 100644 components/modelregistry/modelregistry.go create mode 100644 components/modelregistry/resources/servicemesh-member.tmpl.yaml create mode 100644 components/modelregistry/zz_generated.deepcopy.go diff --git a/PROJECT b/PROJECT index 4bfc6c97792..4782b9fcf91 100644 --- a/PROJECT +++ b/PROJECT @@ -34,6 +34,7 @@ resources: path: github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1 version: v1 webhooks: + defaulting: true validation: true webhookVersion: v1 version: "3" diff --git a/README.md b/README.md index f66c5c3f7a3..89c7a7b3144 100644 --- a/README.md +++ b/README.md @@ -325,12 +325,17 @@ spec: managementState: Managed modelmeshserving: managementState: Managed + modelregistry: + managementState: Managed + registriesNamespace: "rhoai-model-registries" ray: managementState: Managed trainingoperator: managementState: Managed workbenches: managementState: Managed + modelregistry: + managementState: Managed ``` 2. Enable only Dashboard and Workbenches diff --git a/apis/datasciencecluster/v1/datasciencecluster_types.go b/apis/datasciencecluster/v1/datasciencecluster_types.go index b24633ad56b..d64753e07af 100644 --- a/apis/datasciencecluster/v1/datasciencecluster_types.go +++ b/apis/datasciencecluster/v1/datasciencecluster_types.go @@ -31,10 +31,12 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" + "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" ) @@ -80,6 +82,15 @@ type Components struct { //Training Operator component configuration. TrainingOperator trainingoperator.TrainingOperator `json:"trainingoperator,omitempty"` + + // ModelRegistry component configuration. + ModelRegistry modelregistry.ModelRegistry `json:"modelregistry,omitempty"` +} + +// ComponentsStatus defines the custom status of DataScienceCluster components. +type ComponentsStatus struct { + // ModelRegistry component status + ModelRegistry *status.ModelRegistryStatus `json:"modelregistry,omitempty"` } // DataScienceClusterStatus defines the observed state of DataScienceCluster. @@ -101,6 +112,10 @@ type DataScienceClusterStatus struct { // List of components with status if installed or not InstalledComponents map[string]bool `json:"installedComponents,omitempty"` + // Expose component's specific status + // +optional + Components ComponentsStatus `json:"components,omitempty"` + // Version and release type Release cluster.Release `json:"release,omitempty"` } diff --git a/apis/datasciencecluster/v1/zz_generated.deepcopy.go b/apis/datasciencecluster/v1/zz_generated.deepcopy.go index a9f4ff44cf1..035c38f876c 100644 --- a/apis/datasciencecluster/v1/zz_generated.deepcopy.go +++ b/apis/datasciencecluster/v1/zz_generated.deepcopy.go @@ -21,6 +21,7 @@ limitations under the License. package v1 import ( + "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" conditionsv1 "github.com/openshift/custom-resource-status/conditions/v1" corev1 "k8s.io/api/core/v1" runtime "k8s.io/apimachinery/pkg/runtime" @@ -39,6 +40,7 @@ func (in *Components) DeepCopyInto(out *Components) { in.Ray.DeepCopyInto(&out.Ray) in.TrustyAI.DeepCopyInto(&out.TrustyAI) in.TrainingOperator.DeepCopyInto(&out.TrainingOperator) + in.ModelRegistry.DeepCopyInto(&out.ModelRegistry) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Components. @@ -51,6 +53,26 @@ func (in *Components) DeepCopy() *Components { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ComponentsStatus) DeepCopyInto(out *ComponentsStatus) { + *out = *in + if in.ModelRegistry != nil { + in, out := &in.ModelRegistry, &out.ModelRegistry + *out = new(status.ModelRegistryStatus) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ComponentsStatus. +func (in *ComponentsStatus) DeepCopy() *ComponentsStatus { + if in == nil { + return nil + } + out := new(ComponentsStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DataScienceCluster) DeepCopyInto(out *DataScienceCluster) { *out = *in @@ -148,6 +170,7 @@ func (in *DataScienceClusterStatus) DeepCopyInto(out *DataScienceClusterStatus) (*out)[key] = val } } + in.Components.DeepCopyInto(&out.Components) in.Release.DeepCopyInto(&out.Release) } diff --git a/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml b/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml index 6972bf35135..1332c2fb646 100644 --- a/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml +++ b/bundle/manifests/datasciencecluster.opendatahub.io_datascienceclusters.yaml @@ -385,6 +385,63 @@ spec: pattern: ^(Managed|Unmanaged|Force|Removed)$ type: string type: object + modelregistry: + description: ModelRegistry component configuration. + properties: + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: "" + description: contextDir is the relative path to + the folder containing manifests in a repository + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include + any sub-folder or path: `base`, `overlays/dev`, + `default`, `odh` etc' + type: string + uri: + default: "" + description: uri is the URI point to a git repo + with tag/branch. e.g https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object + managementState: + description: "Set to one of the following values: \n - \"Managed\" + : the operator is actively managing the component and trying + to keep it active. It will only upgrade the component if + it is safe to do so \n - \"Removed\" : the operator is actively + managing the component and will not install it, or if it + is installed, the operator will try to remove it" + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + registriesNamespace: + default: rhoai-model-registries + description: Namespace for model registries to be installed, + configurable only once when model registry is enabled, defaults + to "rhoai-model-registries" + maxLength: 63 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?)?$ + type: string + type: object + x-kubernetes-validations: + - message: RegistriesNamespace is immutable when model registry + is Managed + rule: (self.managementState != 'Managed') || (oldSelf.registriesNamespace + == '') || (oldSelf.managementState != 'Managed')|| (self.registriesNamespace + == oldSelf.registriesNamespace) ray: description: Ray component configuration. properties: @@ -566,6 +623,16 @@ spec: status: description: DataScienceClusterStatus defines the observed state of DataScienceCluster. properties: + components: + description: Expose component's specific status + properties: + modelregistry: + description: ModelRegistry component status + properties: + registriesNamespace: + type: string + type: object + type: object conditions: description: Conditions describes the state of the DataScienceCluster resource. diff --git a/bundle/manifests/rhods-operator.clusterserviceversion.yaml b/bundle/manifests/rhods-operator.clusterserviceversion.yaml index a759df5c774..4eadd900113 100644 --- a/bundle/manifests/rhods-operator.clusterserviceversion.yaml +++ b/bundle/manifests/rhods-operator.clusterserviceversion.yaml @@ -46,6 +46,10 @@ metadata: "modelmeshserving": { "managementState": "Managed" }, + "modelregistry": { + "managementState": "Managed" + "registriesNamespace": "rhoai-model-registries" + }, "ray": { "managementState": "Managed" }, @@ -1032,6 +1036,32 @@ spec: - update - use - watch + - apiGroups: + - modelregistry.opendatahub.io + resources: + - modelregistries + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - modelregistry.opendatahub.io + resources: + - modelregistries/finalizers + verbs: + - update + - apiGroups: + - modelregistry.opendatahub.io + resources: + - modelregistries/status + verbs: + - get + - patch + - update - apiGroups: - monitoring.coreos.com resources: @@ -1793,6 +1823,26 @@ spec: name: Red Hat version: 2.14.0 webhookdefinitions: + - admissionReviewVersions: + - v1 + containerPort: 443 + deploymentName: opendatahub-operator-controller-manager + failurePolicy: Fail + generateName: mutate.operator.opendatahub.io + rules: + - apiGroups: + - datasciencecluster.opendatahub.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - datascienceclusters + sideEffects: None + targetPort: 9443 + type: MutatingAdmissionWebhook + webhookPath: /mutate-opendatahub-io-v1 - admissionReviewVersions: - v1 containerPort: 443 diff --git a/components/component.go b/components/component.go index 0730db60559..cd6d44c0972 100644 --- a/components/component.go +++ b/components/component.go @@ -132,6 +132,8 @@ func (c *Component) UpdatePrometheusConfig(_ client.Client, logger logr.Logger, TrustyAIARules string `yaml:"trustyai-alerting.rules"` KserveRRules string `yaml:"kserve-recording.rules"` KserveARules string `yaml:"kserve-alerting.rules"` + ModelRegistryRRules string `yaml:"model-registry-operator-recording.rules"` + ModelRegistryARules string `yaml:"model-registry-operator-alerting.rules"` } `yaml:"data"` } var configMap ConfigMap diff --git a/components/modelregistry/modelregistry.go b/components/modelregistry/modelregistry.go new file mode 100644 index 00000000000..23e4320fe47 --- /dev/null +++ b/components/modelregistry/modelregistry.go @@ -0,0 +1,222 @@ +// Package modelregistry provides utility functions to config ModelRegistry, an ML Model metadata repository service +// +groupName=datasciencecluster.opendatahub.io +package modelregistry + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "strings" + "text/template" + + "github.com/go-logr/logr" + operatorv1 "github.com/openshift/api/operator/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" + infrav1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/infrastructure/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/components" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/conversion" + "github.com/opendatahub-io/opendatahub-operator/v2/pkg/deploy" + + _ "embed" +) + +const DefaultModelRegistryCert = "default-modelregistry-cert" + +var ( + ComponentName = "model-registry-operator" + DefaultModelRegistriesNamespace = "rhoai-model-registries" + Path = deploy.DefaultManifestPath + "/" + ComponentName + "/overlays/odh" + // we should not apply this label to the namespace, as it triggered namspace deletion during operator uninstall + // modelRegistryLabels = cluster.WithLabels( + // labels.ODH.OwnedNamespace, "true", + // ). +) + +// Verifies that ModelRegistry implements ComponentInterface. +var _ components.ComponentInterface = (*ModelRegistry)(nil) + +// ModelRegistry struct holds the configuration for the ModelRegistry component. +// The property `registriesNamespace` is immutable when `managementState` is `Managed` + +// +kubebuilder:object:generate=true +// +kubebuilder:validation:XValidation:rule="(self.managementState != 'Managed') || (oldSelf.registriesNamespace == '') || (oldSelf.managementState != 'Managed')|| (self.registriesNamespace == oldSelf.registriesNamespace)",message="RegistriesNamespace is immutable when model registry is Managed" +//nolint:lll + +type ModelRegistry struct { + components.Component `json:""` + + // Namespace for model registries to be installed, configurable only once when model registry is enabled, defaults to "rhoai-model-registries" + // +kubebuilder:default="rhoai-model-registries" + // +kubebuilder:validation:Pattern="^([a-z0-9]([-a-z0-9]*[a-z0-9])?)?$" + // +kubebuilder:validation:MaxLength=63 + RegistriesNamespace string `json:"registriesNamespace,omitempty"` +} + +func (m *ModelRegistry) OverrideManifests(ctx context.Context, _ cluster.Platform) error { + // If devflags are set, update default manifests path + if len(m.DevFlags.Manifests) != 0 { + manifestConfig := m.DevFlags.Manifests[0] + if err := deploy.DownloadManifests(ctx, ComponentName, manifestConfig); err != nil { + return err + } + // If overlay is defined, update paths + defaultKustomizePath := "overlays/odh" + if manifestConfig.SourcePath != "" { + defaultKustomizePath = manifestConfig.SourcePath + } + Path = filepath.Join(deploy.DefaultManifestPath, ComponentName, defaultKustomizePath) + } + + return nil +} + +func (m *ModelRegistry) GetComponentName() string { + return ComponentName +} + +func (m *ModelRegistry) ReconcileComponent(ctx context.Context, cli client.Client, logger logr.Logger, + owner metav1.Object, dscispec *dsciv1.DSCInitializationSpec, platform cluster.Platform, _ bool) error { + l := m.ConfigComponentLogger(logger, ComponentName, dscispec) + var imageParamMap = map[string]string{ + "IMAGES_MODELREGISTRY_OPERATOR": "RELATED_IMAGE_ODH_MODEL_REGISTRY_OPERATOR_IMAGE", + "IMAGES_GRPC_SERVICE": "RELATED_IMAGE_ODH_MLMD_GRPC_SERVER_IMAGE", + "IMAGES_REST_SERVICE": "RELATED_IMAGE_ODH_MODEL_REGISTRY_IMAGE", + } + enabled := m.GetManagementState() == operatorv1.Managed + monitoringEnabled := dscispec.Monitoring.ManagementState == operatorv1.Managed + + if enabled { + // return error if ServiceMesh is not enabled, as it's a required feature + if dscispec.ServiceMesh == nil || dscispec.ServiceMesh.ManagementState != operatorv1.Managed { + return errors.New("ServiceMesh needs to be set to 'Managed' in DSCI CR, it is required by Model Registry") + } + + if err := m.createDependencies(ctx, cli, dscispec); err != nil { + return err + } + + if m.DevFlags != nil { + // Download manifests and update paths + if err := m.OverrideManifests(ctx, platform); err != nil { + return err + } + } + + // Update image parameters only when we do not have customized manifests set + if (dscispec.DevFlags == nil || dscispec.DevFlags.ManifestsUri == "") && (m.DevFlags == nil || len(m.DevFlags.Manifests) == 0) { + extraParamsMap := map[string]string{ + "DEFAULT_CERT": DefaultModelRegistryCert, + } + if err := deploy.ApplyParams(Path, imageParamMap, extraParamsMap); err != nil { + return fmt.Errorf("failed to update image from %s : %w", Path, err) + } + } + + // Create model registries namespace + // We do not delete this namespace even when ModelRegistry is Removed or when operator is uninstalled. + ns, err := cluster.CreateNamespace(ctx, cli, m.RegistriesNamespace) + if err != nil { + return err + } + l.Info("created model registry namespace", "namespace", m.RegistriesNamespace) + // create servicemeshmember here, for now until post MVP solution + err = enrollToServiceMesh(ctx, cli, dscispec, ns) + if err != nil { + return err + } + l.Info("created model registry servicemesh member", "namespace", m.RegistriesNamespace) + } else { + err := m.removeDependencies(ctx, cli, dscispec) + if err != nil { + return err + } + } + + // Deploy ModelRegistry Operator + if err := deploy.DeployManifestsFromPath(ctx, cli, owner, Path, dscispec.ApplicationsNamespace, m.GetComponentName(), enabled); err != nil { + return err + } + l.Info("apply manifests done") + + // Create additional model registry resources, componentEnabled=true because these extras are never deleted! + if err := deploy.DeployManifestsFromPath(ctx, cli, owner, Path+"/extras", dscispec.ApplicationsNamespace, m.GetComponentName(), true); err != nil { + return err + } + l.Info("apply extra manifests done") + + if enabled { + if err := cluster.WaitForDeploymentAvailable(ctx, cli, m.GetComponentName(), dscispec.ApplicationsNamespace, 10, 1); err != nil { + return fmt.Errorf("deployment for %s is not ready to server: %w", ComponentName, err) + } + } + + // CloudService Monitoring handling + if platform == cluster.ManagedRhods { + if err := m.UpdatePrometheusConfig(cli, l, enabled && monitoringEnabled, ComponentName); err != nil { + return err + } + if err := deploy.DeployManifestsFromPath(ctx, cli, owner, + filepath.Join(deploy.DefaultManifestPath, "monitoring", "prometheus", "apps"), + dscispec.Monitoring.Namespace, + "prometheus", true); err != nil { + return err + } + l.Info("updating SRE monitoring done") + } + return nil +} + +func (m *ModelRegistry) createDependencies(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec) error { + // create DefaultModelRegistryCert + if err := cluster.PropagateDefaultIngressCertificate(ctx, cli, DefaultModelRegistryCert, dscispec.ServiceMesh.ControlPlane.Namespace); err != nil { + return err + } + return nil +} + +func (m *ModelRegistry) removeDependencies(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec) error { + // delete DefaultModelRegistryCert + certSecret := corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: DefaultModelRegistryCert, + Namespace: dscispec.ServiceMesh.ControlPlane.Namespace, + }, + } + // ignore error if the secret has already been removed + if err := cli.Delete(ctx, &certSecret); client.IgnoreNotFound(err) != nil { + return err + } + return nil +} + +//go:embed resources/servicemesh-member.tmpl.yaml +var smmTemplate string + +func enrollToServiceMesh(ctx context.Context, cli client.Client, dscispec *dsciv1.DSCInitializationSpec, namespace *corev1.Namespace) error { + tmpl, err := template.New("servicemeshmember").Parse(smmTemplate) + if err != nil { + return fmt.Errorf("error parsing servicemeshmember template: %w", err) + } + builder := strings.Builder{} + controlPlaneData := struct { + Namespace string + ControlPlane *infrav1.ControlPlaneSpec + }{Namespace: namespace.Name, ControlPlane: &dscispec.ServiceMesh.ControlPlane} + + if err = tmpl.Execute(&builder, controlPlaneData); err != nil { + return fmt.Errorf("error executing servicemeshmember template: %w", err) + } + + unstrObj, err := conversion.StrToUnstructured(builder.String()) + if err != nil || len(unstrObj) != 1 { + return fmt.Errorf("error converting servicemeshmember template: %w", err) + } + + return client.IgnoreAlreadyExists(cli.Create(ctx, unstrObj[0])) +} diff --git a/components/modelregistry/resources/servicemesh-member.tmpl.yaml b/components/modelregistry/resources/servicemesh-member.tmpl.yaml new file mode 100644 index 00000000000..8665f2ba54f --- /dev/null +++ b/components/modelregistry/resources/servicemesh-member.tmpl.yaml @@ -0,0 +1,9 @@ +apiVersion: maistra.io/v1 +kind: ServiceMeshMember +metadata: + name: default + namespace: {{.Namespace}} +spec: + controlPlaneRef: + namespace: {{ .ControlPlane.Namespace }} + name: {{ .ControlPlane.Name }} diff --git a/components/modelregistry/zz_generated.deepcopy.go b/components/modelregistry/zz_generated.deepcopy.go new file mode 100644 index 00000000000..86c4a17e14c --- /dev/null +++ b/components/modelregistry/zz_generated.deepcopy.go @@ -0,0 +1,39 @@ +//go:build !ignore_autogenerated + +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package modelregistry + +import () + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ModelRegistry) DeepCopyInto(out *ModelRegistry) { + *out = *in + in.Component.DeepCopyInto(&out.Component) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ModelRegistry. +func (in *ModelRegistry) DeepCopy() *ModelRegistry { + if in == nil { + return nil + } + out := new(ModelRegistry) + in.DeepCopyInto(out) + return out +} diff --git a/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml b/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml index 4590dcfe9d1..ba6a6770e85 100644 --- a/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml +++ b/config/crd/bases/datasciencecluster.opendatahub.io_datascienceclusters.yaml @@ -398,6 +398,66 @@ spec: pattern: ^(Managed|Unmanaged|Force|Removed)$ type: string type: object + modelregistry: + description: ModelRegistry component configuration. + properties: + devFlags: + description: Add developer fields + properties: + manifests: + description: List of custom manifests for the given component + items: + properties: + contextDir: + default: manifests + description: contextDir is the relative path to + the folder containing manifests in a repository, + default value "manifests" + type: string + sourcePath: + default: "" + description: 'sourcePath is the subpath within contextDir + where kustomize builds start. Examples include + any sub-folder or path: `base`, `overlays/dev`, + `default`, `odh` etc.' + type: string + uri: + default: "" + description: uri is the URI point to a git repo + with tag/branch. e.g. https://github.com/org/repo/tarball/ + type: string + type: object + type: array + type: object + managementState: + description: |- + Set to one of the following values: + + - "Managed" : the operator is actively managing the component and trying to keep it active. + It will only upgrade the component if it is safe to do so + + - "Removed" : the operator is actively managing the component and will not install it, + or if it is installed, the operator will try to remove it + enum: + - Managed + - Removed + pattern: ^(Managed|Unmanaged|Force|Removed)$ + type: string + registriesNamespace: + default: rhoai-model-registries + description: Namespace for model registries to be installed, + configurable only once when model registry is enabled, defaults + to "rhoai-model-registries" + maxLength: 63 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?)?$ + type: string + type: object + x-kubernetes-validations: + - message: RegistriesNamespace is immutable when model registry + is Managed + rule: (self.managementState != 'Managed') || (oldSelf.registriesNamespace + == '') || (oldSelf.managementState != 'Managed')|| (self.registriesNamespace + == oldSelf.registriesNamespace) ray: description: Ray component configuration. properties: @@ -587,6 +647,16 @@ spec: status: description: DataScienceClusterStatus defines the observed state of DataScienceCluster. properties: + components: + description: Expose component's specific status + properties: + modelregistry: + description: ModelRegistry component status + properties: + registriesNamespace: + type: string + type: object + type: object conditions: description: Conditions describes the state of the DataScienceCluster resource. diff --git a/config/monitoring/prometheus/apps/prometheus-configs.yaml b/config/monitoring/prometheus/apps/prometheus-configs.yaml index b19ec0394ba..933421afca7 100644 --- a/config/monitoring/prometheus/apps/prometheus-configs.yaml +++ b/config/monitoring/prometheus/apps/prometheus-configs.yaml @@ -372,6 +372,31 @@ data: target_label: __address__ replacement: ${1}:8080 + - job_name: 'Model Registry Operator' + honor_labels: true + metrics_path: /metrics + scheme: https + tls_config: + insecure_skip_verify: true + params: + module: [http_2xx] + authorization: + credentials_file: /run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: + - + relabel_configs: + - source_labels: [__meta_kubernetes_service_name] + regex: ^(model-registry-operator-controller-manager-metrics-service)$ + target_label: kubernetes_name + action: keep + - source_labels: [__address__] + regex: (.+):(\d+) + target_label: __address__ + replacement: ${1}:8443 + - job_name: 'RHOAI Metrics' honor_labels: true scheme: http @@ -1577,3 +1602,92 @@ data: labels: severity: warning instance: trustyai-service-operator-controller-manager + + model-registry-operator-recording.rules: | + groups: + - name: SLOs - Model Registry Operator + rules: + - expr: | + absent(up{job=~'Model Registry Operator'}) * 0 or vector(1) + labels: + instance: model-registry-operator + record: probe_success + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[1d])) + labels: + instance: model-registry-operator + record: probe_success:burnrate1d + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[1h])) + labels: + instance: model-registry-operator + record: probe_success:burnrate1h + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[2h])) + labels: + instance: model-registry-operator + record: probe_success:burnrate2h + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[30m])) + labels: + instance: model-registry-operator + record: probe_success:burnrate30m + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[3d])) + labels: + instance: model-registry-operator + record: probe_success:burnrate3d + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[5m])) + labels: + instance: model-registry-operator + record: probe_success:burnrate5m + - expr: | + 1 - min(avg_over_time(probe_success{instance="model-registry-operator"}[6h])) + labels: + instance: model-registry-operator + record: probe_success:burnrate6h + + model-registry-operator-alerting.rules: | + groups: + - name: SLOs-probe_success_model_controller + rules: + - alert: Model Registry Operator Probe Success Burn Rate + annotations: + message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).' + triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Model-Serving/rhods-model-registry-operator-probe-success-burn-rate.md" + summary: Model Registry Operator Probe Success Burn Rate + expr: | + sum(probe_success:burnrate5m{instance=~"model-registry-operator"}) by (instance) > (14.40 * (1-0.98000)) + and + sum(probe_success:burnrate1h{instance=~"model-registry-operator"}) by (instance) > (14.40 * (1-0.98000)) + for: 2m + labels: + severity: critical + namespace: redhat-ods-applications + - alert: Model Registry Operator Probe Success Burn Rate + annotations: + message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).' + triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Model-Serving/rhods-model-registry-operator-probe-success-burn-rate.md" + summary: Model Registry Operator Probe Success Burn Rate + expr: | + sum(probe_success:burnrate30m{instance=~"model-registry-operator"}) by (instance) > (6.00 * (1-0.98000)) + and + sum(probe_success:burnrate6h{instance=~"model-registry-operator"}) by (instance) > (6.00 * (1-0.98000)) + for: 15m + labels: + severity: critical + namespace: redhat-ods-applications + - alert: Model Registry Operator Probe Success Burn Rate + annotations: + message: 'High error budget burn for {{ $labels.instance }} (current value: {{ $value }}).' + triage: "https://gitlab.cee.redhat.com/service/managed-tenants-sops/-/blob/main/RHODS/Model-Serving/rhods-model-registry-operator-probe-success-burn-rate.md" + summary: Model Registry Operator Probe Success Burn Rate + expr: | + sum(probe_success:burnrate2h{instance=~"model-registry-operator"}) by (instance) > (3.00 * (1-0.98000)) + and + sum(probe_success:burnrate1d{instance=~"model-registry-operator"}) by (instance) > (3.00 * (1-0.98000)) + for: 1h + labels: + severity: warning + namespace: redhat-ods-applications diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 8ddcba22058..8327a66c0ed 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -504,6 +504,32 @@ rules: - update - use - watch +- apiGroups: + - modelregistry.opendatahub.io + resources: + - modelregistries + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - modelregistry.opendatahub.io + resources: + - modelregistries/finalizers + verbs: + - update +- apiGroups: + - modelregistry.opendatahub.io + resources: + - modelregistries/status + verbs: + - get + - patch + - update - apiGroups: - monitoring.coreos.com resources: diff --git a/config/samples/datasciencecluster_v1_datasciencecluster.yaml b/config/samples/datasciencecluster_v1_datasciencecluster.yaml index 4ffd8b417e8..922469140e2 100644 --- a/config/samples/datasciencecluster_v1_datasciencecluster.yaml +++ b/config/samples/datasciencecluster_v1_datasciencecluster.yaml @@ -39,4 +39,7 @@ spec: workbenches: managementState: "Managed" trustyai: - managementState: "Removed" + managementState: "Removed" + modelregistry: + managementState: "Managed" + registriesNamespace: "rhoai-model-registries" diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 3dcfb31f3e3..9c1174d5173 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -1,5 +1,31 @@ --- apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: mutating-webhook-configuration +webhooks: +- admissionReviewVersions: + - v1 + clientConfig: + service: + name: webhook-service + namespace: system + path: /mutate-opendatahub-io-v1 + failurePolicy: Fail + name: mutate.operator.opendatahub.io + rules: + - apiGroups: + - datasciencecluster.opendatahub.io + apiVersions: + - v1 + operations: + - CREATE + - UPDATE + resources: + - datascienceclusters + sideEffects: None +--- +apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: name: validating-webhook-configuration diff --git a/controllers/datasciencecluster/datasciencecluster_controller.go b/controllers/datasciencecluster/datasciencecluster_controller.go index 1189e4b7d2f..cd1cec7ee8d 100644 --- a/controllers/datasciencecluster/datasciencecluster_controller.go +++ b/controllers/datasciencecluster/datasciencecluster_controller.go @@ -53,6 +53,7 @@ import ( dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" "github.com/opendatahub-io/opendatahub-operator/v2/components" "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/status" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" annotations "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/annotations" @@ -311,6 +312,8 @@ func (r *DataScienceClusterReconciler) reconcileSubComponent(ctx context.Context } err = component.ReconcileComponent(ctx, r.Client, r.Log, instance, r.DataScienceCluster.DSCISpec, platform, installedComponentValue) + // TODO: replace this hack with a full refactor of component status in the future + if err != nil { // reconciliation failed: log errors, raise event and update status accordingly instance = r.reportError(err, instance, "failed to reconcile "+componentName+" on DataScienceCluster") @@ -339,6 +342,15 @@ func (r *DataScienceClusterReconciler) reconcileSubComponent(ctx context.Context default: status.RemoveComponentCondition(&saved.Status.Conditions, componentName) } + + // TODO: replace this hack with a full refactor of component status in the future + if mr, isMR := component.(*modelregistry.ModelRegistry); isMR { + if enabled { + saved.Status.Components.ModelRegistry = &status.ModelRegistryStatus{RegistriesNamespace: mr.RegistriesNamespace} + } else { + saved.Status.Components.ModelRegistry = nil + } + } }) if err != nil { instance = r.reportError(err, instance, "failed to update DataScienceCluster status after reconciling "+componentName) diff --git a/controllers/datasciencecluster/kubebuilder_rbac.go b/controllers/datasciencecluster/kubebuilder_rbac.go index 7aa9fb3c6b5..36ead7c0ca3 100644 --- a/controllers/datasciencecluster/kubebuilder_rbac.go +++ b/controllers/datasciencecluster/kubebuilder_rbac.go @@ -122,6 +122,10 @@ package datasciencecluster // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=probes,verbs=get;create;patch;delete;deletecollection // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheusrules,verbs=get;create;patch;delete;deletecollection +// +kubebuilder:rbac:groups=modelregistry.opendatahub.io,resources=modelregistries,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=modelregistry.opendatahub.io,resources=modelregistries/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=modelregistry.opendatahub.io,resources=modelregistries/finalizers,verbs=update + // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheuses/finalizers,verbs=get;create;patch;delete;deletecollection // +kubebuilder:rbac:groups="monitoring.coreos.com",resources=prometheuses/status,verbs=get;create;patch;delete;deletecollection diff --git a/controllers/status/status.go b/controllers/status/status.go index f831676ed2e..a09787a1aba 100644 --- a/controllers/status/status.go +++ b/controllers/status/status.go @@ -211,3 +211,8 @@ func SetComponentCondition(conditions *[]conditionsv1.Condition, component strin func RemoveComponentCondition(conditions *[]conditionsv1.Condition, component string) { conditionsv1.RemoveStatusCondition(conditions, conditionsv1.ConditionType(component+ReadySuffix)) } + +// ModelRegistryStatus struct holds the status for the ModelRegistry component. +type ModelRegistryStatus struct { + RegistriesNamespace string `json:"registriesNamespace,omitempty"` +} diff --git a/controllers/webhook/webhook.go b/controllers/webhook/webhook.go index 0ac19c4229b..87f463c770b 100644 --- a/controllers/webhook/webhook.go +++ b/controllers/webhook/webhook.go @@ -21,6 +21,7 @@ import ( "fmt" "net/http" + operatorv1 "github.com/openshift/api/operator/v1" admissionv1 "k8s.io/api/admission/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" @@ -29,6 +30,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster/gvk" ) @@ -37,12 +40,12 @@ var log = ctrl.Log.WithName("rhoai-controller-webhook") //+kubebuilder:webhook:path=/validate-opendatahub-io-v1,mutating=false,failurePolicy=fail,sideEffects=None,groups=datasciencecluster.opendatahub.io;dscinitialization.opendatahub.io,resources=datascienceclusters;dscinitializations,verbs=create;delete,versions=v1,name=operator.opendatahub.io,admissionReviewVersions=v1 //nolint:lll -type OpenDataHubWebhook struct { +type OpenDataHubValidatingWebhook struct { Client client.Client Decoder *admission.Decoder } -func (w *OpenDataHubWebhook) SetupWithManager(mgr ctrl.Manager) { +func (w *OpenDataHubValidatingWebhook) SetupWithManager(mgr ctrl.Manager) { hookServer := mgr.GetWebhookServer() odhWebhook := &webhook.Admission{ Handler: w, @@ -74,7 +77,7 @@ func denyCountGtZero(ctx context.Context, cli client.Client, gvk schema.GroupVer return admission.Allowed("") } -func (w *OpenDataHubWebhook) checkDupCreation(ctx context.Context, req admission.Request) admission.Response { +func (w *OpenDataHubValidatingWebhook) checkDupCreation(ctx context.Context, req admission.Request) admission.Response { switch req.Kind.Kind { case "DataScienceCluster", "DSCInitialization": default: @@ -93,7 +96,7 @@ func (w *OpenDataHubWebhook) checkDupCreation(ctx context.Context, req admission fmt.Sprintf("Only one instance of %s object is allowed", req.Kind.Kind)) } -func (w *OpenDataHubWebhook) checkDeletion(ctx context.Context, req admission.Request) admission.Response { +func (w *OpenDataHubValidatingWebhook) checkDeletion(ctx context.Context, req admission.Request) admission.Response { if req.Kind.Kind == "DataScienceCluster" { return admission.Allowed("") } @@ -103,7 +106,7 @@ func (w *OpenDataHubWebhook) checkDeletion(ctx context.Context, req admission.Re fmt.Sprintln("Cannot delete DSCI object when DSC object still exists")) } -func (w *OpenDataHubWebhook) Handle(ctx context.Context, req admission.Request) admission.Response { +func (w *OpenDataHubValidatingWebhook) Handle(ctx context.Context, req admission.Request) admission.Response { var resp admission.Response resp.Allowed = true // initialize Allowed to be true in case Operation falls into "default" case @@ -121,3 +124,46 @@ func (w *OpenDataHubWebhook) Handle(ctx context.Context, req admission.Request) return admission.Allowed(fmt.Sprintf("Operation %s on %s allowed", req.Operation, req.Kind.Kind)) } + +//+kubebuilder:webhook:path=/mutate-opendatahub-io-v1,mutating=true,failurePolicy=fail,sideEffects=None,groups=datasciencecluster.opendatahub.io,resources=datascienceclusters,verbs=create;update,versions=v1,name=mutate.operator.opendatahub.io,admissionReviewVersions=v1 +//nolint:lll + +// OpenDataHubMutatingWebhook is a mutating webhook +// It currently only sets defaults for modelregiestry in datascienceclusters. +type OpenDataHubMutatingWebhook struct { + Client client.Client + Decoder *admission.Decoder +} + +func (w *OpenDataHubMutatingWebhook) SetupWithManager(mgr ctrl.Manager) { + hookServer := mgr.GetWebhookServer() + odhWebhook := &webhook.Admission{ + Handler: w, + } + hookServer.Register("/mutate-opendatahub-io-v1", odhWebhook) +} + +func (w *OpenDataHubMutatingWebhook) Handle(ctx context.Context, req admission.Request) admission.Response { + var resp admission.Response + resp.Allowed = true // initialize Allowed to be true in case Operation falls into "default" case + + dsc := &dscv1.DataScienceCluster{} + err := w.Decoder.Decode(req, dsc) + if err != nil { + return admission.Denied(fmt.Sprintf("failed to decode request body: %s", err)) + } + resp = w.setDSCDefaults(ctx, dsc) + return resp +} + +func (w *OpenDataHubMutatingWebhook) setDSCDefaults(_ context.Context, dsc *dscv1.DataScienceCluster) admission.Response { + // set default registriesNamespace if empty + if len(dsc.Spec.Components.ModelRegistry.RegistriesNamespace) == 0 && dsc.Spec.Components.ModelRegistry.ManagementState == operatorv1.Managed { + return admission.Patched("Property registriesNamespace set to default value", webhook.JSONPatchOp{ + Operation: "replace", + Path: "spec.components.modelregistry.registriesNamespace", + Value: modelregistry.DefaultModelRegistriesNamespace, + }) + } + return admission.Allowed("") +} diff --git a/controllers/webhook/webhook_suite_test.go b/controllers/webhook/webhook_suite_test.go index 87fbeb4b82e..7c58571ae81 100644 --- a/controllers/webhook/webhook_suite_test.go +++ b/controllers/webhook/webhook_suite_test.go @@ -47,6 +47,7 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/datasciencepipelines" "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" "github.com/opendatahub-io/opendatahub-operator/v2/components/workbenches" @@ -130,7 +131,12 @@ var _ = BeforeSuite(func() { }) Expect(err).NotTo(HaveOccurred()) - (&webhook.OpenDataHubWebhook{ + (&webhook.OpenDataHubValidatingWebhook{ + Client: mgr.GetClient(), + Decoder: admission.NewDecoder(mgr.GetScheme()), + }).SetupWithManager(mgr) + + (&webhook.OpenDataHubMutatingWebhook{ Client: mgr.GetClient(), Decoder: admission.NewDecoder(mgr.GetScheme()), }).SetupWithManager(mgr) @@ -200,6 +206,14 @@ var _ = Describe("DSC/DSCI webhook", func() { Expect(k8sClient.Delete(ctx, dscInstance)).Should(Succeed()) Expect(k8sClient.Delete(ctx, dsciInstance)).Should(Succeed()) }) + + It("Should set defaults in DSC instance", func(ctx context.Context) { + dscInstance := newDSC(nameBase+"-dsc-1", namespace) + Expect(k8sClient.Create(ctx, dscInstance)).Should(Succeed()) + Expect(dscInstance.Spec.Components.ModelRegistry.RegistriesNamespace). + Should(Equal(modelregistry.DefaultModelRegistriesNamespace)) + Expect(clearInstance(ctx, dscInstance)).Should(Succeed()) + }) }) func clearInstance(ctx context.Context, instance client.Object) error { diff --git a/docs/DESIGN.md b/docs/DESIGN.md index 87d2ef964a5..c5558009a73 100644 --- a/docs/DESIGN.md +++ b/docs/DESIGN.md @@ -62,6 +62,9 @@ To deploy ODH components seamlessly, ODH operator will watch two CRDs: name: knative-serving modelmeshserving: managementState: Managed + modelregistry: + managementState: Managed + registriesNamespace: "rhoai-model-registries" ray: managementState: Managed kueue: diff --git a/docs/api-overview.md b/docs/api-overview.md index 6edf0a162e5..18d957750fb 100644 --- a/docs/api-overview.md +++ b/docs/api-overview.md @@ -49,6 +49,7 @@ _Appears in:_ - [Kserve](#kserve) - [Kueue](#kueue) - [ModelMeshServing](#modelmeshserving) +- [ModelRegistry](#modelregistry) - [Ray](#ray) - [TrainingOperator](#trainingoperator) - [TrustyAI](#trustyai) @@ -230,6 +231,30 @@ _Appears in:_ +## datasciencecluster.opendatahub.io/modelregistry + +Package modelregistry provides utility functions to config ModelRegistry, an ML Model metadata repository service + + + +#### ModelRegistry + + + + + + + +_Appears in:_ +- [Components](#components) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `Component` _[Component](#component)_ | | | | +| `registriesNamespace` _string_ | Namespace for model registries to be installed, configurable only once when model registry is enabled, defaults to "rhoai-model-registries" | rhoai-model-registries | MaxLength: 63
Pattern: `^([a-z0-9]([-a-z0-9]*[a-z0-9])?)?$`
| + + + ## datasciencecluster.opendatahub.io/ray Package ray provides utility functions to config Ray as part of the stack @@ -361,6 +386,23 @@ _Appears in:_ | `ray` _[Ray](#ray)_ | Ray component configuration. | | | | `trustyai` _[TrustyAI](#trustyai)_ | TrustyAI component configuration. | | | | `trainingoperator` _[TrainingOperator](#trainingoperator)_ | Training Operator component configuration. | | | +| `modelregistry` _[ModelRegistry](#modelregistry)_ | ModelRegistry component configuration. | | | + + +#### ComponentsStatus + + + +ComponentsStatus defines the custom status of DataScienceCluster components. + + + +_Appears in:_ +- [DataScienceClusterStatus](#datascienceclusterstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `modelregistry` _[ModelRegistryStatus](#modelregistrystatus)_ | ModelRegistry component status | | | #### ControlPlaneSpec @@ -436,6 +478,7 @@ _Appears in:_ | `relatedObjects` _[ObjectReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.25/#objectreference-v1-core) array_ | RelatedObjects is a list of objects created and maintained by this operator.
Object references will be added to this list after they have been created AND found in the cluster. | | | | `errorMessage` _string_ | | | | | `installedComponents` _object (keys:string, values:boolean)_ | List of components with status if installed or not | | | +| `components` _[ComponentsStatus](#componentsstatus)_ | Expose component's specific status | | | | `release` _[Release](#release)_ | Version and release type | | | diff --git a/docs/upgrade-testing.md b/docs/upgrade-testing.md index 8aa6d9f0a1a..9227e48f81a 100644 --- a/docs/upgrade-testing.md +++ b/docs/upgrade-testing.md @@ -136,6 +136,9 @@ spec: name: knative-serving modelmeshserving: managementState: Managed + modelregistry: + managementState: Managed + registriesNamespace: "rhoai-model-registries" ray: managementState: Managed kueue: @@ -147,4 +150,4 @@ spec: trustyai: managementState: Removed EOF -``` \ No newline at end of file +``` diff --git a/get_all_manifests.sh b/get_all_manifests.sh index f2c709c49b7..44bbb8ff319 100755 --- a/get_all_manifests.sh +++ b/get_all_manifests.sh @@ -5,7 +5,7 @@ GITHUB_URL="https://github.com/" # update to use different git repo for legacy manifests MANIFEST_ORG="red-hat-data-services" -# component: notebook, dsp, kserve, dashbaord, cf/ray/kueue/trainingoperator, trustyai, modelmesh. +# component: notebook, dsp, kserve, dashbaord, cf/ray/kueue/trainingoperator, trustyai, modelmesh, modelregistry # in the format of "repo-org:repo-name:ref-name:source-folder:target-folder". declare -A COMPONENT_MANIFESTS=( ["codeflare"]="red-hat-data-services:codeflare-operator:rhoai-2.14:config:codeflare" @@ -21,6 +21,7 @@ declare -A COMPONENT_MANIFESTS=( ["kserve"]="red-hat-data-services:kserve:rhoai-2.14:config:kserve" ["odh-dashboard"]="red-hat-data-services:odh-dashboard:rhoai-2.14:manifests:dashboard" ["trainingoperator"]="red-hat-data-services:training-operator:rhoai-2.14:manifests:trainingoperator" + ["modelregistry"]="red-hat-data-services:model-registry-operator:main:config:model-registry-operator" ) # Allow overwriting repo using flags component=repo diff --git a/main.go b/main.go index 1e327332571..88ae6b08e5d 100644 --- a/main.go +++ b/main.go @@ -57,6 +57,7 @@ import ( dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" featurev1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/features/v1" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/controllers/certconfigmapgenerator" dscctrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/datasciencecluster" dscictrl "github.com/opendatahub-io/opendatahub-operator/v2/controllers/dscinitialization" @@ -207,7 +208,12 @@ func main() { //nolint:funlen,maintidx os.Exit(1) } - (&webhook.OpenDataHubWebhook{ + (&webhook.OpenDataHubValidatingWebhook{ + Client: mgr.GetClient(), + Decoder: admission.NewDecoder(mgr.GetScheme()), + }).SetupWithManager(mgr) + + (&webhook.OpenDataHubMutatingWebhook{ Client: mgr.GetClient(), Decoder: admission.NewDecoder(mgr.GetScheme()), }).SetupWithManager(mgr) @@ -345,13 +351,12 @@ func createDeploymentCacheConfig(platform cluster.Platform) map[string]cache.Con case cluster.ManagedRhods: // no need workbench NS, only SFS no Deployment namespaceConfigs["redhat-ods-monitoring"] = cache.Config{} namespaceConfigs["redhat-ods-applications"] = cache.Config{} - //TODO: if ModelReg has a RHOAI NS case cluster.SelfManagedRhods: namespaceConfigs["redhat-ods-applications"] = cache.Config{} - //TODO: if ModelReg has a RHOAI NS default: namespaceConfigs["opendatahub"] = cache.Config{} - namespaceConfigs["odh-model-registries"] = cache.Config{} } + // for modelregistry namespace + namespaceConfigs[modelregistry.DefaultModelRegistriesNamespace] = cache.Config{} return namespaceConfigs } diff --git a/pkg/upgrade/upgrade.go b/pkg/upgrade/upgrade.go index 449353fd6c2..3da1933bb6b 100644 --- a/pkg/upgrade/upgrade.go +++ b/pkg/upgrade/upgrade.go @@ -36,6 +36,7 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" @@ -97,6 +98,9 @@ func CreateDefaultDSC(ctx context.Context, cli client.Client) error { TrustyAI: trustyai.TrustyAI{ Component: components.Component{ManagementState: operatorv1.Removed}, }, + ModelRegistry: modelregistry.ModelRegistry{ + Component: components.Component{ManagementState: operatorv1.Removed}, + }, }, }, } diff --git a/tests/e2e/creation_test.go b/tests/e2e/creation_test.go index f89d0bc5ec8..8fbfc83c63d 100644 --- a/tests/e2e/creation_test.go +++ b/tests/e2e/creation_test.go @@ -20,11 +20,13 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" dscv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/datasciencecluster/v1" dsciv1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/dscinitialization/v1" infrav1 "github.com/opendatahub-io/opendatahub-operator/v2/apis/infrastructure/v1" "github.com/opendatahub-io/opendatahub-operator/v2/components" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/cluster" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/feature/serverless" "github.com/opendatahub-io/opendatahub-operator/v2/pkg/metadata/labels" @@ -58,7 +60,7 @@ func creationTestSuite(t *testing.T) { // DSC t.Run("Creation of DataScienceCluster instance", func(t *testing.T) { - err = testCtx.testDSCCreation() + err = testCtx.testDSCCreation(t) require.NoError(t, err, "error creating DataScienceCluster instance") }) t.Run("Creation of more than one of DataScienceCluster instance", func(t *testing.T) { @@ -78,7 +80,6 @@ func creationTestSuite(t *testing.T) { err = testCtx.validateDSCReady() require.NoError(t, err, "DataScienceCluster instance is not Ready") }) - // Kserve t.Run("Validate Knative resoruce", func(t *testing.T) { err = testCtx.validateDSC() @@ -90,15 +91,19 @@ func creationTestSuite(t *testing.T) { require.NoError(t, err, "error getting default cert secrets for Kserve") }) - // TODO: enable when ModelReg is added - // t.Run("Validate default model registry cert available", func(t *testing.T) { - // err = testCtx.testDefaultModelRegistryCertAvailable() - // require.NoError(t, err, "error getting default cert secret for ModelRegistry") - // }) - // t.Run("Validate model registry servicemeshmember available", func(t *testing.T) { - // err = testCtx.testMRServiceMeshMember() - // require.NoError(t, err, "error getting servicemeshmember for Model Registry") - // }) + // ModelReg + t.Run("Validate model registry cert config", func(t *testing.T) { + err = testCtx.validateModelRegistryConfig() + require.NoError(t, err, "error validating ModelRegistry config") + }) + t.Run("Validate default model registry cert available", func(t *testing.T) { + err = testCtx.testDefaultModelRegistryCertAvailable() + require.NoError(t, err, "error getting default cert secret for ModelRegistry") + }) + t.Run("Validate model registry servicemeshmember available", func(t *testing.T) { + err = testCtx.testMRServiceMeshMember() + require.NoError(t, err, "error getting servicemeshmember for Model Registry") + }) // reconcile t.Run("Validate Controller reconcile", func(t *testing.T) { @@ -150,12 +155,11 @@ func (tc *testContext) testDSCICreation() error { return nil } -func (tc *testContext) testDSCCreation() error { +func (tc *testContext) testDSCCreation(t *testing.T) error { + t.Helper() // Create DataScienceCluster resource if not already created - dscLookupKey := types.NamespacedName{Name: tc.testDsc.Name} - createdDSC := &dscv1.DataScienceCluster{} - existingDSCList := &dscv1.DataScienceClusterList{} + existingDSCList := &dscv1.DataScienceClusterList{} err := tc.customClient.List(tc.ctx, existingDSCList) if err == nil { if len(existingDSCList.Items) > 0 { @@ -164,6 +168,9 @@ func (tc *testContext) testDSCCreation() error { return nil } } + + dscLookupKey := types.NamespacedName{Name: tc.testDsc.Name} + createdDSC := &dscv1.DataScienceCluster{} err = tc.customClient.Get(tc.ctx, dscLookupKey, createdDSC) if err != nil { if k8serr.IsNotFound(err) { @@ -282,7 +289,7 @@ func (tc *testContext) testAllComponentCreation(t *testing.T) error { //nolint:f t.Run("Validate "+name, func(t *testing.T) { t.Parallel() err = tc.testComponentCreation(c) - require.NoError(t, err, "error validating component %v when "+c.GetManagementState()) + require.NoError(t, err, "error validating component %s when %v", name, c.GetManagementState()) }) } return nil @@ -436,6 +443,64 @@ func (tc *testContext) testDefaultCertsAvailable() error { return nil } +func (tc *testContext) testDefaultModelRegistryCertAvailable() error { + // return if MR is not set to Managed + if tc.testDsc.Spec.Components.ModelRegistry.ManagementState != operatorv1.Managed { + return nil + } + + // Get expected cert secrets + defaultIngressCtrl, err := cluster.FindAvailableIngressController(tc.ctx, tc.customClient) + if err != nil { + return fmt.Errorf("failed to get ingress controller: %w", err) + } + + defaultIngressCertName := cluster.GetDefaultIngressCertSecretName(defaultIngressCtrl) + + defaultIngressSecret, err := cluster.GetSecret(tc.ctx, tc.customClient, "openshift-ingress", defaultIngressCertName) + if err != nil { + return err + } + + // Verify secret from Control Plane namespace matches the default MR cert secret + defaultMRSecretName := modelregistry.DefaultModelRegistryCert + defaultMRSecret, err := cluster.GetSecret(tc.ctx, tc.customClient, tc.testDSCI.Spec.ServiceMesh.ControlPlane.Namespace, + defaultMRSecretName) + if err != nil { + return err + } + + if defaultMRSecret.Type != defaultIngressSecret.Type { + return fmt.Errorf("wrong type of MR cert secret is created for %v. Expected %v, Got %v", defaultMRSecretName, defaultIngressSecret.Type, defaultMRSecret.Type) + } + + if string(defaultIngressSecret.Data["tls.crt"]) != string(defaultMRSecret.Data["tls.crt"]) { + return fmt.Errorf("default MR cert secret not expected. Epected %v, Got %v", defaultIngressSecret.Data["tls.crt"], defaultMRSecret.Data["tls.crt"]) + } + + if string(defaultIngressSecret.Data["tls.key"]) != string(defaultMRSecret.Data["tls.key"]) { + return fmt.Errorf("default MR cert secret not expected. Epected %v, Got %v", defaultIngressSecret.Data["tls.crt"], defaultMRSecret.Data["tls.crt"]) + } + return nil +} + +func (tc *testContext) testMRServiceMeshMember() error { + if tc.testDsc.Spec.Components.ModelRegistry.ManagementState != operatorv1.Managed { + return nil + } + + // Get unstructured ServiceMeshMember + smm := unstructured.Unstructured{} + smm.SetAPIVersion("maistra.io/v1") + smm.SetKind("ServiceMeshMember") + err := tc.customClient.Get(tc.ctx, + client.ObjectKey{Namespace: modelregistry.DefaultModelRegistriesNamespace, Name: "default"}, &smm) + if err != nil { + return fmt.Errorf("failed to get servicemesh member: %w", err) + } + return nil +} + func (tc *testContext) testUpdateComponentReconcile() error { // Test Updating Dashboard Replicas appDeployments, err := tc.kubeClient.AppsV1().Deployments(tc.applicationsNamespace).List(tc.ctx, metav1.ListOptions{ @@ -542,3 +607,51 @@ func (tc *testContext) testUpdateDSCComponentEnabled() error { dashboardDeploymentName, tc.applicationsNamespace) } + +const testNs = "test-model-registries" + +func (tc *testContext) validateModelRegistryConfig() error { + // check immutable property registriesNamespace + if tc.testDsc.Spec.Components.ModelRegistry.ManagementState != operatorv1.Managed { + // allowed to set registriesNamespace to non-default + err := patchRegistriesNamespace(tc, testNs, testNs, false) + if err != nil { + return err + } + // allowed to set registriesNamespace back to default value + err = patchRegistriesNamespace(tc, modelregistry.DefaultModelRegistriesNamespace, + modelregistry.DefaultModelRegistriesNamespace, false) + if err != nil { + return err + } + } else { + // not allowed to change registriesNamespace + err := patchRegistriesNamespace(tc, testNs, modelregistry.DefaultModelRegistriesNamespace, true) + if err != nil { + return err + } + } + return nil +} + +func patchRegistriesNamespace(tc *testContext, namespace string, expected string, expectErr bool) error { + patchStr := fmt.Sprintf("{\"spec\":{\"components\":{\"modelregistry\":{\"registriesNamespace\":\"%s\"}}}}", namespace) + err := tc.customClient.Patch(tc.ctx, tc.testDsc, client.RawPatch(types.MergePatchType, []byte(patchStr))) + if err != nil { + if !expectErr { + return fmt.Errorf("unexpected error when setting registriesNamespace in DSC %s to %s: %w", + tc.testDsc.Name, namespace, err) + } + } else { + if expectErr { + return fmt.Errorf("unexpected success when setting registriesNamespace in DSC %s to %s", + tc.testDsc.Name, namespace) + } + } + // compare expected against returned registriesNamespace + if tc.testDsc.Spec.Components.ModelRegistry.RegistriesNamespace != expected { + return fmt.Errorf("expected registriesNamespace %s, got %s", + expected, tc.testDsc.Spec.Components.ModelRegistry.RegistriesNamespace) + } + return nil +} diff --git a/tests/e2e/helper_test.go b/tests/e2e/helper_test.go index 5273e12e0cc..e122ce36921 100644 --- a/tests/e2e/helper_test.go +++ b/tests/e2e/helper_test.go @@ -31,6 +31,7 @@ import ( "github.com/opendatahub-io/opendatahub-operator/v2/components/kserve" "github.com/opendatahub-io/opendatahub-operator/v2/components/kueue" "github.com/opendatahub-io/opendatahub-operator/v2/components/modelmeshserving" + "github.com/opendatahub-io/opendatahub-operator/v2/components/modelregistry" "github.com/opendatahub-io/opendatahub-operator/v2/components/ray" "github.com/opendatahub-io/opendatahub-operator/v2/components/trainingoperator" "github.com/opendatahub-io/opendatahub-operator/v2/components/trustyai" @@ -72,7 +73,6 @@ func (tc *testContext) waitForOperatorDeployment(name string, replicas int32) er } } } - log.Printf("Error in %s deployment", name) return false, nil }) @@ -168,6 +168,11 @@ func setupDSCInstance(name string) *dscv1.DataScienceCluster { ManagementState: operatorv1.Removed, }, }, + ModelRegistry: modelregistry.ModelRegistry{ + Component: components.Component{ + ManagementState: operatorv1.Removed, + }, + }, }, }, }