From a400b94df4c7998d7c2b9474a4d58ba061e42392 Mon Sep 17 00:00:00 2001
From: Hannah Zhang <hannahz@nvidia.com>
Date: Tue, 7 Oct 2025 16:49:03 -0700
Subject: [PATCH 1/3] feat: initial dynamomodel work

Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
---
 .../templates/nvidia.com_dynamomodels.yaml    | 218 +++++++++
 deploy/cloud/operator/PROJECT                 |   8 +
 .../dynamocomponentdeployment_types.go        |   5 +
 .../api/v1alpha1/dynamomodel_types.go         | 188 ++++++++
 deploy/cloud/operator/cmd/main.go             |   7 +
 .../crd/bases/nvidia.com_dynamomodels.yaml    | 219 +++++++++
 .../dynamographdeployment_controller.go       |  60 ++-
 .../controller/dynamomodel_controller.go      | 453 ++++++++++++++++++
 docs/kubernetes/README.md                     |   4 +-
 docs/kubernetes/dynamomodel.md                | 273 +++++++++++
 10 files changed, 1433 insertions(+), 2 deletions(-)
 create mode 100644 deploy/cloud/helm/crds/templates/nvidia.com_dynamomodels.yaml
 create mode 100644 deploy/cloud/operator/api/v1alpha1/dynamomodel_types.go
 create mode 100644 deploy/cloud/operator/config/crd/bases/nvidia.com_dynamomodels.yaml
 create mode 100644 deploy/cloud/operator/internal/controller/dynamomodel_controller.go
 create mode 100644 docs/kubernetes/dynamomodel.md

diff --git a/deploy/cloud/helm/crds/templates/nvidia.com_dynamomodels.yaml b/deploy/cloud/helm/crds/templates/nvidia.com_dynamomodels.yaml
new file mode 100644
index 0000000000..69da21c121
--- /dev/null
+++ b/deploy/cloud/helm/crds/templates/nvidia.com_dynamomodels.yaml
@@ -0,0 +1,218 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+    helm.sh/resource-policy: keep
+  name: dynamomodels.nvidia.com
+spec:
+  group: nvidia.com
+  names:
+    kind: DynamoModel
+    listKind: DynamoModelList
+    plural: dynamomodels
+    shortNames:
+      - dm
+    singular: dynamomodel
+  scope: Namespaced
+  versions:
+    - name: v1alpha1
+      additionalPrinterColumns:
+        - jsonPath: .status.state
+          name: State
+          type: string
+        - jsonPath: .spec.name
+          name: Model
+          type: string
+        - jsonPath: .spec.version
+          name: Version
+          type: string
+        - jsonPath: .status.pvcName
+          name: PVC
+          type: string
+        - jsonPath: .metadata.creationTimestamp
+          name: Age
+          type: date
+      schema:
+        openAPIV3Schema:
+          description: |-
+            DynamoModel is the Schema for the dynamomodels API.
+            It provides a high-level abstraction for managing model artifacts cached in PVCs in the cluster.
+            All jobs referencing the same DynamoModel are guaranteed to use the same artifact,
+            preventing drift and simplifying maintenance.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: Spec defines the desired state for this model.
+              properties:
+                downloaderRef:
+                  description: |-
+                    DownloaderRef is an optional reference to a custom downloader or workflow
+                    (e.g., MLFlow or internal tools). Provides extensibility for specialized workflows
+                    (internal or third-party).
+                  type: string
+                name:
+                  description: |-
+                    Name is the canonical model name (matches external model repo, e.g. HuggingFace, NGC).
+                    Example: "meta-llama/Llama-3.3-70B-Instruct"
+                  type: string
+                pvc:
+                  description: PVC defines the persistent volume claim configuration for storing the model.
+                  properties:
+                    create:
+                      default: true
+                      description: Create indicates whether to create a new PVC or use an existing one.
+                      type: boolean
+                    name:
+                      description: Name is the name of the PVC. If not specified, defaults to the DynamoModel name.
+                      type: string
+                    size:
+                      anyOf:
+                        - type: integer
+                        - type: string
+                      description: Size of the volume, used during PVC creation. Required when create is true.
+                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                      x-kubernetes-int-or-string: true
+                    storageClass:
+                      description: StorageClass to be used for PVC creation. Required when create is true.
+                      type: string
+                    volumeAccessMode:
+                      default: ReadWriteMany
+                      description: VolumeAccessMode is the volume access mode of the PVC. Defaults to ReadWriteMany.
+                      type: string
+                  required:
+                    - create
+                  type: object
+                secretRef:
+                  description: |-
+                    SecretRef is an optional reference to a secret needed for accessing the source URL
+                    (private repo, S3 credentials, etc.)
+                  type: string
+                sourceURL:
+                  description: |-
+                    SourceURL is the source location of model weights (can be HF, S3, NGC).
+                    Ensures flexibility in downstream storage strategies; permits flexible source management and credential injection.
+                    Examples: "hf://meta-llama/Llama-3.3-70B-Instruct", "s3://bucket/path/to/model", "ngc://nvidia/model"
+                  type: string
+                version:
+                  description: |-
+                    Version is a version pin (e.g., SHA or tag from source repository).
+                    This solves version drift by pinning deployments and benchmarking jobs to the same model artifact.
+                  type: string
+              required:
+                - name
+                - pvc
+                - sourceURL
+              type: object
+            status:
+              description: Status reflects the current observed state of this model.
+              properties:
+                conditions:
+                  description: Conditions contains the latest observed conditions of the model.
+                  items:
+                    description: Condition contains details for one aspect of the current state of this API Resource.
+                    properties:
+                      lastTransitionTime:
+                        description: |-
+                          lastTransitionTime is the last time the condition transitioned from one status to another.
+                          This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                        format: date-time
+                        type: string
+                      message:
+                        description: |-
+                          message is a human readable message indicating details about the transition.
+                          This may be an empty string.
+                        maxLength: 32768
+                        type: string
+                      observedGeneration:
+                        description: |-
+                          observedGeneration represents the .metadata.generation that the condition was set based upon.
+                          For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                          with respect to the current state of the instance.
+                        format: int64
+                        minimum: 0
+                        type: integer
+                      reason:
+                        description: |-
+                          reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                          Producers of specific condition types may define expected values and meanings for this field,
+                          and whether the values are considered a guaranteed API.
+                          The value should be a CamelCase string.
+                          This field may not be empty.
+                        maxLength: 1024
+                        minLength: 1
+                        pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                        type: string
+                      status:
+                        description: status of the condition, one of True, False, Unknown.
+                        enum:
+                          - "True"
+                          - "False"
+                          - Unknown
+                        type: string
+                      type:
+                        description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                        maxLength: 316
+                        pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                        type: string
+                    required:
+                      - lastTransitionTime
+                      - message
+                      - reason
+                      - status
+                      - type
+                    type: object
+                  type: array
+                downloadJobName:
+                  description: DownloadJobName is the name of the Job created to download the model.
+                  type: string
+                lastDownloadTime:
+                  description: LastDownloadTime is the timestamp of the last successful download.
+                  format: date-time
+                  type: string
+                pvcName:
+                  description: PVCName is the name of the PVC created or used for this model.
+                  type: string
+                state:
+                  description: |-
+                    State is a high-level textual status of the model lifecycle.
+                    Possible values: "Pending", "Downloading", "Ready", "Failed"
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
diff --git a/deploy/cloud/operator/PROJECT b/deploy/cloud/operator/PROJECT
index a86430a2c2..38caf65e60 100644
--- a/deploy/cloud/operator/PROJECT
+++ b/deploy/cloud/operator/PROJECT
@@ -24,4 +24,12 @@ resources:
   kind: DynamoGraphDeployment
   path: github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1
   version: v1alpha1
+- api:
+    crdVersion: v1
+    namespaced: true
+  controller: true
+  domain: nvidia.com
+  kind: DynamoModel
+  path: github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1
+  version: v1alpha1
 version: "3"
diff --git a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
index b475e90a59..9d69e11062 100644
--- a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
@@ -91,6 +91,11 @@ type DynamoComponentDeploymentSharedSpec struct {
 	EnvFromSecret *string `json:"envFromSecret,omitempty"`
 	// VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
 	VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"`
+	// ModelRef references a DynamoModel resource that provides the model artifact for this component.
+	// When specified, the controller will wait for the model to be ready and automatically mount
+	// the model's PVC to the component.
+	// +kubebuilder:validation:Optional
+	ModelRef string `json:"modelRef,omitempty"`
 
 	// Ingress config to expose the component outside the cluster (or through a service mesh).
 	Ingress *IngressSpec `json:"ingress,omitempty"`
diff --git a/deploy/cloud/operator/api/v1alpha1/dynamomodel_types.go b/deploy/cloud/operator/api/v1alpha1/dynamomodel_types.go
new file mode 100644
index 0000000000..a927c04d23
--- /dev/null
+++ b/deploy/cloud/operator/api/v1alpha1/dynamomodel_types.go
@@ -0,0 +1,188 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package v1alpha1
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// DynamoModelSpec defines the desired state of DynamoModel.
+type DynamoModelSpec struct {
+	// Name is the canonical model name (matches external model repo, e.g. HuggingFace, NGC).
+	// Example: "meta-llama/Llama-3.3-70B-Instruct"
+	// +kubebuilder:validation:Required
+	Name string `json:"name"`
+
+	// Version is a version pin (e.g., SHA or tag from source repository).
+	// This solves version drift by pinning deployments and benchmarking jobs to the same model artifact.
+	// +kubebuilder:validation:Optional
+	Version string `json:"version,omitempty"`
+
+	// SourceURL is the source location of model weights (can be HF, S3, NGC).
+	// Ensures flexibility in downstream storage strategies; permits flexible source management and credential injection.
+	// Examples: "hf://meta-llama/Llama-3.3-70B-Instruct", "s3://bucket/path/to/model", "ngc://nvidia/model"
+	// +kubebuilder:validation:Required
+	SourceURL string `json:"sourceURL"`
+
+	// SecretRef is an optional reference to a secret needed for accessing the source URL
+	// (private repo, S3 credentials, etc.)
+	// +kubebuilder:validation:Optional
+	SecretRef string `json:"secretRef,omitempty"`
+
+	// DownloaderRef is an optional reference to a custom downloader or workflow
+	// (e.g., MLFlow or internal tools). Provides extensibility for specialized workflows
+	// (internal or third-party).
+	// +kubebuilder:validation:Optional
+	DownloaderRef string `json:"downloaderRef,omitempty"`
+
+	// PVC defines the persistent volume claim configuration for storing the model.
+	// +kubebuilder:validation:Required
+	PVC PVCSpec `json:"pvc"`
+}
+
+// PVCSpec defines the PVC configuration for model storage.
+type PVCSpec struct {
+	// Create indicates whether to create a new PVC or use an existing one.
+	// +kubebuilder:default=true
+	Create *bool `json:"create,omitempty"`
+
+	// Name is the name of the PVC. If not specified, defaults to the DynamoModel name.
+	// +kubebuilder:validation:Optional
+	Name string `json:"name,omitempty"`
+
+	// StorageClass to be used for PVC creation. Required when create is true.
+	// +kubebuilder:validation:Optional
+	StorageClass string `json:"storageClass,omitempty"`
+
+	// Size of the volume, used during PVC creation. Required when create is true.
+	// +kubebuilder:validation:Optional
+	Size resource.Quantity `json:"size,omitempty"`
+
+	// VolumeAccessMode is the volume access mode of the PVC. Defaults to ReadWriteMany.
+	// +kubebuilder:default=ReadWriteMany
+	// +kubebuilder:validation:Optional
+	VolumeAccessMode corev1.PersistentVolumeAccessMode `json:"volumeAccessMode,omitempty"`
+}
+
+// DynamoModelStatus defines the observed state of DynamoModel.
+type DynamoModelStatus struct {
+	// State is a high-level textual status of the model lifecycle.
+	// Possible values: "Pending", "Downloading", "Ready", "Failed"
+	State string `json:"state,omitempty"`
+
+	// Conditions contains the latest observed conditions of the model.
+	// +kubebuilder:validation:Optional
+	Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
+
+	// PVCName is the name of the PVC created or used for this model.
+	// +kubebuilder:validation:Optional
+	PVCName string `json:"pvcName,omitempty"`
+
+	// DownloadJobName is the name of the Job created to download the model.
+	// +kubebuilder:validation:Optional
+	DownloadJobName string `json:"downloadJobName,omitempty"`
+
+	// LastDownloadTime is the timestamp of the last successful download.
+	// +kubebuilder:validation:Optional
+	LastDownloadTime *metav1.Time `json:"lastDownloadTime,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+// +kubebuilder:subresource:status
+// +kubebuilder:resource:shortName=dm
+// +kubebuilder:printcolumn:name="State",type=string,JSONPath=`.status.state`
+// +kubebuilder:printcolumn:name="Model",type=string,JSONPath=`.spec.name`
+// +kubebuilder:printcolumn:name="Version",type=string,JSONPath=`.spec.version`
+// +kubebuilder:printcolumn:name="PVC",type=string,JSONPath=`.status.pvcName`
+// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
+// DynamoModel is the Schema for the dynamomodels API.
+// It provides a high-level abstraction for managing model artifacts cached in PVCs in the cluster.
+// All jobs referencing the same DynamoModel are guaranteed to use the same artifact,
+// preventing drift and simplifying maintenance.
+type DynamoModel struct {
+	metav1.TypeMeta   `json:",inline"`
+	metav1.ObjectMeta `json:"metadata,omitempty"`
+
+	// Spec defines the desired state for this model.
+	Spec DynamoModelSpec `json:"spec,omitempty"`
+	// Status reflects the current observed state of this model.
+	Status DynamoModelStatus `json:"status,omitempty"`
+}
+
+// +kubebuilder:object:root=true
+
+// DynamoModelList contains a list of DynamoModel.
+type DynamoModelList struct {
+	metav1.TypeMeta `json:",inline"`
+	metav1.ListMeta `json:"metadata,omitempty"`
+	Items           []DynamoModel `json:"items"`
+}
+
+func init() {
+	SchemeBuilder.Register(&DynamoModel{}, &DynamoModelList{})
+}
+
+// SetState sets the state of the DynamoModel.
+func (m *DynamoModel) SetState(state string) {
+	m.Status.State = state
+}
+
+// GetSpec returns the spec of the DynamoModel.
+func (m *DynamoModel) GetSpec() any {
+	return m.Spec
+}
+
+// SetSpec sets the spec of the DynamoModel.
+func (m *DynamoModel) SetSpec(spec any) {
+	m.Spec = spec.(DynamoModelSpec)
+}
+
+// AddStatusCondition adds or updates a status condition.
+func (m *DynamoModel) AddStatusCondition(condition metav1.Condition) {
+	if m.Status.Conditions == nil {
+		m.Status.Conditions = []metav1.Condition{}
+	}
+	// Check if condition with same type already exists
+	for i, existingCondition := range m.Status.Conditions {
+		if existingCondition.Type == condition.Type {
+			// Replace the existing condition
+			m.Status.Conditions[i] = condition
+			return
+		}
+	}
+	// If no matching condition found, append the new one
+	m.Status.Conditions = append(m.Status.Conditions, condition)
+}
+
+// IsReady returns true if the model is in Ready state.
+func (m *DynamoModel) IsReady() bool {
+	return m.Status.State == "Ready"
+}
+
+// GetPVCName returns the PVC name for this model.
+func (m *DynamoModel) GetPVCName() string {
+	if m.Status.PVCName != "" {
+		return m.Status.PVCName
+	}
+	if m.Spec.PVC.Name != "" {
+		return m.Spec.PVC.Name
+	}
+	return m.Name
+}
diff --git a/deploy/cloud/operator/cmd/main.go b/deploy/cloud/operator/cmd/main.go
index bc55f36eb2..9b6dfa24fa 100644
--- a/deploy/cloud/operator/cmd/main.go
+++ b/deploy/cloud/operator/cmd/main.go
@@ -432,6 +432,13 @@ func main() {
 		setupLog.Error(err, "unable to create controller", "controller", "DynamoGraphDeployment")
 		os.Exit(1)
 	}
+	if err = (&controller.DynamoModelReconciler{
+		Client: mgr.GetClient(),
+		Scheme: mgr.GetScheme(),
+	}).SetupWithManager(mgr); err != nil {
+		setupLog.Error(err, "unable to create controller", "controller", "DynamoModel")
+		os.Exit(1)
+	}
 	//+kubebuilder:scaffold:builder
 
 	if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
diff --git a/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamomodels.yaml b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamomodels.yaml
new file mode 100644
index 0000000000..965860d906
--- /dev/null
+++ b/deploy/cloud/operator/config/crd/bases/nvidia.com_dynamomodels.yaml
@@ -0,0 +1,219 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+---
+apiVersion: apiextensions.k8s.io/v1
+kind: CustomResourceDefinition
+metadata:
+  annotations:
+    controller-gen.kubebuilder.io/version: v0.16.4
+    helm.sh/resource-policy: keep
+  name: dynamomodels.nvidia.com
+spec:
+  group: nvidia.com
+  names:
+    kind: DynamoModel
+    listKind: DynamoModelList
+    plural: dynamomodels
+    shortNames:
+      - dm
+    singular: dynamomodel
+  scope: Namespaced
+  versions:
+    - name: v1alpha1
+      additionalPrinterColumns:
+        - jsonPath: .status.state
+          name: State
+          type: string
+        - jsonPath: .spec.name
+          name: Model
+          type: string
+        - jsonPath: .spec.version
+          name: Version
+          type: string
+        - jsonPath: .status.pvcName
+          name: PVC
+          type: string
+        - jsonPath: .metadata.creationTimestamp
+          name: Age
+          type: date
+      schema:
+        openAPIV3Schema:
+          description: |-
+            DynamoModel is the Schema for the dynamomodels API.
+            It provides a high-level abstraction for managing model artifacts cached in PVCs in the cluster.
+            All jobs referencing the same DynamoModel are guaranteed to use the same artifact,
+            preventing drift and simplifying maintenance.
+          properties:
+            apiVersion:
+              description: |-
+                APIVersion defines the versioned schema of this representation of an object.
+                Servers should convert recognized schemas to the latest internal value, and
+                may reject unrecognized values.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
+              type: string
+            kind:
+              description: |-
+                Kind is a string value representing the REST resource this object represents.
+                Servers may infer this from the endpoint the client submits requests to.
+                Cannot be updated.
+                In CamelCase.
+                More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
+              type: string
+            metadata:
+              type: object
+            spec:
+              description: Spec defines the desired state for this model.
+              properties:
+                downloaderRef:
+                  description: |-
+                    DownloaderRef is an optional reference to a custom downloader or workflow
+                    (e.g., MLFlow or internal tools). Provides extensibility for specialized workflows
+                    (internal or third-party).
+                  type: string
+                name:
+                  description: |-
+                    Name is the canonical model name (matches external model repo, e.g. HuggingFace, NGC).
+                    Example: "meta-llama/Llama-3.3-70B-Instruct"
+                  type: string
+                pvc:
+                  description: PVC defines the persistent volume claim configuration for storing the model.
+                  properties:
+                    create:
+                      default: true
+                      description: Create indicates whether to create a new PVC or use an existing one.
+                      type: boolean
+                    name:
+                      description: Name is the name of the PVC. If not specified, defaults to the DynamoModel name.
+                      type: string
+                    size:
+                      anyOf:
+                        - type: integer
+                        - type: string
+                      description: Size of the volume, used during PVC creation. Required when create is true.
+                      pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
+                      x-kubernetes-int-or-string: true
+                    storageClass:
+                      description: StorageClass to be used for PVC creation. Required when create is true.
+                      type: string
+                    volumeAccessMode:
+                      default: ReadWriteMany
+                      description: VolumeAccessMode is the volume access mode of the PVC. Defaults to ReadWriteMany.
+                      type: string
+                  required:
+                    - create
+                  type: object
+                secretRef:
+                  description: |-
+                    SecretRef is an optional reference to a secret needed for accessing the source URL
+                    (private repo, S3 credentials, etc.)
+                  type: string
+                sourceURL:
+                  description: |-
+                    SourceURL is the source location of model weights (can be HF, S3, NGC).
+                    Ensures flexibility in downstream storage strategies; permits flexible source management and credential injection.
+                    Examples: "hf://meta-llama/Llama-3.3-70B-Instruct", "s3://bucket/path/to/model", "ngc://nvidia/model"
+                  type: string
+                version:
+                  description: |-
+                    Version is a version pin (e.g., SHA or tag from source repository).
+                    This solves version drift by pinning deployments and benchmarking jobs to the same model artifact.
+                  type: string
+              required:
+                - name
+                - pvc
+                - sourceURL
+              type: object
+            status:
+              description: Status reflects the current observed state of this model.
+              properties:
+                conditions:
+                  description: Conditions contains the latest observed conditions of the model.
+                  items:
+                    description: Condition contains details for one aspect of the current state of this API Resource.
+                    properties:
+                      lastTransitionTime:
+                        description: |-
+                          lastTransitionTime is the last time the condition transitioned from one status to another.
+                          This should be when the underlying condition changed.  If that is not known, then using the time when the API field changed is acceptable.
+                        format: date-time
+                        type: string
+                      message:
+                        description: |-
+                          message is a human readable message indicating details about the transition.
+                          This may be an empty string.
+                        maxLength: 32768
+                        type: string
+                      observedGeneration:
+                        description: |-
+                          observedGeneration represents the .metadata.generation that the condition was set based upon.
+                          For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date
+                          with respect to the current state of the instance.
+                        format: int64
+                        minimum: 0
+                        type: integer
+                      reason:
+                        description: |-
+                          reason contains a programmatic identifier indicating the reason for the condition's last transition.
+                          Producers of specific condition types may define expected values and meanings for this field,
+                          and whether the values are considered a guaranteed API.
+                          The value should be a CamelCase string.
+                          This field may not be empty.
+                        maxLength: 1024
+                        minLength: 1
+                        pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$
+                        type: string
+                      status:
+                        description: status of the condition, one of True, False, Unknown.
+                        enum:
+                          - "True"
+                          - "False"
+                          - Unknown
+                        type: string
+                      type:
+                        description: type of condition in CamelCase or in foo.example.com/CamelCase.
+                        maxLength: 316
+                        pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$
+                        type: string
+                    required:
+                      - lastTransitionTime
+                      - message
+                      - reason
+                      - status
+                      - type
+                    type: object
+                  type: array
+                downloadJobName:
+                  description: DownloadJobName is the name of the Job created to download the model.
+                  type: string
+                lastDownloadTime:
+                  description: LastDownloadTime is the timestamp of the last successful download.
+                  format: date-time
+                  type: string
+                pvcName:
+                  description: PVCName is the name of the PVC created or used for this model.
+                  type: string
+                state:
+                  description: |-
+                    State is a high-level textual status of the model lifecycle.
+                    Possible values: "Pending", "Downloading", "Ready", "Failed"
+                  type: string
+              type: object
+          type: object
+      served: true
+      storage: true
+      subresources:
+        status: {}
+
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
index 34a343fd17..8fc08061d8 100644
--- a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
@@ -76,6 +76,8 @@ type DynamoGraphDeploymentReconciler struct {
 // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/status,verbs=get;update;patch
 // +kubebuilder:rbac:groups=nvidia.com,resources=dynamographdeployments/finalizers,verbs=update
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamomodels,verbs=get;list;watch
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamomodels/status,verbs=get
 // +kubebuilder:rbac:groups=grove.io,resources=podcliquesets,verbs=get;list;watch;create;update;patch;delete
 // +kubebuilder:rbac:groups=grove.io,resources=podcliques/scale,verbs=get;update;patch
 // +kubebuilder:rbac:groups=grove.io,resources=podcliquescalinggroups/scale,verbs=get;update;patch
@@ -158,8 +160,19 @@ type Resource interface {
 func (r *DynamoGraphDeploymentReconciler) reconcileResources(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) (State, Reason, Message, error) {
 	logger := log.FromContext(ctx)
 
+	// Check if all referenced models are ready
+	modelsReady, notReadyModels, err := r.checkModelReferences(ctx, dynamoDeployment)
+	if err != nil {
+		logger.Error(err, "Failed to check model references")
+		return "", "", "", fmt.Errorf("failed to check model references: %w", err)
+	}
+	if !modelsReady {
+		logger.Info("Waiting for models to be ready", "notReadyModels", notReadyModels)
+		return PendingState, "WaitingForModels", Message(fmt.Sprintf("Waiting for models to be ready: %v", notReadyModels)), nil
+	}
+
 	// Reconcile top-level PVCs first
-	err := r.reconcilePVCs(ctx, dynamoDeployment)
+	err = r.reconcilePVCs(ctx, dynamoDeployment)
 	if err != nil {
 		logger.Error(err, "Failed to reconcile top-level PVCs")
 		return "", "", "", fmt.Errorf("failed to reconcile top-level PVCs: %w", err)
@@ -522,3 +535,48 @@ func (r *DynamoGraphDeploymentReconciler) SetupWithManager(mgr ctrl.Manager) err
 func (r *DynamoGraphDeploymentReconciler) GetRecorder() record.EventRecorder {
 	return r.Recorder
 }
+
+// checkModelReferences checks if all referenced DynamoModels are ready
+func (r *DynamoGraphDeploymentReconciler) checkModelReferences(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) (bool, []string, error) {
+	logger := log.FromContext(ctx)
+	notReadyModels := []string{}
+
+	// Collect all model references from services
+	modelRefs := make(map[string]bool)
+	for serviceName, serviceSpec := range dynamoDeployment.Spec.Services {
+		if serviceSpec != nil && serviceSpec.ModelRef != "" {
+			modelRefs[serviceSpec.ModelRef] = true
+			logger.Info("Found model reference", "service", serviceName, "modelRef", serviceSpec.ModelRef)
+		}
+	}
+
+	// If no model references, return true
+	if len(modelRefs) == 0 {
+		return true, notReadyModels, nil
+	}
+
+	// Check each referenced model
+	for modelRef := range modelRefs {
+		model := &nvidiacomv1alpha1.DynamoModel{}
+		err := r.Get(ctx, types.NamespacedName{
+			Name:      modelRef,
+			Namespace: dynamoDeployment.Namespace,
+		}, model)
+
+		if err != nil {
+			if errors.IsNotFound(err) {
+				logger.Error(err, "Referenced model not found", "modelRef", modelRef)
+				return false, append(notReadyModels, modelRef), fmt.Errorf("model %s not found", modelRef)
+			}
+			logger.Error(err, "Failed to get model", "modelRef", modelRef)
+			return false, notReadyModels, err
+		}
+
+		if !model.IsReady() {
+			logger.Info("Model not ready", "modelRef", modelRef, "state", model.Status.State)
+			notReadyModels = append(notReadyModels, modelRef)
+		}
+	}
+
+	return len(notReadyModels) == 0, notReadyModels, nil
+}
diff --git a/deploy/cloud/operator/internal/controller/dynamomodel_controller.go b/deploy/cloud/operator/internal/controller/dynamomodel_controller.go
new file mode 100644
index 0000000000..4f3558f595
--- /dev/null
+++ b/deploy/cloud/operator/internal/controller/dynamomodel_controller.go
@@ -0,0 +1,453 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package controller
+
+import (
+	"context"
+	"fmt"
+	"strings"
+
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+	"k8s.io/apimachinery/pkg/types"
+	ctrl "sigs.k8s.io/controller-runtime"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
+	"sigs.k8s.io/controller-runtime/pkg/log"
+
+	nvidiacomv1alpha1 "github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
+	commonController "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/controller_common"
+)
+
+const (
+	dynamoModelFinalizerName = "nvidia.com/dynamomodel-finalizer"
+)
+
+// DynamoModelReconciler reconciles a DynamoModel object
+type DynamoModelReconciler struct {
+	client.Client
+	Scheme *runtime.Scheme
+}
+
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamomodels,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamomodels/status,verbs=get;update;patch
+// +kubebuilder:rbac:groups=nvidia.com,resources=dynamomodels/finalizers,verbs=update
+// +kubebuilder:rbac:groups=batch,resources=jobs,verbs=get;list;watch;create;update;patch;delete
+// +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;update;patch;delete
+
+// Reconcile is part of the main kubernetes reconciliation loop which aims to
+// move the current state of the cluster closer to the desired state.
+func (r *DynamoModelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
+	logger := log.FromContext(ctx)
+
+	var err error
+	reason := Reason("undefined")
+	message := Message("")
+	state := PendingState
+
+	// Retrieve the DynamoModel CRD
+	dynamoModel := &nvidiacomv1alpha1.DynamoModel{}
+	if err = r.Get(ctx, req.NamespacedName, dynamoModel); err != nil {
+		return ctrl.Result{}, client.IgnoreNotFound(err)
+	}
+
+	defer func() {
+		if err != nil {
+			state = FailedState
+			message = Message(err.Error())
+			logger.Error(err, "Reconciliation failed")
+		}
+		dynamoModel.SetState(string(state))
+
+		readyStatus := metav1.ConditionFalse
+		if state == ReadyState {
+			readyStatus = metav1.ConditionTrue
+		}
+
+		// Update Ready condition
+		dynamoModel.AddStatusCondition(metav1.Condition{
+			Type:               "Ready",
+			Status:             readyStatus,
+			Reason:             string(reason),
+			Message:            string(message),
+			LastTransitionTime: metav1.Now(),
+		})
+
+		err = r.Status().Update(ctx, dynamoModel)
+		if err != nil {
+			logger.Error(err, "Unable to update the CRD status", "crd", req.NamespacedName, "state", state, "reason", reason, "message", message)
+		}
+		logger.Info("Reconciliation done")
+	}()
+
+	// Handle finalizer
+	deleted, err := commonController.HandleFinalizer(ctx, dynamoModel, r.Client, r)
+	if err != nil {
+		logger.Error(err, "failed to handle the finalizer")
+		reason = "failed_to_handle_the_finalizer"
+		return ctrl.Result{}, err
+	}
+	if deleted {
+		return ctrl.Result{}, nil
+	}
+
+	// Reconcile resources
+	state, reason, message, err = r.reconcileResources(ctx, dynamoModel)
+	if err != nil {
+		logger.Error(err, "failed to reconcile the resources")
+		reason = "failed_to_reconcile_the_resources"
+		return ctrl.Result{}, err
+	}
+
+	return ctrl.Result{}, nil
+}
+
+// reconcileResources handles the main reconciliation logic for DynamoModel
+func (r *DynamoModelReconciler) reconcileResources(ctx context.Context, dynamoModel *nvidiacomv1alpha1.DynamoModel) (State, Reason, Message, error) {
+	logger := log.FromContext(ctx)
+
+	// Step 1: Reconcile PVC
+	pvc, err := r.reconcilePVC(ctx, dynamoModel)
+	if err != nil {
+		return FailedState, "PVCReconciliationFailed", Message(err.Error()), err
+	}
+
+	// Update status with PVC name
+	if dynamoModel.Status.PVCName != pvc.Name {
+		dynamoModel.Status.PVCName = pvc.Name
+	}
+
+	// Step 2: Reconcile download Job
+	job, err := r.reconcileDownloadJob(ctx, dynamoModel, pvc)
+	if err != nil {
+		return FailedState, "JobReconciliationFailed", Message(err.Error()), err
+	}
+
+	// Update status with Job name
+	if dynamoModel.Status.DownloadJobName != job.Name {
+		dynamoModel.Status.DownloadJobName = job.Name
+	}
+
+	// Step 3: Check Job status
+	if job.Status.Succeeded > 0 {
+		// Job completed successfully
+		logger.Info("Model download completed successfully", "model", dynamoModel.Name)
+		if dynamoModel.Status.LastDownloadTime == nil {
+			now := metav1.Now()
+			dynamoModel.Status.LastDownloadTime = &now
+		}
+		return ReadyState, "ModelReady", "Model downloaded and ready", nil
+	} else if job.Status.Failed > 0 {
+		// Job failed
+		return FailedState, "DownloadFailed", "Model download job failed", fmt.Errorf("download job failed")
+	} else if job.Status.Active > 0 {
+		// Job is still running
+		return PendingState, "Downloading", "Model download in progress", nil
+	}
+
+	// Job is pending
+	return PendingState, "JobPending", "Model download job is pending", nil
+}
+
+// reconcilePVC creates or retrieves the PVC for the model
+func (r *DynamoModelReconciler) reconcilePVC(ctx context.Context, dynamoModel *nvidiacomv1alpha1.DynamoModel) (*corev1.PersistentVolumeClaim, error) {
+	logger := log.FromContext(ctx)
+
+	pvcName := dynamoModel.GetPVCName()
+	pvc := &corev1.PersistentVolumeClaim{}
+	pvcNamespacedName := types.NamespacedName{Name: pvcName, Namespace: dynamoModel.Namespace}
+
+	err := r.Get(ctx, pvcNamespacedName, pvc)
+	if err != nil && !errors.IsNotFound(err) {
+		logger.Error(err, "Unable to retrieve PVC", "pvcName", pvcName)
+		return nil, err
+	}
+
+	// If PVC does not exist, create it
+	if errors.IsNotFound(err) {
+		create := true
+		if dynamoModel.Spec.PVC.Create != nil {
+			create = *dynamoModel.Spec.PVC.Create
+		}
+
+		if !create {
+			logger.Error(err, "PVC does not exist and create is not enabled", "pvcName", pvcName)
+			return nil, fmt.Errorf("PVC %s does not exist and create is disabled", pvcName)
+		}
+
+		pvc = r.constructPVC(dynamoModel)
+		if err := controllerutil.SetControllerReference(dynamoModel, pvc, r.Scheme); err != nil {
+			logger.Error(err, "Failed to set controller reference for PVC", "pvcName", pvcName)
+			return nil, err
+		}
+
+		err = r.Create(ctx, pvc)
+		if err != nil {
+			logger.Error(err, "Failed to create PVC", "pvcName", pvcName)
+			return nil, err
+		}
+		logger.Info("PVC created", "pvcName", pvcName, "namespace", dynamoModel.Namespace)
+	}
+
+	return pvc, nil
+}
+
+// constructPVC creates a PVC object from the DynamoModel spec
+func (r *DynamoModelReconciler) constructPVC(dynamoModel *nvidiacomv1alpha1.DynamoModel) *corev1.PersistentVolumeClaim {
+	pvcName := dynamoModel.GetPVCName()
+	storageClassName := dynamoModel.Spec.PVC.StorageClass
+	accessMode := dynamoModel.Spec.PVC.VolumeAccessMode
+	if accessMode == "" {
+		accessMode = corev1.ReadWriteMany
+	}
+
+	return &corev1.PersistentVolumeClaim{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      pvcName,
+			Namespace: dynamoModel.Namespace,
+			Labels: map[string]string{
+				"app.kubernetes.io/managed-by": "dynamo-operator",
+				"app.kubernetes.io/component":  "model-storage",
+				"dynamo.nvidia.com/model":      dynamoModel.Name,
+			},
+		},
+		Spec: corev1.PersistentVolumeClaimSpec{
+			AccessModes: []corev1.PersistentVolumeAccessMode{accessMode},
+			Resources: corev1.VolumeResourceRequirements{
+				Requests: corev1.ResourceList{
+					corev1.ResourceStorage: dynamoModel.Spec.PVC.Size,
+				},
+			},
+			StorageClassName: &storageClassName,
+		},
+	}
+}
+
+// reconcileDownloadJob creates or retrieves the download Job for the model
+func (r *DynamoModelReconciler) reconcileDownloadJob(ctx context.Context, dynamoModel *nvidiacomv1alpha1.DynamoModel, pvc *corev1.PersistentVolumeClaim) (*batchv1.Job, error) {
+	logger := log.FromContext(ctx)
+
+	jobName := fmt.Sprintf("%s-download", dynamoModel.Name)
+	job := &batchv1.Job{}
+	jobNamespacedName := types.NamespacedName{Name: jobName, Namespace: dynamoModel.Namespace}
+
+	err := r.Get(ctx, jobNamespacedName, job)
+	if err != nil && !errors.IsNotFound(err) {
+		logger.Error(err, "Unable to retrieve Job", "jobName", jobName)
+		return nil, err
+	}
+
+	// If Job does not exist, create it
+	if errors.IsNotFound(err) {
+		job = r.constructDownloadJob(dynamoModel, pvc)
+		if err := controllerutil.SetControllerReference(dynamoModel, job, r.Scheme); err != nil {
+			logger.Error(err, "Failed to set controller reference for Job", "jobName", jobName)
+			return nil, err
+		}
+
+		err = r.Create(ctx, job)
+		if err != nil {
+			logger.Error(err, "Failed to create Job", "jobName", jobName)
+			return nil, err
+		}
+		logger.Info("Download Job created", "jobName", jobName, "namespace", dynamoModel.Namespace)
+	}
+
+	return job, nil
+}
+
+// constructDownloadJob creates a Job object for downloading the model
+func (r *DynamoModelReconciler) constructDownloadJob(dynamoModel *nvidiacomv1alpha1.DynamoModel, pvc *corev1.PersistentVolumeClaim) *batchv1.Job {
+	jobName := fmt.Sprintf("%s-download", dynamoModel.Name)
+	backoffLimit := int32(3)
+	completions := int32(1)
+	parallelism := int32(1)
+
+	// Parse source URL to determine download strategy
+	downloadScript := r.generateDownloadScript(dynamoModel)
+
+	job := &batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      jobName,
+			Namespace: dynamoModel.Namespace,
+			Labels: map[string]string{
+				"app.kubernetes.io/managed-by": "dynamo-operator",
+				"app.kubernetes.io/component":  "model-downloader",
+				"dynamo.nvidia.com/model":      dynamoModel.Name,
+			},
+		},
+		Spec: batchv1.JobSpec{
+			BackoffLimit: &backoffLimit,
+			Completions:  &completions,
+			Parallelism:  &parallelism,
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{
+						"app":                     "model-download",
+						"dynamo.nvidia.com/model": dynamoModel.Name,
+					},
+				},
+				Spec: corev1.PodSpec{
+					RestartPolicy: corev1.RestartPolicyNever,
+					Containers: []corev1.Container{
+						{
+							Name:    "model-download",
+							Image:   "python:3.10-slim",
+							Command: []string{"sh", "-c"},
+							Args:    []string{downloadScript},
+							Env:     r.generateEnvVars(dynamoModel),
+							VolumeMounts: []corev1.VolumeMount{
+								{
+									Name:      "model-cache",
+									MountPath: "/model-cache",
+								},
+							},
+						},
+					},
+					Volumes: []corev1.Volume{
+						{
+							Name: "model-cache",
+							VolumeSource: corev1.VolumeSource{
+								PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{
+									ClaimName: pvc.Name,
+								},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+
+	// Add secret reference if specified
+	if dynamoModel.Spec.SecretRef != "" {
+		job.Spec.Template.Spec.Containers[0].EnvFrom = []corev1.EnvFromSource{
+			{
+				SecretRef: &corev1.SecretEnvSource{
+					LocalObjectReference: corev1.LocalObjectReference{
+						Name: dynamoModel.Spec.SecretRef,
+					},
+				},
+			},
+		}
+	}
+
+	return job
+}
+
+// generateDownloadScript generates the download script based on the source URL
+func (r *DynamoModelReconciler) generateDownloadScript(dynamoModel *nvidiacomv1alpha1.DynamoModel) string {
+	sourceURL := dynamoModel.Spec.SourceURL
+
+	// Determine the download method based on the source URL prefix
+	if strings.HasPrefix(sourceURL, "hf://") {
+		// HuggingFace download
+		modelName := strings.TrimPrefix(sourceURL, "hf://")
+		script := `
+set -eux
+pip install --no-cache-dir huggingface_hub hf_transfer
+export HF_HUB_ENABLE_HF_TRANSFER=1
+huggingface-cli download $MODEL_NAME --cache-dir /model-cache
+`
+		return script
+	} else if strings.HasPrefix(sourceURL, "s3://") {
+		// S3 download
+		script := `
+set -eux
+pip install --no-cache-dir awscli
+aws s3 sync $SOURCE_URL /model-cache --no-progress
+`
+		return script
+	} else if strings.HasPrefix(sourceURL, "ngc://") {
+		// NGC download
+		script := `
+set -eux
+pip install --no-cache-dir ngc-cli
+ngc registry model download-version $MODEL_NAME --dest /model-cache
+`
+		return script
+	}
+
+	// Default: generic download (assumes HTTP/HTTPS URL)
+	script := `
+set -eux
+pip install --no-cache-dir wget
+wget -P /model-cache $SOURCE_URL
+`
+	return script
+}
+
+// generateEnvVars generates environment variables for the download job
+func (r *DynamoModelReconciler) generateEnvVars(dynamoModel *nvidiacomv1alpha1.DynamoModel) []corev1.EnvVar {
+	envVars := []corev1.EnvVar{
+		{
+			Name:  "SOURCE_URL",
+			Value: dynamoModel.Spec.SourceURL,
+		},
+	}
+
+	// Add model name for HuggingFace downloads
+	if strings.HasPrefix(dynamoModel.Spec.SourceURL, "hf://") {
+		modelName := strings.TrimPrefix(dynamoModel.Spec.SourceURL, "hf://")
+		envVars = append(envVars, corev1.EnvVar{
+			Name:  "MODEL_NAME",
+			Value: modelName,
+		})
+	}
+
+	// Add version if specified
+	if dynamoModel.Spec.Version != "" {
+		envVars = append(envVars, corev1.EnvVar{
+			Name:  "MODEL_VERSION",
+			Value: dynamoModel.Spec.Version,
+		})
+	}
+
+	return envVars
+}
+
+// SetupWithManager sets up the controller with the Manager.
+func (r *DynamoModelReconciler) SetupWithManager(mgr ctrl.Manager) error {
+	return ctrl.NewControllerManagedBy(mgr).
+		For(&nvidiacomv1alpha1.DynamoModel{}).
+		Owns(&corev1.PersistentVolumeClaim{}).
+		Owns(&batchv1.Job{}).
+		Complete(r)
+}
+
+// Cleanup implements the Cleanup interface for finalizer handling
+func (r *DynamoModelReconciler) Cleanup(ctx context.Context, obj client.Object) error {
+	logger := log.FromContext(ctx)
+	dynamoModel := obj.(*nvidiacomv1alpha1.DynamoModel)
+
+	logger.Info("Cleaning up DynamoModel resources", "name", dynamoModel.Name, "namespace", dynamoModel.Namespace)
+
+	// The PVC and Job will be automatically deleted due to owner references
+	// Additional cleanup logic can be added here if needed
+
+	return nil
+}
+
+// GetFinalizerName returns the finalizer name for this controller
+func (r *DynamoModelReconciler) GetFinalizerName() string {
+	return dynamoModelFinalizerName
+}
+
diff --git a/docs/kubernetes/README.md b/docs/kubernetes/README.md
index acc276fae0..fecc0d3cf5 100644
--- a/docs/kubernetes/README.md
+++ b/docs/kubernetes/README.md
@@ -84,7 +84,8 @@ Refer to the [API Reference and Documentation](/docs/kubernetes/api_reference.md
 
 For detailed technical specifications of Dynamo's Kubernetes resources:
 
-- **[API Reference](/docs/kubernetes/api_reference.md)** - Complete CRD field specifications for `DynamoGraphDeployment` and `DynamoComponentDeployment`
+- **[API Reference](/docs/kubernetes/api_reference.md)** - Complete CRD field specifications for `DynamoGraphDeployment`, `DynamoComponentDeployment`, and `DynamoModel`
+- **[DynamoModel Guide](/docs/kubernetes/dynamomodel.md)** - Model artifact management with version pinning and automated downloads
 - **[Operator Guide](/docs/kubernetes/dynamo_operator.md)** - Dynamo operator configuration and management
 - **[Create Deployment](/docs/kubernetes/create_deployment.md)** - Step-by-step deployment creation examples
 
@@ -170,6 +171,7 @@ Key customization points include:
 ## Additional Resources
 
 - **[Examples](/examples/README.md)** - Complete working examples
+- **[DynamoModel Examples](/examples/deployments/README.md)** - Model artifact management examples
 - **[Create Custom Deployments](/docs/kubernetes/create_deployment.md)** - Build your own CRDs
 - **[Operator Documentation](/docs/kubernetes/dynamo_operator.md)** - How the platform works
 - **[Helm Charts](/deploy/helm/README.md)** - For advanced users
diff --git a/docs/kubernetes/dynamomodel.md b/docs/kubernetes/dynamomodel.md
new file mode 100644
index 0000000000..6479c799f4
--- /dev/null
+++ b/docs/kubernetes/dynamomodel.md
@@ -0,0 +1,273 @@
+<!--
+SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+SPDX-License-Identifier: Apache-2.0
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# DynamoModel: Model Artifact Management
+
+## Overview
+
+`DynamoModel` is a Kubernetes Custom Resource Definition (CRD) that provides a high-level abstraction for managing model artifacts cached in PVCs within your cluster. It solves the critical problem of **model version drift** by ensuring all deployments and benchmarking jobs referencing the same `DynamoModel` use identical model artifacts.
+
+## Why DynamoModel?
+
+### Problem Statement
+
+Without `DynamoModel`, teams face several challenges:
+
+1. **Version Drift**: Different jobs might download different versions of a model, leading to inconsistent results
+2. **Manual PVC Management**: Teams must manually create PVCs, download models, and track versions
+3. **Duplicate Downloads**: Multiple jobs download the same model repeatedly, wasting time and bandwidth
+4. **No Version Pinning**: Difficult to ensure deployments and benchmarks use the exact same model artifact
+
+### Solution
+
+`DynamoModel` provides:
+
+- **Version Pinning**: Pin deployments to specific model versions (SHA or tag)
+- **Automated Downloads**: Automatically downloads and caches models in PVCs
+- **Guaranteed Consistency**: All jobs referencing the same `DynamoModel` use identical artifacts
+- **Flexible Sources**: Support for HuggingFace, S3, NGC, and custom sources
+- **Simplified Management**: Declarative model management with Kubernetes-native tooling
+
+## Key Features
+
+### 1. Model Name and Version Pinning
+
+```yaml
+spec:
+  name: meta-llama/Llama-3.3-70B-Instruct
+  version: abcd12345  # Source SHA, avoids drift
+```
+
+Enables version pinning, avoiding drift/inconsistency in deployments versus benchmarking.
+
+### 2. Flexible Source Management
+
+```yaml
+spec:
+  sourceURL: hf://meta-llama/Llama-3.3-70B-Instruct
+  # Or: s3://bucket/path/to/model
+  # Or: ngc://nvidia/model
+```
+
+Supports multiple source types with automatic protocol detection.
+
+### 3. Credential Injection
+
+```yaml
+spec:
+  secretRef: llama-hf-secret
+```
+
+Securely inject credentials for private repositories.
+
+### 4. Extensibility
+
+```yaml
+spec:
+  downloaderRef: custom-downloader  # Optional
+```
+
+Plug in custom downloaders or workflows (e.g., MLFlow or internal tools).
+
+## Quick Start
+
+### Step 1: Create a Model Secret (if needed)
+
+For private models, create a secret with your credentials:
+
+```bash
+kubectl create secret generic llama-hf-secret \
+  --from-literal=HF_TOKEN="your-huggingface-token" \
+  -n your-namespace
+```
+
+### Step 2: Define a DynamoModel
+
+Create a `DynamoModel` resource:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoModel
+metadata:
+  name: llama-3-70b-instruct-v1
+  namespace: your-namespace
+spec:
+  name: meta-llama/Llama-3.3-70B-Instruct
+  version: abcd12345
+  sourceURL: hf://meta-llama/Llama-3.3-70B-Instruct
+  secretRef: llama-hf-secret
+  pvc:
+    create: true
+    storageClass: your-storage-class
+    size: 200Gi
+    volumeAccessMode: ReadWriteMany
+```
+
+Apply it:
+
+```bash
+kubectl apply -f dynamomodel.yaml
+```
+
+### Step 3: Check Model Status
+
+```bash
+# Check status
+kubectl get dynamomodel llama-3-70b-instruct-v1 -n your-namespace
+
+# Watch download progress
+kubectl get dynamomodel llama-3-70b-instruct-v1 -n your-namespace -w
+
+# View detailed status
+kubectl describe dynamomodel llama-3-70b-instruct-v1 -n your-namespace
+
+# Check download job logs
+kubectl logs job/llama-3-70b-instruct-v1-download -n your-namespace
+```
+
+### Step 4: Reference in DynamoGraphDeployment
+
+Once the model is ready, reference it in your deployment:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: vllm-disagg
+  namespace: your-namespace
+spec:
+  services:
+    VllmDecodeWorker:
+      modelRef: llama-3-70b-instruct-v1
+      replicas: 2
+      # ... other configuration
+```
+
+The controller will automatically:
+1. Wait for the model to be ready
+2. Mount the model's PVC to the service
+3. Ensure all replicas use the same model artifact
+
+## API Reference
+
+### DynamoModelSpec
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `name` | string | Yes | Canonical model name (e.g., "meta-llama/Llama-3.3-70B-Instruct") |
+| `version` | string | No | Version pin (SHA or tag) to prevent drift |
+| `sourceURL` | string | Yes | Source location (hf://, s3://, ngc://) |
+| `secretRef` | string | No | Reference to secret for credentials |
+| `downloaderRef` | string | No | Reference to custom downloader |
+| `pvc` | PVCSpec | Yes | PVC configuration for model storage |
+
+### PVCSpec
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `create` | bool | No | true | Whether to create a new PVC |
+| `name` | string | No | model name | Name of the PVC |
+| `storageClass` | string | Yes* | - | Storage class for PVC creation |
+| `size` | Quantity | Yes* | - | Size of the volume |
+| `volumeAccessMode` | string | No | ReadWriteMany | Volume access mode |
+
+\* Required when `create` is true
+
+### DynamoModelStatus
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `state` | string | Lifecycle state: "Pending", "Downloading", "Ready", "Failed" |
+| `conditions` | []Condition | Detailed status conditions |
+| `pvcName` | string | Name of the created/used PVC |
+| `downloadJobName` | string | Name of the download Job |
+| `lastDownloadTime` | Time | Timestamp of last successful download |
+
+## Supported Source Types
+
+### HuggingFace
+
+```yaml
+sourceURL: hf://meta-llama/Llama-3.3-70B-Instruct
+secretRef: hf-token-secret  # Optional for public models
+```
+
+Downloads using `huggingface-cli` with HF Transfer enabled for faster downloads.
+
+### S3
+
+```yaml
+sourceURL: s3://my-bucket/models/llama-70b
+secretRef: aws-credentials  # AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
+```
+
+Downloads using AWS CLI.
+
+### NGC (NVIDIA GPU Cloud)
+
+```yaml
+sourceURL: ngc://nvidia/llama-70b
+secretRef: ngc-api-key
+```
+
+Downloads using NGC CLI.
+
+### HTTP/HTTPS
+
+```yaml
+sourceURL: https://example.com/models/model.tar.gz
+```
+
+Generic HTTP download using wget.
+
+## Advanced Usage
+
+### Using Existing PVC
+
+If you already have a PVC with a model:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoModel
+metadata:
+  name: existing-model
+spec:
+  name: my-org/my-model
+  sourceURL: hf://my-org/my-model
+  pvc:
+    create: false
+    name: existing-model-pvc
+```
+
+### Custom Downloader
+
+For specialized workflows:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoModel
+metadata:
+  name: custom-model
+spec:
+  name: my-org/custom-model
+  sourceURL: custom://my-internal-registry/model
+  downloaderRef: mlflow-downloader
+  pvc:
+    create: true
+    storageClass: fast-ssd
+    size: 500Gi
+```

From 31dd961ed070e4209f2c654e9d27ba4dafdb5d55 Mon Sep 17 00:00:00 2001
From: Hannah Zhang <hannahz@nvidia.com>
Date: Thu, 9 Oct 2025 10:20:19 -0700
Subject: [PATCH 2/3] feat: add model examples to components

Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
---
 components/models/README.md        | 102 +++++++++++++++++++++++++++++
 components/models/llama-3-70b.yaml |  28 ++++++++
 components/models/qwen3-0.6b.yaml  |  28 ++++++++
 docs/kubernetes/dynamomodel.md     |  64 ++++++++++++++++--
 4 files changed, 216 insertions(+), 6 deletions(-)
 create mode 100644 components/models/README.md
 create mode 100644 components/models/llama-3-70b.yaml
 create mode 100644 components/models/qwen3-0.6b.yaml

diff --git a/components/models/README.md b/components/models/README.md
new file mode 100644
index 0000000000..73db05728a
--- /dev/null
+++ b/components/models/README.md
@@ -0,0 +1,102 @@
+# DynamoModel Definitions
+
+This directory contains pre-configured `DynamoModel` resources for commonly used models.
+
+## Available Models
+
+### Qwen 3 0.6B
+**File:** `qwen3-0.6b.yaml`
+- **Size:** ~2GB
+- **Use Case:** Testing, development, lightweight inference
+- **Public:** Yes (no authentication required)
+
+```bash
+kubectl apply -f qwen3-0.6b.yaml -n your-namespace
+```
+
+### Llama 3.3 70B Instruct
+**File:** `llama-3-70b.yaml`
+- **Size:** ~140GB
+- **Use Case:** Production inference, high-quality responses
+- **Public:** Gated (requires HuggingFace token)
+
+```bash
+# Create secret first
+kubectl create secret generic hf-token-secret \
+  --from-literal=HF_TOKEN="your-token" \
+  -n your-namespace
+
+kubectl apply -f llama-3-70b.yaml -n your-namespace
+```
+
+## Usage
+
+### 1. Deploy Model
+
+```bash
+kubectl apply -f <model-file>.yaml -n your-namespace
+```
+
+### 2. Check Status
+
+```bash
+# Watch model download progress
+kubectl get dynamomodel -n your-namespace -w
+
+# Check detailed status
+kubectl describe dynamomodel qwen3-0.6b -n your-namespace
+
+# View download logs
+kubectl logs job/qwen3-0.6b-download -n your-namespace -f
+```
+
+### 3. Reference in Deployment
+
+Once the model state is "Ready", reference it in your `DynamoGraphDeployment`:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: my-deployment
+spec:
+  modelRef: qwen3-0.6b  # Reference the model by name
+  backendFramework: vllm
+  services:
+    VllmWorker:
+      replicas: 1
+      resources:
+        limits:
+          nvidia.com/gpu: "1"
+```
+
+## Customization
+
+Update the following fields based on your cluster:
+
+- **`storageClass`**: Use your cluster's available storage class
+- **`size`**: Adjust based on model requirements
+- **`version`**: Pin to specific commit SHA for production
+- **`secretRef`**: Add if model requires authentication
+
+## Adding New Models
+
+Create a new YAML file following this template:
+
+```yaml
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoModel
+metadata:
+  name: my-model
+spec:
+  name: organization/model-name
+  version: commit-sha  # Optional
+  sourceURL: hf://organization/model-name
+  secretRef: secret-name  # Optional
+  pvc:
+    create: true
+    storageClass: your-storage-class
+    size: XXGi
+    volumeAccessMode: ReadWriteMany
+```
+
diff --git a/components/models/llama-3-70b.yaml b/components/models/llama-3-70b.yaml
new file mode 100644
index 0000000000..a3f340d0d6
--- /dev/null
+++ b/components/models/llama-3-70b.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# DynamoModel for Llama 3.3 70B Instruct - production-ready large model
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoModel
+metadata:
+  name: llama-3-70b-instruct
+spec:
+  # Canonical model name from HuggingFace
+  name: meta-llama/Llama-3.3-70B-Instruct
+
+  # Version pin - use specific SHA for production
+  version: main
+
+  # Source URL - HuggingFace Hub
+  sourceURL: hf://meta-llama/Llama-3.3-70B-Instruct
+
+  # Secret reference for authentication (required for gated models)
+  secretRef: hf-token-secret
+
+  # PVC configuration for model storage
+  pvc:
+    create: true
+    storageClass: standard  # Update with your storage class
+    size: 200Gi  # Large model requires significant storage
+    volumeAccessMode: ReadWriteMany  # Required for multi-replica deployments
+
diff --git a/components/models/qwen3-0.6b.yaml b/components/models/qwen3-0.6b.yaml
new file mode 100644
index 0000000000..47c73ce6d6
--- /dev/null
+++ b/components/models/qwen3-0.6b.yaml
@@ -0,0 +1,28 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# DynamoModel for Qwen 3 0.6B - lightweight model for testing and development
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoModel
+metadata:
+  name: qwen3-0.6b
+spec:
+  # Canonical model name from HuggingFace
+  name: Qwen/Qwen3-0.6B
+
+  # Version pin (optional) - use a specific commit SHA for reproducibility
+  # version: main
+
+  # Source URL - HuggingFace Hub
+  sourceURL: hf://Qwen/Qwen3-0.6B
+
+  # Secret reference for authentication (optional for public models)
+  # secretRef: hf-token-secret
+
+  # PVC configuration for model storage
+  pvc:
+    create: true
+    storageClass: standard  # Update with your storage class
+    size: 10Gi  # Small model, only needs ~2GB but allocate extra space
+    volumeAccessMode: ReadWriteMany  # Required for multi-replica deployments
+
diff --git a/docs/kubernetes/dynamomodel.md b/docs/kubernetes/dynamomodel.md
index 6479c799f4..6839f75d60 100644
--- a/docs/kubernetes/dynamomodel.md
+++ b/docs/kubernetes/dynamomodel.md
@@ -21,8 +21,6 @@ limitations under the License.
 
 `DynamoModel` is a Kubernetes Custom Resource Definition (CRD) that provides a high-level abstraction for managing model artifacts cached in PVCs within your cluster. It solves the critical problem of **model version drift** by ensuring all deployments and benchmarking jobs referencing the same `DynamoModel` use identical model artifacts.
 
-## Why DynamoModel?
-
 ### Problem Statement
 
 Without `DynamoModel`, teams face several challenges:
@@ -83,6 +81,54 @@ spec:
 
 Plug in custom downloaders or workflows (e.g., MLFlow or internal tools).
 
+## How It Works
+
+### User Flow
+
+1. **Create Model Definition:**
+   ```bash
+   kubectl apply -f components/models/qwen3-0.6b.yaml
+   ```
+
+2. **Watch Model Download:**
+   ```bash
+   kubectl get dynamomodel qwen3-0.6b -w
+   ```
+
+3. **Reference in Deployment:**
+   ```yaml
+   apiVersion: nvidia.com/v1alpha1
+   kind: DynamoGraphDeployment
+   spec:
+     modelRef: qwen3-0.6b
+     backendFramework: vllm
+   ```
+
+### Controller Flow
+
+1. **DGD Controller** checks if `modelRef` is specified
+2. Waits for `DynamoModel` to reach "Ready" state
+3. Passes model name to Grove/Component pod generation
+4. **Backend-specific logic** injects model arguments:
+   - vLLM: `--model Qwen/Qwen3-0.6B`
+   - SGLang/TRT-LLM: `--model-path Qwen/Qwen3-0.6B --served-model-name Qwen/Qwen3-0.6B`
+5. Sets `HF_HOME=/model-cache` so backends can resolve models
+6. Auto-mounts model PVC to `/model-cache`
+
+### Model Resolution
+
+When `HF_HOME=/model-cache` is set and backends use the canonical model name:
+```
+/model-cache/
+  models--Qwen--Qwen3-0.6B/
+    snapshots/
+      <hash>/
+        config.json
+        model-*.safetensors
+```
+
+Backends automatically resolve `Qwen/Qwen3-0.6B` → `/model-cache/models--Qwen--Qwen3-0.6B/snapshots/<hash>`
+
 ## Quick Start
 
 ### Step 1: Create a Model Secret (if needed)
@@ -150,17 +196,23 @@ metadata:
   name: vllm-disagg
   namespace: your-namespace
 spec:
+  modelRef: llama-3-70b-instruct-v1  # Reference at top-level
+  backendFramework: vllm
   services:
     VllmDecodeWorker:
-      modelRef: llama-3-70b-instruct-v1
       replicas: 2
-      # ... other configuration
+      resources:
+        limits:
+          nvidia.com/gpu: "2"
+      # Model arguments will be auto-injected by the controller
 ```
 
 The controller will automatically:
 1. Wait for the model to be ready
-2. Mount the model's PVC to the service
-3. Ensure all replicas use the same model artifact
+2. Inject the appropriate model arguments for your backend
+3. Mount the model's PVC to `/model-cache`
+4. Set `HF_HOME=/model-cache` for model resolution
+5. Ensure all replicas use the same model artifact
 
 ## API Reference
 

From ba965f51ad1a8d74e7057c59e7ca0e5f4164cf0c Mon Sep 17 00:00:00 2001
From: Hannah Zhang <hannahz@nvidia.com>
Date: Thu, 9 Oct 2025 10:24:06 -0700
Subject: [PATCH 3/3] feat: move modelRef to DGD top level

Signed-off-by: Hannah Zhang <hannahz@nvidia.com>
---
 .../dynamocomponentdeployment_types.go        |  5 ----
 .../v1alpha1/dynamographdeployment_types.go   |  5 ++++
 .../dynamographdeployment_controller.go       | 21 ++++----------
 .../controller/dynamomodel_controller.go      | 29 +++++++++++++------
 4 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
index 9d69e11062..b475e90a59 100644
--- a/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamocomponentdeployment_types.go
@@ -91,11 +91,6 @@ type DynamoComponentDeploymentSharedSpec struct {
 	EnvFromSecret *string `json:"envFromSecret,omitempty"`
 	// VolumeMounts references PVCs defined at the top level for volumes to be mounted by the component.
 	VolumeMounts []VolumeMount `json:"volumeMounts,omitempty"`
-	// ModelRef references a DynamoModel resource that provides the model artifact for this component.
-	// When specified, the controller will wait for the model to be ready and automatically mount
-	// the model's PVC to the component.
-	// +kubebuilder:validation:Optional
-	ModelRef string `json:"modelRef,omitempty"`
 
 	// Ingress config to expose the component outside the cluster (or through a service mesh).
 	Ingress *IngressSpec `json:"ingress,omitempty"`
diff --git a/deploy/cloud/operator/api/v1alpha1/dynamographdeployment_types.go b/deploy/cloud/operator/api/v1alpha1/dynamographdeployment_types.go
index 2d1a64a9fb..42f5d7c009 100644
--- a/deploy/cloud/operator/api/v1alpha1/dynamographdeployment_types.go
+++ b/deploy/cloud/operator/api/v1alpha1/dynamographdeployment_types.go
@@ -49,6 +49,11 @@ type DynamoGraphDeploymentSpec struct {
 	// BackendFramework specifies the backend framework (e.g., "sglang", "vllm", "trtllm").
 	// +kubebuilder:validation:Enum=sglang;vllm;trtllm
 	BackendFramework string `json:"backendFramework,omitempty"`
+	// ModelRef references a DynamoModel resource that provides the model artifact for this deployment.
+	// When specified, the controller will wait for the model to be ready and automatically
+	// configure all services with the appropriate model paths and mount the model's PVC.
+	// +kubebuilder:validation:Optional
+	ModelRef string `json:"modelRef,omitempty"`
 }
 
 // DynamoGraphDeploymentStatus defines the observed state of DynamoGraphDeployment.
diff --git a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
index 8fc08061d8..af75d25136 100644
--- a/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamographdeployment_controller.go
@@ -541,22 +541,11 @@ func (r *DynamoGraphDeploymentReconciler) checkModelReferences(ctx context.Conte
 	logger := log.FromContext(ctx)
 	notReadyModels := []string{}
 
-	// Collect all model references from services
-	modelRefs := make(map[string]bool)
-	for serviceName, serviceSpec := range dynamoDeployment.Spec.Services {
-		if serviceSpec != nil && serviceSpec.ModelRef != "" {
-			modelRefs[serviceSpec.ModelRef] = true
-			logger.Info("Found model reference", "service", serviceName, "modelRef", serviceSpec.ModelRef)
-		}
-	}
-
-	// If no model references, return true
-	if len(modelRefs) == 0 {
-		return true, notReadyModels, nil
-	}
+	// Check top-level modelRef
+	if dynamoDeployment.Spec.ModelRef != "" {
+		modelRef := dynamoDeployment.Spec.ModelRef
+		logger.Info("Found top-level model reference", "modelRef", modelRef)
 
-	// Check each referenced model
-	for modelRef := range modelRefs {
 		model := &nvidiacomv1alpha1.DynamoModel{}
 		err := r.Get(ctx, types.NamespacedName{
 			Name:      modelRef,
@@ -566,7 +555,7 @@ func (r *DynamoGraphDeploymentReconciler) checkModelReferences(ctx context.Conte
 		if err != nil {
 			if errors.IsNotFound(err) {
 				logger.Error(err, "Referenced model not found", "modelRef", modelRef)
-				return false, append(notReadyModels, modelRef), fmt.Errorf("model %s not found", modelRef)
+				return false, []string{modelRef}, fmt.Errorf("model %s not found", modelRef)
 			}
 			logger.Error(err, "Failed to get model", "modelRef", modelRef)
 			return false, notReadyModels, err
diff --git a/deploy/cloud/operator/internal/controller/dynamomodel_controller.go b/deploy/cloud/operator/internal/controller/dynamomodel_controller.go
index 4f3558f595..c52726fb61 100644
--- a/deploy/cloud/operator/internal/controller/dynamomodel_controller.go
+++ b/deploy/cloud/operator/internal/controller/dynamomodel_controller.go
@@ -359,14 +359,18 @@ func (r *DynamoModelReconciler) generateDownloadScript(dynamoModel *nvidiacomv1a
 
 	// Determine the download method based on the source URL prefix
 	if strings.HasPrefix(sourceURL, "hf://") {
-		// HuggingFace download
-		modelName := strings.TrimPrefix(sourceURL, "hf://")
-		script := `
+		// HuggingFace download using modern 'hf download' command
+		// Build revision flag if version is specified
+		revisionFlag := ""
+		if dynamoModel.Spec.Version != "" {
+			revisionFlag = fmt.Sprintf("--revision $MODEL_REVISION")
+		}
+
+		script := fmt.Sprintf(`
 set -eux
 pip install --no-cache-dir huggingface_hub hf_transfer
-export HF_HUB_ENABLE_HF_TRANSFER=1
-huggingface-cli download $MODEL_NAME --cache-dir /model-cache
-`
+hf download $MODEL_NAME %s --exclude "original/*" --exclude "metal/*"
+`, revisionFlag)
 		return script
 	} else if strings.HasPrefix(sourceURL, "s3://") {
 		// S3 download
@@ -402,6 +406,14 @@ func (r *DynamoModelReconciler) generateEnvVars(dynamoModel *nvidiacomv1alpha1.D
 			Name:  "SOURCE_URL",
 			Value: dynamoModel.Spec.SourceURL,
 		},
+		{
+			Name:  "HF_HOME",
+			Value: "/model-cache",
+		},
+		{
+			Name:  "HF_HUB_ENABLE_HF_TRANSFER",
+			Value: "1",
+		},
 	}
 
 	// Add model name for HuggingFace downloads
@@ -413,10 +425,10 @@ func (r *DynamoModelReconciler) generateEnvVars(dynamoModel *nvidiacomv1alpha1.D
 		})
 	}
 
-	// Add version if specified
+	// Add version/revision if specified
 	if dynamoModel.Spec.Version != "" {
 		envVars = append(envVars, corev1.EnvVar{
-			Name:  "MODEL_VERSION",
+			Name:  "MODEL_REVISION",
 			Value: dynamoModel.Spec.Version,
 		})
 	}
@@ -450,4 +462,3 @@ func (r *DynamoModelReconciler) Cleanup(ctx context.Context, obj client.Object)
 func (r *DynamoModelReconciler) GetFinalizerName() string {
 	return dynamoModelFinalizerName
 }
-