ai-dynamo · alec-flowers · Oct 9, 2025 · Oct 7, 2025 · Oct 8, 2025 · Oct 8, 2025
@@ -145,11 +145,6 @@ def remove_valued_arguments(args: list[str], key: str) -> list[str]:
     return args
 
 
-def join_arguments(args: list[str]) -> list[str]:
-    # Use shlex.join to properly quote arguments that contain spaces or special characters
-    return [shlex.join(args)]
-
-
 def append_argument(args: list[str], to_append) -> list[str]:
     idx = find_arg_index(args)
     if isinstance(to_append, list):
@@ -469,7 +464,7 @@ def convert_config(
             if "--no-enable-prefix-caching" not in args:
                 args = append_argument(args, "--no-enable-prefix-caching")
 
-            worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+            worker_service.extraPodSpec.mainContainer.args = args
 
         elif target == "decode":
             # Get service names by inferring from subComponentType first
@@ -500,7 +495,7 @@ def convert_config(
             if "--no-enable-prefix-caching" in args:
                 args.remove("--no-enable-prefix-caching")
 
-            worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+            worker_service.extraPodSpec.mainContainer.args = args
 
         # set num workers to 1
         # Use the inferred decode service name
@@ -537,7 +532,7 @@ def set_config_tp_size(
         except ValueError:
             args = append_argument(args, ["--tensor-parallel-size", str(tp_size)])
 
-        worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+        worker_service.extraPodSpec.mainContainer.args = args
 
         return cfg.model_dump()
 
@@ -695,7 +690,7 @@ def convert_config(
             if "--disable-radix-cache" not in args:
                 args = append_argument(args, "--disable-radix-cache")
 
-            worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+            worker_service.extraPodSpec.mainContainer.args = args
 
         elif target == "decode":
             # Get service names by inferring from subComponentType first
@@ -739,7 +734,7 @@ def convert_config(
                         args, ["--load-balance-method", "round_robin"]
                     )
 
-            worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+            worker_service.extraPodSpec.mainContainer.args = args
 
         # set num workers to 1
         # Use the inferred decode service name
@@ -772,7 +767,7 @@ def set_config_tp_size(
         # Set --tp argument
         args = set_argument_value(args, "--tp", str(tp_size))
 
-        worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+        worker_service.extraPodSpec.mainContainer.args = args
         return cfg.model_dump()
 
     @classmethod
@@ -807,7 +802,7 @@ def set_config_tep_size(
         if "--enable-dp-attention" in args:
             args.remove("--enable-dp-attention")
 
-        worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+        worker_service.extraPodSpec.mainContainer.args = args
         return cfg.model_dump()
 
     @classmethod
@@ -842,7 +837,7 @@ def set_config_dep_size(
         # 4. Set --ep-size=dep_size (expert parallelism size)
         args = set_argument_value(args, "--ep-size", str(dep_size))
 
-        worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+        worker_service.extraPodSpec.mainContainer.args = args
         return cfg.model_dump()
 
     @classmethod
@@ -989,7 +984,7 @@ def convert_config(
             override_str = json.dumps(override_dict)
             args = append_argument(args, ["--override-engine-args", override_str])
 
-            worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+            worker_service.extraPodSpec.mainContainer.args = args
 
         elif target == "decode":
             # Get service names by inferring from subComponentType first
@@ -1037,7 +1032,7 @@ def convert_config(
             override_str = json.dumps(override_dict)
             args = append_argument(args, ["--override-engine-args", override_str])
 
-            worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+            worker_service.extraPodSpec.mainContainer.args = args
 
         # Set num workers to 1
         # Use the inferred decode service name
@@ -1082,7 +1077,7 @@ def set_config_tp_size(
         override_str = json.dumps(override_dict)
         args = append_argument(args, ["--override-engine-args", override_str])
 
-        worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
+        worker_service.extraPodSpec.mainContainer.args = args
 
         return cfg.model_dump()
 

@@ -34,6 +34,26 @@ This includes:
 
 After setting up Dynamo Cloud, use this script to prepare your namespace with the additional resources needed for benchmarking and profiling workflows:
 
+The setup script creates a `dynamo-pvc` with `ReadWriteMany` (RWX). If your cluster's default `storageClassName` does not support RWX, set `storageClassName` in `deploy/utils/manifests/pvc.yaml` to an RWX-capable class before running the script.
+
+Example (add under `spec` in `deploy/utils/manifests/pvc.yaml`):
+```yaml
+...
+spec:
+  accessModes:
+  - ReadWriteMany
+  storageClassName: <your-rwx-storageclass>
+...
+```
+
+> [!TIP]
+> **Check your clusters storage classes**
+>
+> - List storage classes and provisioners:
+> ```bash
+> kubectl get sc -o wide
+> ```
+
 ```bash
 export NAMESPACE=your-dynamo-namespace
 export HF_TOKEN=<HF_TOKEN>  # Optional: for HuggingFace model access

diff --git a/docs/benchmarks/pre_deployment_profiling.md b/docs/benchmarks/pre_deployment_profiling.md
@@ -224,6 +224,14 @@ If you see `ErrImagePull` or `ImagePullBackOff` errors with 401 unauthorized mes
 
 3. The service account should show `imagePullSecrets` containing `nvcr-imagepullsecret`.
 
+If it doesn't, create the secret
+
+```bash
+export NGC_API_KEY=<you-ngc-api-key-here>
+kubectl create secret docker-registry nvcr-imagepullsecret --docker-server=nvcr.io --docker-username='$oauthtoken' --docker-password=$NGC_API_KEY
+
+```
+
 
 ## Running the Profiling Script with AI Configurator
 

diff --git a/docs/kubernetes/installation_guide.md b/docs/kubernetes/installation_guide.md
@@ -144,16 +144,24 @@ kubectl create secret docker-registry docker-imagepullsecret \
   --docker-password=${DOCKER_PASSWORD} \
   --namespace=${NAMESPACE}
 
+cd deploy/cloud/helm
+
 # 4. Install CRDs
 helm upgrade --install dynamo-crds ./crds/ --namespace default
 
 # 5. Install Platform
 helm dep build ./platform/
+
+# To install cluster-wide instead, set NS_RESTRICT_FLAGS="" (empty) or omit that line entirely.
+
+NS_RESTRICT_FLAGS="--set dynamo-operator.namespaceRestriction.enabled=true"
 helm install dynamo-platform ./platform/ \
-  --namespace ${NAMESPACE} \
+  --namespace "${NAMESPACE}" \
   --set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \
   --set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \
-  --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret"
+  --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret" \
+  ${NS_RESTRICT_FLAGS}
+
 ```
 
 → [Verify Installation](#verify-installation)
@@ -166,7 +174,7 @@ kubectl get crd | grep dynamo
 
 # Check operator and platform pods
 kubectl get pods -n ${NAMESPACE}
-# Expected: dynamo-operator-* and etcd-* pods Running
+# Expected: dynamo-operator-* and etcd-* and nats-* pods Running
 ```
 
 ## Next Steps

diff --git a/docs/kubernetes/sla_planner_quickstart.md b/docs/kubernetes/sla_planner_quickstart.md
@@ -39,7 +39,8 @@ flowchart TD
 Before deploying the SLA planner, ensure:
 - **Dynamo platform installed** (see [Installation Guide](/docs/kubernetes/installation_guide.md))
 - **[kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.** By default, the prometheus server is not deployed in the `monitoring` namespace. If it is deployed to a different namespace, set `dynamo-operator.dynamo.metrics.prometheusEndpoint="http://prometheus-kube-prometheus-prometheus.<namespace>.svc.cluster.local:9090"`.
-- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `pvc.yaml`.
+- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `deploy/utils/manifests/pvc.yaml` which does support `ReadWriteMany`.
+
 
 ## Pre-Deployment Profiling
 
@@ -260,4 +261,4 @@ This is because the `subComponentType` field has only been added in newer versio
 ---
 
 > [!TIP]
-> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps).
+> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps).