diff --git a/benchmarks/profiler/utils/config.py b/benchmarks/profiler/utils/config.py index a84865b479..087e5c035a 100644 --- a/benchmarks/profiler/utils/config.py +++ b/benchmarks/profiler/utils/config.py @@ -145,11 +145,6 @@ def remove_valued_arguments(args: list[str], key: str) -> list[str]: return args -def join_arguments(args: list[str]) -> list[str]: - # Use shlex.join to properly quote arguments that contain spaces or special characters - return [shlex.join(args)] - - def append_argument(args: list[str], to_append) -> list[str]: idx = find_arg_index(args) if isinstance(to_append, list): @@ -469,7 +464,7 @@ def convert_config( if "--no-enable-prefix-caching" not in args: args = append_argument(args, "--no-enable-prefix-caching") - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args elif target == "decode": # Get service names by inferring from subComponentType first @@ -500,7 +495,7 @@ def convert_config( if "--no-enable-prefix-caching" in args: args.remove("--no-enable-prefix-caching") - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args # set num workers to 1 # Use the inferred decode service name @@ -537,7 +532,7 @@ def set_config_tp_size( except ValueError: args = append_argument(args, ["--tensor-parallel-size", str(tp_size)]) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args return cfg.model_dump() @@ -695,7 +690,7 @@ def convert_config( if "--disable-radix-cache" not in args: args = append_argument(args, "--disable-radix-cache") - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args elif target == "decode": # Get service names by inferring from subComponentType first @@ -739,7 +734,7 @@ def convert_config( args, ["--load-balance-method", "round_robin"] ) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args # set num workers to 1 # Use the inferred decode service name @@ -772,7 +767,7 @@ def set_config_tp_size( # Set --tp argument args = set_argument_value(args, "--tp", str(tp_size)) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args return cfg.model_dump() @classmethod @@ -807,7 +802,7 @@ def set_config_tep_size( if "--enable-dp-attention" in args: args.remove("--enable-dp-attention") - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args return cfg.model_dump() @classmethod @@ -842,7 +837,7 @@ def set_config_dep_size( # 4. Set --ep-size=dep_size (expert parallelism size) args = set_argument_value(args, "--ep-size", str(dep_size)) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args return cfg.model_dump() @classmethod @@ -989,7 +984,7 @@ def convert_config( override_str = json.dumps(override_dict) args = append_argument(args, ["--override-engine-args", override_str]) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args elif target == "decode": # Get service names by inferring from subComponentType first @@ -1037,7 +1032,7 @@ def convert_config( override_str = json.dumps(override_dict) args = append_argument(args, ["--override-engine-args", override_str]) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args # Set num workers to 1 # Use the inferred decode service name @@ -1082,7 +1077,7 @@ def set_config_tp_size( override_str = json.dumps(override_dict) args = append_argument(args, ["--override-engine-args", override_str]) - worker_service.extraPodSpec.mainContainer.args = join_arguments(args) + worker_service.extraPodSpec.mainContainer.args = args return cfg.model_dump() diff --git a/deploy/utils/README.md b/deploy/utils/README.md index 26b832c694..e3e5a09e8e 100644 --- a/deploy/utils/README.md +++ b/deploy/utils/README.md @@ -34,6 +34,26 @@ This includes: After setting up Dynamo Cloud, use this script to prepare your namespace with the additional resources needed for benchmarking and profiling workflows: +The setup script creates a `dynamo-pvc` with `ReadWriteMany` (RWX). If your cluster's default `storageClassName` does not support RWX, set `storageClassName` in `deploy/utils/manifests/pvc.yaml` to an RWX-capable class before running the script. + +Example (add under `spec` in `deploy/utils/manifests/pvc.yaml`): +```yaml +... +spec: + accessModes: + - ReadWriteMany + storageClassName: +... +``` + +> [!TIP] +> **Check your clusters storage classes** +> +> - List storage classes and provisioners: +> ```bash +> kubectl get sc -o wide +> ``` + ```bash export NAMESPACE=your-dynamo-namespace export HF_TOKEN= # Optional: for HuggingFace model access diff --git a/docs/benchmarks/pre_deployment_profiling.md b/docs/benchmarks/pre_deployment_profiling.md index afc099b9d0..6160fbb30d 100644 --- a/docs/benchmarks/pre_deployment_profiling.md +++ b/docs/benchmarks/pre_deployment_profiling.md @@ -224,6 +224,14 @@ If you see `ErrImagePull` or `ImagePullBackOff` errors with 401 unauthorized mes 3. The service account should show `imagePullSecrets` containing `nvcr-imagepullsecret`. +If it doesn't, create the secret + +```bash +export NGC_API_KEY= +kubectl create secret docker-registry nvcr-imagepullsecret --docker-server=nvcr.io --docker-username='$oauthtoken' --docker-password=$NGC_API_KEY + +``` + ## Running the Profiling Script with AI Configurator diff --git a/docs/kubernetes/installation_guide.md b/docs/kubernetes/installation_guide.md index 15521c8948..49158c6fab 100644 --- a/docs/kubernetes/installation_guide.md +++ b/docs/kubernetes/installation_guide.md @@ -144,16 +144,24 @@ kubectl create secret docker-registry docker-imagepullsecret \ --docker-password=${DOCKER_PASSWORD} \ --namespace=${NAMESPACE} +cd deploy/cloud/helm + # 4. Install CRDs helm upgrade --install dynamo-crds ./crds/ --namespace default # 5. Install Platform helm dep build ./platform/ + +# To install cluster-wide instead, set NS_RESTRICT_FLAGS="" (empty) or omit that line entirely. + +NS_RESTRICT_FLAGS="--set dynamo-operator.namespaceRestriction.enabled=true" helm install dynamo-platform ./platform/ \ - --namespace ${NAMESPACE} \ + --namespace "${NAMESPACE}" \ --set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \ --set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \ - --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret" + --set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret" \ + ${NS_RESTRICT_FLAGS} + ``` → [Verify Installation](#verify-installation) @@ -166,7 +174,7 @@ kubectl get crd | grep dynamo # Check operator and platform pods kubectl get pods -n ${NAMESPACE} -# Expected: dynamo-operator-* and etcd-* pods Running +# Expected: dynamo-operator-* and etcd-* and nats-* pods Running ``` ## Next Steps diff --git a/docs/kubernetes/sla_planner_quickstart.md b/docs/kubernetes/sla_planner_quickstart.md index 9e80771f77..6eccd17be5 100644 --- a/docs/kubernetes/sla_planner_quickstart.md +++ b/docs/kubernetes/sla_planner_quickstart.md @@ -39,7 +39,8 @@ flowchart TD Before deploying the SLA planner, ensure: - **Dynamo platform installed** (see [Installation Guide](/docs/kubernetes/installation_guide.md)) - **[kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.** By default, the prometheus server is not deployed in the `monitoring` namespace. If it is deployed to a different namespace, set `dynamo-operator.dynamo.metrics.prometheusEndpoint="http://prometheus-kube-prometheus-prometheus..svc.cluster.local:9090"`. -- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `pvc.yaml`. +- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `deploy/utils/manifests/pvc.yaml` which does support `ReadWriteMany`. + ## Pre-Deployment Profiling @@ -260,4 +261,4 @@ This is because the `subComponentType` field has only been added in newer versio --- > [!TIP] -> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps). \ No newline at end of file +> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps).