Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 11 additions & 16 deletions benchmarks/profiler/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,6 @@ def remove_valued_arguments(args: list[str], key: str) -> list[str]:
return args


def join_arguments(args: list[str]) -> list[str]:
# Use shlex.join to properly quote arguments that contain spaces or special characters
return [shlex.join(args)]


def append_argument(args: list[str], to_append) -> list[str]:
idx = find_arg_index(args)
if isinstance(to_append, list):
Expand Down Expand Up @@ -469,7 +464,7 @@ def convert_config(
if "--no-enable-prefix-caching" not in args:
args = append_argument(args, "--no-enable-prefix-caching")

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

elif target == "decode":
# Get service names by inferring from subComponentType first
Expand Down Expand Up @@ -500,7 +495,7 @@ def convert_config(
if "--no-enable-prefix-caching" in args:
args.remove("--no-enable-prefix-caching")

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

# set num workers to 1
# Use the inferred decode service name
Expand Down Expand Up @@ -537,7 +532,7 @@ def set_config_tp_size(
except ValueError:
args = append_argument(args, ["--tensor-parallel-size", str(tp_size)])

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

return cfg.model_dump()

Expand Down Expand Up @@ -695,7 +690,7 @@ def convert_config(
if "--disable-radix-cache" not in args:
args = append_argument(args, "--disable-radix-cache")

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

elif target == "decode":
# Get service names by inferring from subComponentType first
Expand Down Expand Up @@ -739,7 +734,7 @@ def convert_config(
args, ["--load-balance-method", "round_robin"]
)

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

# set num workers to 1
# Use the inferred decode service name
Expand Down Expand Up @@ -772,7 +767,7 @@ def set_config_tp_size(
# Set --tp argument
args = set_argument_value(args, "--tp", str(tp_size))

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump()

@classmethod
Expand Down Expand Up @@ -807,7 +802,7 @@ def set_config_tep_size(
if "--enable-dp-attention" in args:
args.remove("--enable-dp-attention")

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump()

@classmethod
Expand Down Expand Up @@ -842,7 +837,7 @@ def set_config_dep_size(
# 4. Set --ep-size=dep_size (expert parallelism size)
args = set_argument_value(args, "--ep-size", str(dep_size))

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args
return cfg.model_dump()

@classmethod
Expand Down Expand Up @@ -989,7 +984,7 @@ def convert_config(
override_str = json.dumps(override_dict)
args = append_argument(args, ["--override-engine-args", override_str])

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

elif target == "decode":
# Get service names by inferring from subComponentType first
Expand Down Expand Up @@ -1037,7 +1032,7 @@ def convert_config(
override_str = json.dumps(override_dict)
args = append_argument(args, ["--override-engine-args", override_str])

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

# Set num workers to 1
# Use the inferred decode service name
Expand Down Expand Up @@ -1082,7 +1077,7 @@ def set_config_tp_size(
override_str = json.dumps(override_dict)
args = append_argument(args, ["--override-engine-args", override_str])

worker_service.extraPodSpec.mainContainer.args = join_arguments(args)
worker_service.extraPodSpec.mainContainer.args = args

return cfg.model_dump()

Expand Down
20 changes: 20 additions & 0 deletions deploy/utils/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,26 @@ This includes:

After setting up Dynamo Cloud, use this script to prepare your namespace with the additional resources needed for benchmarking and profiling workflows:

The setup script creates a `dynamo-pvc` with `ReadWriteMany` (RWX). If your cluster's default `storageClassName` does not support RWX, set `storageClassName` in `deploy/utils/manifests/pvc.yaml` to an RWX-capable class before running the script.

Example (add under `spec` in `deploy/utils/manifests/pvc.yaml`):
```yaml
...
spec:
accessModes:
- ReadWriteMany
storageClassName: <your-rwx-storageclass>
...
```

> [!TIP]
> **Check your clusters storage classes**
>
> - List storage classes and provisioners:
> ```bash
> kubectl get sc -o wide
> ```

```bash
export NAMESPACE=your-dynamo-namespace
export HF_TOKEN=<HF_TOKEN> # Optional: for HuggingFace model access
Expand Down
8 changes: 8 additions & 0 deletions docs/benchmarks/pre_deployment_profiling.md
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,14 @@ If you see `ErrImagePull` or `ImagePullBackOff` errors with 401 unauthorized mes

3. The service account should show `imagePullSecrets` containing `nvcr-imagepullsecret`.

If it doesn't, create the secret

```bash
export NGC_API_KEY=<you-ngc-api-key-here>
kubectl create secret docker-registry nvcr-imagepullsecret --docker-server=nvcr.io --docker-username='$oauthtoken' --docker-password=$NGC_API_KEY

```


## Running the Profiling Script with AI Configurator

Expand Down
14 changes: 11 additions & 3 deletions docs/kubernetes/installation_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,16 +144,24 @@ kubectl create secret docker-registry docker-imagepullsecret \
--docker-password=${DOCKER_PASSWORD} \
--namespace=${NAMESPACE}

cd deploy/cloud/helm

# 4. Install CRDs
helm upgrade --install dynamo-crds ./crds/ --namespace default

# 5. Install Platform
helm dep build ./platform/

# To install cluster-wide instead, set NS_RESTRICT_FLAGS="" (empty) or omit that line entirely.

NS_RESTRICT_FLAGS="--set dynamo-operator.namespaceRestriction.enabled=true"
helm install dynamo-platform ./platform/ \
--namespace ${NAMESPACE} \
--namespace "${NAMESPACE}" \
--set "dynamo-operator.controllerManager.manager.image.repository=${DOCKER_SERVER}/dynamo-operator" \
--set "dynamo-operator.controllerManager.manager.image.tag=${IMAGE_TAG}" \
--set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret"
--set "dynamo-operator.imagePullSecrets[0].name=docker-imagepullsecret" \
${NS_RESTRICT_FLAGS}

```

→ [Verify Installation](#verify-installation)
Expand All @@ -166,7 +174,7 @@ kubectl get crd | grep dynamo

# Check operator and platform pods
kubectl get pods -n ${NAMESPACE}
# Expected: dynamo-operator-* and etcd-* pods Running
# Expected: dynamo-operator-* and etcd-* and nats-* pods Running
```

## Next Steps
Expand Down
5 changes: 3 additions & 2 deletions docs/kubernetes/sla_planner_quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ flowchart TD
Before deploying the SLA planner, ensure:
- **Dynamo platform installed** (see [Installation Guide](/docs/kubernetes/installation_guide.md))
- **[kube-prometheus-stack](/docs/kubernetes/metrics.md) installed and running.** By default, the prometheus server is not deployed in the `monitoring` namespace. If it is deployed to a different namespace, set `dynamo-operator.dynamo.metrics.prometheusEndpoint="http://prometheus-kube-prometheus-prometheus.<namespace>.svc.cluster.local:9090"`.
- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `pvc.yaml`.
- **Benchmarking resources setup** (see [Kubernetes utilities for Dynamo Benchmarking and Profiling](../../deploy/utils/README.md)) The script will create a `dynamo-pvc` with `ReadWriteMany` access, if your cluster's default storageClassName does not allow `ReadWriteMany`, you need to specify a different storageClassName in `deploy/utils/manifests/pvc.yaml` which does support `ReadWriteMany`.


## Pre-Deployment Profiling

Expand Down Expand Up @@ -260,4 +261,4 @@ This is because the `subComponentType` field has only been added in newer versio
---

> [!TIP]
> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps).
> **Need Help?** If you encounter issues, check the [troubleshooting section](#troubleshooting) or refer to the detailed guides linked in [Next Steps](#next-steps).
Loading