ai-dynamo · arunraman · Sep 15, 2025 · Sep 16, 2025 · Sep 16, 2025 · Sep 16, 2025
@@ -36,6 +36,7 @@ Platform-specific deployment guides for production environments:
 
 - **[Amazon EKS](deployments/EKS/)** - Deploy Dynamo on Amazon Elastic Kubernetes Service
 - **[Azure AKS](deployments/AKS/)** - Deploy Dynamo on Azure Kubernetes Service
+- **[LLM Router](deployments/LLM%20Router/)** - Intelligent LLM request routing with NVIDIA Dynamo integration
 - **[Router Standalone](deployments/router_standalone/)** - Standalone router deployment patterns
 - **Amazon ECS** - _Coming soon_
 - **Google GKE** - _Coming soon_

diff --git a/examples/deployments/LLM Router/README.md b/examples/deployments/LLM Router/README.md
diff --git a/examples/deployments/LLM Router/agg.yaml b/examples/deployments/LLM Router/agg.yaml
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: vllm-agg-${MODEL_SUFFIX}
+spec:
+  services:
+    VllmDecodeWorker:
+      envFromSecret: hf-token-secret
+      dynamoNamespace: vllm-agg
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+      extraPodSpec:
+        mainContainer:
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:${DYNAMO_VERSION}
+          workingDir: /workspace/components/backends/vllm
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - python3 -m dynamo.vllm --model ${MODEL_NAME}
diff --git a/examples/deployments/LLM Router/disagg.yaml b/examples/deployments/LLM Router/disagg.yaml
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: vllm-disagg-${MODEL_SUFFIX}
+spec:
+  services:
+    VllmDecodeWorker:
+      dynamoNamespace: vllm-agg
+      envFromSecret: hf-token-secret
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+      extraPodSpec:
+        mainContainer:
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:${DYNAMO_VERSION}
+          workingDir: /workspace/components/backends/vllm
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - "python3 -m dynamo.vllm --model ${MODEL_NAME}"
+    VllmPrefillWorker:
+      dynamoNamespace: vllm-agg
+      envFromSecret: hf-token-secret
+      componentType: worker
+      replicas: 1
+      resources:
+        limits:
+          gpu: "1"
+      extraPodSpec:
+        mainContainer:
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:${DYNAMO_VERSION}
+          workingDir: /workspace/components/backends/vllm
+          command:
+            - /bin/sh
+            - -c
+          args:
+            - "python3 -m dynamo.vllm --model ${MODEL_NAME} --is-prefill-worker"
diff --git a/examples/deployments/LLM Router/frontend.yaml b/examples/deployments/LLM Router/frontend.yaml
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: nvidia.com/v1alpha1
+kind: DynamoGraphDeployment
+metadata:
+  name: vllm-frontend
+spec:
+  services:
+    Frontend:
+      dynamoNamespace: vllm-agg
+      componentType: frontend
+      replicas: 1
+      extraPodSpec:
+        mainContainer:
+          image: nvcr.io/nvidia/ai-dynamo/vllm-runtime:${DYNAMO_VERSION}
diff --git a/examples/deployments/LLM Router/llm-router-values-override.yaml b/examples/deployments/LLM Router/llm-router-values-override.yaml
@@ -0,0 +1,116 @@
+##
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+##
+
+# LLM Router Helm Values for NVIDIA Dynamo Cloud Platform Integration
+# Based on official sample: https://github.com/NVIDIA-AI-Blueprints/llm-router/blob/main/deploy/helm/llm-router/values.override.yaml.sample
+# Uses official External ConfigMap strategy for custom configuration
+
+# Global configuration (following official sample structure)
+# NOTE: Update imageRegistry and imagePullSecrets before deployment (see README Step 6)
+global:
+  storageClass: "standard"
+  imageRegistry: "YOUR_REGISTRY_HERE/"  # REPLACE with your Docker registry
+  imagePullSecrets:
+    - name: nvcr-secret  # UPDATE to match your registry credentials
+
+# Router Controller Configuration
+routerController:
+  enabled: true
+  replicas: 1
+  image:
+    repository: llm-router-controller  # Will be prefixed with global.imageRegistry
+    tag: latest
+    pullPolicy: IfNotPresent
+
+  service:
+    type: ClusterIP
+    port: 8084
+
+  # Dynamo-specific environment variables
+  env:
+    - name: LOG_LEVEL
+      value: "INFO"
+    - name: ENABLE_METRICS
+      value: "true"
+    - name: DYNAMO_API_BASE
+      value: "http://vllm-frontend-frontend.dynamo-kubernetes.svc.cluster.local:8000"
+    - name: DYNAMO_API_KEY
+      valueFrom:
+        secretKeyRef:
+          name: dynamo-api-secret
+          key: DYNAMO_API_KEY
+
+  # STRATEGY 1: External ConfigMap (Official Support)
+  # Uses the official Helm chart's external ConfigMap feature
+  config:
+    existingConfigMap: "router-config-dynamo"  # Points to our router configuration
+
+# Router Server Configuration
+routerServer:
+  enabled: true
+  replicas: 1  # Single replica for simpler deployment
+  image:
+    repository: llm-router-server
+    tag: latest
+    pullPolicy: IfNotPresent
+  env:
+    - name: HF_HOME
+      value: "/tmp/huggingface"
+    - name: TRANSFORMERS_CACHE
+      value: "/tmp/huggingface/transformers"
+    - name: HF_HUB_CACHE
+      value: "/tmp/huggingface/hub"
+  resources:
+    limits:
+      nvidia.com/gpu: 1
+      memory: "8Gi"
+    requests:
+      nvidia.com/gpu: 1
+      memory: "8Gi"
+  # Model repository configuration
+  modelRepository:
+    path: "/model_repository/routers"
+  volumes:
+    modelRepository:
+      enabled: true
+      mountPath: "/model_repository"
+      storage:
+        persistentVolumeClaim:
+          enabled: true
+          existingClaim: "router-models-pvc"
+  service:
+    type: ClusterIP
+  shm_size: "8G"
+
+# Ingress Configuration (disabled for internal access)
+ingress:
+  enabled: false
+  className: "nginx"
+  annotations:
+    nginx.ingress.kubernetes.io/rewrite-target: /$2
+    nginx.ingress.kubernetes.io/ssl-redirect: "false"
+    nginx.ingress.kubernetes.io/proxy-body-size: "10m"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "300"
+    nginx.ingress.kubernetes.io/proxy-send-timeout: "300"
+  hosts:
+    - host: llm-router.local
+      paths:
+        - path: /app(/|$)(.*)
+          pathType: ImplementationSpecific
+          service: app
+        - path: /router-controller(/|$)(.*)
+          pathType: ImplementationSpecific
+          service: router-controller
+
+# Demo app (disabled)
+app:
+  enabled: true  # Enable for demo web interface
+  replicas: 1  # Single replica for simpler deployment
+  image:
+    repository: llm-router-app
+    tag: latest
+    pullPolicy: IfNotPresent
+  service:
+    type: ClusterIP
diff --git a/examples/deployments/LLM Router/router-config-dynamo.yaml b/examples/deployments/LLM Router/router-config-dynamo.yaml
@@ -0,0 +1,122 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# LLM Router Configuration for NVIDIA Dynamo Integration
+# This configuration routes requests to the official NVIDIA Dynamo Cloud Platform
+# deployment using the proper service endpoints
+#
+# Based on: https://docs.nvidia.com/dynamo/latest/guides/dynamo_deploy/dynamo_cloud.html
+# API Key pattern follows: https://github.com/NVIDIA-AI-Blueprints/llm-router/blob/main/deploy/helm/llm-router/templates/router-controller-configmap.yaml
+#
+# IMPORTANT: This config references the 3 models for intelligent routing:
+# - meta-llama/Llama-3.1-8B-Instruct   (Fast model for simple tasks)
+# - meta-llama/Llama-3.1-70B-Instruct  (Powerful model for complex tasks)
+# - mistralai/Mixtral-8x22B-Instruct-v0.1 (Creative model for conversational tasks)
+#
+# To add more models:
+# 1. Deploy the model using the pattern in Step 2 of README.md
+# 2. Add router entries below following the same format
+#
+# NOTE: Environment variables are resolved at runtime:
+# - ${DYNAMO_API_BASE}: Points to the Dynamo service endpoint
+# - ${DYNAMO_API_KEY}: API key for authenticating with Dynamo services
+#
+# These variables are populated from:
+# - ConfigMap: DYNAMO_API_BASE (defined in llm-router-values-override.yaml)
+# - Secret: DYNAMO_API_KEY (created during deployment setup)
+
+policies:
+  - name: "task_router"
+    url: http://llm-router-router-server.llm-router.svc.cluster.local:8000/v2/models/task_router_ensemble/infer
+    llms:
+      - name: Brainstorming
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Chatbot
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: mistralai/Mixtral-8x22B-Instruct-v0.1
+      - name: Classification
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct
+      - name: Closed QA
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Code Generation
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Extraction
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct
+      - name: Open QA
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Other
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: mistralai/Mixtral-8x22B-Instruct-v0.1
+      - name: Rewrite
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct
+      - name: Summarization
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Text Generation
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: mistralai/Mixtral-8x22B-Instruct-v0.1
+      - name: Unknown
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct
+  - name: "complexity_router"
+    url: http://llm-router-router-server.llm-router.svc.cluster.local:8000/v2/models/complexity_router_ensemble/infer
+    llms:
+      - name: Creativity
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Reasoning
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Contextual-Knowledge
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct
+      - name: Few-Shot
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-70B-Instruct
+      - name: Domain-Knowledge
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: mistralai/Mixtral-8x22B-Instruct-v0.1
+      - name: No-Label-Reason
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct
+      - name: Constraint
+        api_base: ${DYNAMO_API_BASE}
+        api_key: ${DYNAMO_API_KEY}
+        model: meta-llama/Llama-3.1-8B-Instruct