From 7448483a2f77841229332aa8aae9e4dba7043c4f Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Mon, 6 Oct 2025 23:31:13 -0700 Subject: [PATCH] feat: use generic image and use single node --- recipes/gpt-oss-120b/trtllm/agg/deploy.yaml | 10 +++++----- recipes/gpt-oss-120b/trtllm/agg/perf.yaml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml index 6f725af31088..16be6ffae0a5 100644 --- a/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml +++ b/recipes/gpt-oss-120b/trtllm/agg/deploy.yaml @@ -30,12 +30,12 @@ spec: command: - /bin/sh - -c - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3 + image: my-registry/trtllm-runtime:my-tag pvc: create: false mountPoint: /model-store name: model-cache - replicas: 18 + replicas: 1 TrtllmWorker: componentType: main dynamoNamespace: gpt-oss-agg @@ -69,7 +69,7 @@ spec: command: - /bin/sh - -c - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.5.1-rc0.pre3 + image: my-registry/trtllm-runtime:my-tag env: - name: TRTLLM_ENABLE_PDL value: "1" @@ -80,7 +80,7 @@ spec: - name: ENGINE_ARGS value: "/opt/dynamo/configs/config.yaml" - name: MODEL_PATH - value: "/model-store/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a" + value: "/model-store/hub/models--openai--gpt-oss-120b/snapshots/b5c939de8f754692c1647ca79fbf85e8c1e70f8a" volumeMounts: - mountPath: /opt/dynamo/configs name: llm-config @@ -94,7 +94,7 @@ spec: create: false mountPoint: /model-store name: model-cache - replicas: 18 + replicas: 1 resources: limits: gpu: "4" diff --git a/recipes/gpt-oss-120b/trtllm/agg/perf.yaml b/recipes/gpt-oss-120b/trtllm/agg/perf.yaml index 02843db6851f..eed5d69addbf 100644 --- a/recipes/gpt-oss-120b/trtllm/agg/perf.yaml +++ b/recipes/gpt-oss-120b/trtllm/agg/perf.yaml @@ -114,7 +114,7 @@ spec: - name: CONCURRENCY_PER_GPU value: "900" - name: DEPLOYMENT_GPU_COUNT - value: "72" + value: "4" - name: ISL value: "128" - name: OSL