diff --git a/charts/llm-d-modelservice/Chart.yaml b/charts/llm-d-modelservice/Chart.yaml index a31e0463..be1a0ee1 100644 --- a/charts/llm-d-modelservice/Chart.yaml +++ b/charts/llm-d-modelservice/Chart.yaml @@ -13,7 +13,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: "v0.4.3" +version: "v0.4.4" # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. diff --git a/examples/output-cpu.yaml b/examples/output-cpu.yaml index 7e75444e..6f20b079 100644 --- a/examples/output-cpu.yaml +++ b/examples/output-cpu.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: cpu-sim-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: cpu-sim-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -105,7 +105,7 @@ kind: Deployment metadata: name: cpu-sim-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: diff --git a/examples/output-dra.yaml b/examples/output-dra.yaml index 4f929319..eaa7c932 100644 --- a/examples/output-dra.yaml +++ b/examples/output-dra.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: dra-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: dra-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -115,7 +115,7 @@ kind: ResourceClaimTemplate metadata: name: intel-gaudi-claim-template-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm llm-d.ai/role: decode diff --git a/examples/output-gaudi.yaml b/examples/output-gaudi.yaml index 928327e4..4581a8c0 100644 --- a/examples/output-gaudi.yaml +++ b/examples/output-gaudi.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: gaudi-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: gaudi-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: diff --git a/examples/output-heterogeneous-pd.yaml b/examples/output-heterogeneous-pd.yaml index c1dbf8b8..e6bab5e7 100644 --- a/examples/output-heterogeneous-pd.yaml +++ b/examples/output-heterogeneous-pd.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: heterogeneous-pd-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: heterogeneous-pd-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -93,7 +93,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: DP_SIZE @@ -109,7 +109,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -131,7 +131,7 @@ kind: Deployment metadata: name: heterogeneous-pd-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -182,7 +182,7 @@ spec: - name: UCX_TLS value: tcp - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -202,7 +202,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -224,7 +224,7 @@ kind: ResourceClaimTemplate metadata: name: nvidia-claim-template-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm llm-d.ai/role: decode diff --git a/examples/output-pd-mnnvl.yaml b/examples/output-pd-mnnvl.yaml index fdb3c0d4..3b40f3db 100644 --- a/examples/output-pd-mnnvl.yaml +++ b/examples/output-pd-mnnvl.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pd-mnnvl-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: pd-mnnvl-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -91,7 +91,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: DP_SIZE @@ -107,7 +107,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -131,7 +131,7 @@ kind: Deployment metadata: name: pd-mnnvl-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -184,7 +184,7 @@ spec: - name: UCX_TLS value: cuda_ipc,cuda_copy,tcp - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -204,7 +204,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: diff --git a/examples/output-pd.yaml b/examples/output-pd.yaml index d9127aa8..fc206c98 100644 --- a/examples/output-pd.yaml +++ b/examples/output-pd.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pd-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: pd-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -89,7 +89,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: DP_SIZE @@ -105,7 +105,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -127,7 +127,7 @@ kind: Deployment metadata: name: pd-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -178,7 +178,7 @@ spec: - name: UCX_TLS value: cuda_ipc,cuda_copy,tcp - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -198,7 +198,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: diff --git a/examples/output-pvc-hf.yaml b/examples/output-pvc-hf.yaml index f340b528..fefcb0a6 100644 --- a/examples/output-pvc-hf.yaml +++ b/examples/output-pvc-hf.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pvc-hf-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: pvc-hf-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -89,7 +89,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: DP_SIZE @@ -105,7 +105,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -127,7 +127,7 @@ kind: Deployment metadata: name: pvc-hf-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -178,7 +178,7 @@ spec: - name: UCX_TLS value: cuda_ipc,cuda_copy,tcp - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -198,7 +198,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: diff --git a/examples/output-pvc.yaml b/examples/output-pvc.yaml index 49e65dd5..3192feab 100644 --- a/examples/output-pvc.yaml +++ b/examples/output-pvc.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: pvc-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: pvc-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -89,7 +89,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: DP_SIZE @@ -103,7 +103,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -126,7 +126,7 @@ kind: Deployment metadata: name: pvc-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -177,7 +177,7 @@ spec: - name: UCX_TLS value: cuda_ipc,cuda_copy,tcp - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -195,7 +195,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: diff --git a/examples/output-requester.yaml b/examples/output-requester.yaml index c7bf1387..0ef5ef8f 100644 --- a/examples/output-requester.yaml +++ b/examples/output-requester.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: requester-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -76,7 +76,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: DP_SIZE @@ -92,7 +92,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -142,7 +142,7 @@ kind: Deployment metadata: name: requester-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -193,7 +193,7 @@ spec: - name: UCX_TLS value: cuda_ipc,cuda_copy,tcp - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -213,7 +213,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: diff --git a/examples/output-xpu-pd.yaml b/examples/output-xpu-pd.yaml index b8e05f89..3735241a 100644 --- a/examples/output-xpu-pd.yaml +++ b/examples/output-xpu-pd.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: xpu-pd-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: xpu-pd-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -103,7 +103,7 @@ spec: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG - name: TORCH_LLM_ALLREDUCE @@ -136,7 +136,7 @@ spec: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: @@ -158,7 +158,7 @@ kind: Deployment metadata: name: xpu-pd-llm-d-modelservice-prefill labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: @@ -222,7 +222,7 @@ spec: - name: ZE_ENABLE_PCI_ID_DEVICE_ORDER value: "1" - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -259,7 +259,7 @@ spec: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: diff --git a/examples/output-xpu.yaml b/examples/output-xpu.yaml index a1a9c899..5df3542b 100644 --- a/examples/output-xpu.yaml +++ b/examples/output-xpu.yaml @@ -6,7 +6,7 @@ kind: ServiceAccount metadata: name: xpu-llm-d-modelservice labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm --- @@ -16,7 +16,7 @@ kind: Deployment metadata: name: xpu-llm-d-modelservice-decode labels: - helm.sh/chart: llm-d-modelservice-v0.4.3 + helm.sh/chart: llm-d-modelservice-v0.4.4 app.kubernetes.io/version: "v0.3.0" app.kubernetes.io/managed-by: Helm spec: diff --git a/examples/values-heterogeneous-pd.yaml b/examples/values-heterogeneous-pd.yaml index 71c5f291..54a975d9 100644 --- a/examples/values-heterogeneous-pd.yaml +++ b/examples/values-heterogeneous-pd.yaml @@ -63,13 +63,13 @@ decode: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG ports: - containerPort: 8200 # from routing.proxy.targetPort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: @@ -112,7 +112,7 @@ prefill: - name: UCX_TLS value: "tcp" - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -122,7 +122,7 @@ prefill: ports: - containerPort: 8000 # from routing.servicePort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: diff --git a/examples/values-pd-mnnvl.yaml b/examples/values-pd-mnnvl.yaml index 6b2efcd1..6c58c74a 100644 --- a/examples/values-pd-mnnvl.yaml +++ b/examples/values-pd-mnnvl.yaml @@ -45,13 +45,13 @@ decode: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG ports: - containerPort: 8200 # from routing.proxy.targetPort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: @@ -83,7 +83,7 @@ prefill: - name: UCX_TLS value: "cuda_ipc,cuda_copy,tcp" - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -93,7 +93,7 @@ prefill: ports: - containerPort: 8000 # from routing.servicePort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: diff --git a/examples/values-pd.yaml b/examples/values-pd.yaml index addf0cf6..5ef2780b 100644 --- a/examples/values-pd.yaml +++ b/examples/values-pd.yaml @@ -45,13 +45,13 @@ decode: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG ports: - containerPort: 8200 # from routing.proxy.targetPort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: @@ -80,7 +80,7 @@ prefill: - name: UCX_TLS value: "cuda_ipc,cuda_copy,tcp" - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -90,7 +90,7 @@ prefill: ports: - containerPort: 8000 # from routing.servicePort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: diff --git a/examples/values-requester.yaml b/examples/values-requester.yaml index 6db7a5d7..ffe331b7 100644 --- a/examples/values-requester.yaml +++ b/examples/values-requester.yaml @@ -64,13 +64,13 @@ decode: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG ports: - containerPort: 8200 # from routing.proxy.targetPort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: @@ -101,7 +101,7 @@ prefill: - name: UCX_TLS value: "cuda_ipc,cuda_copy,tcp" - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -111,7 +111,7 @@ prefill: ports: - containerPort: 8000 # from routing.servicePort protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: diff --git a/examples/values-xpu-pd.yaml b/examples/values-xpu-pd.yaml index 2d6ef9a0..e1ee42bf 100644 --- a/examples/values-xpu-pd.yaml +++ b/examples/values-xpu-pd.yaml @@ -56,7 +56,7 @@ decode: fieldRef: fieldPath: status.podIP - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_LOGGING_LEVEL value: DEBUG # Intel XPU specific environment variables @@ -73,7 +73,7 @@ decode: ports: - containerPort: 8200 protocol: TCP - - containerPort: 5557 # NIXL side channel + - containerPort: 5600 # NIXL side channel protocol: TCP resources: limits: @@ -118,7 +118,7 @@ prefill: - name: ZE_ENABLE_PCI_ID_DEVICE_ORDER value: "1" - name: VLLM_NIXL_SIDE_CHANNEL_PORT - value: "5557" + value: "5600" - name: VLLM_NIXL_SIDE_CHANNEL_HOST valueFrom: fieldRef: @@ -139,7 +139,7 @@ prefill: ports: - containerPort: 8000 protocol: TCP - - containerPort: 5557 + - containerPort: 5600 protocol: TCP resources: limits: