Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion charts/llm-d-modelservice/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: "v0.4.3"
version: "v0.4.4"
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
Expand Down
6 changes: 3 additions & 3 deletions examples/output-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: cpu-sim-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: cpu-sim-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -105,7 +105,7 @@ kind: Deployment
metadata:
name: cpu-sim-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down
6 changes: 3 additions & 3 deletions examples/output-dra.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: dra-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: dra-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -115,7 +115,7 @@ kind: ResourceClaimTemplate
metadata:
name: intel-gaudi-claim-template-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
llm-d.ai/role: decode
Expand Down
4 changes: 2 additions & 2 deletions examples/output-gaudi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: gaudi-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: gaudi-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down
16 changes: 8 additions & 8 deletions examples/output-heterogeneous-pd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: heterogeneous-pd-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: heterogeneous-pd-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -93,7 +93,7 @@ spec:
fieldRef:
fieldPath: status.podIP
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_LOGGING_LEVEL
value: DEBUG
- name: DP_SIZE
Expand All @@ -109,7 +109,7 @@ spec:
ports:
- containerPort: 8200
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand All @@ -131,7 +131,7 @@ kind: Deployment
metadata:
name: heterogeneous-pd-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -182,7 +182,7 @@ spec:
- name: UCX_TLS
value: tcp
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_NIXL_SIDE_CHANNEL_HOST
valueFrom:
fieldRef:
Expand All @@ -202,7 +202,7 @@ spec:
ports:
- containerPort: 8000
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand All @@ -224,7 +224,7 @@ kind: ResourceClaimTemplate
metadata:
name: nvidia-claim-template-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
llm-d.ai/role: decode
Expand Down
14 changes: 7 additions & 7 deletions examples/output-pd-mnnvl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: pd-mnnvl-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: pd-mnnvl-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -91,7 +91,7 @@ spec:
fieldRef:
fieldPath: status.podIP
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_LOGGING_LEVEL
value: DEBUG
- name: DP_SIZE
Expand All @@ -107,7 +107,7 @@ spec:
ports:
- containerPort: 8200
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand All @@ -131,7 +131,7 @@ kind: Deployment
metadata:
name: pd-mnnvl-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -184,7 +184,7 @@ spec:
- name: UCX_TLS
value: cuda_ipc,cuda_copy,tcp
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_NIXL_SIDE_CHANNEL_HOST
valueFrom:
fieldRef:
Expand All @@ -204,7 +204,7 @@ spec:
ports:
- containerPort: 8000
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand Down
14 changes: 7 additions & 7 deletions examples/output-pd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: pd-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: pd-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -89,7 +89,7 @@ spec:
fieldRef:
fieldPath: status.podIP
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_LOGGING_LEVEL
value: DEBUG
- name: DP_SIZE
Expand All @@ -105,7 +105,7 @@ spec:
ports:
- containerPort: 8200
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand All @@ -127,7 +127,7 @@ kind: Deployment
metadata:
name: pd-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -178,7 +178,7 @@ spec:
- name: UCX_TLS
value: cuda_ipc,cuda_copy,tcp
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_NIXL_SIDE_CHANNEL_HOST
valueFrom:
fieldRef:
Expand All @@ -198,7 +198,7 @@ spec:
ports:
- containerPort: 8000
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand Down
14 changes: 7 additions & 7 deletions examples/output-pvc-hf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ kind: ServiceAccount
metadata:
name: pvc-hf-llm-d-modelservice
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
---
Expand All @@ -16,7 +16,7 @@ kind: Deployment
metadata:
name: pvc-hf-llm-d-modelservice-decode
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -89,7 +89,7 @@ spec:
fieldRef:
fieldPath: status.podIP
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_LOGGING_LEVEL
value: DEBUG
- name: DP_SIZE
Expand All @@ -105,7 +105,7 @@ spec:
ports:
- containerPort: 8200
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand All @@ -127,7 +127,7 @@ kind: Deployment
metadata:
name: pvc-hf-llm-d-modelservice-prefill
labels:
helm.sh/chart: llm-d-modelservice-v0.4.3
helm.sh/chart: llm-d-modelservice-v0.4.4
app.kubernetes.io/version: "v0.3.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand Down Expand Up @@ -178,7 +178,7 @@ spec:
- name: UCX_TLS
value: cuda_ipc,cuda_copy,tcp
- name: VLLM_NIXL_SIDE_CHANNEL_PORT
value: "5557"
value: "5600"
- name: VLLM_NIXL_SIDE_CHANNEL_HOST
valueFrom:
fieldRef:
Expand All @@ -198,7 +198,7 @@ spec:
ports:
- containerPort: 8000
protocol: TCP
- containerPort: 5557
- containerPort: 5600
protocol: TCP
resources:
limits:
Expand Down
Loading
Loading