diff --git a/charts/llmisvc-resources/templates/config-llm-scheduler.yaml b/charts/llmisvc-resources/templates/config-llm-scheduler.yaml index 0173be08791..c1d930c00ee 100644 --- a/charts/llmisvc-resources/templates/config-llm-scheduler.yaml +++ b/charts/llmisvc-resources/templates/config-llm-scheduler.yaml @@ -64,6 +64,8 @@ spec: - --modelServerMetricsHttpsInsecureSkipVerify - --certPath - "/etc/ssl/certs" + - --kvCacheUsagePercentageMetric + - "vllm:kv_cache_usage_perc" resources: requests: cpu: 256m diff --git a/config/llmisvcconfig/config-llm-scheduler.yaml b/config/llmisvcconfig/config-llm-scheduler.yaml index 1f5965d2e24..2eabb6e8250 100644 --- a/config/llmisvcconfig/config-llm-scheduler.yaml +++ b/config/llmisvcconfig/config-llm-scheduler.yaml @@ -64,6 +64,8 @@ spec: - --modelServerMetricsHttpsInsecureSkipVerify - --certPath - "/etc/ssl/certs" + - --kvCacheUsagePercentageMetric + - "vllm:kv_cache_usage_perc" resources: requests: cpu: 256m