diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 03aadebdbc..bae864abb5 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -25,7 +25,7 @@ var ( Name: "pd_decision_total", Help: metrics.HelpMsgWithStability("Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA), }, - []string{"decision_type"}, // "decode-only" or "prefill-decode" + []string{"model_name", "decision_type"}, // "decode-only" or "prefill-decode" ) ) @@ -36,7 +36,13 @@ func GetCollectors() []prometheus.Collector { } } -// RecordPDDecision records the type of P/D disaggregation decision made. -func RecordPDDecision(decisionType string) { - SchedulerPDDecisionCount.WithLabelValues(decisionType).Inc() +// RecordPDDecision increments the counter for a specific P/D routing decision. +// The decisionType must be one of the DecisionType* constants (e.g., DecisionTypeDecodeOnly). +// The model parameter should be the target model name (e.g., from request.TargetModel); +// if empty, the caller should pass a placeholder like "unknown" to avoid empty labels. +func RecordPDDecision(modelName, decisionType string) { + if modelName == "" { + modelName = "unknown" + } + SchedulerPDDecisionCount.WithLabelValues(modelName, decisionType).Inc() } diff --git a/pkg/metrics/metrics_test.go b/pkg/metrics/metrics_test.go index 98b3bc3b12..66208efd6a 100644 --- a/pkg/metrics/metrics_test.go +++ b/pkg/metrics/metrics_test.go @@ -8,15 +8,20 @@ import ( ) func TestSchedulerPDDecisionCount(t *testing.T) { - RecordPDDecision(DecisionTypePrefillDecode) - RecordPDDecision(DecisionTypeDecodeOnly) - RecordPDDecision(DecisionTypePrefillDecode) - if err := testutil.CollectAndCompare(SchedulerPDDecisionCount, strings.NewReader(` + model := "test-model" + + RecordPDDecision(model, DecisionTypePrefillDecode) + RecordPDDecision(model, DecisionTypeDecodeOnly) + RecordPDDecision(model, DecisionTypePrefillDecode) + expected := ` # HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] Total number of P/D disaggregation decisions made # TYPE llm_d_inference_scheduler_pd_decision_total counter - llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only"} 1 - llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode"} 2 - `), "decision_type"); err != nil { + llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only",model_name="test-model"} 1 + llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode",model_name="test-model"} 2 + ` + + if err := testutil.CollectAndCompare(SchedulerPDDecisionCount, strings.NewReader(expected), + "llm_d_inference_scheduler_pd_decision_total"); err != nil { t.Errorf("RecordPDDecision() failed: %v", err) } } diff --git a/pkg/plugins/profile/pd_profile_handler.go b/pkg/plugins/profile/pd_profile_handler.go index d470acb5e0..8dff33e437 100644 --- a/pkg/plugins/profile/pd_profile_handler.go +++ b/pkg/plugins/profile/pd_profile_handler.go @@ -163,12 +163,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat if (1.0-hitPercentagePrefix)*float64(len(userInput)) < float64(h.pdThreshold) { log.FromContext(ctx).Info("Non-cached suffix is smaller than threshold, using decode profile only", "hitPercentage", hitPercentagePrefix) - metrics.RecordPDDecision(metrics.DecisionTypeDecodeOnly) + metrics.RecordPDDecision(request.TargetModel, metrics.DecisionTypeDecodeOnly) return map[string]*framework.SchedulerProfile{} // do not run prefill } } - metrics.RecordPDDecision(metrics.DecisionTypePrefillDecode) + metrics.RecordPDDecision(request.TargetModel, metrics.DecisionTypePrefillDecode) // run the prefill profile return map[string]*framework.SchedulerProfile{ h.prefillProfile: profiles[h.prefillProfile],