Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ var (
Name: "pd_decision_total",
Help: metrics.HelpMsgWithStability("Total number of P/D disaggregation decisions made", compbasemetrics.ALPHA),
},
[]string{"decision_type"}, // "decode-only" or "prefill-decode"
[]string{"model_name", "decision_type"}, // "decode-only" or "prefill-decode"
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

)
)

Expand All @@ -36,7 +36,13 @@ func GetCollectors() []prometheus.Collector {
}
}

// RecordPDDecision records the type of P/D disaggregation decision made.
func RecordPDDecision(decisionType string) {
SchedulerPDDecisionCount.WithLabelValues(decisionType).Inc()
// RecordPDDecision increments the counter for a specific P/D routing decision.
// The decisionType must be one of the DecisionType* constants (e.g., DecisionTypeDecodeOnly).
// The model parameter should be the target model name (e.g., from request.TargetModel);
// if empty, the caller should pass a placeholder like "unknown" to avoid empty labels.
func RecordPDDecision(modelName, decisionType string) {
if modelName == "" {
modelName = "unknown"
}
SchedulerPDDecisionCount.WithLabelValues(modelName, decisionType).Inc()
}
19 changes: 12 additions & 7 deletions pkg/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,20 @@ import (
)

func TestSchedulerPDDecisionCount(t *testing.T) {
RecordPDDecision(DecisionTypePrefillDecode)
RecordPDDecision(DecisionTypeDecodeOnly)
RecordPDDecision(DecisionTypePrefillDecode)
if err := testutil.CollectAndCompare(SchedulerPDDecisionCount, strings.NewReader(`
model := "test-model"

RecordPDDecision(model, DecisionTypePrefillDecode)
RecordPDDecision(model, DecisionTypeDecodeOnly)
RecordPDDecision(model, DecisionTypePrefillDecode)
expected := `
# HELP llm_d_inference_scheduler_pd_decision_total [ALPHA] Total number of P/D disaggregation decisions made
# TYPE llm_d_inference_scheduler_pd_decision_total counter
llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only"} 1
llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode"} 2
`), "decision_type"); err != nil {
llm_d_inference_scheduler_pd_decision_total{decision_type="decode-only",model_name="test-model"} 1
llm_d_inference_scheduler_pd_decision_total{decision_type="prefill-decode",model_name="test-model"} 2
`

if err := testutil.CollectAndCompare(SchedulerPDDecisionCount, strings.NewReader(expected),
"llm_d_inference_scheduler_pd_decision_total"); err != nil {
t.Errorf("RecordPDDecision() failed: %v", err)
}
}
4 changes: 2 additions & 2 deletions pkg/plugins/profile/pd_profile_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,12 @@ func (h *PdProfileHandler) Pick(ctx context.Context, cycleState *types.CycleStat

if (1.0-hitPercentagePrefix)*float64(len(userInput)) < float64(h.pdThreshold) {
log.FromContext(ctx).Info("Non-cached suffix is smaller than threshold, using decode profile only", "hitPercentage", hitPercentagePrefix)
metrics.RecordPDDecision(metrics.DecisionTypeDecodeOnly)
metrics.RecordPDDecision(request.TargetModel, metrics.DecisionTypeDecodeOnly)
return map[string]*framework.SchedulerProfile{} // do not run prefill
}
}

metrics.RecordPDDecision(metrics.DecisionTypePrefillDecode)
metrics.RecordPDDecision(request.TargetModel, metrics.DecisionTypePrefillDecode)
// run the prefill profile
return map[string]*framework.SchedulerProfile{
h.prefillProfile: profiles[h.prefillProfile],
Expand Down