Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion python/sglang/srt/managers/scheduler_metrics_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def log_decode_stats(

# Others
self.calculate_utilization()
self.metrics_collector.log_stats(self.stats)
self.metrics_collector.log_stats(self.stats, is_decode_stats=True)
self._emit_kv_metrics()
self._publish_kv_events()

Expand Down
11 changes: 7 additions & 4 deletions python/sglang/srt/metrics/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def observe_per_stage_req_latency(self, stage: str, latency: float) -> None:
def observe_queue_time(self, latency: float) -> None:
self._log_histogram(self.queue_time, latency)

def log_stats(self, stats: SchedulerStats) -> None:
def log_stats(self, stats: SchedulerStats, is_decode_stats: bool = False) -> None:
self._log_gauge(self.num_running_reqs, stats.num_running_reqs)
self._log_gauge(self.num_used_tokens, stats.num_used_tokens)
self._log_gauge(self.token_usage, stats.token_usage)
Expand All @@ -543,11 +543,14 @@ def log_stats(self, stats: SchedulerStats) -> None:
self._log_gauge(
self.num_running_reqs_offline_batch, stats.num_running_reqs_offline_batch
)
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)

if not is_decode_stats:
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)

# Speculative decoding
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
self._log_gauge(self.spec_accept_rate, stats.spec_accept_rate)
if is_decode_stats:
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
self._log_gauge(self.spec_accept_rate, stats.spec_accept_rate)
Comment on lines +547 to +553
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Since logging cache_hit_rate and speculative decoding metrics are mutually exclusive based on is_decode_stats, you can use a single if/else block. This improves readability and makes the mutually exclusive nature of these logic blocks more explicit.

Suggested change
if not is_decode_stats:
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)
# Speculative decoding
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
self._log_gauge(self.spec_accept_rate, stats.spec_accept_rate)
if is_decode_stats:
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
self._log_gauge(self.spec_accept_rate, stats.spec_accept_rate)
if not is_decode_stats:
self._log_gauge(self.cache_hit_rate, stats.cache_hit_rate)
else:
# Speculative decoding
self._log_gauge(self.spec_accept_length, stats.spec_accept_length)
self._log_gauge(self.spec_accept_rate, stats.spec_accept_rate)


# PD disaggregation
self._log_gauge(
Expand Down