diff --git a/nemo_rl/models/generation/vllm/vllm_worker.py b/nemo_rl/models/generation/vllm/vllm_worker.py index 55fc58f774..2237a9efde 100644 --- a/nemo_rl/models/generation/vllm/vllm_worker.py +++ b/nemo_rl/models/generation/vllm/vllm_worker.py @@ -539,7 +539,14 @@ def _get_raw_spec_counters(self) -> dict[str, float | list[float]]: """ metrics: dict[str, float | list[float]] = {} if self.llm is not None: - for metric in self.llm.get_metrics(): + if hasattr(self.llm, "get_metrics"): + vllm_prom_metrics = self.llm.get_metrics() + else: + # The AsyncLLM API does not implement get_metrics so we need to call the prometheus API ourselves + from vllm.v1.metrics.reader import get_metrics_snapshot + + vllm_prom_metrics = get_metrics_snapshot() + for metric in vllm_prom_metrics: if hasattr(metric, "values"): metrics[metric.name] = metric.values elif hasattr(metric, "value"): diff --git a/tests/functional/grpo_non_colocated.sh b/tests/functional/grpo_non_colocated.sh index c5fc5eb5dc..8c65aedda2 100755 --- a/tests/functional/grpo_non_colocated.sh +++ b/tests/functional/grpo_non_colocated.sh @@ -27,6 +27,7 @@ uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJE policy.train_micro_batch_size=1 \ policy.generation.colocated.enabled=false \ policy.generation.colocated.resources.gpus_per_node=1 \ + policy.generation.vllm_cfg.async_engine=true \ cluster.gpus_per_node=2 \ grpo.max_num_steps=2 \ logger.tensorboard_enabled=true \