Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.enable_chunked_prefill=False \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
actor_rollout_ref.ref.profiler.enable=True \
actor_rollout_ref.ref.profiler.ranks=$PROFILE_RANKS \
actor_rollout_ref.ref.profiler.all_ranks=$PROFILE_RANKS_ALL \
actor_rollout_ref.ref.profiler.tool_config.npu.discrete=$DISCRETE \
actor_rollout_ref.ref.profiler.tool_config.npu.contents=$CONTENTS \
actor_rollout_ref.ref.profiler.tool_config.npu.level=$LEVEL \
actor_rollout_ref.ref.profiler.tool_config.npu.analysis=$ANALYSIS \
algorithm.use_kl_in_reward=False \
trainer.critic_warmup=0 \
trainer.logger=console \
Expand Down
6 changes: 6 additions & 0 deletions examples/grpo_trainer/run_qwen2_5_7b_grpo_e2e_prof_npu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.enable_chunked_prefill=False \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
actor_rollout_ref.ref.profiler.enable=True \
actor_rollout_ref.ref.profiler.all_ranks=$PROFILE_RANKS_ALL \
actor_rollout_ref.ref.profiler.tool_config.npu.discrete=$DISCRETE \
actor_rollout_ref.ref.profiler.tool_config.npu.contents=$CONTENTS \
actor_rollout_ref.ref.profiler.tool_config.npu.level=$LEVEL \
actor_rollout_ref.ref.profiler.tool_config.npu.analysis=$ANALYSIS \
algorithm.use_kl_in_reward=False \
trainer.critic_warmup=0 \
trainer.logger=console \
Expand Down
3 changes: 2 additions & 1 deletion tests/utils/test_nvtx_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,9 @@ def test_annotate_decorator(self):
mock_self = MagicMock()
mock_self.profiler = self.profiler
mock_self.profiler.this_step = True
decorator = mock_self.profiler.annotate(message="test")

@NsightSystemsProfiler.annotate(message="test")
@decorator
def test_func(self, *args, **kwargs):
return "result"

Expand Down
15 changes: 10 additions & 5 deletions tests/utils/test_special_mstx_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,9 @@ def test_annotate_decorator_applied_correctly(self):
mock_start_patch.return_value = mock_mark_range

with patch("verl.utils.profiler.mstx_profile.get_npu_profiler") as mock_get_profiler:
decorator = mock_worker.profiler.annotate(message="test")

@NPUProfiler.annotate(message="test")
@decorator
def test_func(self, *args, **kwargs):
return "result"

Expand All @@ -171,8 +172,9 @@ def test_annotate_when_profiler_disabled(self):
patch("verl.utils.profiler.mstx_profile.mark_end_range") as mock_end_patch,
patch("verl.utils.profiler.mstx_profile.get_npu_profiler") as mock_get_profiler,
):
decorator = mock_worker.profiler.annotate(message="test")

@NPUProfiler.annotate(message="test")
@decorator
def test_func(self, *args, **kwargs):
return "result"

Expand All @@ -193,8 +195,9 @@ def test_annotate_when_this_step_disabled(self):
patch("verl.utils.profiler.mstx_profile.mark_end_range") as mock_end_patch,
patch("verl.utils.profiler.mstx_profile.get_npu_profiler") as mock_get_profiler,
):
decorator = mock_worker.profiler.annotate(message="test")

@NPUProfiler.annotate(message="test")
@decorator
def test_func(self, *args, **kwargs):
return "result"

Expand All @@ -221,8 +224,9 @@ def test_annotate_discrete_mode_enabled(self):
):
mock_start_patch.return_value = mock_mark_range
mock_get_profiler.return_value = mock_profile_npu
decorator = mock_worker.profiler.annotate(message="test", role="test_role")

@NPUProfiler.annotate(message="test", role="test_role")
@decorator
def test_func(self, *args, **kwargs):
return "result"

Expand Down Expand Up @@ -253,8 +257,9 @@ def test_annotate_with_default_message(self):
patch("verl.utils.profiler.mstx_profile.mark_end_range") as mock_end_patch,
):
mock_start_patch.return_value = mock_mark_range
decorator = mock_worker.profiler.annotate()

@NPUProfiler.annotate()
@decorator
def test_func(self, *args, **kwargs):
return "result"

Expand Down
25 changes: 12 additions & 13 deletions verl/utils/profiler/mstx_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,7 @@ def stop(self):
self.profile_npu.stop()
NPUProfiler._define_count -= 1

@staticmethod
def annotate(message: Optional[str] = None, role: Optional[str] = None, **kwargs) -> Callable:
def annotate(self, message: Optional[str] = None, role: Optional[str] = None, **kwargs_outer) -> Callable:
"""Decorate a Worker member function to profile the current rank in the current training step.

Requires the target function to be a member function of a Worker,
Expand All @@ -230,32 +229,32 @@ def annotate(message: Optional[str] = None, role: Optional[str] = None, **kwargs

def decorator(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
if not self.profiler.enable:
return func(self, *args, **kwargs)
def wrapper(*args, **kwargs_inner):
if not self.enable:
return func(*args, **kwargs_inner)

profile_name = message or func.__name__
discrete_mode = self.profiler.discrete
profile_enable = self.profiler.this_step and self.profiler.enable
discrete_mode = self.discrete
profile_enable = self.this_step and self.enable

if not profile_enable:
return func(self, *args, **kwargs)
return func(*args, **kwargs_inner)

if profile_enable:
if not discrete_mode:
mark_range = mark_start_range(message=profile_name)
else:
profile_npu = get_npu_profiler(
contents=self.profiler.profile_contents,
profile_level=self.profiler.profile_level,
profile_save_path=self.profiler.profile_save_path,
analysis=self.profiler.analysis,
contents=self.profile_contents,
profile_level=self.profile_level,
profile_save_path=self.profile_save_path,
analysis=self.analysis,
role=role,
)
profile_npu.start()
mark_range = mark_start_range(message=profile_name)

result = func(self, *args, **kwargs)
result = func(*args, **kwargs_inner)

if profile_enable:
if not discrete_mode:
Expand Down
20 changes: 10 additions & 10 deletions verl/utils/profiler/nvtx_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,13 @@ def stop(self):
if not self.discrete:
torch.cuda.profiler.stop()

@staticmethod
def annotate(
self,
message: Optional[str] = None,
color: Optional[str] = None,
domain: Optional[str] = None,
category: Optional[str] = None,
**kwargs,
**kwargs_outer,
) -> Callable:
"""Decorate a Worker member function to profile the current rank in the current training step.

Expand All @@ -175,22 +175,22 @@ def annotate(

def decorator(func):
@functools.wraps(func)
def wrapper(self, *args, **kwargs):
if not self.profiler.enable:
return func(self, *args, **kwargs)
def wrapper(*args, **kwargs_inner):
if not self.enable:
return func(*args, **kwargs_inner)

profile_name = message or func.__name__

if self.profiler.this_step:
if self.profiler.discrete:
if self.this_step:
if self.discrete:
torch.cuda.profiler.start()
mark_range = mark_start_range(message=profile_name, color=color, domain=domain, category=category)

result = func(self, *args, **kwargs)
result = func(*args, **kwargs_inner)

if self.profiler.this_step:
if self.this_step:
mark_end_range(mark_range)
if self.profiler.discrete:
if self.discrete:
torch.cuda.profiler.stop()

return result
Expand Down
26 changes: 23 additions & 3 deletions verl/utils/profiler/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import functools
import os
from typing import Callable, Optional

Expand Down Expand Up @@ -226,16 +227,35 @@ def start(self, **kwargs):
def stop(self):
return getattr(self._impl, "stop", lambda: None)()

@staticmethod
@classmethod
def annotate(
cls,
message: Optional[str] = None,
color: Optional[str] = None,
domain: Optional[str] = None,
category: Optional[str] = None,
**kwargs,
**kwargs_outer,
) -> Callable:
def decorator(func):
return func
@functools.wraps(func)
def wrapper(self_instance, *args, **kwargs_inner):
profiler = getattr(self_instance, "profiler", None)
if not profiler:
return func(self_instance, *args, **kwargs_inner)

impl = profiler._impl
if hasattr(impl, "annotate"):
try:
actual_decorator = impl.annotate(
message=message, color=color, domain=domain, category=category, **kwargs_outer
)

return actual_decorator(func)(self_instance, *args, **kwargs_inner)
except Exception:
return func(self_instance, *args, **kwargs_inner)
return func(self_instance, *args, **kwargs_inner)

return wrapper

return decorator

Expand Down