Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .github/workflows/e2e_ascend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# - `special_sanity`: a suite of quick sanity tests
# - `special_standalone`: a set of test that are designed to run in dedicated environments

# Accelerators for tests
# Accelerators for tests
# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.

Expand Down Expand Up @@ -67,7 +67,7 @@ jobs:
if: github.repository_owner == 'volcengine'
name: verl Ascend test (self-host)
runs-on: [self-hosted, npu-0]
timeout-minutes: 45 # Increase this timeout value as needed
timeout-minutes: 60 # Increase this timeout value as needed
container:
image: quay.io/ascend/verl:verl-8.3.rc1-910b-ubuntu22.04-py3.11-latest
volumes:
Expand All @@ -88,7 +88,7 @@ jobs:
--network host
--privileged
--shm-size 16g
env:
env:
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
Expand Down Expand Up @@ -140,6 +140,11 @@ jobs:
ray stop --force
bash tests/special_npu/run_qwen2_5_05b_dapo.sh
rm -rf $HOME/ckpts
- name: Running gsm8k e2e qwen3 MoE training tests with DAPO MindSpeed on ASCEND NPU
run: |
ray stop --force
export PYTHONPATH=$PYTHONPATH:/Megatron-LM
USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_dapo_mindspeed bash tests/special_npu/run_qwen3_30b_dapo_mindspeed.sh
- name: Running gsm8k e2e training tests with GRPO MindSpeed on ASCEND NPU
run: |
ray stop --force
Expand Down
3 changes: 3 additions & 0 deletions recipe/dapo/run_dapo_qwen3_moe_30b_megatron_npu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ offload=True

max_num_batched_tokens=$((max_prompt_length + max_response_length))

# vllm
gen_tp=4

# Megatron backen
train_tp=4
train_ep=2
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"num_hidden_layers": 2,
"max_window_layers": 2
}
139 changes: 139 additions & 0 deletions tests/special_npu/run_qwen3_30b_dapo_mindspeed.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/env bash
set -xeuo pipefail

export VLLM_ASCEND_ENABLE_NZ=0

MODEL_ID=${MODEL_ID:-Qwen/Qwen3-30B-A3B-Instruct-2507}
MODEL_PATH=${MODEL_PATH:-${HOME}/models/${MODEL_ID}}
USE_DIST_CKPT=${USE_DIST_CKPT:-False}
DIST_CKPT_PATH=${DIST_CKPT_PATH:-${HOME}/dist_ckpt/qwen3_30b_dapo_mindspeed}

# use dummy model
if [[ "$USE_DUMMY_MODEL" == "True" ]]; then
DUMMY_MODEL_PATH=${DUMMY_MODEL_PATH:-${HOME}/models_dummy/${MODEL_ID}}
if [ -z "${DUMMY_MODEL_CONFIG_PATH}" ]; then
echo "[ERROR] DUMMY_MODEL_CONFIG_PATH not set"
exit 1
fi

# make sure the path is empty
if [[ -d $DUMMY_MODEL_PATH && $DUMMY_MODEL_PATH != "/" ]]; then
rm -rf $DUMMY_MODEL_PATH
fi

# init model
python scripts/init_random_model.py \
--hf_model_path "${MODEL_PATH}" \
--new_config_path "${DUMMY_MODEL_CONFIG_PATH}" \
--output_path "${DUMMY_MODEL_PATH}"

# replace model path
MODEL_PATH=$DUMMY_MODEL_PATH
fi

# convert to megatron
if [[ "$USE_DIST_CKPT" == "True" ]]; then

if [[ "$USE_DUMMY_MODEL" == "True" ]]; then
DIST_CKPT_PATH=${HOME}/dist_ckpt/qwen3_30b_dapo_mindspeed_dummy

if [[ -d $DIST_CKPT_PATH && $DIST_CKPT_PATH != "/" ]];then
rm -rf $DIST_CKPT_PATH
fi
fi

torchrun --nproc_per_node 2 --nnodes 1 scripts/converter_hf_to_mcore.py \
--hf_model_path "${MODEL_PATH}" \
--output_path "${DIST_CKPT_PATH}"
fi

exp_name='Qwen3-30B-A3B-DAPO-MindSpeed'

max_prompt_length=512
max_response_length=1024

train_prompt_bsz=16

actor_ppo_max_token_len=$(((max_prompt_length + max_response_length)))
infer_ppo_max_token_len=$(((max_prompt_length + max_response_length)))

python3 -m recipe.dapo.main_dapo \
--config-name="dapo_megatron_trainer" \
data.train_files=${HOME}/data/gsm8k/train.parquet \
data.val_files=${HOME}/data/gsm8k/test.parquet \
data.train_batch_size=${train_prompt_bsz} \
data.max_prompt_length=${max_prompt_length} \
data.max_response_length=${max_response_length} \
data.filter_overlong_prompts=True \
data.shuffle=False \
data.truncation='left' \
data.gen_batch_size=${train_prompt_bsz} \
algorithm.adv_estimator=grpo \
algorithm.use_kl_in_reward=False \
algorithm.filter_groups.enable=False \
algorithm.filter_groups.max_num_gen_batches=10 \
algorithm.filter_groups.metric=acc \
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=16 \
actor_rollout_ref.rollout.enable_chunked_prefill=False \
actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
actor_rollout_ref.rollout.name=vllm \
actor_rollout_ref.rollout.gpu_memory_utilization=0.6 \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.rollout.temperature=1.0 \
actor_rollout_ref.rollout.top_p=1.0 \
actor_rollout_ref.rollout.top_k=-1 \
actor_rollout_ref.rollout.enforce_eager=True \
actor_rollout_ref.rollout.free_cache_engine=True \
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=${actor_ppo_max_token_len} \
actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
actor_rollout_ref.rollout.log_prob_max_token_len_per_gpu=${infer_ppo_max_token_len} \
actor_rollout_ref.actor.strategy=megatron \
actor_rollout_ref.actor.kl_loss_coef=0.0 \
actor_rollout_ref.actor.clip_ratio_low=0.2 \
actor_rollout_ref.actor.clip_ratio_high=0.28 \
actor_rollout_ref.actor.clip_ratio_c=10.0 \
actor_rollout_ref.actor.ppo_epochs=1 \
actor_rollout_ref.actor.use_dynamic_bsz=True \
actor_rollout_ref.model.path="${MODEL_PATH}" \
actor_rollout_ref.actor.optim.lr=1e-6 \
actor_rollout_ref.actor.ppo_mini_batch_size=16 \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
actor_rollout_ref.rollout.log_prob_use_dynamic_bsz=True \
actor_rollout_ref.ref.log_prob_use_dynamic_bsz=True \
actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=2 \
actor_rollout_ref.actor.megatron.tensor_model_parallel_size=2 \
actor_rollout_ref.actor.megatron.expert_model_parallel_size=2 \
actor_rollout_ref.actor.megatron.use_dist_checkpointing=${USE_DIST_CKPT} \
actor_rollout_ref.actor.megatron.dist_checkpointing_path=${DIST_CKPT_PATH} \
actor_rollout_ref.actor.use_kl_loss=False \
actor_rollout_ref.actor.loss_agg_mode="token-mean" \
actor_rollout_ref.ref.strategy=megatron \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=2 \
actor_rollout_ref.ref.megatron.tensor_model_parallel_size=2 \
actor_rollout_ref.ref.megatron.expert_model_parallel_size=2 \
actor_rollout_ref.ref.megatron.use_dist_checkpointing=${USE_DIST_CKPT} \
actor_rollout_ref.ref.megatron.dist_checkpointing_path=${DIST_CKPT_PATH} \
reward_model.reward_manager=dapo \
algorithm.kl_ctrl.kl_coef=0.0 \
trainer.logger=['console'] \
trainer.project_name='verl_gsm8k_example' \
trainer.experiment_name='qwen3_30b_a3b_cut_gsm8k_mindspeed' \
trainer.n_gpus_per_node=16 \
trainer.nnodes=1 \
trainer.save_freq=-1 \
trainer.test_freq=-1 \
trainer.total_epochs=1 \
trainer.total_training_steps=2 \
trainer.device=npu \
actor_rollout_ref.actor.use_torch_compile=False \
actor_rollout_ref.ref.use_torch_compile=False \
+actor_rollout_ref.actor.megatron.override_transformer_config.use_flash_attn=True $@

# clean up
if [[ "$USE_DUMMY_MODEL" == "True" ]]; then
rm -rf $DUMMY_MODEL_PATH
if [[ "$USE_DIST_CKPT" == "True" ]]; then
rm -rf $DIST_CKPT_PATH
fi
fi