-
Notifications
You must be signed in to change notification settings - Fork 349
Description
torchrun --nproc_per_node="8"
--nnodes="1"
--node_rank="0"
--master_addr="127.0.0.1"
--master_port="12349"
src/open_r1/grpo_jsonl.py
--use_vllm False
--output_dir ${REPO_HOME}/checkpoints/rl/${EXP_NAME}
--resume_from_checkpoint True
--model_name_or_path $model_path
--data_file_paths $data_paths
--image_folders $image_folders
--is_reward_customized_from_vlm_module $is_reward_customized_from_vlm_module
--task_type $TASK_TYPE
--per_device_train_batch_size 1
--gradient_accumulation_steps 2
--gradient_checkpointing true
--logging_steps 2
--num_train_epochs 2
--bf16
--attn_implementation flash_attention_2
--run_name ${EXP_NAME}
--data_seed 42
--save_steps 20
--num_generations 4
--max_completion_length 384
--reward_funcs format_count relative_error squares
--beta 0.04
--report_to wandb
--dataset-name this_is_not_used
--deepspeed ${REPO_HOME}/src/open-r1-multimodal/local_scripts/zero3_offload.json
--learning_rate 1e-5
--use_peft true
--lora_r 64
--lora_alpha 128
--lora_dropout 0.05
--lora_task_type CAUSAL_LM
--freeze_vision_modules true
--lora_resume_checkpoint ${lora_resume_checkpoint}\
echo "Training completed for ${EXP_NAME}"
I want to know why this configuration using 8-card 3090 for Qwen2.5-VL-7B-Instruction's Lora fine-tuning also results in memory overflow. I don't know if anyone is the same, thank you for your help!