diff --git a/run_repro.sh b/run_repro.sh deleted file mode 100755 index 221bcbaa16..0000000000 --- a/run_repro.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -python mason.py \ - --cluster ai2/jupiter-cirrascale-2 --image nathanl/open_instruct_auto \ - --pure_docker_mode \ - --workspace ai2/tulu-thinker \ - --priority high \ - --preemptible \ - --num_nodes 6 \ - --max_retries 0 \ - --env VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ - --budget ai2/oe-adapt \ - --gpus 8 -- source configs/beaker_configs/ray_node_setup.sh \&\& source configs/beaker_configs/code_api_setup.sh \&\& python open_instruct/grpo_fast.py \ - --exp_name repro_1106rl_int_all_qwen_integration_mix_11607 \ - --beta 0.0 \ - --add_bos False \ - --num_samples_per_prompt_rollout 16 \ - --num_unique_prompts_rollout 128 \ - --num_mini_batches 1 \ - --gather_whole_model False \ - --num_epochs 1 \ - --learning_rate 5e-7 \ - --per_device_train_batch_size 1 \ - --output_dir /output \ - --kl_estimator kl3 \ - --dataset_mixer_list "saurabh5/rlvr_acecoder 56878 hamishivi/rlvr_orz_math_57k_collected 56878 hamishivi/tulu_3_rewritten_400k_string_f1_only_v2 56878 allenai/IF_multi_constraints_upto5 56878" \ - --dataset_mixer_list_splits train \ - --dataset_mixer_eval_list \ - hamishivi/tulu_3_rewritten_100k 32 \ - --dataset_mixer_eval_list_splits train \ - --max_token_length 10240 \ - --max_prompt_token_length 2048 \ - --response_length 8192 \ - --pack_length 16384 \ - --model_name_or_path Qwen/Qwen2.5-7B \ - --model_revision main \ - --tokenizer_name_or_path hamishivi/Qwen-2.5-7b-tokenizer \ - --chat_template_name tulu_thinker_r1_style \ - --stop_strings "" \ - --non_stop_penalty False \ - --temperature 1.0 \ - --ground_truths_key ground_truth \ - --sft_messages_key messages \ - --total_episodes 10000000 \ - --deepspeed_stage 2 \ - --num_learners_per_node 8 8 \ - --vllm_num_engines 16 \ - --vllm_tensor_parallel_size 1 \ - --lr_scheduler_type constant \ - --apply_verifiable_reward true \ - --seed 1 \ - --num_evals 5 \ - --save_freq 100 \ - --try_launch_beaker_eval_jobs_on_weka True \ - --gradient_checkpointing \ - --with_tracking \ - --oe_eval_max_length 32768 \ - --oe_eval_tasks minerva_math::hamish_zs_reasoning,bbh:cot::hamish_zs_reasoning,gsm8k::hamish_zs_reasoning,minerva_math_500::hamish_zs_reasoning,zebralogic::hamish_zs_reasoning,aime::hamish_zs_reasoning,agi_eval_english:0shot_cot::hamish_zs_reasoning,gpqa:0shot_cot::hamish_zs_reasoning,codex_humanevalplus:0-shot-chat::tulu-thinker,ifeval::tulu,popqa::tulu,mmlu:mc::tulu,bbh:cot-v1::tulu