diff --git a/examples/README.md b/examples/README.md index 57816bb1376..a78a68a93b1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -19,12 +19,12 @@ export DOWNLOAD_SOURCE=aistudio ### Paddle 权重使用说明 -使用 **Paddle** 格式权重,需要在配置文件(如 `sft_full.json`、`sft_lora.json`等)中手动添加以下参数,以避免与 **HuggingFace** 格式冲突: +使用 **Paddle** 格式权重,需要在配置文件(如 `sft_full.yaml`、`sft_lora.yaml`等)中手动添加以下参数,以避免与 **HuggingFace** 格式冲突: -```json -"model_name_or_path": "your_model_name", -"convert_from_hf": false, -"save_to_hf": false, +```yaml +model_name_or_path: your_model_name_or_path +convert_from_hf: false +save_to_hf: false ``` @@ -55,19 +55,19 @@ tar -xvf alpaca_demo.gz 单卡 ```bash -python -u run_finetune.py ./config/sft_full.json +python -u run_finetune.py ./config/sft_full.yaml ``` 多卡 ```bash -python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" run_finetune.py ./config/sft_full.json +python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" run_finetune.py ./config/sft_full.yaml ``` ### 1.3 LoRA SFT LoRA SFT 启动命令参考 ```bash -python -u run_finetune.py ./config/sft_lora.json +python -u run_finetune.py ./config/sft_lora.yaml ``` @@ -109,19 +109,19 @@ tar -zxvf ultrafeedback_binarized.tar.gz 单卡 ```bash -python -u ./alignment/dpo/run_dpo.py ./config/dpo_full.json +python -u ./alignment/dpo/run_dpo.py ./config/dpo_full.yaml ``` 多卡 ```bash -python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./alignment/dpo/run_dpo.py ./config/dpo_full.json +python -u -m paddle.distributed.launch --devices "0,1,2,3,4,5,6,7" ./alignment/dpo/run_dpo.py ./config/dpo_full.yaml ``` ### 2.3 LoRA DPO LoRA DPO 启动命令参考 ```bash -python -u ./alignment/dpo/run_dpo.py ./config/dpo_lora.json +python -u ./alignment/dpo/run_dpo.py ./config/dpo_lora.yaml ``` diff --git a/examples/alignment/dpo/run_dpo.py b/examples/alignment/dpo/run_dpo.py index fad15f186db..40e66380f57 100644 --- a/examples/alignment/dpo/run_dpo.py +++ b/examples/alignment/dpo/run_dpo.py @@ -76,6 +76,8 @@ def main(): parser = PdArgumentParser((DPOModelArgument, DPODataArgument, DPOTrainingArguments, DPOConfig)) if len(sys.argv) >= 2 and sys.argv[1].endswith(".json"): model_args, data_args, training_args, dpo_config = parser.parse_json_file_and_cmd_lines() + elif len(sys.argv) >= 2 and sys.argv[1].endswith(".yaml"): + model_args, data_args, training_args, dpo_config = parser.parse_yaml_file_and_cmd_lines() else: model_args, data_args, training_args, dpo_config = parser.parse_args_into_dataclasses() diff --git a/examples/config/dpo_full.json b/examples/config/dpo_full.json deleted file mode 100644 index 90a6c33b19e..00000000000 --- a/examples/config/dpo_full.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "model_name_or_path": "Qwen/Qwen3-0.6B-Base", - "train_dataset_path": "./data/dpo/train.jsonl", - "train_dataset_prob": "1.0", - "train_dataset_type": "erniekit", - "eval_dataset_path": "./data/dpo/dev.jsonl", - "eval_dataset_prob": "1.0", - "eval_dataset_type": "erniekit", - "packing": false, - "mix_strategy": "concat", - "output_dir": "./checkpoints/qwen3_paddle_dpo_ckpts", - "max_seq_len": 8192, - "per_device_train_batch_size": 1, - "gradient_accumulation_steps": 8, - "per_device_eval_batch_size": 1, - "num_train_epochs": 1, - "learning_rate": 1e-06, - "warmup_steps": 10, - "logging_steps": 1, - "max_steps": -1, - "evaluation_strategy": "steps", - "save_strategy": "steps", - "eval_steps": 100, - "save_steps": 100, - "bf16": true, - "fp16_opt_level": "O2", - "do_train": true, - "do_eval": true, - "disable_tqdm": true, - "recompute": true, - "save_total_limit": 1, - "tensor_parallel_degree": 1, - "pipeline_parallel_degree": 1, - "sharding": "stage2", - "unified_checkpoint": true, - "attn_impl": "flashmask" - } \ No newline at end of file diff --git a/examples/config/dpo_full.yaml b/examples/config/dpo_full.yaml new file mode 100644 index 00000000000..3669198a62f --- /dev/null +++ b/examples/config/dpo_full.yaml @@ -0,0 +1,49 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./data/dpo/train.jsonl +train_dataset_prob: "1.0" +eval_dataset_path: ./data/dpo/dev.jsonl +eval_dataset_prob: "1.0" +max_seq_len: 8192 +num_samples_each_epoch: 6000000 +packing: false +mix_strategy: concat + +### model +model_name_or_path: Qwen/Qwen3-0.6B-Base +attn_impl: flashmask + +### finetuning +# base +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_total_limit: 1 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-6 + +# performance +tensor_parallel_degree: 1 +pipeline_parallel_degree: 1 +sharding: stage2 +recompute: true +bf16: true +fp16_opt_level: O2 +unified_checkpoint: true diff --git a/examples/config/dpo_lora.json b/examples/config/dpo_lora.json deleted file mode 100644 index 6fef187054f..00000000000 --- a/examples/config/dpo_lora.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "model_name_or_path": "Qwen/Qwen3-0.6B-Base", - "train_dataset_path": "./data/dpo/train.jsonl", - "train_dataset_prob": "1.0", - "train_dataset_type": "erniekit", - "eval_dataset_path": "./data/dpo/dev.jsonl", - "eval_dataset_prob": "1.0", - "eval_dataset_type": "erniekit", - "packing": false, - "mix_strategy": "concat", - "output_dir": "./checkpoints/qwen3_paddle_dpo_lora_ckpts", - "max_seq_len": 8192, - "per_device_train_batch_size": 1, - "gradient_accumulation_steps": 8, - "per_device_eval_batch_size": 1, - "num_train_epochs": 1, - "learning_rate": 1e-05, - "warmup_steps": 10, - "logging_steps": 1, - "max_steps": -1, - "evaluation_strategy": "steps", - "save_strategy": "steps", - "eval_steps": 100, - "save_steps": 100, - "bf16": true, - "fp16_opt_level": "O2", - "do_train": true, - "do_eval": true, - "disable_tqdm": true, - "recompute": true, - "save_total_limit": 1, - "tensor_parallel_degree": 1, - "pipeline_parallel_degree": 1, - "sharding": "stage2", - "unified_checkpoint": true, - "lora": true, - "lora_rank": 64, - "attn_impl": "flashmask" - } \ No newline at end of file diff --git a/examples/config/dpo_lora.yaml b/examples/config/dpo_lora.yaml new file mode 100644 index 00000000000..127e6af6b33 --- /dev/null +++ b/examples/config/dpo_lora.yaml @@ -0,0 +1,51 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./data/dpo/train.jsonl +train_dataset_prob: "1.0" +eval_dataset_path: ./data/dpo/dev.jsonl +eval_dataset_prob: "1.0" +max_seq_len: 8192 +num_samples_each_epoch: 6000000 +packing: false +mix_strategy: concat + +### model +model_name_or_path: Qwen/Qwen3-0.6B-Base +attn_impl: flashmask +lora: true +lora_rank: 8 + +### finetuning +# base +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_total_limit: 1 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_lora_ckpts +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-5 + +# performance +tensor_parallel_degree: 1 +pipeline_parallel_degree: 1 +sharding: stage2 +recompute: true +bf16: true +fp16_opt_level: O2 +unified_checkpoint: true diff --git a/examples/config/sft_full.json b/examples/config/sft_full.json deleted file mode 100644 index b172f8b6e91..00000000000 --- a/examples/config/sft_full.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "model_name_or_path": "Qwen/Qwen3-0.6B-Base", - "train_dataset_path": "./data/sft/train.json", - "train_dataset_prob": "1.0", - "train_dataset_type": "erniekit", - "eval_dataset_path": "./data/sft/dev.json", - "eval_dataset_prob": "1.0", - "eval_dataset_type": "erniekit", - "packing": false, - "mix_strategy": "concat", - "output_dir": "./checkpoints/qwen3_paddle_sft_ckpts", - "max_seq_len": 8192, - "per_device_train_batch_size": 1, - "gradient_accumulation_steps": 4, - "per_device_eval_batch_size": 1, - "eval_accumulation_steps":16, - "num_train_epochs": 1, - "learning_rate": 3e-05, - "warmup_steps": 10, - "logging_steps": 1, - "max_steps": -1, - "evaluation_strategy": "steps", - "save_strategy": "steps", - "eval_steps": 100, - "save_steps": 100, - "bf16": true, - "fp16_opt_level": "O2", - "do_train": true, - "do_eval": true, - "disable_tqdm": true, - "recompute": true, - "save_total_limit": 1, - "tensor_parallel_degree": 1, - "pipeline_parallel_degree": 1, - "sharding": "stage2", - "unified_checkpoint": true, - "attn_impl": "flashmask" - } \ No newline at end of file diff --git a/examples/config/sft_full.yaml b/examples/config/sft_full.yaml new file mode 100644 index 00000000000..c4ad3965a1e --- /dev/null +++ b/examples/config/sft_full.yaml @@ -0,0 +1,49 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./data/sft/train.json +train_dataset_prob: "1.0" +eval_dataset_path: ./data/sft/dev.json +eval_dataset_prob: "1.0" +max_seq_len: 8192 +num_samples_each_epoch: 6000000 +packing: false +mix_strategy: concat + +### model +model_name_or_path: Qwen/Qwen3-0.6B-Base +attn_impl: flashmask + +### finetuning +# base +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_total_limit: 1 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/qwen3_hf_0p6b_sft_ckpts +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-5 + +# performance +tensor_parallel_degree: 1 +pipeline_parallel_degree: 1 +sharding: stage2 +recompute: true +bf16: true +fp16_opt_level: O2 +unified_checkpoint: true diff --git a/examples/config/sft_lora.json b/examples/config/sft_lora.json deleted file mode 100644 index d97d2d4ea3e..00000000000 --- a/examples/config/sft_lora.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "model_name_or_path": "Qwen/Qwen3-0.6B-Base", - "train_dataset_path": "./data/sft/train.json", - "train_dataset_prob": "1.0", - "train_dataset_type": "erniekit", - "eval_dataset_path": "./data/sft/dev.json", - "eval_dataset_prob": "1.0", - "eval_dataset_type": "erniekit", - "packing": false, - "mix_strategy": "concat", - "output_dir": "./checkpoints/qwen3_paddle_lora_ckpts", - "max_seq_len": 8192, - "per_device_train_batch_size": 1, - "gradient_accumulation_steps": 4, - "per_device_eval_batch_size": 1, - "eval_accumulation_steps":16, - "num_train_epochs": 1, - "learning_rate": 3e-04, - "warmup_steps": 10, - "logging_steps": 1, - "max_steps": -1, - "evaluation_strategy": "steps", - "save_strategy": "steps", - "eval_steps": 100, - "save_steps": 100, - "bf16": true, - "fp16_opt_level": "O2", - "do_train": true, - "do_eval": true, - "disable_tqdm": true, - "recompute": true, - "save_total_limit": 1, - "tensor_parallel_degree": 1, - "pipeline_parallel_degree": 1, - "sharding": "stage2", - "unified_checkpoint": true, - "lora": true, - "attn_impl": "flashmask" - } \ No newline at end of file diff --git a/examples/config/sft_lora.yaml b/examples/config/sft_lora.yaml new file mode 100644 index 00000000000..8c10c6371d8 --- /dev/null +++ b/examples/config/sft_lora.yaml @@ -0,0 +1,51 @@ +### data +train_dataset_type: erniekit +eval_dataset_type: erniekit +train_dataset_path: ./data/sft/train.json +train_dataset_prob: "1.0" +eval_dataset_path: ./data/sft/dev.json +eval_dataset_prob: "1.0" +max_seq_len: 8192 +num_samples_each_epoch: 6000000 +packing: false +mix_strategy: concat + +### model +model_name_or_path: Qwen/Qwen3-0.6B-Base +attn_impl: flashmask +lora: true +lora_rank: 8 + +### finetuning +# base +seed: 23 +do_train: true +do_eval: true +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +num_train_epochs: 1 +max_steps: -1 +eval_steps: 100 +evaluation_strategy: steps +save_steps: 100 +save_total_limit: 1 +save_strategy: steps +logging_steps: 1 +gradient_accumulation_steps: 4 +logging_dir: ./vdl_log +output_dir: ./checkpoints/qwen3_hf_0p6b_lora_ckpts +disable_tqdm: true +eval_accumulation_steps: 16 + +# train +warmup_steps: 20 +learning_rate: 1.0e-4 + +# performance +tensor_parallel_degree: 1 +pipeline_parallel_degree: 1 +sharding: stage2 +recompute: true +bf16: true +fp16_opt_level: O2 +unified_checkpoint: true