Skip to content

Commit

Permalink
[AutoConfig]add benchmark scripts (PaddlePaddle#7897)
Browse files Browse the repository at this point in the history
* add auto_tuner

* fix

* update log_file

* update json

* close eval/predict

* fix run_mode

* update

* fix

* Revert "fix"

This reverts commit e526c86.

* Revert "update"

This reverts commit 9cbd773.

* update prepare

* Revert "Revert "update""

This reverts commit 811b6a4.

* Revert "Revert "fix""

This reverts commit 32cc005.

* update finetune prepare

* update

* add

* update sft/lora steps

* update json

* update

* add benchmark

* update years

* update a100
  • Loading branch information
Liujie0926 authored Jan 26, 2024
1 parent 95c0dd4 commit 879a853
Show file tree
Hide file tree
Showing 14 changed files with 799 additions and 0 deletions.
33 changes: 33 additions & 0 deletions tests/test_tipc/auto_tuner/autoconfig/check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#!/usr/bin/env bash

# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

autoconfig_json_file=$(basename "$1") # autoconfig/llama7b_pretrain.json
model_name=${autoconfig_json_file%.*}
auto_log_file=./autoconfig/${model_name}_auto_tuner.log

if [ -f "$auto_log_file" ] && grep -q "Launch best cfg:" "$auto_log_file"; then
echo "autotuner 已找到最优配置"
if [ -d "./autoconfig/best_cfg" ]; then
echo "autotuner 已执行最优配置"
exit 0
else
echo "autotuner 未执行最优配置"
exit -1
fi
else
echo "autotuner 执行失败,请检查日志文件是否存在或是否包含指定文本!"
exit -1
fi
80 changes: 80 additions & 0 deletions tests/test_tipc/auto_tuner/autoconfig/llama7b_lora.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"dp_degree": "auto",
"invalid_strategy": [
"stage3_mp*"
],
"max_search_time": 900,
"max_time_per_task": 300,
"metric_cfg": {
"OptimizationDirection": "Maximize",
"name": "interval_samples_per_second"
},
"micro_batch_size": "auto",
"mode": "LoRA",
"model_cfg": {
"global_batch_size": 8,
"hidden_size": 4096,
"num_attention_heads": 32,
"num_layers": 28,
"vocab_size": 65024
},
"mp_degree": [
1
],
"need_baseline": true,
"pp_degree": [
1
],
"run_cmd": {
"gradient_accumulation_steps": [
"./autoconfig/llama7b_lora_params.json",
"gradient_accumulation_steps"
],
"micro_batch_size": [
"./autoconfig/llama7b_lora_params.json",
"per_device_train_batch_size"
],
"mp_degree": [
"./autoconfig/llama7b_lora_params.json",
"tensor_parallel_degree"
],
"pp_degree": [
"./autoconfig/llama7b_lora_params.json",
"pipeline_parallel_degree"
],
"run_best_stage": {
"autotuner_benchmark": [
"./autoconfig/llama7b_lora_params.json",
"autotuner_benchmark",
0
]
},
"search_stage": {
"autotuner_benchmark": [
"./autoconfig/llama7b_lora_params.json",
"autotuner_benchmark",
1
]
},
"sharding_degree": [
"./autoconfig/llama7b_lora_params.json",
"sharding_parallel_degree"
],
"sharding_stage": [
"./autoconfig/llama7b_lora_params.json",
"sharding",
"stage"
],
"use_recompute": [
"./autoconfig/llama7b_lora_params.json",
"recompute"
]
},
"schedule_prior": [
"mp4"
],
"sharding_degree": "auto",
"sharding_stage": "auto",
"task_limit": 2000,
"use_recompute": "auto"
}
38 changes: 38 additions & 0 deletions tests/test_tipc/auto_tuner/autoconfig/llama7b_lora_params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"model_name_or_path": "facebook/llama-7b",
"dataset_name_or_path": "./data",
"output_dir": "./checkpoints/llama_lora_ckpts",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 1,
"per_device_eval_batch_size": 8,
"eval_accumulation_steps": 16,
"num_train_epochs": 1,
"max_steps": 100,
"learning_rate": 0.0003,
"warmup_steps": 30,
"logging_steps": 1,
"evaluation_strategy": "no",
"save_strategy": "steps",
"src_length": 1024,
"max_length": 2048,
"bf16": true,
"fp16_opt_level": "O2",
"do_train": true,
"do_eval": false,
"disable_tqdm": true,
"load_best_model_at_end": false,
"eval_with_do_generation": false,
"metric_for_best_model": "accuracy",
"recompute": true,
"save_total_limit": 1,
"tensor_parallel_degree": 1,
"pipeline_parallel_degree": 1,
"lora": true,
"zero_padding": false,
"use_flash_attention": true,
"sharding_parallel_degree": 8,
"sharding": "stage3",
"recompute_granularity": "full_attn",
"autotuner_benchmark": 1,
"benchmark": 1
}
87 changes: 87 additions & 0 deletions tests/test_tipc/auto_tuner/autoconfig/llama7b_pretrain.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
{
"dp_degree": "auto",
"max_search_time": 900,
"max_time_per_task": 400,
"metric_cfg": {
"OptimizationDirection": "Maximize",
"name": "interval_samples_per_second"
},
"micro_batch_size": "auto",
"model_cfg": {
"global_batch_size": 8,
"hidden_size": 5120,
"num_attention_heads": 40,
"num_layers": 40,
"vocab_size": 32000
},
"mp_degree": "auto",
"pp_degree": "auto",
"run_cmd": {
"gradient_accumulation_steps": [
"./autoconfig/llama7b_pretrain_params.json",
"gradient_accumulation_steps"
],
"micro_batch_size": [
"./autoconfig/llama7b_pretrain_params.json",
"per_device_train_batch_size"
],
"mp_degree": [
"./autoconfig/llama7b_pretrain_params.json",
"tensor_parallel_degree"
],
"pp_degree": [
"./autoconfig/llama7b_pretrain_params.json",
"pipeline_parallel_degree"
],
"run_best_stage": {
"continue_training": [
"./autoconfig/llama7b_pretrain_params.json",
"continue_training",
0
],
"autotuner_benchmark": [
"./autoconfig/llama7b_pretrain_params.json",
"autotuner_benchmark",
0
]
},
"search_stage": {
"continue_training": [
"./autoconfig/llama7b_pretrain_params.json",
"continue_training",
0
],
"autotuner_benchmark": [
"./autoconfig/llama7b_pretrain_params.json",
"autotuner_benchmark",
1
]
},
"sharding_degree": [
"./autoconfig/llama7b_pretrain_params.json",
"sharding_parallel_degree"
],
"sharding_stage": [
"./autoconfig/llama7b_pretrain_params.json",
"sharding",
"stage"
],
"use_recompute": [
"./autoconfig/llama7b_pretrain_params.json",
"recompute"
],
"recompute_granularity": [
"./autoconfig/llama7b_pretrain_params.json",
"recompute_granularity"
]
},
"sharding_degree": "auto",
"sharding_stage": "auto",
"task_limit": 2000,
"use_recompute": "auto",
"recompute_granularity": "auto",
"invalid_strategy": ["stage3_mp*"],
"schedule_prior": ["mp4"],
"need_baseline": true,
"mode": "Pretrain"
}
42 changes: 42 additions & 0 deletions tests/test_tipc/auto_tuner/autoconfig/llama7b_pretrain_params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"model_name_or_path": "facebook/llama-7b",
"tokenizer_name_or_path": "facebook/llama-7b",
"input_dir": "./data",
"output_dir": "./checkpoints/llama_pretrain_ckpts",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"per_device_eval_batch_size": 2,
"tensor_parallel_degree": 8,
"pipeline_parallel_degree": 1,
"sharding": "stage3",
"virtual_pp_degree": 1,
"sequence_parallel": 0,
"use_flash_attention": true,
"use_fused_rms_norm": true,
"use_fused_rope": true,
"max_seq_length": 4096,
"learning_rate": 3e-05,
"min_learning_rate": 3e-06,
"warmup_steps": 30,
"logging_steps": 1,
"max_steps": 100,
"save_steps": 5000,
"eval_steps": 1000,
"weight_decay": 0.01,
"bf16": true,
"fp16_opt_level": "O2",
"warmup_ratio": 0.01,
"max_grad_norm": 1.0,
"dataloader_num_workers": 1,
"continue_training": 0,
"do_train": true,
"do_eval": false,
"do_predict": false,
"disable_tqdm": true,
"recompute": true,
"distributed_dataloader": 1,
"recompute_granularity": "full",
"save_total_limit": 2,
"sharding_parallel_degree": 1,
"autotuner_benchmark": 1
}
78 changes: 78 additions & 0 deletions tests/test_tipc/auto_tuner/autoconfig/llama7b_sft.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
{
"dp_degree": "auto",
"invalid_strategy": [
"stage3_mp*"
],
"max_search_time": 900,
"max_time_per_task": 300,
"metric_cfg": {
"OptimizationDirection": "Maximize",
"name": "interval_samples_per_second"
},
"micro_batch_size": "auto",
"mode": "SFT",
"model_cfg": {
"global_batch_size": 8,
"hidden_size": 4096,
"num_attention_heads": 32,
"num_layers": 28,
"vocab_size": 65024
},
"mp_degree": "auto",
"need_baseline": true,
"pp_degree": [
1
],
"run_cmd": {
"gradient_accumulation_steps": [
"./autoconfig/llama7b_sft_params.json",
"gradient_accumulation_steps"
],
"micro_batch_size": [
"./autoconfig/llama7b_sft_params.json",
"per_device_train_batch_size"
],
"mp_degree": [
"./autoconfig/llama7b_sft_params.json",
"tensor_parallel_degree"
],
"pp_degree": [
"./autoconfig/llama7b_sft_params.json",
"pipeline_parallel_degree"
],
"run_best_stage": {
"autotuner_benchmark": [
"./autoconfig/llama7b_sft_params.json",
"autotuner_benchmark",
0
]
},
"search_stage": {
"autotuner_benchmark": [
"./autoconfig/llama7b_sft_params.json",
"autotuner_benchmark",
1
]
},
"sharding_degree": [
"./autoconfig/llama7b_sft_params.json",
"sharding_parallel_degree"
],
"sharding_stage": [
"./autoconfig/llama7b_sft_params.json",
"sharding",
"stage"
],
"use_recompute": [
"./autoconfig/llama7b_sft_params.json",
"recompute"
]
},
"schedule_prior": [
"mp4"
],
"sharding_degree": "auto",
"sharding_stage": "auto",
"task_limit": 2000,
"use_recompute": "auto"
}
Loading

0 comments on commit 879a853

Please sign in to comment.