Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into add_master_grad
Browse files Browse the repository at this point in the history
  • Loading branch information
heavyrain-lzy committed Dec 13, 2023
2 parents 4cb6cb4 + a73a7bf commit 877da15
Show file tree
Hide file tree
Showing 33 changed files with 366 additions and 201 deletions.
14 changes: 10 additions & 4 deletions .github/codecov.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
codecov:
notify:
require_ci_to_pass: yes

coverage:
status:
project:
default:
informational: true
default:
target: 75% # overall project Coverage < 75% CI will fail
informational: true
patch:
default:
informational: true
default:
target: 90% # lines adjusted Coverage < 90% CI will fail
informational: true
1 change: 1 addition & 0 deletions llm/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def convert_rounds_example_common(example, tokenizer, data_args, is_test=True, i
input_ids = rounds_inputs.pop("input_ids")
# shift input_ids and labels
input_ids, labels = input_ids[:-1], labels[1:]

seq_length = len(input_ids)
features = {"input_ids": input_ids, "labels": labels}
if intokens:
Expand Down
2 changes: 1 addition & 1 deletion llm/gpt-3/finetune_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def main():
training_args.tgt_length = data_args.tgt_length
paddle.set_device(training_args.device)

set_seed(args=training_args)
set_seed(seed=training_args.seed)

# Log on each process the small summary:
logger.warning(
Expand Down
28 changes: 15 additions & 13 deletions llm/llama/auto_parallel/run_auto.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# just for debug
# just for debug auto_parallel

set -x
unset CUDA_VISIBLE_DEVICES

export FLAGS_call_stack_level=2

task_name="llama_auto_dp2mp2pp2"
rm -rf output/$task_name/
task_name="llama_auto_dp2sharding2mp2pp2_vpp2"
# rm -rf output/$task_name/ # ckpt is saved in 'output/''
rm -rf "output/$task_name""_log"

export SOT_LOG_LEVEL=4
export PARALLEL_CROSS_ENTROPY=true
export FLAGS_call_stack_level=2
export PYTHONPATH=../../../:$PYTHONPATH

python -u -m paddle.distributed.launch \
--gpus "0,1,2,3,4,5,6,7" \
--log_dir "output/$task_name""_log" \
Expand All @@ -38,17 +36,19 @@ python -u -m paddle.distributed.launch \
--split 949,50,1 \
--max_seq_length 2048 \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 4 \
--gradient_accumulation_steps 4 \
--per_device_eval_batch_size 8 \
--gradient_accumulation_steps 8 \
--use_flash_attention 0 \
--use_fused_rms_norm 0 \
--fp16 0 \
--fp16_opt_level "O2" \
--scale_loss 1024 \
--pipeline_parallel_degree 2 \
--tensor_parallel_degree 2 \
--sharding_parallel_degree 1 \
--sharding "stage1" \
--pipeline_parallel_degree 2 \
--virtual_pp_degree 2 \
--pipeline_schedule_mode "VPP" \
--sharding_parallel_degree 2 \
--sharding "stage2" \
--learning_rate 0.0001 \
--min_learning_rate 0.00001 \
--max_steps 10 \
Expand All @@ -58,14 +58,16 @@ python -u -m paddle.distributed.launch \
--max_grad_norm 1.0 \
--logging_steps 1 \
--dataloader_num_workers 1 \
--sharding "" \
--eval_steps 1000 \
--report_to "visualdl" \
--disable_tqdm true \
--continue_training 0 \
--recompute 1 \
--recompute_granularity full \
--do_train \
--do_eval \
--device "gpu" \
--data_impl "mmap" \
--parallel_mode "auto"

# --resume_from_checkpoint "output/llama_auto_serial/checkpoint-2" \
41 changes: 41 additions & 0 deletions llm/llama/auto_parallel/run_pretrain_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@
PdArgumentParser,
Trainer,
TrainingArguments,
get_last_checkpoint,
speed_metrics,
)
from paddlenlp.trainer.trainer_utils import PREFIX_CHECKPOINT_DIR
from paddlenlp.transformers import (
AutoTokenizer,
CosineAnnealingWithWarmupDecay,
Expand Down Expand Up @@ -98,6 +100,9 @@ class PreTrainingArguments(TrainingArguments):
"help": "Enable fused_linear_param_grad pass, which should replace add_n_op with add_op for gradients accumulation."
},
)
pipeline_schedule_mode: str = field(
default="1F1B", metadata={"help": "The pipeline schedule mode, support FThenB, 1F1B, VPP and Eager-1F1B."}
)

def __post_init__(self):
super().__post_init__()
Expand Down Expand Up @@ -441,6 +446,21 @@ def main():
+ f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16 or training_args.bf16}"
)

# Detecting last checkpoint.
last_checkpoint = None
if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
last_checkpoint = get_last_checkpoint(training_args.output_dir)
# if last_checkpoint is None and len(
# os.listdir(training_args.output_dir)) > 1:
# raise ValueError(
# f"Output directory ({training_args.output_dir}) already exists and is not empty. "
# "Use --overwrite_output_dir to overcome.")
if last_checkpoint is not None and training_args.resume_from_checkpoint is None:
logger.info(
f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
"the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
)

config_class, model_class = MODEL_CLASSES[model_args.model_type]

tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name_or_path)
Expand Down Expand Up @@ -553,6 +573,17 @@ def loss_func(loss, outputs):
print_config(training_args)

engine = auto.Engine(model, loss_func, optimizer, strategy=training_args.strategy)

checkpoint = None
if training_args.resume_from_checkpoint is not None:
checkpoint = training_args.resume_from_checkpoint
elif last_checkpoint is not None:
checkpoint = last_checkpoint

if checkpoint:
logger.info(f"Starting training from resume_from_checkpoint : {checkpoint}")
engine.load(os.path.join(checkpoint, "auto"))

engine.prepare(
[
paddle.static.InputSpec(
Expand Down Expand Up @@ -638,6 +669,16 @@ def loss_func(loss, outputs):
start_time_last_logged = time.time()
tr_loss = float(0)

if training_args.save_steps > 0 and global_step % training_args.save_steps == 0:
paddle.device.cuda.synchronize()
checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{global_step}"
run_dir = training_args.output_dir
output_dir = os.path.join(run_dir, checkpoint_folder)
os.makedirs(output_dir, exist_ok=True)
logger.info(f"Saving model checkpoint to {output_dir}")
prefix_path = os.path.join(output_dir, "auto")
engine.save(prefix_path, training=True)

if global_step >= training_args.max_steps:
break

Expand Down
2 changes: 1 addition & 1 deletion llm/llama/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def main():

paddle.set_device(training_args.device)

set_seed(args=training_args)
set_seed(seed=training_args.seed)

# Log on each process the small summary:
logger.warning(
Expand Down
4 changes: 3 additions & 1 deletion llm/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
ChatGLMv2Tokenizer,
LlamaTokenizer,
PretrainedModel,
PretrainedTokenizer,
Expand Down Expand Up @@ -197,7 +198,8 @@ def _preprocess(self, source):
return_tensors=self.return_tensors,
padding=True,
# when use chat_template, it should not add special tokens
add_special_tokens=self.config.chat_template is None,
# chatglm2 prefix-tokens can not be tokenized into ids
add_special_tokens=self.tokenizer.chat_template is None or isinstance(self.tokenizer, ChatGLMv2Tokenizer),
)
return tokenized_source

Expand Down
4 changes: 2 additions & 2 deletions llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from paddlenlp.datasets import InTokensIterableDataset
from paddlenlp.trainer import Trainer, TrainerCallback
from paddlenlp.trainer.trainer_utils import IterableDatasetShard, has_length
from paddlenlp.transformers import LlamaForCausalLMPipe
from paddlenlp.transformers import ChatGLMv2Tokenizer, LlamaForCausalLMPipe
from paddlenlp.transformers.tokenizer_utils import PretrainedTokenizer
from paddlenlp.utils.log import logger

Expand Down Expand Up @@ -409,7 +409,7 @@ def dybatch_preprocess(
padding=True,
max_length=src_length,
# if use chat_template, it will not add special_tokens
add_special_tokens=tokenizer.chat_template is None,
add_special_tokens=tokenizer.chat_template is None or isinstance(tokenizer, ChatGLMv2Tokenizer),
)
input_ids.append(tokens["input_ids"][0])
position_ids.append(tokens["position_ids"][0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_decoder_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_decoding_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_encoder_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_gpt_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_gptj_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_miro_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_opt_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/ops/fast_transformer/src/fusion_t5_decoding_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ limitations under the License. */
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#ifdef PADDLE_ON_INFERENCE
#include "paddle/extension.h"
#include "paddle_inference_api.h"
#include "paddle/phi/api/ext/exception.h"
#include "paddle/common/exception.h"
#else
#include "paddle/extension.h"
#endif
Expand Down
2 changes: 1 addition & 1 deletion paddlenlp/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def __init__(
self._memory_tracker.start()

# Seed must be set before instantiating the model when using model
set_seed(args=self.args)
set_seed(seed=self.args.seed)

if model is None:
raise RuntimeError("`Trainer` requires either a `model` or `model_init` argument")
Expand Down
Loading

0 comments on commit 877da15

Please sign in to comment.