diff --git a/README.md b/README.md index ad96afee8c..1dd050ee9c 100644 --- a/README.md +++ b/README.md @@ -31,38 +31,45 @@ Check out [this blog post about BERT pre-training](https://huggingface.co/blog/p If you are not familiar with HPUs and would like to know more about them, we recommend you take a look at [our conceptual guide](https://huggingface.co/docs/optimum/habana/concept_guides/hpu). -## Install -To install the latest stable release of this package: +## Install the library and get example scripts -```bash -pip install --upgrade-strategy eager optimum[habana] -``` +### Option 1: Use the latest stable release -The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is upgraded to the latest stable release. - -> To use DeepSpeed on HPUs, you also need to run the following command: +To install the latest stable release of this package >```bash ->pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 +>pip install --upgrade-strategy eager optimum[habana] >``` -Optimum Habana is a fast-moving project, and you may want to install it from source: +The `--upgrade-strategy eager` option is needed to ensure `optimum-habana` is upgraded to the latest stable release. + +To use the example associated with the latest stable release, run: +> ``` +> git clone https://github.com/huggingface/optimum-habana +> cd optimum-habana && git checkout v1.10.2 +> ``` +> with `v1.10.2` the version number of this release. + +### Option 2: Use the latest main branch under development + +Optimum Habana is a fast-moving project, and you may want to install it from source and get the latest scripts : ```bash pip install git+https://github.com/huggingface/optimum-habana.git +git clone https://github.com/huggingface/optimum-habana ``` -Last but not least, don't forget to install the requirements for every example: +## Install dependencies -```bash -cd -pip install -r requirements.txt -``` +To use DeepSpeed on HPUs, you also need to run the following command: +>```bash +>pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 +>``` -> To use the example associated with the latest stable release, run: -> ``` -> git checkout v1.10.1 -> ``` -> with `v1.10.1` the version number of this release. +To install the requirements for every example: +>```bash +>cd +>pip install -r requirements.txt +>``` ## How to use it? diff --git a/examples/language-modeling/README.md b/examples/language-modeling/README.md index 1d885313e1..bd5b9a24b8 100644 --- a/examples/language-modeling/README.md +++ b/examples/language-modeling/README.md @@ -417,7 +417,7 @@ LOWER_LIST=ops_bf16.txt python3 run_lora_clm.py \ --low_cpu_mem_usage True \ --adam_epsilon 1e-08 \ --do_eval \ - --validation_split_percentage 10 + --validation_split_percentage 5 ``` - Multi-card finetuning of Llama1-7B: @@ -518,7 +518,7 @@ LOWER_LIST=ops_bf16.txt python3 ../gaudi_spawn.py \ --adam_epsilon 1e-08 \ --do_eval \ --low_cpu_mem_usage True \ - --validation_split_percentage 10 + --validation_split_percentage 6 ``` - Multi-card finetuning of Llama2-70B with DeepSpeed ZeRO-3 optimization and LoRA: @@ -593,6 +593,7 @@ DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 LOWER_LIST=ops_bf16.txt python3 .. --max_seq_length 256 \ --adam_epsilon 1e-08 \ --do_eval \ + --validation_split_percentage 5 \ --deepspeed ds_falcon_180b_z3.json ``` ## Streaming diff --git a/examples/summarization/run_summarization.py b/examples/summarization/run_summarization.py index e36b5c2d18..9040a4b1f0 100644 --- a/examples/summarization/run_summarization.py +++ b/examples/summarization/run_summarization.py @@ -399,11 +399,11 @@ def main(): logger.info(f"Training/evaluation parameters {training_args}") if data_args.source_prefix is None and model_args.model_name_or_path in [ - "t5-small", - "t5-base", - "t5-large", - "t5-3b", - "t5-11b", + "google-t5/t5-small", + "google-t5/t5-base", + "google-t5/t5-large", + "google-t5/t5-3b", + "google-t5/t5-11b", ]: logger.warning( "You're running a t5 model but didn't provide a source prefix, which is the expected, e.g. with " diff --git a/examples/translation/run_translation.py b/examples/translation/run_translation.py index 459969e388..4c0ec070b9 100644 --- a/examples/translation/run_translation.py +++ b/examples/translation/run_translation.py @@ -351,11 +351,11 @@ def main(): logger.info(f"Training/evaluation parameters {training_args}") if data_args.source_prefix is None and model_args.model_name_or_path in [ - "t5-small", - "t5-base", - "t5-large", - "t5-3b", - "t5-11b", + "google-t5/t5-small", + "google-t5/t5-base", + "google-t5/t5-large", + "google-t5/t5-3b", + "google-t5/t5-11b", ]: logger.warning( "You're running a t5 model but didn't provide a source prefix, which is expected, e.g. with " diff --git a/tests/example_diff/run_clm.txt b/tests/example_diff/run_clm.txt index 2c4a933adf..47ab917083 100644 --- a/tests/example_diff/run_clm.txt +++ b/tests/example_diff/run_clm.txt @@ -67,11 +67,15 @@ --- > > streaming: bool = field(default=False, metadata={"help": "Enable streaming mode."}) -250c267 +228a246,248 +> save_last_ckpt: bool = field( +> default=True, metadata={"help": "Whether to save checkpoint at the end of the training."} +> ) +250c270 < parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) --- > parser = HfArgumentParser((ModelArguments, DataTrainingArguments, GaudiTrainingArguments)) -288a306,312 +288a309,315 > gaudi_config = GaudiConfig.from_pretrained( > training_args.gaudi_config_name, > cache_dir=model_args.cache_dir, @@ -79,26 +83,26 @@ > use_auth_token=True if model_args.use_auth_token else None, > ) > -289a314 +289a317 > mixed_precision = training_args.bf16 or gaudi_config.use_torch_autocast -291,292c316,318 +291,292c319,321 < f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}, " < + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}" --- > f"Process rank: {training_args.local_rank}, device: {training_args.device}, " > + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, " > + f"mixed-precision training: {mixed_precision}" -403a430 +403a433 > "use_cache": False if training_args.gradient_checkpointing else model_args.use_cache, -499a527 +499a530 > -597c625 +597c628 < trainer = Trainer( --- > trainer = GaudiTrainer( -598a627 +598a630 > gaudi_config=gaudi_config, -605,608c634,635 +605,608c637,638 < compute_metrics=compute_metrics if training_args.do_eval and not is_torch_tpu_available() else None, < preprocess_logits_for_metrics=preprocess_logits_for_metrics < if training_args.do_eval and not is_torch_tpu_available() @@ -106,7 +110,12 @@ --- > compute_metrics=compute_metrics if training_args.do_eval else None, > preprocess_logits_for_metrics=preprocess_logits_for_metrics if training_args.do_eval else None, -623,626c650,656 +619c649,650 +< trainer.save_model() # Saves the tokenizer too for easy upload +--- +> if data_args.save_last_ckpt: +> trainer.save_model() # Saves the tokenizer too for easy upload +623,626c654,660 < max_train_samples = ( < data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) < ) @@ -119,9 +128,9 @@ > data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset) > ) > metrics["train_samples"] = min(max_train_samples, len(train_dataset)) -635d664 +635d668 < -638,639c667,672 +638,639c671,676 < max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset) < metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) --- @@ -131,7 +140,7 @@ > ) > metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset)) > -662,666d694 +662,666d698 < < < def _mp_fn(index):