[deepspeed] fix load_best_model test #17550

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

stas00 merged 1 commit into main from load_best_model-2

Jun 3, 2022

tests/deepspeed/test_deepspeed.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -752,60 +752,60 @@ def test_load_best_model(self, stage, dtype): @@
             # must use this setting to get the reload path exercised
             ds_config_dict["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] = True
-            tokenizer = T5Tokenizer.from_pretrained(T5_TINY)
-            model = T5ForConditionalGeneration.from_pretrained(T5_TINY)
-            def _add_eos_to_examples(example):
-                example["input_text"] = f"question: {example['question']}  context: {example['context']}"
-                example["target_text"] = example["answers"]["text"][0] if len(example["answers"]["text"]) > 0 else ""
-                return example
-            def _convert_to_features(example_batch):
-                input_encodings = tokenizer.batch_encode_plus(
-                    example_batch["input_text"], pad_to_max_length=True, max_length=512, truncation=True
-                )
-                target_encodings = tokenizer.batch_encode_plus(
-                    example_batch["target_text"], pad_to_max_length=True, max_length=16, truncation=True
-                )
+            with mockenv_context(**self.dist_env_1_gpu):
-                encodings = {
-                    "input_ids": input_encodings["input_ids"],
-                    "attention_mask": input_encodings["attention_mask"],
-                    "labels": target_encodings["input_ids"],
+                tokenizer = T5Tokenizer.from_pretrained(T5_TINY)
+                model = T5ForConditionalGeneration.from_pretrained(T5_TINY)
+                def _add_eos_to_examples(example):
+                    example["input_text"] = f"question: {example['question']}  context: {example['context']}"
+                    example["target_text"] = example["answers"]["text"][0] if len(example["answers"]["text"]) > 0 else ""
+                    return example
+                def _convert_to_features(example_batch):
+                    input_encodings = tokenizer.batch_encode_plus(
+                        example_batch["input_text"], pad_to_max_length=True, max_length=512, truncation=True
+                    )
+                    target_encodings = tokenizer.batch_encode_plus(
+                        example_batch["target_text"], pad_to_max_length=True, max_length=16, truncation=True
+                    )
+                    encodings = {
+                        "input_ids": input_encodings["input_ids"],
+                        "attention_mask": input_encodings["attention_mask"],
+                        "labels": target_encodings["input_ids"],
+                    }
+                    return encodings
+                def get_dataset():
+                    data_file = str(self.tests_dir / "fixtures/tests_samples/SQUAD/sample.json")
+                    data_files = dict(train=data_file, validation=data_file)
+                    raw_datasets = datasets.load_dataset("json", data_files=data_files, field="data")
+                    train_dataset = raw_datasets["train"].map(_add_eos_to_examples).map(_convert_to_features, batched=True)
+                    valid_dataset = deepcopy(train_dataset)
+                    return train_dataset, valid_dataset
+                train_dataset, eval_dataset = get_dataset()
+                args_dict = {
+                    "per_gpu_train_batch_size": 1,
+                    "per_gpu_eval_batch_size": 1,
+                    "gradient_accumulation_steps": 1,
+                    "learning_rate": 1e-4,
+                    "num_train_epochs": 1,
+                    "do_train": True,
+                    "do_eval": True,
+                    "optim": "adafactor",
+                    "evaluation_strategy": "steps",
+                    "eval_steps": 1,
+                    "save_strategy": "steps",
+                    "save_steps": 1,
+                    "load_best_model_at_end": True,
+                    "max_steps": 1,
+                    "deepspeed": ds_config_dict,
                 }
-                return encodings
-            def get_dataset():
-                data_file = str(self.tests_dir / "fixtures/tests_samples/SQUAD/sample.json")
-                data_files = dict(train=data_file, validation=data_file)
-                raw_datasets = datasets.load_dataset("json", data_files=data_files, field="data")
-                train_dataset = raw_datasets["train"].map(_add_eos_to_examples).map(_convert_to_features, batched=True)
-                valid_dataset = deepcopy(train_dataset)
-                return train_dataset, valid_dataset
-            train_dataset, eval_dataset = get_dataset()
-            args_dict = {
-                "per_gpu_train_batch_size": 1,
-                "per_gpu_eval_batch_size": 1,
-                "gradient_accumulation_steps": 1,
-                "learning_rate": 1e-4,
-                "num_train_epochs": 1,
-                "do_train": True,
-                "do_eval": True,
-                "optim": "adafactor",
-                "evaluation_strategy": "steps",
-                "eval_steps": 1,
-                "save_strategy": "steps",
-                "save_steps": 1,
-                "load_best_model_at_end": True,
-                "max_steps": 1,
-                "deepspeed": ds_config_dict,
-            }
-            with mockenv_context(**self.dist_env_1_gpu):
                 training_args = TrainingArguments(output_dir, **args_dict)
                 trainer = Trainer(
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[deepspeed] fix load_best_model test #17550

Uh oh!

Diff view

Diff view

There are no files selected for viewing