diff --git a/paddlenlp/trainer/trainer.py b/paddlenlp/trainer/trainer.py index dfc47354c493..b42e596e97e4 100644 --- a/paddlenlp/trainer/trainer.py +++ b/paddlenlp/trainer/trainer.py @@ -1771,16 +1771,8 @@ def _wrap_model(self, model, training=True): in_sep_parallel_mode = self.args.sep_parallel_degree > 1 # Multi-gpu training - if ( - self.args.world_size > 1 - and not self.args.use_hybrid_parallel - or not ( - in_pipeline_parallel_mode - or in_sharding_parallel_mode - or in_tensor_parallel_mode - or in_sep_parallel_mode - ) - ): + if self.args.world_size > 1 and (not self.args.use_hybrid_parallel): + # MOE use DDP to broadcaset parameters. model = paddle.DataParallel(model) # Distributed training (should be after fp16 initialization) diff --git a/paddlenlp/trainer/training_args.py b/paddlenlp/trainer/training_args.py index 423d77d6f510..b31e55d7b4f0 100644 --- a/paddlenlp/trainer/training_args.py +++ b/paddlenlp/trainer/training_args.py @@ -1406,7 +1406,7 @@ def is_segment_parallel_supported(): if world_size > 1: if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(): if self.unified_checkpoint: - self.use_hybrid_parallel = True + # DP use hybrid group strategy = fleet.DistributedStrategy() fleet.init(is_collective=True, strategy=strategy) else: diff --git a/paddlenlp/utils/safetensors.py b/paddlenlp/utils/safetensors.py index 422a7d09961c..c273d0d973c2 100644 --- a/paddlenlp/utils/safetensors.py +++ b/paddlenlp/utils/safetensors.py @@ -157,16 +157,16 @@ def __getitem__(self, index): out_start, out_stop, out_step = copy.deepcopy((self.start, self.stop, self.step)) for i, (start, stop, step, slice_) in enumerate(zip(self.start, self.stop, self.step, index)): - out_start[i] = slice_.start or 0 - out_step[i] = slice_.step or 1 - out_stop[i] = slice_.stop or stop - start + out_start[i] = slice_.start if slice_.start is not None else 0 + out_step[i] = slice_.step if slice_.step is not None else 1 + out_stop[i] = slice_.stop if slice_.stop is not None else stop - start out_stop[i] = min(stop, out_stop[i]) target_shape = [] - for x, y, z in zip(out_start, out_stop, out_step): + for x, y, z, sli in zip(out_start, out_stop, out_step, index): assert z == 1, "only support step = 1" - if y - x > 1: - target_shape.append(int(y - x)) + if y - x > 1 or sli.step is None: + target_shape.append(max(int(y - x), 0)) if len(target_shape) == 0: if self.shape == [1]: diff --git a/pyproject.toml b/pyproject.toml index 715323d09e37..858508037fce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ exclude = ['.flake8'] [tool.pytest.ini_options] minversion = "6.0" -addopts = "-ra -q --ignore model_zoo/gpt-3/" +addopts = "-ra -q --dist loadgroup" pythonpath = ["."] testpaths = [ "tests/data", @@ -28,7 +28,7 @@ testpaths = [ "tests/prompt", # "tests/taskflow", TODO (paddle 2.5.1 breaks this test suite, debug later) "tests/utils", - "model_zoo", + # "model_zoo", ] python_files = [ "test.py", diff --git a/tests/trainer/test_lora_unified_checkpoint.py b/tests/trainer/test_lora_unified_checkpoint.py index 98d5516d2388..0abfc257d4f7 100644 --- a/tests/trainer/test_lora_unified_checkpoint.py +++ b/tests/trainer/test_lora_unified_checkpoint.py @@ -149,7 +149,7 @@ def __test__(cls): def setUp(self): """ - 1. update runfrist and rerun to run defined different config + 1. update runfirst and rerun to run defined different config 2. update need_allclose to True if you want to check the result 3. update rtol to the relative value you want to check """ @@ -169,7 +169,7 @@ def setUp(self): self.run_lora_file = "llm/finetune_generation.py" - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_lora_file, **train_args) def rerun(self, train_args): @@ -181,7 +181,7 @@ def testTP4PP2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["TP4PP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -196,7 +196,7 @@ def testTP2Sharding4(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["TP2Sharding4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -213,7 +213,7 @@ def testTP8(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["TP8"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -227,7 +227,7 @@ def testTP4DP2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["TP4DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -242,7 +242,7 @@ def testTP4Sharding2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["TP4Sharding2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -257,7 +257,7 @@ def testTP2PP4(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["TP2PP4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -272,7 +272,7 @@ def testPP8(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["PP8"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -287,7 +287,7 @@ def testPP4DP2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["PP4DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -302,7 +302,7 @@ def testPP4Sharding2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["PP4Sharding2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -317,7 +317,7 @@ def testSharding8S1(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["Sharding8S1"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -332,7 +332,7 @@ def testSharding8S2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["Sharding8S2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -347,7 +347,7 @@ def testSharding4S1DP2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["Sharding4S1DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -362,7 +362,7 @@ def testSharding4S2DP2(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["Sharding4S2DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -377,7 +377,7 @@ def testSharding2S1DP4(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["Sharding2S1DP4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -392,7 +392,7 @@ def testSharding2S2DP4(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["Sharding2S2DP4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -407,7 +407,7 @@ def testDP8(self): remove_ckpt(lora_arguments["output_dir"]) train_args = self.configs["DP8"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -416,19 +416,21 @@ def testDP8(self): np.testing.assert_allclose(res[0], res[1], self.rtol) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN2C4(TestUnifiedCheckpointBase): def setUp(self): super().setUp() self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_lora_file, **train_args) def rerun(self, train_args): self.run_n2c4(self.run_lora_file, **train_args) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN1C8CheckpointCompatible(TestUnifiedCheckpointBase): def setUp(self): super().setUp() @@ -436,7 +438,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n1c8(self.run_lora_file, **train_args) @@ -445,6 +447,7 @@ def rerun(self, train_args): self.run_n1c8(self.run_lora_file, **train_args) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestPaddleCheckpointOnN1C8Reset(TestUnifiedCheckpointBase): def setUp(self): super().setUp() @@ -452,7 +455,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n1c8(self.run_lora_file, **train_args) @@ -469,7 +472,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n2c4(self.run_lora_file, **train_args) diff --git a/tests/trainer/test_unified_checkpoint.py b/tests/trainer/test_unified_checkpoint.py index f8cc0ed7bfac..5ce99b36ff19 100644 --- a/tests/trainer/test_unified_checkpoint.py +++ b/tests/trainer/test_unified_checkpoint.py @@ -175,7 +175,7 @@ def __test__(cls): def setUp(self): """ - 1. update runfrist and rerun to run defined diffrent config + 1. update runfirst and rerun to run defined diffrent config 2. update need_allclose to True if you want to check the result 3. update rtol to the relative value you want to check """ @@ -194,7 +194,7 @@ def setUp(self): self.run_pretrain_file = "llm/llama/run_pretrain.py" - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -206,7 +206,7 @@ def testTP4PP2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["TP4PP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -221,7 +221,7 @@ def testTP2Sharding4(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["TP2Sharding4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -238,7 +238,7 @@ def testTP8(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["TP8"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -252,7 +252,7 @@ def testTP4DP2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["TP4DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -267,7 +267,7 @@ def testTP4Sharding2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["TP4Sharding2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -282,7 +282,7 @@ def testTP2PP4(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["TP2PP4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -297,7 +297,7 @@ def testPP8(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["PP8"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -312,7 +312,7 @@ def testPP4DP2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["PP4DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -327,7 +327,7 @@ def testPP4Sharding2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["PP4Sharding2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -342,7 +342,7 @@ def testSharding8S1(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["Sharding8S1"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -357,7 +357,7 @@ def testSharding8S2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["Sharding8S2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -372,7 +372,7 @@ def testSharding4S1DP2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["Sharding4S1DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -387,7 +387,7 @@ def testSharding4S2DP2(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["Sharding4S2DP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -402,7 +402,7 @@ def testSharding2S1DP4(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["Sharding2S1DP4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -417,7 +417,7 @@ def testSharding2S2DP4(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["Sharding2S2DP4"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -432,7 +432,7 @@ def testDP8(self): remove_ckpt(pretrain_arguments["output_dir"]) train_args = self.configs["DP8"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -441,13 +441,14 @@ def testDP8(self): np.testing.assert_allclose(res[0], res[1], self.rtol) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN2C4(TestUnifiedCheckpointBase): def setUp(self): super().setUp() self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -463,7 +464,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -485,7 +486,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -507,7 +508,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) move_checkpoint_N1C8_to_N2C4() @@ -529,7 +530,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) move_checkpoint_N2C4_to_N1C8() @@ -557,7 +558,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -576,7 +577,7 @@ def setUp(self): self.need_allclose = False - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O1" self.run_n1c8(self.run_pretrain_file, **train_args) @@ -585,6 +586,7 @@ def rerun(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN1C8MasterWeightCompatibleO2ToO1(TestUnifiedCheckpointBase): def setUp(self): super().setUp() @@ -596,7 +598,7 @@ def setUp(self): self.need_allclose = False - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O2" self.run_n1c8(self.run_pretrain_file, **train_args) @@ -605,6 +607,7 @@ def rerun(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN1C8CheckpointCompatible(TestUnifiedCheckpointBase): def setUp(self): super().setUp() @@ -612,7 +615,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n1c8(self.run_pretrain_file, **train_args) @@ -621,6 +624,7 @@ def rerun(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestPaddleCheckpointOnN1C8Reset(TestUnifiedCheckpointBase): def setUp(self): super().setUp() @@ -628,7 +632,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n1c8(self.run_pretrain_file, **train_args) @@ -637,6 +641,7 @@ def rerun(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestPaddleCheckpointOnN1C2Reset(TestMultipleGpus): def setUp(self): self.configs = get_pretrain_arguments(pretrain_arguments) @@ -653,7 +658,7 @@ def setUp(self): self.run_pretrain_file = "llm/llama/run_pretrain.py" - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n1c2(self.run_pretrain_file, **train_args) @@ -669,7 +674,7 @@ def testTP2(self): train_args = self.configs["TP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -678,6 +683,7 @@ def testTP2(self): np.testing.assert_allclose(res[0], res[1], self.rtol) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN1C2Reset(TestMultipleGpus): def setUp(self): self.configs = get_pretrain_arguments(pretrain_arguments) @@ -714,7 +720,7 @@ def setUp(self): "training_args.bin", ] - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 1 self.run_n1c2(self.run_pretrain_file, **train_args) @@ -730,7 +736,7 @@ def testTP2(self): train_args = self.configs["TP2"] - self.runfrist(train_args) + self.runfirst(train_args) self.rerun(train_args) if self.need_allclose: @@ -748,7 +754,7 @@ def testFileLists(self): base_ckpt_path = os.path.join(pretrain_arguments["output_dir"], "checkpoint-%d" % save_steps) train_args = self.configs["TP2"] - self.runfrist(train_args) + self.runfirst(train_args) assert sorted(self.filelists) == sorted(os.listdir(base_ckpt_path)) self.rerun(train_args) @@ -761,7 +767,7 @@ def testFileLists(self): remove_logs() remove_ckpt(pretrain_arguments["output_dir"]) train_args["unified_checkpoint_config"] = "skip_save_model_weight" - self.runfrist(train_args) + self.runfirst(train_args) unsave_filelists = [ "master_weights-00001-of-00002.safetensors", "master_weights-00002-of-00002.safetensors", @@ -788,7 +794,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -809,7 +815,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -828,7 +834,7 @@ def setUp(self): self.need_allclose = False - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O1" self.run_n2c4(self.run_pretrain_file, **train_args) @@ -849,7 +855,7 @@ def setUp(self): self.need_allclose = False - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O2" self.run_n2c4(self.run_pretrain_file, **train_args) @@ -866,7 +872,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["unified_checkpoint"] = 0 self.run_n2c4(self.run_pretrain_file, **train_args) @@ -886,7 +892,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -909,7 +915,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) move_checkpoint_N1C8_to_N2C4() @@ -937,7 +943,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O1" self.run_n1c8(self.run_pretrain_file, **train_args) move_checkpoint_N1C8_to_N2C4() @@ -967,7 +973,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O2" self.run_n1c8(self.run_pretrain_file, **train_args) move_checkpoint_N1C8_to_N2C4() @@ -995,7 +1001,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) move_checkpoint_N1C8_to_N2C4() @@ -1023,7 +1029,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) move_checkpoint_N2C4_to_N1C8() @@ -1051,7 +1057,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O1" self.run_n2c4(self.run_pretrain_file, **train_args) move_checkpoint_N2C4_to_N1C8() @@ -1081,7 +1087,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): train_args["fp16_opt_level"] = "O2" self.run_n2c4(self.run_pretrain_file, **train_args) move_checkpoint_N2C4_to_N1C8() @@ -1109,7 +1115,7 @@ def setUp(self): self.rtol = 1e-4 self.k = MAX_CONVERT_CONFIGS # max: 16, min: 1 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n2c4(self.run_pretrain_file, **train_args) move_checkpoint_N2C4_to_N1C8() @@ -1123,6 +1129,7 @@ def rerun(self, train_args): np.testing.assert_allclose(res[0], res[-1], rtol=self.rtol) +@pytest.mark.skipif(True, reason="Skip for None CE") class TestUnifiedCheckpointOnN1C8EnableAll(TestUnifiedCheckpointBase): def setUp(self): super().setUp() @@ -1133,7 +1140,7 @@ def setUp(self): self.need_allclose = True self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, **train_args) def rerun(self, train_args): @@ -1153,7 +1160,7 @@ def setUp(self): self.need_allclose = False self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, log_dir="log_uc", **train_args) def rerun(self, train_args): @@ -1172,7 +1179,7 @@ def setUp(self): self.need_allclose = False self.rtol = 1e-7 - def runfrist(self, train_args): + def runfirst(self, train_args): self.run_n1c8(self.run_pretrain_file, log_dir="log_pd", **train_args) def rerun(self, train_args): diff --git a/tests/transformers/test_safetensors.py b/tests/transformers/test_safetensors.py index 3c143e26a0b5..85b291e42349 100644 --- a/tests/transformers/test_safetensors.py +++ b/tests/transformers/test_safetensors.py @@ -28,7 +28,14 @@ class FastSafetensors(unittest.TestCase): def setUp(self): super().setUp() self.weigth_map = {} - tensors = [([10, 10], "float32"), ([8], "float16"), ([5, 5, 5], "int32")] + tensors = [ + ([10, 1, 10], "float32"), + ([1, 1, 10], "float32"), + ([1, 1, 1, 10], "float32"), + ([10, 10], "float32"), + ([8], "float16"), + ([5, 5, 5], "int32"), + ] count = 0 for shape, dtype in tensors: self.weigth_map[f"weight_{count}"] = (np.random.random(shape) * 100).astype(dtype) @@ -53,5 +60,10 @@ def test_safe_open(self): with fast_safe_open(path, framework="np") as f: for key in f.keys(): safe_slice = f.get_slice(key) + # np.testing.assert_equal(self.weigth_map[key][2:1, ...], safe_slice[2:1, ...]) + np.testing.assert_equal(self.weigth_map[key][0, ...], safe_slice[0, ...]) + np.testing.assert_equal(self.weigth_map[key][0:1, ...], safe_slice[0:1, ...]) + np.testing.assert_equal(self.weigth_map[key][..., 2:], safe_slice[..., 2:]) + np.testing.assert_equal(self.weigth_map[key][..., 1], safe_slice[..., 1]) np.testing.assert_equal(self.weigth_map[key][:2, ...], safe_slice[:2, ...]) np.testing.assert_equal(self.weigth_map[key][..., :4], safe_slice[..., :4])