-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Description
{'loss': 0.0024, 'grad_norm': 0.15136083960533142, 'learning_rate': 8.939929328621908e-07, 'completion_length': 214.3078125, 'rewards/accuracy_reward': 0.2982421875, 'rewards/format_reward': 0.0, 'reward': 0.2982421875, 'reward_std': 0.1828827694291249, 'kl': 0.0598480224609375, 'epoch': 0.32} {'loss': 0.0025, 'grad_norm': 0.16363804042339325, 'learning_rate': 8.881036513545347e-07, 'completion_length': 211.0828125, 'rewards/accuracy_reward': 0.3224609375, 'rewards/format_reward': 0.0, 'reward': 0.3224609375, 'reward_std': 0.19847961873747408, 'kl': 0.061981201171875, 'epoch': 0.34} {'loss': 0.0025, 'grad_norm': 0.16145798563957214, 'learning_rate': 8.822143698468787e-07, 'completion_length': 210.954296875, 'rewards/accuracy_reward': 0.31396484375, 'rewards/format_reward': 0.0, 'reward': 0.31396484375, 'reward_std': 0.19121394583489745, 'kl': 0.0621246337890625, 'epoch': 0.35} {'loss': 0.0026, 'grad_norm': 0.12994669377803802, 'learning_rate': 8.763250883392225e-07, 'completion_length': 208.32666015625, 'rewards/accuracy_reward': 0.33154296875, 'rewards/format_reward': 0.0, 'reward': 0.33154296875, 'reward_std': 0.20113030483480543, 'kl': 0.0647735595703125, 'epoch': 0.37} {'loss': 0.0028, 'grad_norm': 0.14411582052707672, 'learning_rate': 8.704358068315665e-07, 'completion_length': 208.77900390625, 'rewards/accuracy_reward': 0.3294921875, 'rewards/format_reward': 0.0, 'reward': 0.3294921875, 'reward_std': 0.20098235972691328, 'kl': 0.06962890625, 'epoch': 0.39} 13%|█▎ | 226/1698 [7:25:11<48:30:08, 118.62s/it][rank1]: Traceback (most recent call last): [rank1]: File "/aml/open-r1/src/open_r1/grpo.py", line 141, in [rank1]: main(script_args, training_args, model_args) [rank1]: File "/aml/open-r1/src/open_r1/grpo.py", line 130, in main [rank1]: trainer.train() [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/transformers/trainer.py", line 2184, in train [rank1]: return inner_training_loop( [rank1]: ^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/transformers/trainer.py", line 2544, in _inner_training_loop [rank1]: tr_loss_step = self.training_step(model, inputs, num_items_in_batch) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/transformers/trainer.py", line 3688, in training_step [rank1]: loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/trl/trainer/grpo_trainer.py", line 397, in compute_loss [rank1]: output_reward_func = reward_func(prompts=prompts, completions=completions, **reward_kwargs) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/aml/open-r1/src/open_r1/grpo.py", line 69, in accuracy_reward [rank1]: reward = float(verify(answer_parsed, gold_parsed)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 447, in verify [rank1]: return any(compare_single_extraction_wrapper(g, t) for g, t in product(gold, target)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 447, in [rank1]: return any(compare_single_extraction_wrapper(g, t) for g, t in product(gold, target)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 438, in compare_single_extraction_wrapper [rank1]: return compare_single_extraction(g, t) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/utils.py", line 50, in wrapper [rank1]: return func(args, **kwargs) [rank1]: ^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 420, in compare_single_extraction [rank1]: return sympy_expr_eq(gold, target, precision) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 365, in sympy_expr_eq [rank1]: return sympy_compare_sets(gold, pred, precision) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 316, in sympy_compare_sets [rank1]: if a_set.symmetric_difference(b_set).is_empty: [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 259, in symmetric_difference [rank1]: return SymmetricDifference(self, other) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2183, in new [rank1]: return SymmetricDifference.reduce(a, b) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2189, in reduce [rank1]: result = B._symmetric_difference(A) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 262, in _symmetric_difference [rank1]: return Union(Complement(self, other), Complement(other, self)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 1721, in new [rank1]: return Complement.reduce(a, b) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 1731, in reduce [rank1]: if B == S.UniversalSet or A.is_subset(B): [rank1]: ^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 413, in is_subset [rank1]: ret = self._eval_is_subset(other) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2056, in _eval_is_subset [rank1]: return fuzzy_and(other._contains(e) for e in self.args) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/logic.py", line 142, in fuzzy_and [rank1]: for ai in args: [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2056, in [rank1]: return fuzzy_and(other._contains(e) for e in self.args) [rank1]: ^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2053, in _contains [rank1]: return Or([Eq(e, other, evaluate=True) for e in self.args]) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/operations.py", line 513, in new [rank1]: _args = frozenset(cls._new_args_filter(args)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/logic/boolalg.py", line 741, in _new_args_filter [rank1]: c = x.canonical [rank1]: ^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in canonical [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in canonical [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 335, in canonical [rank1]: r = self.func(*args) [rank1]: ^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 852, in new [rank1]: return cls._eval_relation(lhs, rhs, **options) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 859, in _eval_relation [rank1]: val = cls._eval_fuzzy_relation(lhs, rhs) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 1186, in _eval_fuzzy_relation [rank1]: return is_lt(lhs, rhs) [rank1]: ^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 1265, in is_lt [rank1]: return fuzzy_not(is_ge(lhs, rhs, assumptions)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 1380, in is_ge [rank1]: raise TypeError("Can only compare inequalities with Expr") [rank1]: TypeError: Can only compare inequalities with Expr W0201 10:55:55.568000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 291191 closing signal SIGTERM W0201 10:55:55.570000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 291193 closing signal SIGTERM W0201 10:55:55.570000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 291194 closing signal SIGTERM E0201 10:55:56.239000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: 1) local_rank: 1 (pid: 291192) of binary: /home/root123/miniconda3/envs/openr1/bin/python Traceback (most recent call last): File "/home/root123/miniconda3/envs/openr1/bin/accelerate", line 8, in sys.exit(main()) ^^^^^^ File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/accelerate/commands/accelerate_cli.py", line 48, in main args.func(args) File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/accelerate/commands/launch.py", line 1157, in launch_command deepspeed_launcher(args) File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/accelerate/commands/launch.py", line 845, in deepspeed_launcher distrib_run.run(args) File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/torch/distributed/run.py", line 910, in run elastic_launch( File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 138, in call return launch_agent(self._config, self._entrypoint, list(args)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
src/open_r1/grpo.py FAILED
Failures: <NO_OTHER_FAILURES>
Root Cause (first observed failure): [0]: time : 2025-02-01_10:55:55 host : a101.internal.chinacloudapp.cn rank : 1 (local_rank: 1) exitcode : 1 (pid: 291192) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html