Skip to content

GRPO issue #152

@hellangleZ

Description

@hellangleZ

{'loss': 0.0024, 'grad_norm': 0.15136083960533142, 'learning_rate': 8.939929328621908e-07, 'completion_length': 214.3078125, 'rewards/accuracy_reward': 0.2982421875, 'rewards/format_reward': 0.0, 'reward': 0.2982421875, 'reward_std': 0.1828827694291249, 'kl': 0.0598480224609375, 'epoch': 0.32} {'loss': 0.0025, 'grad_norm': 0.16363804042339325, 'learning_rate': 8.881036513545347e-07, 'completion_length': 211.0828125, 'rewards/accuracy_reward': 0.3224609375, 'rewards/format_reward': 0.0, 'reward': 0.3224609375, 'reward_std': 0.19847961873747408, 'kl': 0.061981201171875, 'epoch': 0.34} {'loss': 0.0025, 'grad_norm': 0.16145798563957214, 'learning_rate': 8.822143698468787e-07, 'completion_length': 210.954296875, 'rewards/accuracy_reward': 0.31396484375, 'rewards/format_reward': 0.0, 'reward': 0.31396484375, 'reward_std': 0.19121394583489745, 'kl': 0.0621246337890625, 'epoch': 0.35} {'loss': 0.0026, 'grad_norm': 0.12994669377803802, 'learning_rate': 8.763250883392225e-07, 'completion_length': 208.32666015625, 'rewards/accuracy_reward': 0.33154296875, 'rewards/format_reward': 0.0, 'reward': 0.33154296875, 'reward_std': 0.20113030483480543, 'kl': 0.0647735595703125, 'epoch': 0.37} {'loss': 0.0028, 'grad_norm': 0.14411582052707672, 'learning_rate': 8.704358068315665e-07, 'completion_length': 208.77900390625, 'rewards/accuracy_reward': 0.3294921875, 'rewards/format_reward': 0.0, 'reward': 0.3294921875, 'reward_std': 0.20098235972691328, 'kl': 0.06962890625, 'epoch': 0.39} 13%|█▎ | 226/1698 [7:25:11<48:30:08, 118.62s/it][rank1]: Traceback (most recent call last): [rank1]: File "/aml/open-r1/src/open_r1/grpo.py", line 141, in [rank1]: main(script_args, training_args, model_args) [rank1]: File "/aml/open-r1/src/open_r1/grpo.py", line 130, in main [rank1]: trainer.train() [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/transformers/trainer.py", line 2184, in train [rank1]: return inner_training_loop( [rank1]: ^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/transformers/trainer.py", line 2544, in _inner_training_loop [rank1]: tr_loss_step = self.training_step(model, inputs, num_items_in_batch) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/transformers/trainer.py", line 3688, in training_step [rank1]: loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/trl/trainer/grpo_trainer.py", line 397, in compute_loss [rank1]: output_reward_func = reward_func(prompts=prompts, completions=completions, **reward_kwargs) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/aml/open-r1/src/open_r1/grpo.py", line 69, in accuracy_reward [rank1]: reward = float(verify(answer_parsed, gold_parsed)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 447, in verify [rank1]: return any(compare_single_extraction_wrapper(g, t) for g, t in product(gold, target)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 447, in [rank1]: return any(compare_single_extraction_wrapper(g, t) for g, t in product(gold, target)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 438, in compare_single_extraction_wrapper [rank1]: return compare_single_extraction(g, t) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/utils.py", line 50, in wrapper [rank1]: return func(args, **kwargs) [rank1]: ^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 420, in compare_single_extraction [rank1]: return sympy_expr_eq(gold, target, precision) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 365, in sympy_expr_eq [rank1]: return sympy_compare_sets(gold, pred, precision) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/math_verify/grader.py", line 316, in sympy_compare_sets [rank1]: if a_set.symmetric_difference(b_set).is_empty: [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 259, in symmetric_difference [rank1]: return SymmetricDifference(self, other) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2183, in new [rank1]: return SymmetricDifference.reduce(a, b) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2189, in reduce [rank1]: result = B._symmetric_difference(A) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 262, in _symmetric_difference [rank1]: return Union(Complement(self, other), Complement(other, self)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 1721, in new [rank1]: return Complement.reduce(a, b) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 1731, in reduce [rank1]: if B == S.UniversalSet or A.is_subset(B): [rank1]: ^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 413, in is_subset [rank1]: ret = self._eval_is_subset(other) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2056, in _eval_is_subset [rank1]: return fuzzy_and(other._contains(e) for e in self.args) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/logic.py", line 142, in fuzzy_and [rank1]: for ai in args: [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2056, in [rank1]: return fuzzy_and(other._contains(e) for e in self.args) [rank1]: ^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/sets/sets.py", line 2053, in _contains [rank1]: return Or([Eq(e, other, evaluate=True) for e in self.args]) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/operations.py", line 513, in new [rank1]: _args = frozenset(cls._new_args_filter(args)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/logic/boolalg.py", line 741, in _new_args_filter [rank1]: c = x.canonical [rank1]: ^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in canonical [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in canonical [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 333, in [rank1]: args = tuple([i.canonical if isinstance(i, Relational) else i for i in self.args]) [rank1]: ^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 335, in canonical [rank1]: r = self.func(*args) [rank1]: ^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 852, in new [rank1]: return cls._eval_relation(lhs, rhs, **options) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 859, in _eval_relation [rank1]: val = cls._eval_fuzzy_relation(lhs, rhs) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 1186, in _eval_fuzzy_relation [rank1]: return is_lt(lhs, rhs) [rank1]: ^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 1265, in is_lt [rank1]: return fuzzy_not(is_ge(lhs, rhs, assumptions)) [rank1]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [rank1]: File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/sympy/core/relational.py", line 1380, in is_ge [rank1]: raise TypeError("Can only compare inequalities with Expr") [rank1]: TypeError: Can only compare inequalities with Expr W0201 10:55:55.568000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 291191 closing signal SIGTERM W0201 10:55:55.570000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 291193 closing signal SIGTERM W0201 10:55:55.570000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:897] Sending process 291194 closing signal SIGTERM E0201 10:55:56.239000 290975 site-packages/torch/distributed/elastic/multiprocessing/api.py:869] failed (exitcode: 1) local_rank: 1 (pid: 291192) of binary: /home/root123/miniconda3/envs/openr1/bin/python Traceback (most recent call last): File "/home/root123/miniconda3/envs/openr1/bin/accelerate", line 8, in sys.exit(main()) ^^^^^^ File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/accelerate/commands/accelerate_cli.py", line 48, in main args.func(args) File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/accelerate/commands/launch.py", line 1157, in launch_command deepspeed_launcher(args) File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/accelerate/commands/launch.py", line 845, in deepspeed_launcher distrib_run.run(args) File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/torch/distributed/run.py", line 910, in run elastic_launch( File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 138, in call return launch_agent(self._config, self._entrypoint, list(args)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/home/root123/miniconda3/envs/openr1/lib/python3.11/site-packages/torch/distributed/launcher/api.py", line 269, in launch_agent raise ChildFailedError( torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
src/open_r1/grpo.py FAILED
Failures: <NO_OTHER_FAILURES>
Root Cause (first observed failure): [0]: time : 2025-02-01_10:55:55 host : a101.internal.chinacloudapp.cn rank : 1 (local_rank: 1) exitcode : 1 (pid: 291192) error_file: <N/A> traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions