huggingface · wzy6642 · Jan 31, 2025
diff --git a/src/open_r1/grpo.py b/src/open_r1/grpo.py
@@ -67,8 +67,11 @@ def accuracy_reward(completions, solution, **kwargs):
                 ],
                 extraction_mode="first_match",
             )
-            # Reward 1 if the content is the same as the ground truth, 0 otherwise
-            reward = float(verify(answer_parsed, gold_parsed))
+            try: 
+                # Reward 1 if the content is the same as the ground truth, 0 otherwise
+                reward = float(verify(answer_parsed, gold_parsed))
+            except Exception as e:
+                reward = 0
         else:
             # If the gold solution is not parseable, we reward 1 to skip this example
             reward = 1.0