open_instruct/ground_truth_utils.py

-Original file line number
+Diff line change
@@ Expand Up @@
         for judge_type in JUDGE_PROMPT_MAP.keys():
             instance = LMJudgeVerifier(judge_type, LMJudgeVerifierConfig.from_args(args))
             verifiers[instance.name.lower()] = instance
+        # if we have remap arg, remap!
+        if args.remap_verifier:
+            remap = args.remap_verifier.split("=")
+            assert len(remap) == 2, "Remap must be in the format old_name=new_name"
+            old_name, new_name = remap
+            # map so that the old name calls the new verifier
+            assert new_name.lower() in verifiers, f"{new_name} not found in verifiers during remapping"
+            verifiers[old_name.lower()] = verifiers[new_name.lower()]
         return verifiers
@@ Expand Down @@

open_instruct/grpo_fast.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -272,6 +272,8 @@ class Args: @@
         """whether to apply verifiable reward"""
         verification_reward: float = 10.0
         """the reward value for verifiable responses"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # -- llm verifiers
         llm_judge_model: str = "azure/gpt-4o-mini-standard"
@@ Expand Down @@

open_instruct/grpo_vllm_thread_ray_gtrl.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -264,6 +264,8 @@ class Args: @@
         """whether to add the R1 style format reward"""
         r1_style_format_reward: float = 1.0
         """the reward value for R1 style format reward"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # async setting
         async_mode: bool = True
@@ Expand Down @@

open_instruct/judge_utils.py

-Original file line number
+Diff line change
@@ Expand Up @@
             if cleaned_str.endswith("```"):
                 cleaned_str = cleaned_str[:-3]  # Remove trailing ```
+            # escape newlines
+            cleaned_str = cleaned_str.replace("\r\n", "\n").replace("\n", "\\n")
+            # escape backslashes
+            cleaned_str = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', cleaned_str)
             cleaned_str = cleaned_str.strip()
-            data = json.loads(cleaned_str)
-            reasoning = data.get("REASONING", "")
-            return reasoning, float(data.get("SCORE", 0.0))
+            try:
+                data = json.loads(cleaned_str)
+                reasoning = data.get("REASONING", "")
+                score = float(data.get("SCORE", 0.0))
+            except json.JSONDecodeError:
+                # try just getting the score with some regex
+                score_match = re.search(r'"SCORE"\s*:\s*"?([0-9]+(?:\.[0-9]+)?)"?', cleaned_str)
+                if score_match:
+                    score = float(score_match.group(1))
+                    reasoning = cleaned_str
+                else:
+                    # bubble up the error
+                    raise ValueError()
+            return reasoning, score
         except (json.JSONDecodeError, TypeError, ValueError):
             logger.warning(f"Could not parse score from due to invalid json: {score_str}, defaulting to 0.0")
             return score_str, 0.0
@@ Expand Down @@

open_instruct/model_utils.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -261,8 +261,9 @@ async def apply_verifiable_reward( @@
                     tokenized_prediction=tok_prediction, prediction=prediction, label=gt, query=query
                 )
                 async_tasks.append(task)
+                # use reward_func.name to get the name of the verifier, rather than ds in case we have done remapping.
                 task_metadata.append(
-                    {"response_idx": i, "dataset": ds, "reward_weight": reward_func.weight, "reward_mult": reward_mult}
+                    {"response_idx": i, "dataset": reward_func.name, "reward_weight": reward_func.weight, "reward_mult": reward_mult}
                 )
         # Execute all tasks in parallel
@@ Expand Down @@

open_instruct/ppo_fast.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -273,6 +273,8 @@ class Args: @@
         """whether to apply verifiable reward"""
         verification_reward: float = 10.0
         """the reward value for verifiable responses"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # -- llm verifiers reward
         llm_judge_model: str = "azure/gpt-4o-mini-standard"
@@ Expand Down @@

open_instruct/ppo_vllm_thread_ray_gtrl.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -269,6 +269,8 @@ class Args: @@
         """whether to add the R1 style format reward"""
         r1_style_format_reward: float = 1.0
         """the reward value for R1 style format reward"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # async setting
         async_mode: bool = True
@@ Expand Down @@

Add remap verifier #773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

hamishivi merged 6 commits into main from add-remap-verifier

Jul 16, 2025

-Original file line number
+Diff line change
@@ Expand Up @@
         for judge_type in JUDGE_PROMPT_MAP.keys():
             instance = LMJudgeVerifier(judge_type, LMJudgeVerifierConfig.from_args(args))
             verifiers[instance.name.lower()] = instance
+        # if we have remap arg, remap!
+        if args.remap_verifier:
+            remap = args.remap_verifier.split("=")
+            assert len(remap) == 2, "Remap must be in the format old_name=new_name"
+            old_name, new_name = remap
+            # map so that the old name calls the new verifier
+            assert new_name.lower() in verifiers, f"{new_name} not found in verifiers during remapping"
+            verifiers[old_name.lower()] = verifiers[new_name.lower()]
         return verifiers
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -272,6 +272,8 @@ class Args: @@
         """whether to apply verifiable reward"""
         verification_reward: float = 10.0
         """the reward value for verifiable responses"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # -- llm verifiers
         llm_judge_model: str = "azure/gpt-4o-mini-standard"
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -264,6 +264,8 @@ class Args: @@
         """whether to add the R1 style format reward"""
         r1_style_format_reward: float = 1.0
         """the reward value for R1 style format reward"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # async setting
         async_mode: bool = True
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up @@
             if cleaned_str.endswith("```"):
                 cleaned_str = cleaned_str[:-3]  # Remove trailing ```
+            # escape newlines
+            cleaned_str = cleaned_str.replace("\r\n", "\n").replace("\n", "\\n")
+            # escape backslashes
+            cleaned_str = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', cleaned_str)
             cleaned_str = cleaned_str.strip()
-            data = json.loads(cleaned_str)
-            reasoning = data.get("REASONING", "")
-            return reasoning, float(data.get("SCORE", 0.0))
+            try:
+                data = json.loads(cleaned_str)
+                reasoning = data.get("REASONING", "")
+                score = float(data.get("SCORE", 0.0))
+            except json.JSONDecodeError:
+                # try just getting the score with some regex
+                score_match = re.search(r'"SCORE"\s*:\s*"?([0-9]+(?:\.[0-9]+)?)"?', cleaned_str)
+                if score_match:
+                    score = float(score_match.group(1))
+                    reasoning = cleaned_str
+                else:
+                    # bubble up the error
+                    raise ValueError()
+            return reasoning, score
         except (json.JSONDecodeError, TypeError, ValueError):
             logger.warning(f"Could not parse score from due to invalid json: {score_str}, defaulting to 0.0")
             return score_str, 0.0
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -261,8 +261,9 @@ async def apply_verifiable_reward( @@
                     tokenized_prediction=tok_prediction, prediction=prediction, label=gt, query=query
                 )
                 async_tasks.append(task)
+                # use reward_func.name to get the name of the verifier, rather than ds in case we have done remapping.
                 task_metadata.append(
-                    {"response_idx": i, "dataset": ds, "reward_weight": reward_func.weight, "reward_mult": reward_mult}
+                    {"response_idx": i, "dataset": reward_func.name, "reward_weight": reward_func.weight, "reward_mult": reward_mult}
                 )
         # Execute all tasks in parallel
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -273,6 +273,8 @@ class Args: @@
         """whether to apply verifiable reward"""
         verification_reward: float = 10.0
         """the reward value for verifiable responses"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # -- llm verifiers reward
         llm_judge_model: str = "azure/gpt-4o-mini-standard"
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -269,6 +269,8 @@ class Args: @@
         """whether to add the R1 style format reward"""
         r1_style_format_reward: float = 1.0
         """the reward value for R1 style format reward"""
+        remap_verifier: str = None
+        """Remap verifier like string_f1=general-quality_ref. Currently can only remap once."""
         # async setting
         async_mode: bool = True
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add remap verifier #773

Uh oh!

Diff view

Diff view

There are no files selected for viewing