From dab9ba582ba1ece8db84c8b5d29dbbe4d926daf0 Mon Sep 17 00:00:00 2001 From: Stephen Ge Date: Mon, 15 Dec 2025 12:59:31 -0800 Subject: [PATCH 1/2] Fix prover.py compatibility with gpt-oss models MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add _make_assistant_message helper to properly handle reasoning_content from models like gpt-oss that output <|channel|> tags. The chat template expects these in a 'thinking' field rather than embedded in 'content'. This fix is backwards compatible - the 'thinking' field is only added when reasoning_content is present in the generation output. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 Signed-off-by: Stephen Ge --- nemo_skills/inference/prover.py | 35 ++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/nemo_skills/inference/prover.py b/nemo_skills/inference/prover.py index 848c7c70dd..bea5e306bc 100644 --- a/nemo_skills/inference/prover.py +++ b/nemo_skills/inference/prover.py @@ -195,6 +195,17 @@ def _transform_for_nemotron_refinement(self, proof_attempt: str, error_message: } ) + def _make_assistant_message(self, content: str, reasoning_content: str | None = None) -> dict: + """Create an assistant message dict, optionally with thinking/reasoning content. + + Some models (e.g., gpt-oss) output <|channel|> tags that need to be in a separate + 'thinking' field rather than in 'content' for the chat template to work correctly. + """ + message = {"role": "assistant", "content": content} + if reasoning_content: + message["thinking"] = reasoning_content + return message + async def _single_data_point_generate(self, data_point, data): formal_statement = ( (data_point["header"].strip() + "\n") @@ -243,8 +254,11 @@ async def _single_data_point_generate(self, data_point, data): ), ) + # Get reasoning_content if available (e.g., from gpt-oss models) + reasoning_content = generation.get("reasoning_content") + new_prompt_turn_list = deepcopy(prompt_turn_list) - new_prompt_turn_list += [{"role": "assistant", "content": generation["generation"]}] + new_prompt_turn_list.append(self._make_assistant_message(generation["generation"], reasoning_content)) prompt_turn_list_list.append( new_prompt_turn_list @@ -259,22 +273,15 @@ async def _single_data_point_generate(self, data_point, data): ): # check if successfully parse the code. We do not want to delete the turn if there is a parsing error. if self.cfg.delete_wrong_turns: prompt_turn_list = deepcopy(base_prompt_turn_list) + [ - { - "role": "assistant", - "content": f"```lean4\n{full_code.strip()}\n```", - } + self._make_assistant_message(f"```lean4\n{full_code.strip()}\n```") ] # only keep the latest turn else: - prompt_turn_list += [ - { - "role": "assistant", - "content": f"```lean4\n{full_code.strip()}\n```", - } - ] - full_prompt_turn_list += [{"role": "assistant", "content": generation["generation"]}] + prompt_turn_list.append(self._make_assistant_message(f"```lean4\n{full_code.strip()}\n```")) + full_prompt_turn_list.append(self._make_assistant_message(generation["generation"], reasoning_content)) else: - prompt_turn_list += [{"role": "assistant", "content": generation["generation"]}] - full_prompt_turn_list += [{"role": "assistant", "content": generation["generation"]}] + assistant_msg = self._make_assistant_message(generation["generation"], reasoning_content) + prompt_turn_list.append(assistant_msg) + full_prompt_turn_list.append(assistant_msg) if code == "None" or "**Error**" in full_code: if code == "None": From 3c25ee17af30ffe7387c42bee84d0126b2921dd6 Mon Sep 17 00:00:00 2001 From: Stephen Ge Date: Mon, 15 Dec 2025 13:52:28 -0800 Subject: [PATCH 2/2] Fix gpt-oss channel tag parsing in prover.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The gpt-oss model outputs <|channel|> tags embedded in the content field, which causes apply_chat_template to fail. The chat template expects analysis/thinking content in a separate 'thinking' field. Added _parse_gpt_oss_output() to extract: - Analysis content (between <|channel|>analysis<|message|> and <|end|>) -> goes to 'thinking' field - Final content (after <|channel|>final<|message|>) -> goes to 'content' field Updated _make_assistant_message() to automatically parse gpt-oss format when reasoning_content is not provided by the server. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 Signed-off-by: Stephen Ge --- nemo_skills/inference/prover.py | 45 +++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/nemo_skills/inference/prover.py b/nemo_skills/inference/prover.py index bea5e306bc..711b5fe9aa 100644 --- a/nemo_skills/inference/prover.py +++ b/nemo_skills/inference/prover.py @@ -195,12 +195,57 @@ def _transform_for_nemotron_refinement(self, proof_attempt: str, error_message: } ) + def _parse_gpt_oss_output(self, content: str) -> tuple[str, str | None]: + """Parse gpt-oss model output to extract thinking and final content. + + gpt-oss models output in the format: + <|channel|>analysis<|message|>...thinking...<|end|><|start|>assistant<|channel|>final<|message|>...final...<|return|> + + The chat template expects analysis content in 'thinking' field and final content in 'content' field. + + Returns: + tuple of (final_content, thinking_content or None) + """ + import re + + # Check if the content contains gpt-oss channel tags + if "<|channel|>" not in content: + return content, None + + thinking = None + final_content = content + + # Extract analysis/thinking content: between <|channel|>analysis<|message|> and <|end|> + analysis_pattern = r"<\|channel\|>analysis[^<]*<\|message\|>(.*?)<\|end\|>" + analysis_match = re.search(analysis_pattern, content, re.DOTALL) + if analysis_match: + thinking = analysis_match.group(1).strip() + + # Extract final content: after <|channel|>final<|message|> until <|return|> or end + final_pattern = r"<\|channel\|>final<\|message\|>(.*?)(?:<\|return\|>|$)" + final_match = re.search(final_pattern, content, re.DOTALL) + if final_match: + final_content = final_match.group(1).strip() + else: + # If no final channel found, try to strip all channel tags and use what remains + # This handles cases where the format might be slightly different + final_content = re.sub(r"<\|[^|]+\|>", "", content).strip() + + return final_content, thinking + def _make_assistant_message(self, content: str, reasoning_content: str | None = None) -> dict: """Create an assistant message dict, optionally with thinking/reasoning content. Some models (e.g., gpt-oss) output <|channel|> tags that need to be in a separate 'thinking' field rather than in 'content' for the chat template to work correctly. + + If reasoning_content is not provided, attempts to parse it from content if the content + contains gpt-oss channel tags. """ + # If reasoning_content not provided, try to parse from content + if reasoning_content is None: + content, reasoning_content = self._parse_gpt_oss_output(content) + message = {"role": "assistant", "content": content} if reasoning_content: message["thinking"] = reasoning_content