Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions resources_servers/reasoning_gym/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# Reasoning Gym Resources Server

Integration of reasoning gym: https://github.com/open-thought/reasoning-gym

From reasoning gym's readme, "It currently provides more than 100 tasks over many domains, including but not limited to algebra, arithmetic, computation, cognition, geometry, graph theory, logic, and many common games."

# Dataset prep

**Single task:**
```bash
python scripts/create_dataset.py \
--task knights_knaves \
--size 500 \
--seed 42 \
--output data/train_knights_knaves.jsonl
```

**Multiple tasks (composite):**
```bash
python scripts/create_dataset.py \
--tasks knights_knaves,syllogisms,leg_counting \
--size 1000 \
--output data/train_composite.jsonl
```

**All tasks in a category:**
```bash
python scripts/create_dataset.py \
--category logic \
--size 1000 \
--output data/train_logic.jsonl
```


**All tasks in all categories:**
```bash
python scripts/create_dataset.py \
--all-tasks \
--size 1000 \
--output data/train_all.jsonl
```


**With custom config:**
```bash
python scripts/create_dataset.py \
--task knights_knaves \
--size 500 \
--config '{"n_people": 3, "depth_constraint": 3}' \
--output data/train_hard.jsonl
```

# Rollout collection

## Start a vllm server

```bash
pip install -U "vllm>=0.12.0"

wget https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/resolve/main/nano_v3_reasoning_parser.py

vllm serve nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 \
--max-num-seqs 8 \
--tensor-parallel-size 1 \
--max-model-len 262144 \
--port 10240 \
--trust-remote-code \
--tool-call-parser qwen3_coder \
--reasoning-parser-plugin nano_v3_reasoning_parser.py \
--reasoning-parser nano_v3
```

## Create env.yaml:


```
policy_base_url: http://localhost:10240/v1
policy_api_key: EMPTY
policy_model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
```

## Launch nemo gym servers

```bash
ng_run "+config_paths=[resources_servers/reasoning_gym/configs/reasoning_gym.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
```

## Collect rollouts

```bash
ng_collect_rollouts \
+agent_name=reasoning_gym_simple_agent \
+input_jsonl_fpath=resources_servers/reasoning_gym/data/example.jsonl \
+output_jsonl_fpath=results/reasoning_gym_rollouts.jsonl \
+limit=5
```
112 changes: 112 additions & 0 deletions resources_servers/reasoning_gym/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
from typing import Any, Optional

import reasoning_gym
from fastapi import FastAPI
from reasoning_gym.utils import extract_answer

from nemo_gym.base_resources_server import (
BaseResourcesServerConfig,
BaseVerifyRequest,
BaseVerifyResponse,
SimpleResourcesServer,
)


class ReasoningGymResourcesServerConfig(BaseResourcesServerConfig):
pass


class ReasoningGymVerifyRequest(BaseVerifyRequest):
question: str
answer: Optional[str]
metadata: dict[str, Any]


class ReasoningGymVerifyResponse(BaseVerifyResponse):
task_name: str
score: float
extracted_answer: Optional[str]


class ReasoningGymResourcesServer(SimpleResourcesServer):
config: ReasoningGymResourcesServerConfig

def setup_webserver(self) -> FastAPI:
app = super().setup_webserver()
return app

async def verify(self, body: ReasoningGymVerifyRequest) -> ReasoningGymVerifyResponse:
"""Uses reasoning gym verifier"""
model_answer = self._extract_answer_from_response(body.response)

task_name = body.metadata.get("source_dataset")

if not task_name:
raise ValueError(f"No task name found in metadata: {body.metadata}")

entry = {
"question": body.question,
"answer": body.answer,
"metadata": body.metadata,
}
try:
score_fn = reasoning_gym.get_score_answer_fn(task_name)
score = float(score_fn(answer=model_answer, entry=entry))
except Exception as e:
print(f"Error scoring answer for task {task_name}: {e}")
score = 0.0

return ReasoningGymVerifyResponse(
**body.model_dump(),
reward=score,
task_name=task_name,
score=score,
extracted_answer=model_answer,
)

def _extract_answer_from_response(self, response) -> str:
assistant_responses = []
for output_item in response.output:
if output_item.type != "message":
continue
for content_item in output_item.content:
if content_item.type != "output_text":
continue
assistant_responses.append(content_item.text)

full_text = "".join(assistant_responses)

# Try <answer> tags first (reasoning gym default)
extracted = extract_answer(full_text, tag_name="answer")
if extracted is not None:
return extracted

# Try \boxed{} if <answer> tags fail
# this could be a slight instruction following issue, if model is prompted to use <answer> but uses boxed instead
# found for deepseek-distill-qwen-1.5b it fails to use <answer> tags in favor of boxed, hence this fallback
# may advise commenting this out for large models who follow instructions to use <answer> well
boxed_match = re.search(r"\\boxed\{([^}]+)\}", full_text)
if boxed_match:
return boxed_match.group(1).strip()

# return full text if <answer> or \boxed{} fail
return full_text.strip() if full_text.strip() else ""


if __name__ == "__main__":
ReasoningGymResourcesServer.run_webserver()
25 changes: 25 additions & 0 deletions resources_servers/reasoning_gym/configs/reasoning_gym.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
reasoning_gym:
resources_servers:
reasoning_gym:
entrypoint: app.py
domain: knowledge
verified: true
reasoning_gym_simple_agent:
responses_api_agents:
simple_agent:
entrypoint: app.py
resources_server:
type: resources_servers
name: reasoning_gym
model_server:
type: responses_api_models
name: policy_model
datasets:
- name: train
type: train
jsonl_fpath: resources_servers/reasoning_gym/data/train_knights_knaves.jsonl
gitlab_identifier:
dataset_name: knights_knaves_reasoning_gym
version: 1.0.0
artifact_fpath: train_knights_knaves.jsonl
license: Apache 2.0
7 changes: 7 additions & 0 deletions resources_servers/reasoning_gym/configs/resources_only.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
reasoning_gym:
resources_servers:
reasoning_gym:
entrypoint: app.py
domain: knowledge
verified: false

5 changes: 5 additions & 0 deletions resources_servers/reasoning_gym/data/example.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
49 changes: 49 additions & 0 deletions resources_servers/reasoning_gym/data/example_metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"name": "example",
"type": "example",
"jsonl_fpath": "resources_servers/reasoning_gym/data/example.jsonl",
"num_repeats": 1,
"gitlab_identifier": null,
"license": null,
"Number of examples": 5,
"Number of tools": {
"Total # non-null values": 0,
"Average": 0.0,
"Min": 0.0,
"Max": 0.0,
"Median": 0.0,
"Standard deviation": 0.0
},
"Json-dumped number of words (proxy for token count)": {
"Total # non-null values": 5,
"Average": 76.2,
"Min": 72.0,
"Max": 81.0,
"Median": 76.0,
"Standard deviation": 3.42
},
"Number of turns": {
"Total # non-null values": 5,
"Average": 1.0,
"Min": 1.0,
"Max": 1.0,
"Median": 1.0,
"Standard deviation": 0.0
},
"Temperature": {
"Total # non-null values": 0,
"Average": 0.0,
"Min": 0.0,
"Max": 0.0,
"Median": 0.0,
"Standard deviation": 0.0
},
"question": {
"unique_count": 5,
"total_count": 5
},
"answer": {
"unique_count": 5,
"total_count": 5
}
}
Loading