NVIDIA-NeMo · bxyu-nvidia · Dec 15, 2025 · Oct 2, 2025 · Oct 2, 2025 · Oct 2, 2025
diff --git a/resources_servers/reasoning_gym/README.md b/resources_servers/reasoning_gym/README.md
@@ -0,0 +1,96 @@
+# Reasoning Gym Resources Server
+
+Integration of reasoning gym: https://github.com/open-thought/reasoning-gym
+
+From reasoning gym's readme, "It currently provides more than 100 tasks over many domains, including but not limited to algebra, arithmetic, computation, cognition, geometry, graph theory, logic, and many common games."
+
+# Dataset prep
+
+**Single task:**
+```bash
+python scripts/create_dataset.py \
+    --task knights_knaves \
+    --size 500 \
+    --seed 42 \
+    --output data/train_knights_knaves.jsonl
+```
+
+**Multiple tasks (composite):**
+```bash
+python scripts/create_dataset.py \
+    --tasks knights_knaves,syllogisms,leg_counting \
+    --size 1000 \
+    --output data/train_composite.jsonl
+```
+
+**All tasks in a category:**
+```bash
+python scripts/create_dataset.py \
+    --category logic \
+    --size 1000 \
+    --output data/train_logic.jsonl
+```
+
+
+**All tasks in all categories:**
+```bash
+python scripts/create_dataset.py \
+    --all-tasks \
+    --size 1000 \
+    --output data/train_all.jsonl
+```
+
+
+**With custom config:**
+```bash
+python scripts/create_dataset.py \
+    --task knights_knaves \
+    --size 500 \
+    --config '{"n_people": 3, "depth_constraint": 3}' \
+    --output data/train_hard.jsonl
+```
+
+# Rollout collection
+
+## Start a vllm server
+
+```bash
+pip install -U "vllm>=0.12.0"
+
+wget https://huggingface.co/nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16/resolve/main/nano_v3_reasoning_parser.py
+
+vllm serve nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16 \
+ --max-num-seqs 8 \
+  --tensor-parallel-size 1 \
+  --max-model-len 262144 \
+  --port 10240 \
+  --trust-remote-code \
+  --tool-call-parser qwen3_coder \
+  --reasoning-parser-plugin nano_v3_reasoning_parser.py \
+  --reasoning-parser nano_v3
+```
+
+## Create env.yaml:
+
+
+```
+policy_base_url: http://localhost:10240/v1
+policy_api_key: EMPTY
+policy_model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16
+```
+
+## Launch nemo gym servers
+
+```bash
+ng_run "+config_paths=[resources_servers/reasoning_gym/configs/reasoning_gym.yaml,responses_api_models/vllm_model/configs/vllm_model.yaml]"
+```
+
+## Collect rollouts
+
+```bash
+ng_collect_rollouts \
+    +agent_name=reasoning_gym_simple_agent \
+    +input_jsonl_fpath=resources_servers/reasoning_gym/data/example.jsonl \
+    +output_jsonl_fpath=results/reasoning_gym_rollouts.jsonl \
+    +limit=5
+```
diff --git a/resources_servers/reasoning_gym/app.py b/resources_servers/reasoning_gym/app.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+from typing import Any, Optional
+
+import reasoning_gym
+from fastapi import FastAPI
+from reasoning_gym.utils import extract_answer
+
+from nemo_gym.base_resources_server import (
+    BaseResourcesServerConfig,
+    BaseVerifyRequest,
+    BaseVerifyResponse,
+    SimpleResourcesServer,
+)
+
+
+class ReasoningGymResourcesServerConfig(BaseResourcesServerConfig):
+    pass
+
+
+class ReasoningGymVerifyRequest(BaseVerifyRequest):
+    question: str
+    answer: Optional[str]
+    metadata: dict[str, Any]
+
+
+class ReasoningGymVerifyResponse(BaseVerifyResponse):
+    task_name: str
+    score: float
+    extracted_answer: Optional[str]
+
+
+class ReasoningGymResourcesServer(SimpleResourcesServer):
+    config: ReasoningGymResourcesServerConfig
+
+    def setup_webserver(self) -> FastAPI:
+        app = super().setup_webserver()
+        return app
+
+    async def verify(self, body: ReasoningGymVerifyRequest) -> ReasoningGymVerifyResponse:
+        """Uses reasoning gym verifier"""
+        model_answer = self._extract_answer_from_response(body.response)
+
+        task_name = body.metadata.get("source_dataset")
+
+        if not task_name:
+            raise ValueError(f"No task name found in metadata: {body.metadata}")
+
+        entry = {
+            "question": body.question,
+            "answer": body.answer,
+            "metadata": body.metadata,
+        }
+        try:
+            score_fn = reasoning_gym.get_score_answer_fn(task_name)
+            score = float(score_fn(answer=model_answer, entry=entry))
+        except Exception as e:
+            print(f"Error scoring answer for task {task_name}: {e}")
+            score = 0.0
+
+        return ReasoningGymVerifyResponse(
+            **body.model_dump(),
+            reward=score,
+            task_name=task_name,
+            score=score,
+            extracted_answer=model_answer,
+        )
+
+    def _extract_answer_from_response(self, response) -> str:
+        assistant_responses = []
+        for output_item in response.output:
+            if output_item.type != "message":
+                continue
+            for content_item in output_item.content:
+                if content_item.type != "output_text":
+                    continue
+                assistant_responses.append(content_item.text)
+
+        full_text = "".join(assistant_responses)
+
+        # Try <answer> tags first (reasoning gym default)
+        extracted = extract_answer(full_text, tag_name="answer")
+        if extracted is not None:
+            return extracted
+
+        # Try \boxed{} if <answer> tags fail
+        # this could be a slight instruction following issue, if model is prompted to use <answer> but uses boxed instead
+        # found for deepseek-distill-qwen-1.5b it fails to use <answer> tags in favor of boxed, hence this fallback
+        # may advise commenting this out for large models who follow instructions to use <answer> well
+        boxed_match = re.search(r"\\boxed\{([^}]+)\}", full_text)
+        if boxed_match:
+            return boxed_match.group(1).strip()
+
+        # return full text if <answer> or \boxed{} fail
+        return full_text.strip() if full_text.strip() else ""
+
+
+if __name__ == "__main__":
+    ReasoningGymResourcesServer.run_webserver()
diff --git a/resources_servers/reasoning_gym/configs/reasoning_gym.yaml b/resources_servers/reasoning_gym/configs/reasoning_gym.yaml
@@ -0,0 +1,25 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      verified: true
+reasoning_gym_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: reasoning_gym
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+      - name: train
+        type: train
+        jsonl_fpath: resources_servers/reasoning_gym/data/train_knights_knaves.jsonl
+        gitlab_identifier:
+          dataset_name: knights_knaves_reasoning_gym
+          version: 1.0.0
+          artifact_fpath: train_knights_knaves.jsonl
+        license: Apache 2.0
diff --git a/resources_servers/reasoning_gym/configs/resources_only.yaml b/resources_servers/reasoning_gym/configs/resources_only.yaml
@@ -0,0 +1,7 @@
+reasoning_gym:
+  resources_servers:
+    reasoning_gym:
+      entrypoint: app.py
+      domain: knowledge
+      verified: false
+
diff --git a/resources_servers/reasoning_gym/data/example.jsonl b/resources_servers/reasoning_gym/data/example.jsonl
@@ -0,0 +1,5 @@
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")"}]}, "question": "A very special island is inhabited only by sages and fools. Sages always tell the truth, and fools always lie. You meet 2 inhabitants: Zoey, and Riley. Zoey commented, \"Riley is a fool\". In Riley's words: \"Zoey is a sage or Riley is a sage\". So who is a sage and who is a fool? (Format your answer like: \"Zoey is a sage/fool, and Riley is a sage/fool\")", "answer": "Zoey is a fool, and Riley is a sage.", "metadata": {"source_dataset": "knights_knaves", "source_index": 0, "statements": [["lying", 1], ["or", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Zoey", "Riley"], "knight_knave_terms": {"knight": "sage", "knave": "fool", "a_knight": "a sage", "a_knave": "a fool", "Knight": "Sage", "Knave": "Fool"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Riley, and Olivia. \"if Riley is a pioneer then Olivia is a laggard\" - Riley. Olivia stated, \"Olivia is a pioneer and Riley is a laggard\". So who is a pioneer and who is a laggard? (Format your answer like: \"Riley is a pioneer/laggard, and Olivia is a pioneer/laggard\")", "answer": "Riley is a pioneer, and Olivia is a laggard.", "metadata": {"source_dataset": "knights_knaves", "source_index": 1, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["and", ["telling-truth", 1], ["lying", 0]]], "solution": [true, false], "names": ["Riley", "Olivia"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")"}]}, "question": "A very special island is inhabited only by saints and sinners. Saints always tell the truth, and sinners always lie. You meet 2 inhabitants: Samuel, and Jacob. Samuel expressed that if Samuel is a saint then Jacob is a sinner. Jacob was heard saying, \"if Samuel is a saint then Samuel is a sinner\". So who is a saint and who is a sinner? (Format your answer like: \"Samuel is a saint/sinner, and Jacob is a saint/sinner\")", "answer": "Samuel is a saint, and Jacob is a sinner.", "metadata": {"source_dataset": "knights_knaves", "source_index": 2, "statements": [["->", ["telling-truth", 0], ["lying", 1]], ["->", ["telling-truth", 0], ["lying", 0]]], "solution": [true, false], "names": ["Samuel", "Jacob"], "knight_knave_terms": {"knight": "saint", "knave": "sinner", "a_knight": "a saint", "a_knave": "a sinner", "Knight": "Saint", "Knave": "Sinner"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")"}]}, "question": "A very special island is inhabited only by pioneers and laggards. Pioneers always tell the truth, and laggards always lie. You meet 2 inhabitants: Olivia, and Lily. \"Lily is a laggard and Lily is a pioneer\" - Olivia. Lily noted, \"if Olivia is a pioneer then Lily is a pioneer\". So who is a pioneer and who is a laggard? (Format your answer like: \"Olivia is a pioneer/laggard, and Lily is a pioneer/laggard\")", "answer": "Olivia is a laggard, and Lily is a pioneer.", "metadata": {"source_dataset": "knights_knaves", "source_index": 3, "statements": [["and", ["lying", 1], ["telling-truth", 1]], ["->", ["telling-truth", 0], ["telling-truth", 1]]], "solution": [false, true], "names": ["Olivia", "Lily"], "knight_knave_terms": {"knight": "pioneer", "knave": "laggard", "a_knight": "a pioneer", "a_knave": "a laggard", "Knight": "Pioneer", "Knave": "Laggard"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
+{"responses_create_params": {"input": [{"role": "user", "content": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")"}]}, "question": "A very special island is inhabited only by altruists and egoists. Altruists always tell the truth, and egoists always lie. You meet 2 inhabitants: Mason, and Jack. Mason expressed that if Jack is an egoist then Mason is an altruist. In Jack's words: \"Mason is an egoist\". So who is an altruist and who is an egoist? (Format your answer like: \"Mason is a altruist/egoist, and Jack is a altruist/egoist\")", "answer": "Mason is an altruist, and Jack is an egoist.", "metadata": {"source_dataset": "knights_knaves", "source_index": 4, "statements": [["->", ["lying", 1], ["telling-truth", 0]], ["not", ["telling-truth", 0]]], "solution": [true, false], "names": ["Mason", "Jack"], "knight_knave_terms": {"knight": "altruist", "knave": "egoist", "a_knight": "an altruist", "a_knave": "an egoist", "Knight": "Altruist", "Knave": "Egoist"}, "difficulty": {"n_people": 2, "depth_constraint": 2, "width_constraint": 2}}}
diff --git a/resources_servers/reasoning_gym/data/example_metrics.json b/resources_servers/reasoning_gym/data/example_metrics.json
@@ -0,0 +1,49 @@
+{
+    "name": "example",
+    "type": "example",
+    "jsonl_fpath": "resources_servers/reasoning_gym/data/example.jsonl",
+    "num_repeats": 1,
+    "gitlab_identifier": null,
+    "license": null,
+    "Number of examples": 5,
+    "Number of tools": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "Json-dumped number of words (proxy for token count)": {
+        "Total # non-null values": 5,
+        "Average": 76.2,
+        "Min": 72.0,
+        "Max": 81.0,
+        "Median": 76.0,
+        "Standard deviation": 3.42
+    },
+    "Number of turns": {
+        "Total # non-null values": 5,
+        "Average": 1.0,
+        "Min": 1.0,
+        "Max": 1.0,
+        "Median": 1.0,
+        "Standard deviation": 0.0
+    },
+    "Temperature": {
+        "Total # non-null values": 0,
+        "Average": 0.0,
+        "Min": 0.0,
+        "Max": 0.0,
+        "Median": 0.0,
+        "Standard deviation": 0.0
+    },
+    "question": {
+        "unique_count": 5,
+        "total_count": 5
+    },
+    "answer": {
+        "unique_count": 5,
+        "total_count": 5
+    }
+}