NVIDIA-NeMo · gwarmstrong · Dec 18, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -85,10 +85,7 @@ jobs:
         NVIDIA_API_KEY: ${{ secrets.NVIDIA_API_KEY }}
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
       run: |
-        # Default shared runtime directory
-        sudo mkdir -p /nemo_run
-        sudo chmod 777 /nemo_run
-        docker run --rm --network=host -v /nemo_run:/nemo_run nemo-skills-sandbox-image &
+        docker run --rm --network=host nemo-skills-sandbox-image &
         sleep 10
         set -o pipefail # this will make sure next line returns non-0 exit code if tests fail
         ns prepare_data gsm8k math-500

diff --git a/docs/evaluation/code.md b/docs/evaluation/code.md
@@ -185,10 +185,10 @@ We currently support IOI24 and are working to support IOI25 for evaluation. The
 
 #### Data Preparation
 
-First, prepare the dataset by running the `ns prepare_data` command. The arguments below will generate `ioi24.jsonl` and `ioi24_metadata.json`.
+First, prepare the dataset by running the `ns prepare_data` command. The arguments below will generate `test.jsonl` and `test_metadata.json`.
 
 ```
-ns prepare_data ioi
+ns prepare_data ioi24
 ```
 
 #### Running the Evaluation
@@ -209,24 +209,24 @@ ns eval \
     --server_gpus=8 \
     --benchmarks=ioi24:50 \
     --with_sandbox \
-    --split=ioi24 \
+    --split=test \
     --data_dir=<DATA_DIR> \
     --output_dir=<OUTPUT_DIR> \
-    --eval_subfolder=eval-results/ioi24/ \ # set the folder if you want to differentiate subsets.
-    --extra_eval_args="++eval_config.test_file=<PATH_TO_METADATA_TEST_DIR>/ioi24_metadata.json" \
+    --extra_eval_args="++eval_config.test_file=<PATH_TO_METADATA_TEST_FILE>" \
     ++inference.temperature=0.6 \
     ++inference.top_p=0.95 \
     ++inference.tokens_to_generate=65536
 ```
 
 ##### Verifying Results
 
-After all jobs are complete, you can check the results in `<OUTPUT_DIR>/eval-results/ioi24/ioi/metrics.json`. You can also take a look at `<OUTPUT_DIR>/eval-results/ioi24/ioi/summarized-results/main_*`. They should look something like this:
+After all jobs are complete, you can check the results in `<OUTPUT_DIR>/eval-results/ioi24/metrics.json`. You can also take a look at `<OUTPUT_DIR>/eval-results/ioi24/summarized-results/main_*`. They should look something like this:
 
 ```
------------------------------------- ioi24 -------------------------------------
-evaluation_mode | num_entries | avg_tokens | gen_seconds | correct | total_score
-pass@50          | 39          | 52225      | 99630       | 23.08%  | 500
+------------------------------------------------------ ioi24 ------------------------------------------------------
+evaluation_mode   | num_entries | avg_tokens | gen_seconds | correct       | total_score        | round_robin_score
+pass@1[avg-of-50] | 39          | 40387      | 7410        | 0.51% ± 1.04% | 303.47             | 261.01
+pass@50           | 39          | 40387      | 7410        | 2.56%         | 303.47             | 261.01
 ```
 
 ### livecodebench

diff --git a/nemo_skills/dataset/ioi/__init__.py → nemo_skills/dataset/ioi24/__init__.py b/nemo_skills/dataset/ioi/__init__.py → nemo_skills/dataset/ioi24/__init__.py
diff --git a/nemo_skills/dataset/ioi/prepare.py → nemo_skills/dataset/ioi24/prepare.py b/nemo_skills/dataset/ioi/prepare.py → nemo_skills/dataset/ioi24/prepare.py
@@ -27,7 +27,6 @@
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--split", type=str, default="test")
-    parser.add_argument("--suffix", type=str, default="24")
     args = parser.parse_args()
 
     data_dir = Path(__file__).absolute().parent
@@ -51,7 +50,7 @@
                 }
             )
 
-    with open(os.path.join(data_dir, f"ioi{args.suffix}.jsonl"), "w") as f:
+    with open(os.path.join(data_dir, f"{args.split}.jsonl"), "w") as f:
         f.write("\n".join(json.dumps(x) for x in entries))
 
     tests_dataset = load_dataset("open-r1/ioi-test-cases", name="2024", split="train")
@@ -83,5 +82,5 @@
             "grader_files": entry["grader_files"],
         }
 
-    with open(os.path.join(data_dir, f"ioi{args.suffix}_metadata.json"), "w") as f:
+    with open(os.path.join(data_dir, f"{args.split}_metadata.json"), "w") as f:
         json.dump(final_structure, f)
diff --git a/nemo_skills/dataset/ioi25/__init__.py b/nemo_skills/dataset/ioi25/__init__.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+todo: We are working on providing the data files that are necessary to run IOI25 evaluation.
+"""
+
+# settings that define how evaluation should be done by default (all can be changed from cmdline)
+GENERATION_ARGS = "++prompt_config=generic/default ++eval_type=ioi"
+DATASET_GROUP = "code"
+METRICS_TYPE = "ioi"
+
+# environment variables required by this benchmark
+SANDBOX_ENV_VARS = [
+    "UWSGI_PROCESSES=1024",
+    "UWSGI_CPU_AFFINITY=8",
+    "UWSGI_CHEAPER=1023",
+    "NUM_WORKERS=1",
+    "STATEFUL_SANDBOX=0",
+]