From 308dbe811b46498b59927fc9474d03bcdec7200c Mon Sep 17 00:00:00 2001
From: Arkadiusz Nowaczynski <anowaczynski@nvidia.com>
Date: Tue, 10 Mar 2026 16:20:54 +0100
Subject: [PATCH 1/3] skip output-rs*_submissions.jsonl files in critpt

Signed-off-by: Arkadiusz Nowaczynski <anowaczynski@nvidia.com>
---
 nemo_skills/pipeline/summarize_results.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py
index 46e89c833c..29bc17d7cd 100644
--- a/nemo_skills/pipeline/summarize_results.py
+++ b/nemo_skills/pipeline/summarize_results.py
@@ -293,7 +293,9 @@ def summarize_results(
             [
                 jsonl_file
                 for jsonl_file in glob.glob(f"{benchmark_path}/output-rs*.jsonl")
-                if Path(jsonl_file).name != "output.jsonl" and "_chunk_" not in Path(jsonl_file).name
+                if Path(jsonl_file).name != "output.jsonl"
+                and "_chunk_" not in Path(jsonl_file).name
+                and "_submissions" not in Path(jsonl_file).name  # introduced because of critpt
             ]
         )
         has_sampling = len(input_files) > 0

From a0c45e6fa659221cdb74e5aca06ffac8171b815a Mon Sep 17 00:00:00 2001
From: Arkadiusz Nowaczynski <anowaczynski@nvidia.com>
Date: Tue, 10 Mar 2026 17:20:20 +0100
Subject: [PATCH 2/3] more robust

Signed-off-by: Arkadiusz Nowaczynski <anowaczynski@nvidia.com>
---
 nemo_skills/pipeline/summarize_results.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py
index 29bc17d7cd..534eb9eb35 100644
--- a/nemo_skills/pipeline/summarize_results.py
+++ b/nemo_skills/pipeline/summarize_results.py
@@ -292,19 +292,17 @@ def summarize_results(
         input_files = sorted(
             [
                 jsonl_file
-                for jsonl_file in glob.glob(f"{benchmark_path}/output-rs*.jsonl")
-                if Path(jsonl_file).name != "output.jsonl"
-                and "_chunk_" not in Path(jsonl_file).name
-                and "_submissions" not in Path(jsonl_file).name  # introduced because of critpt
+                for jsonl_file in glob.glob(f"{benchmark_path}/output-rs[0-9]*.jsonl")
+                if Path(jsonl_file).name != "output.jsonl" and "_chunk_" not in Path(jsonl_file).name
             ]
         )
         has_sampling = len(input_files) > 0
 
         if has_greedy and has_sampling:
             raise ValueError(
-                f"Both output.jsonl and output-rs*.jsonl found for benchmark {benchmark}. "
+                f"Both output.jsonl and output-rs[0-9]*.jsonl found for benchmark {benchmark}. "
                 "This indicates that the evaluation was done multiple times with different sampling parameters. "
-                "It's not clear how to process this! Please remove output.jsonl or output-rs*.jsonl files and rerun."
+                "It's not clear how to process this! Please remove output.jsonl or output-rs[0-9]*.jsonl files and rerun."
             )
 
         if not has_greedy and not has_sampling:

From bda56dbc1ca1372df4e93dd629e02712497ef13b Mon Sep 17 00:00:00 2001
From: Arkadiusz Nowaczynski <anowaczynski@nvidia.com>
Date: Tue, 10 Mar 2026 17:32:02 +0100
Subject: [PATCH 3/3] ultra robust

Signed-off-by: Arkadiusz Nowaczynski <anowaczynski@nvidia.com>
---
 nemo_skills/pipeline/summarize_results.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py
index 534eb9eb35..04611397eb 100644
--- a/nemo_skills/pipeline/summarize_results.py
+++ b/nemo_skills/pipeline/summarize_results.py
@@ -289,20 +289,21 @@ def summarize_results(
         metrics = {}
 
         has_greedy = Path(f"{benchmark_path}/output.jsonl").exists()
+        output_rs_pattern = re.compile(r"^output-rs\d+\.jsonl$")
         input_files = sorted(
             [
                 jsonl_file
-                for jsonl_file in glob.glob(f"{benchmark_path}/output-rs[0-9]*.jsonl")
-                if Path(jsonl_file).name != "output.jsonl" and "_chunk_" not in Path(jsonl_file).name
+                for jsonl_file in glob.glob(f"{benchmark_path}/output-rs*.jsonl")
+                if output_rs_pattern.match(Path(jsonl_file).name)
             ]
         )
         has_sampling = len(input_files) > 0
 
         if has_greedy and has_sampling:
             raise ValueError(
-                f"Both output.jsonl and output-rs[0-9]*.jsonl found for benchmark {benchmark}. "
+                f"Both output.jsonl and output-rs*.jsonl found for benchmark {benchmark}. "
                 "This indicates that the evaluation was done multiple times with different sampling parameters. "
-                "It's not clear how to process this! Please remove output.jsonl or output-rs[0-9]*.jsonl files and rerun."
+                "It's not clear how to process this! Please remove output.jsonl or output-rs*.jsonl files and rerun."
             )
 
         if not has_greedy and not has_sampling: