From 308dbe811b46498b59927fc9474d03bcdec7200c Mon Sep 17 00:00:00 2001 From: Arkadiusz Nowaczynski Date: Tue, 10 Mar 2026 16:20:54 +0100 Subject: [PATCH 1/3] skip output-rs*_submissions.jsonl files in critpt Signed-off-by: Arkadiusz Nowaczynski --- nemo_skills/pipeline/summarize_results.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py index 46e89c833c..29bc17d7cd 100644 --- a/nemo_skills/pipeline/summarize_results.py +++ b/nemo_skills/pipeline/summarize_results.py @@ -293,7 +293,9 @@ def summarize_results( [ jsonl_file for jsonl_file in glob.glob(f"{benchmark_path}/output-rs*.jsonl") - if Path(jsonl_file).name != "output.jsonl" and "_chunk_" not in Path(jsonl_file).name + if Path(jsonl_file).name != "output.jsonl" + and "_chunk_" not in Path(jsonl_file).name + and "_submissions" not in Path(jsonl_file).name # introduced because of critpt ] ) has_sampling = len(input_files) > 0 From a0c45e6fa659221cdb74e5aca06ffac8171b815a Mon Sep 17 00:00:00 2001 From: Arkadiusz Nowaczynski Date: Tue, 10 Mar 2026 17:20:20 +0100 Subject: [PATCH 2/3] more robust Signed-off-by: Arkadiusz Nowaczynski --- nemo_skills/pipeline/summarize_results.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py index 29bc17d7cd..534eb9eb35 100644 --- a/nemo_skills/pipeline/summarize_results.py +++ b/nemo_skills/pipeline/summarize_results.py @@ -292,19 +292,17 @@ def summarize_results( input_files = sorted( [ jsonl_file - for jsonl_file in glob.glob(f"{benchmark_path}/output-rs*.jsonl") - if Path(jsonl_file).name != "output.jsonl" - and "_chunk_" not in Path(jsonl_file).name - and "_submissions" not in Path(jsonl_file).name # introduced because of critpt + for jsonl_file in glob.glob(f"{benchmark_path}/output-rs[0-9]*.jsonl") + if Path(jsonl_file).name != "output.jsonl" and "_chunk_" not in Path(jsonl_file).name ] ) has_sampling = len(input_files) > 0 if has_greedy and has_sampling: raise ValueError( - f"Both output.jsonl and output-rs*.jsonl found for benchmark {benchmark}. " + f"Both output.jsonl and output-rs[0-9]*.jsonl found for benchmark {benchmark}. " "This indicates that the evaluation was done multiple times with different sampling parameters. " - "It's not clear how to process this! Please remove output.jsonl or output-rs*.jsonl files and rerun." + "It's not clear how to process this! Please remove output.jsonl or output-rs[0-9]*.jsonl files and rerun." ) if not has_greedy and not has_sampling: From bda56dbc1ca1372df4e93dd629e02712497ef13b Mon Sep 17 00:00:00 2001 From: Arkadiusz Nowaczynski Date: Tue, 10 Mar 2026 17:32:02 +0100 Subject: [PATCH 3/3] ultra robust Signed-off-by: Arkadiusz Nowaczynski --- nemo_skills/pipeline/summarize_results.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nemo_skills/pipeline/summarize_results.py b/nemo_skills/pipeline/summarize_results.py index 534eb9eb35..04611397eb 100644 --- a/nemo_skills/pipeline/summarize_results.py +++ b/nemo_skills/pipeline/summarize_results.py @@ -289,20 +289,21 @@ def summarize_results( metrics = {} has_greedy = Path(f"{benchmark_path}/output.jsonl").exists() + output_rs_pattern = re.compile(r"^output-rs\d+\.jsonl$") input_files = sorted( [ jsonl_file - for jsonl_file in glob.glob(f"{benchmark_path}/output-rs[0-9]*.jsonl") - if Path(jsonl_file).name != "output.jsonl" and "_chunk_" not in Path(jsonl_file).name + for jsonl_file in glob.glob(f"{benchmark_path}/output-rs*.jsonl") + if output_rs_pattern.match(Path(jsonl_file).name) ] ) has_sampling = len(input_files) > 0 if has_greedy and has_sampling: raise ValueError( - f"Both output.jsonl and output-rs[0-9]*.jsonl found for benchmark {benchmark}. " + f"Both output.jsonl and output-rs*.jsonl found for benchmark {benchmark}. " "This indicates that the evaluation was done multiple times with different sampling parameters. " - "It's not clear how to process this! Please remove output.jsonl or output-rs[0-9]*.jsonl files and rerun." + "It's not clear how to process this! Please remove output.jsonl or output-rs*.jsonl files and rerun." ) if not has_greedy and not has_sampling: