From 27e58bfd7379f6c147d7f6b7c435d7ef594cce3d Mon Sep 17 00:00:00 2001 From: wasiahmad Date: Thu, 5 Feb 2026 14:11:32 -0800 Subject: [PATCH 1/4] lcb generalized prompting support Signed-off-by: wasiahmad --- nemo_skills/dataset/livecodebench/__init__.py | 2 +- nemo_skills/dataset/livecodebench/prepare.py | 14 +++++--------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/nemo_skills/dataset/livecodebench/__init__.py b/nemo_skills/dataset/livecodebench/__init__.py index 6945c0c5e0..83d4d4a30f 100644 --- a/nemo_skills/dataset/livecodebench/__init__.py +++ b/nemo_skills/dataset/livecodebench/__init__.py @@ -16,4 +16,4 @@ DATASET_GROUP = "code" METRICS_TYPE = "livecodebench" EVAL_SPLIT = "test_v6_2408_2505" -GENERATION_ARGS = "++prompt_config=eval/livecodebench/python_codegen ++eval_type=livecodebench" +GENERATION_ARGS = "++prompt_config=eval/livecodebench/default_reasoning ++eval_type=livecodebench" diff --git a/nemo_skills/dataset/livecodebench/prepare.py b/nemo_skills/dataset/livecodebench/prepare.py index a3cb49d099..9258367618 100644 --- a/nemo_skills/dataset/livecodebench/prepare.py +++ b/nemo_skills/dataset/livecodebench/prepare.py @@ -23,7 +23,7 @@ class PromptConstants: - # reference: https://github.com/QwenLM/Qwen2.5-Coder/blob/main/qwencoder-eval/reasoning/livecode_bench_cot/lcb_runner_cq/prompts/code_generation.py#L31 + # reference: https://github.com/LiveCodeBench/LiveCodeBench/blob/main/lcb_runner/prompts/code_generation.py#L35C5-L38C1 FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters." FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the python program runs, it reads the inputs, runs the algorithm and writes output to STDOUT." @@ -68,16 +68,14 @@ def parse_month_range(start_date, end_date): def clean_data(dataset, keep_all_columns=False): def map_fn(data): - question = data["question_content"] + "\n\n" if data["starter_code"]: - question += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" - question += f"```python\n{data['starter_code']}\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE + data["starter_code"] = f"```python\n{data['starter_code']}\n```\n\n" else: - question += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n" - question += "```python\n# YOUR CODE HERE\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_WITHOUT_STARTER_CODE + data["starter_code"] = "```python\n# YOUR CODE HERE\n```\n\n" data["task_id"] = data["question_id"] - data["question"] = question.replace(" ", "\t") return data remove_columns = [] @@ -86,10 +84,8 @@ def map_fn(data): "question_title", "contest_id", "metadata", - "question_content", "platform", "question_id", - "starter_code", "public_test_cases", "private_test_cases", ] From e6d077abc5fcb993ecba7d6194507f89c469e2df Mon Sep 17 00:00:00 2001 From: wasiahmad Date: Thu, 5 Feb 2026 14:12:38 -0800 Subject: [PATCH 2/4] lcb generalized prompting support Signed-off-by: wasiahmad --- docs/releases/openreasoning/training.md | 2 +- nemo_skills/dataset/livecodebench-cpp/__init__.py | 4 +++- nemo_skills/dataset/livecodebench-cpp/prepare.py | 14 +++++--------- nemo_skills/dataset/livecodebench-pro/__init__.py | 2 +- nemo_skills/dataset/livecodebench-pro/prepare.py | 7 +++++-- .../config/eval/livecodebench/cpp_codegen.yaml | 6 ------ .../eval/livecodebench/cpp_codegen_reasoning.yaml | 12 ------------ .../code.yaml => eval/livecodebench/default.yaml} | 6 +++--- .../eval/livecodebench/default_reasoning.yaml | 10 ++++++++++ .../config/eval/livecodebench/python_codegen.yaml | 6 ------ .../livecodebench/python_codegen_reasoning.yaml | 11 ----------- .../prompt/config/gpt-oss/livecodebench.yaml | 8 +++++++- 12 files changed, 35 insertions(+), 53 deletions(-) delete mode 100644 nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml delete mode 100644 nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml rename nemo_skills/prompt/config/{gpt-oss/code.yaml => eval/livecodebench/default.yaml} (53%) create mode 100644 nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml delete mode 100644 nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml delete mode 100644 nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml diff --git a/docs/releases/openreasoning/training.md b/docs/releases/openreasoning/training.md index 3e4e62691a..c6206095c6 100644 --- a/docs/releases/openreasoning/training.md +++ b/docs/releases/openreasoning/training.md @@ -105,7 +105,7 @@ def apply_format(elem, prompt): dataset = load_dataset("nvidia/Nemotron-Post-Training-Dataset-v1", split="code") -prompt = get_prompt('eval/livecodebench/python_codegen_reasoning', tokenizer='Qwen/Qwen2.5-32B-Instruct', system_message="") +prompt = get_prompt('eval/livecodebench/default_reasoning', tokenizer='Qwen/Qwen2.5-32B-Instruct', system_message="") func = partial(apply_format, prompt=prompt) dataset = dataset.map(func, num_proc=20) dataset = dataset.remove_columns(['messages']) diff --git a/nemo_skills/dataset/livecodebench-cpp/__init__.py b/nemo_skills/dataset/livecodebench-cpp/__init__.py index 76d7c478de..f983a983b5 100644 --- a/nemo_skills/dataset/livecodebench-cpp/__init__.py +++ b/nemo_skills/dataset/livecodebench-cpp/__init__.py @@ -16,4 +16,6 @@ DATASET_GROUP = "code" METRICS_TYPE = "livecodebench" EVAL_SPLIT = "v6_2408_2505" -GENERATION_ARGS = "++prompt_config=eval/livecodebench/cpp_codegen ++eval_type=livecodebench ++eval_config.language=cpp" +GENERATION_ARGS = ( + "++prompt_config=eval/livecodebench/default_reasoning ++eval_type=livecodebench ++eval_config.language=cpp" +) diff --git a/nemo_skills/dataset/livecodebench-cpp/prepare.py b/nemo_skills/dataset/livecodebench-cpp/prepare.py index bb4becd4fc..4aca9481cd 100644 --- a/nemo_skills/dataset/livecodebench-cpp/prepare.py +++ b/nemo_skills/dataset/livecodebench-cpp/prepare.py @@ -20,7 +20,7 @@ class PromptConstants: - # reference: https://github.com/QwenLM/Qwen2.5-Coder/blob/main/qwencoder-eval/reasoning/livecode_bench_cot/lcb_runner_cq/prompts/code_generation.py#L31 + # reference: https://github.com/LiveCodeBench/LiveCodeBench/blob/main/lcb_runner/prompts/code_generation.py#L35C5-L38C1 FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters." FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the c++ program runs, it reads the inputs, runs the algorithm and writes output to STDOUT." @@ -45,16 +45,14 @@ def parse_data(split): def clean_data(dataset, keep_all_columns=False): def map_fn(data): - question = data["question_content"] + "\n\n" if data["starter_code"]: - question += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" - question += f"```cpp\n{data['starter_code']}\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE + data["starter_code"] = f"```cpp\n{data['starter_code']}\n```\n\n" else: - question += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n" - question += "```cpp\n// YOUR CODE HERE\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_WITHOUT_STARTER_CODE + data["starter_code"] = "```cpp\n// YOUR CODE HERE\n```\n\n" data["task_id"] = data["question_id"] - data["question"] = question.replace(" ", "\t") return data remove_columns = [] @@ -63,10 +61,8 @@ def map_fn(data): "question_title", "contest_id", "metadata", - "question_content", "platform", "question_id", - "starter_code", "public_test_cases", "private_test_cases", ] diff --git a/nemo_skills/dataset/livecodebench-pro/__init__.py b/nemo_skills/dataset/livecodebench-pro/__init__.py index 7aa6e5f0ed..bed9a8eafd 100644 --- a/nemo_skills/dataset/livecodebench-pro/__init__.py +++ b/nemo_skills/dataset/livecodebench-pro/__init__.py @@ -16,4 +16,4 @@ DATASET_GROUP = "code" METRICS_TYPE = "livecodebench_pro" EVAL_SPLIT = "test_25q2" -GENERATION_ARGS = "++prompt_config=eval/livecodebench/cpp_codegen ++eval_type=livecodebench_pro" +GENERATION_ARGS = "++prompt_config=eval/livecodebench/default_reasoning ++eval_type=livecodebench_pro" diff --git a/nemo_skills/dataset/livecodebench-pro/prepare.py b/nemo_skills/dataset/livecodebench-pro/prepare.py index 52703532de..287ce652b6 100644 --- a/nemo_skills/dataset/livecodebench-pro/prepare.py +++ b/nemo_skills/dataset/livecodebench-pro/prepare.py @@ -28,6 +28,8 @@ ("25q3", "quater_2025_7_9", 144), ] +FORMATTING_MESSAGE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the c++ program runs, it reads the inputs, runs the algorithm and writes output to STDOUT." + def download_testcases(local_dir, token): """ @@ -61,9 +63,10 @@ def process_problem_splits(output_dir, token): with open(output_file, "w", encoding="utf-8") as f: for row in dataset: output_record = dict(row) - output_record["question"] = row["problem_statement"] + output_record["question_content"] = row["problem_statement"] + output_record["formatting_message"] = FORMATTING_MESSAGE + output_record["starter_code"] = "```cpp\n// YOUR CODE HERE\n```\n\n" output_record["subset_for_metrics"] = row["difficulty"] - f.write(json.dumps(output_record) + "\n") except Exception as e: diff --git a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml b/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml deleted file mode 100644 index a99d1aca6d..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# default prompt for livecodebench c++ - -user: |- - Here is a problem for which you need to generate an executable code in c++ programming language. - - {question} diff --git a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml b/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml deleted file mode 100644 index 1378a2d07f..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml +++ /dev/null @@ -1,12 +0,0 @@ - -user: |- - You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below. - - Please use c++ programming language only. - - You must use ```cpp for just the final solution code block with the following format: - ```cpp - // Your code here - ``` - - {question} diff --git a/nemo_skills/prompt/config/gpt-oss/code.yaml b/nemo_skills/prompt/config/eval/livecodebench/default.yaml similarity index 53% rename from nemo_skills/prompt/config/gpt-oss/code.yaml rename to nemo_skills/prompt/config/eval/livecodebench/default.yaml index b2f1101751..1118652784 100644 --- a/nemo_skills/prompt/config/gpt-oss/code.yaml +++ b/nemo_skills/prompt/config/eval/livecodebench/default.yaml @@ -1,8 +1,8 @@ user: |- - ### Question - {question} + ### Question: + {question_content} - {formatting_message} + ### Format: {formatting_message} {starter_code} ### Answer: (use the provided format with backticks) diff --git a/nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml b/nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml new file mode 100644 index 0000000000..9daf50da6e --- /dev/null +++ b/nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml @@ -0,0 +1,10 @@ +user: |- + You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below. + + ### Question: + {question_content} + + ### Format: {formatting_message} + {starter_code} + + ### Answer: (use the provided format with backticks) diff --git a/nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml b/nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml deleted file mode 100644 index 5c14e5eb6e..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# default prompt for livecodebench Python - -user: |- - Here is a problem for which you need to generate an executable code in python programming language. - - {question} diff --git a/nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml b/nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml deleted file mode 100644 index 44631da442..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml +++ /dev/null @@ -1,11 +0,0 @@ -user: |- - You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below. - - Please use python programming language only. - - You must use ```python for just the final solution code block with the following format: - ```python - # Your code here - ``` - - {question} diff --git a/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml b/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml index 4bcc663278..b7f6afaf59 100644 --- a/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml +++ b/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml @@ -1,4 +1,10 @@ user: |- - {question} + ### Question: + {question_content} + + ### Format: {formatting_message} + {starter_code} Please avoid using `sys.stdin.buffer` to process input, and avoid using `threading`. + + ### Answer: (use the provided format with backticks) From 5ccb141b9aca0aa8cfee2e5766226ac8209fcec0 Mon Sep 17 00:00:00 2001 From: wasiahmad Date: Sun, 8 Feb 2026 00:29:16 -0800 Subject: [PATCH 3/4] removing extra \n from starter_code Signed-off-by: wasiahmad --- nemo_skills/dataset/livecodebench-pro/prepare.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo_skills/dataset/livecodebench-pro/prepare.py b/nemo_skills/dataset/livecodebench-pro/prepare.py index 287ce652b6..97c15de8b0 100644 --- a/nemo_skills/dataset/livecodebench-pro/prepare.py +++ b/nemo_skills/dataset/livecodebench-pro/prepare.py @@ -65,7 +65,7 @@ def process_problem_splits(output_dir, token): output_record = dict(row) output_record["question_content"] = row["problem_statement"] output_record["formatting_message"] = FORMATTING_MESSAGE - output_record["starter_code"] = "```cpp\n// YOUR CODE HERE\n```\n\n" + output_record["starter_code"] = "```cpp\n// YOUR CODE HERE\n```" output_record["subset_for_metrics"] = row["difficulty"] f.write(json.dumps(output_record) + "\n") From 781e3f30ee464ade5b68af6793e1f314d345dcfe Mon Sep 17 00:00:00 2001 From: wasiahmad Date: Sun, 8 Feb 2026 00:30:45 -0800 Subject: [PATCH 4/4] removing extra \n\n from starter_code Signed-off-by: wasiahmad --- nemo_skills/dataset/livecodebench-cpp/prepare.py | 4 ++-- nemo_skills/dataset/livecodebench/prepare.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/nemo_skills/dataset/livecodebench-cpp/prepare.py b/nemo_skills/dataset/livecodebench-cpp/prepare.py index 4aca9481cd..43e47f8e75 100644 --- a/nemo_skills/dataset/livecodebench-cpp/prepare.py +++ b/nemo_skills/dataset/livecodebench-cpp/prepare.py @@ -47,10 +47,10 @@ def clean_data(dataset, keep_all_columns=False): def map_fn(data): if data["starter_code"]: data["formatting_message"] = PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE - data["starter_code"] = f"```cpp\n{data['starter_code']}\n```\n\n" + data["starter_code"] = f"```cpp\n{data['starter_code']}\n```" else: data["formatting_message"] = PromptConstants.FORMATTING_WITHOUT_STARTER_CODE - data["starter_code"] = "```cpp\n// YOUR CODE HERE\n```\n\n" + data["starter_code"] = "```cpp\n// YOUR CODE HERE\n```" data["task_id"] = data["question_id"] return data diff --git a/nemo_skills/dataset/livecodebench/prepare.py b/nemo_skills/dataset/livecodebench/prepare.py index 9258367618..32da002c67 100644 --- a/nemo_skills/dataset/livecodebench/prepare.py +++ b/nemo_skills/dataset/livecodebench/prepare.py @@ -70,10 +70,10 @@ def clean_data(dataset, keep_all_columns=False): def map_fn(data): if data["starter_code"]: data["formatting_message"] = PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE - data["starter_code"] = f"```python\n{data['starter_code']}\n```\n\n" + data["starter_code"] = f"```python\n{data['starter_code']}\n```" else: data["formatting_message"] = PromptConstants.FORMATTING_WITHOUT_STARTER_CODE - data["starter_code"] = "```python\n# YOUR CODE HERE\n```\n\n" + data["starter_code"] = "```python\n# YOUR CODE HERE\n```" data["task_id"] = data["question_id"] return data