diff --git a/docs/releases/openreasoning/training.md b/docs/releases/openreasoning/training.md index 3e4e62691a..c6206095c6 100644 --- a/docs/releases/openreasoning/training.md +++ b/docs/releases/openreasoning/training.md @@ -105,7 +105,7 @@ def apply_format(elem, prompt): dataset = load_dataset("nvidia/Nemotron-Post-Training-Dataset-v1", split="code") -prompt = get_prompt('eval/livecodebench/python_codegen_reasoning', tokenizer='Qwen/Qwen2.5-32B-Instruct', system_message="") +prompt = get_prompt('eval/livecodebench/default_reasoning', tokenizer='Qwen/Qwen2.5-32B-Instruct', system_message="") func = partial(apply_format, prompt=prompt) dataset = dataset.map(func, num_proc=20) dataset = dataset.remove_columns(['messages']) diff --git a/nemo_skills/dataset/livecodebench-cpp/__init__.py b/nemo_skills/dataset/livecodebench-cpp/__init__.py index 76d7c478de..f983a983b5 100644 --- a/nemo_skills/dataset/livecodebench-cpp/__init__.py +++ b/nemo_skills/dataset/livecodebench-cpp/__init__.py @@ -16,4 +16,6 @@ DATASET_GROUP = "code" METRICS_TYPE = "livecodebench" EVAL_SPLIT = "v6_2408_2505" -GENERATION_ARGS = "++prompt_config=eval/livecodebench/cpp_codegen ++eval_type=livecodebench ++eval_config.language=cpp" +GENERATION_ARGS = ( + "++prompt_config=eval/livecodebench/default_reasoning ++eval_type=livecodebench ++eval_config.language=cpp" +) diff --git a/nemo_skills/dataset/livecodebench-cpp/prepare.py b/nemo_skills/dataset/livecodebench-cpp/prepare.py index bb4becd4fc..43e47f8e75 100644 --- a/nemo_skills/dataset/livecodebench-cpp/prepare.py +++ b/nemo_skills/dataset/livecodebench-cpp/prepare.py @@ -20,7 +20,7 @@ class PromptConstants: - # reference: https://github.com/QwenLM/Qwen2.5-Coder/blob/main/qwencoder-eval/reasoning/livecode_bench_cot/lcb_runner_cq/prompts/code_generation.py#L31 + # reference: https://github.com/LiveCodeBench/LiveCodeBench/blob/main/lcb_runner/prompts/code_generation.py#L35C5-L38C1 FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters." FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the c++ program runs, it reads the inputs, runs the algorithm and writes output to STDOUT." @@ -45,16 +45,14 @@ def parse_data(split): def clean_data(dataset, keep_all_columns=False): def map_fn(data): - question = data["question_content"] + "\n\n" if data["starter_code"]: - question += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" - question += f"```cpp\n{data['starter_code']}\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE + data["starter_code"] = f"```cpp\n{data['starter_code']}\n```" else: - question += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n" - question += "```cpp\n// YOUR CODE HERE\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_WITHOUT_STARTER_CODE + data["starter_code"] = "```cpp\n// YOUR CODE HERE\n```" data["task_id"] = data["question_id"] - data["question"] = question.replace(" ", "\t") return data remove_columns = [] @@ -63,10 +61,8 @@ def map_fn(data): "question_title", "contest_id", "metadata", - "question_content", "platform", "question_id", - "starter_code", "public_test_cases", "private_test_cases", ] diff --git a/nemo_skills/dataset/livecodebench-pro/__init__.py b/nemo_skills/dataset/livecodebench-pro/__init__.py index 7aa6e5f0ed..bed9a8eafd 100644 --- a/nemo_skills/dataset/livecodebench-pro/__init__.py +++ b/nemo_skills/dataset/livecodebench-pro/__init__.py @@ -16,4 +16,4 @@ DATASET_GROUP = "code" METRICS_TYPE = "livecodebench_pro" EVAL_SPLIT = "test_25q2" -GENERATION_ARGS = "++prompt_config=eval/livecodebench/cpp_codegen ++eval_type=livecodebench_pro" +GENERATION_ARGS = "++prompt_config=eval/livecodebench/default_reasoning ++eval_type=livecodebench_pro" diff --git a/nemo_skills/dataset/livecodebench-pro/prepare.py b/nemo_skills/dataset/livecodebench-pro/prepare.py index 52703532de..97c15de8b0 100644 --- a/nemo_skills/dataset/livecodebench-pro/prepare.py +++ b/nemo_skills/dataset/livecodebench-pro/prepare.py @@ -28,6 +28,8 @@ ("25q3", "quater_2025_7_9", 144), ] +FORMATTING_MESSAGE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the c++ program runs, it reads the inputs, runs the algorithm and writes output to STDOUT." + def download_testcases(local_dir, token): """ @@ -61,9 +63,10 @@ def process_problem_splits(output_dir, token): with open(output_file, "w", encoding="utf-8") as f: for row in dataset: output_record = dict(row) - output_record["question"] = row["problem_statement"] + output_record["question_content"] = row["problem_statement"] + output_record["formatting_message"] = FORMATTING_MESSAGE + output_record["starter_code"] = "```cpp\n// YOUR CODE HERE\n```" output_record["subset_for_metrics"] = row["difficulty"] - f.write(json.dumps(output_record) + "\n") except Exception as e: diff --git a/nemo_skills/dataset/livecodebench/__init__.py b/nemo_skills/dataset/livecodebench/__init__.py index 6945c0c5e0..83d4d4a30f 100644 --- a/nemo_skills/dataset/livecodebench/__init__.py +++ b/nemo_skills/dataset/livecodebench/__init__.py @@ -16,4 +16,4 @@ DATASET_GROUP = "code" METRICS_TYPE = "livecodebench" EVAL_SPLIT = "test_v6_2408_2505" -GENERATION_ARGS = "++prompt_config=eval/livecodebench/python_codegen ++eval_type=livecodebench" +GENERATION_ARGS = "++prompt_config=eval/livecodebench/default_reasoning ++eval_type=livecodebench" diff --git a/nemo_skills/dataset/livecodebench/prepare.py b/nemo_skills/dataset/livecodebench/prepare.py index a3cb49d099..32da002c67 100644 --- a/nemo_skills/dataset/livecodebench/prepare.py +++ b/nemo_skills/dataset/livecodebench/prepare.py @@ -23,7 +23,7 @@ class PromptConstants: - # reference: https://github.com/QwenLM/Qwen2.5-Coder/blob/main/qwencoder-eval/reasoning/livecode_bench_cot/lcb_runner_cq/prompts/code_generation.py#L31 + # reference: https://github.com/LiveCodeBench/LiveCodeBench/blob/main/lcb_runner/prompts/code_generation.py#L35C5-L38C1 FORMATTING_MESSAGE_WITH_STARTER_CODE = "You will use the following starter code to write the solution to the problem and enclose your code within delimiters." FORMATTING_WITHOUT_STARTER_CODE = "Read the inputs from stdin solve the problem and write the answer to stdout (do not directly test on the sample inputs). Enclose your code within delimiters as follows. Ensure that when the python program runs, it reads the inputs, runs the algorithm and writes output to STDOUT." @@ -68,16 +68,14 @@ def parse_month_range(start_date, end_date): def clean_data(dataset, keep_all_columns=False): def map_fn(data): - question = data["question_content"] + "\n\n" if data["starter_code"]: - question += f"{PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE}\n" - question += f"```python\n{data['starter_code']}\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_MESSAGE_WITH_STARTER_CODE + data["starter_code"] = f"```python\n{data['starter_code']}\n```" else: - question += f"{PromptConstants.FORMATTING_WITHOUT_STARTER_CODE}\n\n" - question += "```python\n# YOUR CODE HERE\n```\n\n" + data["formatting_message"] = PromptConstants.FORMATTING_WITHOUT_STARTER_CODE + data["starter_code"] = "```python\n# YOUR CODE HERE\n```" data["task_id"] = data["question_id"] - data["question"] = question.replace(" ", "\t") return data remove_columns = [] @@ -86,10 +84,8 @@ def map_fn(data): "question_title", "contest_id", "metadata", - "question_content", "platform", "question_id", - "starter_code", "public_test_cases", "private_test_cases", ] diff --git a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml b/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml deleted file mode 100644 index a99d1aca6d..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# default prompt for livecodebench c++ - -user: |- - Here is a problem for which you need to generate an executable code in c++ programming language. - - {question} diff --git a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml b/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml deleted file mode 100644 index 1378a2d07f..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/cpp_codegen_reasoning.yaml +++ /dev/null @@ -1,12 +0,0 @@ - -user: |- - You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below. - - Please use c++ programming language only. - - You must use ```cpp for just the final solution code block with the following format: - ```cpp - // Your code here - ``` - - {question} diff --git a/nemo_skills/prompt/config/gpt-oss/code.yaml b/nemo_skills/prompt/config/eval/livecodebench/default.yaml similarity index 53% rename from nemo_skills/prompt/config/gpt-oss/code.yaml rename to nemo_skills/prompt/config/eval/livecodebench/default.yaml index b2f1101751..1118652784 100644 --- a/nemo_skills/prompt/config/gpt-oss/code.yaml +++ b/nemo_skills/prompt/config/eval/livecodebench/default.yaml @@ -1,8 +1,8 @@ user: |- - ### Question - {question} + ### Question: + {question_content} - {formatting_message} + ### Format: {formatting_message} {starter_code} ### Answer: (use the provided format with backticks) diff --git a/nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml b/nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml new file mode 100644 index 0000000000..9daf50da6e --- /dev/null +++ b/nemo_skills/prompt/config/eval/livecodebench/default_reasoning.yaml @@ -0,0 +1,10 @@ +user: |- + You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below. + + ### Question: + {question_content} + + ### Format: {formatting_message} + {starter_code} + + ### Answer: (use the provided format with backticks) diff --git a/nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml b/nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml deleted file mode 100644 index 5c14e5eb6e..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/python_codegen.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# default prompt for livecodebench Python - -user: |- - Here is a problem for which you need to generate an executable code in python programming language. - - {question} diff --git a/nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml b/nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml deleted file mode 100644 index 44631da442..0000000000 --- a/nemo_skills/prompt/config/eval/livecodebench/python_codegen_reasoning.yaml +++ /dev/null @@ -1,11 +0,0 @@ -user: |- - You are a helpful and harmless assistant. You should think step-by-step before responding to the instruction below. - - Please use python programming language only. - - You must use ```python for just the final solution code block with the following format: - ```python - # Your code here - ``` - - {question} diff --git a/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml b/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml index 4bcc663278..b7f6afaf59 100644 --- a/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml +++ b/nemo_skills/prompt/config/gpt-oss/livecodebench.yaml @@ -1,4 +1,10 @@ user: |- - {question} + ### Question: + {question_content} + + ### Format: {formatting_message} + {starter_code} Please avoid using `sys.stdin.buffer` to process input, and avoid using `threading`. + + ### Answer: (use the provided format with backticks)