-
Notifications
You must be signed in to change notification settings - Fork 248
chore: Add evaluation pipeline #1876
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
84c8c0e
ab8060c
8e5256a
81e0a4b
01883ec
74a02e5
7696d43
9b40152
b932ca4
d3251cc
c869b88
1228135
9732ecd
cf93c86
bfcf4d5
79d5cce
0fd27e6
1d5b07f
33c0536
5745dfa
67fa44d
4aff1d3
a74faba
4d8f1c8
34d14f6
ea42d5a
d07b929
1e226ae
653f35e
69f22ae
39e435f
5b6b7a3
d5aac3e
ddaaf99
c598a42
5c075ee
896ede1
c417081
75b9d61
3b78b5e
b2f092a
4308a35
d86bbfe
7e335c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,69 @@ | ||
| # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import argparse | ||
| import os | ||
|
|
||
| from argument_parser import parse_cli_args | ||
|
|
||
|
|
||
| def list_of_strings(arg): | ||
| """Split a comma-separated string into a list of substrings.""" | ||
| return arg.split(",") | ||
|
|
||
|
|
||
| def normalize_arg_name(arg_name: str) -> str: | ||
| """ | ||
| Normalizes a command-line argument name (e.g., '--model_family_name' or '-m') | ||
| into a suitable environment variable name (e.g., 'MODEL_FAMILY_NAME'). | ||
| """ | ||
| name = arg_name.lstrip("-") | ||
| name = name.upper() | ||
| name = name.replace("-", "_") | ||
| return name | ||
|
|
||
|
|
||
| def build_cli_args_from_env_vars(parser: argparse.ArgumentParser) -> str: | ||
| """ | ||
| Inspects an argparse.ArgumentParser, checks for corresponding environment | ||
| variables, and constructs a CLI argument string from them. | ||
| """ | ||
| cli_arg_string = [] | ||
|
|
||
| for action in parser._actions: | ||
| if action.option_strings: | ||
| long_arg_name = action.option_strings[-1] | ||
| env_var_name = normalize_arg_name(long_arg_name) | ||
| env_value = os.getenv(env_var_name) | ||
|
|
||
| if env_value is not None: | ||
| if isinstance(action, argparse._StoreTrueAction): | ||
| is_true = env_value.lower() in ("true", "1", "yes", "on") | ||
| if is_true: | ||
| cli_arg_string.append(long_arg_name) | ||
| continue | ||
| elif action.type is list_of_strings: | ||
| if env_value: | ||
| cli_arg_string.append(long_arg_name) | ||
| cli_arg_string.append(env_value) | ||
| continue | ||
| else: | ||
| cli_arg_string.append(long_arg_name) | ||
| cli_arg_string.append(env_value) | ||
|
|
||
| return " ".join(cli_arg_string) | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| cli_args_string = build_cli_args_from_env_vars(parse_cli_args()) | ||
| print(cli_args_string) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,247 @@ | ||
| # Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved. | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
| import argparse | ||
|
|
||
|
|
||
| def list_of_strings(arg): | ||
| """Split a comma-separated string into a list of substrings.""" | ||
| return arg.split(",") | ||
|
|
||
|
|
||
| def to_dict(arg): | ||
| """Split a comma-separated string into a dictionary of key-value pairs.""" | ||
| return dict(item.split("=") for item in arg.split(",")) | ||
|
|
||
|
|
||
| ENDPOINT_TYPES = {"chat": "chat/completions/", "completions": "completions/"} | ||
|
|
||
|
|
||
| def parse_cli_args(): | ||
| """Parse command line arguments for launching Megatron-Bridge Evaluation.""" | ||
| parser = argparse.ArgumentParser(description="Launch Megatron-Bridge Evaluation") | ||
| parser.add_argument( | ||
| "--dryrun", | ||
| action="store_true", | ||
| help="Dry run the experiment.", | ||
| default=False, | ||
| ) | ||
|
|
||
| # Deployment args | ||
| deployment_args = parser.add_argument_group("Deployment arguments") | ||
| deployment_args.add_argument("--megatron_checkpoint", type=str, help="Megatron checkpoint to evaluate") | ||
| deployment_args.add_argument( | ||
| "--host", | ||
| type=str, | ||
| help="Server address to use for evaluation", | ||
| default="0.0.0.0", | ||
| ) | ||
| deployment_args.add_argument("--port", type=int, help="Server port to use for evaluation", default=8000) | ||
| deployment_args.add_argument("--gpus_per_node", type=int, help="Number of GPUs per node", default=8) | ||
| deployment_args.add_argument("--num_gpus", type=int, help="Number of nodes to use for evaluation", default=8) | ||
| deployment_args.add_argument("--num_replicas", type=int, default=1, help="Num of replicas for Ray server") | ||
| deployment_args.add_argument( | ||
| "--tensor_model_parallel_size", | ||
| type=int, | ||
| help="Tensor model parallel size to use for evaluation", | ||
| default=1, | ||
| ) | ||
| deployment_args.add_argument( | ||
| "--pipeline_model_parallel_size", | ||
| type=int, | ||
| help="Pipeline model parallel size to use for evaluation", | ||
| default=1, | ||
| ) | ||
| deployment_args.add_argument( | ||
| "--context_model_parallel_size", | ||
| type=int, | ||
| help="Context model parallel size to use for evaluation", | ||
| default=1, | ||
| ) | ||
|
|
||
| # Evaluation args | ||
| evaluation_args = parser.add_argument_group("Evaluation arguments") | ||
| evaluation_args.add_argument( | ||
| "--endpoint_type", | ||
| type=str, | ||
| default="completions", | ||
| help="Whether to use completions or chat endpoint. Refer to the docs for details on tasks that are completions" | ||
| "v/s chat.", | ||
| choices=list(ENDPOINT_TYPES), | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--limit_samples", | ||
| type=float, | ||
| default=None, | ||
| help="Limit evaluation to `limit` samples. Default: use all samples.", | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--parallelism", | ||
| type=int, | ||
| default=8, | ||
| help="Number of parallel requests to send to server. Default: use default for the task.", | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--request_timeout", | ||
| type=int, | ||
| default=1000, | ||
| help="Time in seconds for the eval client. Default: 1000s", | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--temperature", | ||
| type=float, | ||
| default=None, | ||
| help="Sampling temperature for generation. Higher values = more random. Default: use task default.", | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--top_p", | ||
| type=float, | ||
| default=None, | ||
| help="Top-p (nucleus) sampling threshold. Default: use task default.", | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--top_k", | ||
| type=int, | ||
| default=None, | ||
| help="Top-k sampling threshold. Default: use task default.", | ||
| ) | ||
| evaluation_args.add_argument( | ||
| "--eval_task", | ||
| type=str, | ||
| default="mmlu", | ||
| help="Evaluation benchmark to run. Refer to the docs for more details on the tasks/benchmarks.", | ||
| ) | ||
|
|
||
| # Slurm args | ||
| slurm_args = parser.add_argument_group("Slurm arguments") | ||
| slurm_args.add_argument( | ||
| "--custom_mounts", type=list_of_strings, help="Comma separated string of mounts", default=[], required=False | ||
| ) | ||
| slurm_args.add_argument( | ||
| "--custom_env_vars", | ||
| type=to_dict, | ||
| help="Comma separated string of environment variables", | ||
| default={}, | ||
| required=False, | ||
| ) | ||
| slurm_args.add_argument("--account", type=str, help="Cluster account to run test") | ||
| slurm_args.add_argument("--partition", type=str, help="Cluster partition to run test") | ||
| slurm_args.add_argument("--time_limit", type=str, default="04:00:00", help="Time limit of run") | ||
| slurm_args.add_argument("--container_image", type=str, default="", help="Container image to run") | ||
|
|
||
| # Logging args | ||
| logging_args = parser.add_argument_group("Logging arguments") | ||
| logging_args.add_argument( | ||
| "--output_dir", | ||
| type=str, | ||
| help="Output directory to save the results", | ||
| required=False, | ||
| ) | ||
| logging_args.add_argument( | ||
| "--experiment_name", | ||
| type=str, | ||
| help="wandb job name", | ||
| required=False, | ||
| ) | ||
| logging_args.add_argument( | ||
| "--wandb_key", | ||
| type=str, | ||
| help="wandb key. Needed for wandb logger projection to server", | ||
| required=False, | ||
| ) | ||
| logging_args.add_argument( | ||
| "--wandb_project_name", | ||
| type=str, | ||
| help="wandb project name", | ||
| required=False, | ||
| ) | ||
| logging_args.add_argument( | ||
| "--wandb_entity_name", | ||
| type=str, | ||
| help="wandb entity name", | ||
| required=False, | ||
| ) | ||
| logging_args.add_argument( | ||
| "--wandb_experiment_name", | ||
| type=str, | ||
| help="wandb job name", | ||
| required=False, | ||
| ) | ||
|
|
||
| # Tokenizer args | ||
| tokenizer_args = parser.add_argument_group("Tokenizer arguments") | ||
| tokenizer_args.add_argument( | ||
| "-hf", | ||
| "--hf_token", | ||
| type=str, | ||
| help="HuggingFace token. Defaults to None. Required for accessing tokenizers and checkpoints.", | ||
| ) | ||
|
|
||
| # DGXCloud | ||
| dgxc_args = parser.add_argument_group("DGXCloud arguments") | ||
| dgxc_args.add_argument( | ||
| "--dgxc_cluster", | ||
| type=str, | ||
| help="DGXCloud cluster to use for experiment", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_base_url", | ||
| type=str, | ||
| help="DGXCloud base url", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_kube_apiserver_url", | ||
| type=str, | ||
| help="DGXCloud kube apiserver url", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_app_id", | ||
| type=str, | ||
| help="DGXCloud app id", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_app_secret", | ||
| type=str, | ||
| help="DGXCloud app secret", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_project_name", | ||
| type=str, | ||
| help="DGXCloud project name", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_pvc_claim_name", | ||
| type=str, | ||
| help="DGXCloud pvc claim name", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_pvc_mount_path", | ||
| type=str, | ||
| help="DGXCloud pvc mount path", | ||
| required=False, | ||
| ) | ||
| dgxc_args.add_argument( | ||
| "--dgxc_namespace", | ||
| type=str, | ||
| help="DGXCloud namespace", | ||
| required=False, | ||
| ) | ||
|
|
||
| return parser | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,19 @@ | ||||||||||||||||||||||
| # Unset SLURM/PMI/PMIX env vars to prevent MPI initialization issues | ||||||||||||||||||||||
| for i in $(env | grep ^SLURM_ | cut -d"=" -f 1); do unset -v $i; done | ||||||||||||||||||||||
| for i in $(env | grep ^PMI_ | cut -d"=" -f 1); do unset -v $i; done | ||||||||||||||||||||||
| for i in $(env | grep ^PMIX_ | cut -d"=" -f 1); do unset -v $i; done | ||||||||||||||||||||||
|
Comment on lines
+1
to
+4
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🛠️ Refactor suggestion | 🟠 Major Add a shebang line to specify the shell interpreter. The script lacks a shebang, which can cause portability issues and unexpected behavior. Static analysis (SC2148) correctly flags this. Proposed fix+#!/bin/bash
+
# Unset SLURM/PMI/PMIX env vars to prevent MPI initialization issues
for i in $(env | grep ^SLURM_ | cut -d"=" -f 1); do unset -v $i; done
for i in $(env | grep ^PMI_ | cut -d"=" -f 1); do unset -v $i; done
for i in $(env | grep ^PMIX_ | cut -d"=" -f 1); do unset -v $i; done📝 Committable suggestion
Suggested change
🧰 Tools🪛 Shellcheck (0.11.0)[error] 1-1: Tips depend on target shell and yours is unknown. Add a shebang or a 'shell' directive. (SC2148) 🤖 Prompt for AI Agents |
||||||||||||||||||||||
|
|
||||||||||||||||||||||
| MEGATRON_CHECKPOINT=$1 | ||||||||||||||||||||||
| NUM_REPLICAS=$2 | ||||||||||||||||||||||
| NUM_GPUS=$3 | ||||||||||||||||||||||
| python \ | ||||||||||||||||||||||
| /opt/Export-Deploy/scripts/deploy/nlp/deploy_ray_inframework.py \ | ||||||||||||||||||||||
| --megatron_checkpoint "$MEGATRON_CHECKPOINT" \ | ||||||||||||||||||||||
| --model_id megatron_model \ | ||||||||||||||||||||||
| --host 0.0.0.0 \ | ||||||||||||||||||||||
| --port 8000 \ | ||||||||||||||||||||||
| --num_gpus "$NUM_GPUS" \ | ||||||||||||||||||||||
| --num_replicas "$NUM_REPLICAS" \ | ||||||||||||||||||||||
| --tensor_model_parallel_size 1 \ | ||||||||||||||||||||||
| --pipeline_model_parallel_size 1 \ | ||||||||||||||||||||||
| --context_parallel_size 1 | ||||||||||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copy-paste error in help text.
The help text for
--wandb_entity_namesays "wandb project name" but should describe the entity name.Proposed fix
logging_args.add_argument( "--wandb_entity_name", type=str, - help="wandb project name", + help="wandb entity name", required=False, )📝 Committable suggestion
🤖 Prompt for AI Agents