diff --git a/3rdparty/Gym-workspace/Gym b/3rdparty/Gym-workspace/Gym index c192ee407f..23cdeb3807 160000 --- a/3rdparty/Gym-workspace/Gym +++ b/3rdparty/Gym-workspace/Gym @@ -1 +1 @@ -Subproject commit c192ee407ff71046015d11da7c8960082bd62418 +Subproject commit 23cdeb38077d7b72a5fbae0927a2e1a74bfc15f7 diff --git a/3rdparty/Gym-workspace/is_nemo_gym_installed.py b/3rdparty/Gym-workspace/is_nemo_gym_installed.py deleted file mode 100644 index 1a7572b077..0000000000 --- a/3rdparty/Gym-workspace/is_nemo_gym_installed.py +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -try: - from nemo_gym import config_types # noqa: F401 - - INSTALLED = True -except Exception: - INSTALLED = False - -print(f"NEMO_GYM {INSTALLED=}") diff --git a/3rdparty/Gym-workspace/pyproject.toml b/3rdparty/Gym-workspace/pyproject.toml deleted file mode 100644 index dfda26adaf..0000000000 --- a/3rdparty/Gym-workspace/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0", "wheel"] -build-backend = "setuptools.build_meta" - -[project] -name = "nemo_gym" -dynamic = ["dependencies", "version"] -authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] -description = "Standalone packaging for the Gym sub-module." -requires-python = ">=3.10" diff --git a/3rdparty/Gym-workspace/setup.py b/3rdparty/Gym-workspace/setup.py deleted file mode 100644 index b6df0d66c0..0000000000 --- a/3rdparty/Gym-workspace/setup.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import tomllib -from pathlib import Path - -import setuptools - -final_packages = [] -final_package_dir = {} - -# If the submodule is present, expose `nemo_gym` package from the checkout -src_dir = Path("Gym") - - -CACHED_DEPENDENCIES = [ - "openai<=2.6.1", - "tqdm", - "pydantic", - "pydantic_core", - "devtools", - "fastapi", - "uvicorn", - "uvloop", - "hydra-core", - "omegaconf", - "gradio", - "mlflow", - "tdigest>=0.5.2.2", - "aiohttp", - "yappi", - "ray[default]", - "psutil", - "datasets", -] - -if src_dir.exists(): - pyproject_toml_path = src_dir / "pyproject.toml" - with pyproject_toml_path.open("rb") as f: - pyproject_toml = tomllib.load(f) - if not pyproject_toml_path.exists(): - raise FileNotFoundError( - f"[Gym][setup] {pyproject_toml_path} not found; skipping dependency consistency check." - ) - - packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"] - - for package in packages: - final_packages.append(package) - final_package_dir[package] = src_dir / package - - actual_dependencies = pyproject_toml["project"]["dependencies"] - - ######################################## - # Compare cached dependencies with the submodule's pyproject - ######################################## - - missing_in_cached = set(actual_dependencies) - set(CACHED_DEPENDENCIES) - extra_in_cached = set(CACHED_DEPENDENCIES) - set(actual_dependencies) - - if missing_in_cached or extra_in_cached: - print( - "[Gym][setup] Dependency mismatch between Gym-workspace/Gym/pyproject.toml vs Gym-workspace/setup.py::CACHED_DEPENDENCIES.", - file=sys.stderr, - ) - if missing_in_cached: - print( - " - Present in Gym-workspace/Gym/pyproject.toml but missing from CACHED_DEPENDENCIES:", - file=sys.stderr, - ) - for dep in sorted(missing_in_cached): - print(f" * {dep}", file=sys.stderr) - if extra_in_cached: - print( - " - Present in CACHED_DEPENDENCIES but not in Gym-workspace/Gym/pyproject.toml:", - file=sys.stderr, - ) - for dep in sorted(extra_in_cached): - print(f" * {dep}", file=sys.stderr) - print( - " Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.", - file=sys.stderr, - ) - sys.exit(1) - else: - print( - "[Gym][setup] Dependency sets are consistent with the submodule pyproject.", - file=sys.stderr, - ) - - -setuptools.setup( - name="nemo_gym", - version="0.0.0", - description="Standalone packaging for the Gym sub-module.", - author="NVIDIA", - author_email="nemo-toolkit@nvidia.com", - packages=final_packages, - package_dir=final_package_dir, - py_modules=["is_nemo_gym_installed"], - install_requires=CACHED_DEPENDENCIES, -) diff --git a/docs/design-docs/dependency-management.md b/docs/design-docs/dependency-management.md index 26151f7809..b2d3a21700 100644 --- a/docs/design-docs/dependency-management.md +++ b/docs/design-docs/dependency-management.md @@ -161,7 +161,7 @@ The rebuilt container will have all virtual environments pre-cached with your up ### Option 3: Classic Workflow - Mounting Modified Submodules -For situations where you're **only changing submodules** (like nemo-automodel, Penguin, Megatron-LM, or Megatron-Bridge) but **not changing Python package versions**, you can use a classic mounting approach. This workflow assumes that the non-submodule Python packages in your local checkout match what the container was built with. +For situations where you're **only changing submodules** (like nemo-automodel, NeMo Gym, Megatron-LM, or Megatron-Bridge) but **not changing Python package versions**, you can use a classic mounting approach. This workflow assumes that the non-submodule Python packages in your local checkout match what the container was built with. The container's NeMo RL code is located at `/opt/nemo-rl`. By mounting your local `3rdparty/` directory over the container's `/opt/nemo-rl/3rdparty/`, you can swap out submodules without rebuilding environments or containers. @@ -193,7 +193,7 @@ This mounts: > [!IMPORTANT] > This workflow is **only suitable when**: > - Python package versions in `pyproject.toml` and `uv.lock` haven't changed -> - You're only modifying code within submodules (nemo-automodel, Penguin, Megatron-LM, Megatron-Bridge) +> - You're only modifying code within submodules (nemo-automodel, NeMo Gym, Megatron-LM, Megatron-Bridge) > - The submodule commits/branches are compatible with the installed package versions If you've changed Python package versions or dependencies outside of submodules, use Option 1 (`NRL_FORCE_REBUILD_VENVS=true`) or Option 2 (rebuild the container) instead. diff --git a/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml b/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml deleted file mode 100644 index d6d550a12c..0000000000 --- a/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml +++ /dev/null @@ -1,278 +0,0 @@ -grpo: - max_num_epochs: 1 - num_prompts_per_step: 64 - num_generations_per_prompt: 16 - max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question) - max_num_steps: 1000000 - normalize_rewards: true - use_leave_one_out_baseline: true - val_period: 10 - val_at_start: true - overlong_filtering: false - max_val_samples: null # inferred from size of val dataset. for multi evals, repeat val ds via `num_repeats` in `ng_prepare_data`. - val_batch_size: null - seed: 42 - use_dynamic_sampling: false - dynamic_sampling_max_gen_batches: 10 - batch_multiplier: 1 - reward_shaping: - enabled: false - overlong_buffer_length: 128 - overlong_buffer_penalty: 1 - max_response_length: ${policy.max_total_sequence_length} - reward_scaling: - enabled: false - source_min: 0.0 - source_max: 1.0 - target_min: 0.0 - target_max: 1.0 - skip_reference_policy_logprobs_calculation: true - -loss_fn: - reference_policy_kl_penalty: 0 - reference_policy_kl_type: "k3" - kl_input_clamp_value: 20.0 - kl_output_clamp_value: 10.0 - ratio_clip_min: 0.2 - ratio_clip_max: 0.2 - ratio_clip_c: null - # (default off) loss formulation improvements (docs/guides/grpo.md#loss) - use_on_policy_kl_approximation: false - truncated_importance_sampling_ratio: null - use_importance_sampling_correction: false - token_level_loss: true - force_on_policy_ratio: false # Set to true to force ratio=1.0 (requires train_global_batch_size == num_prompts_per_step * num_generations_per_prompt) - -checkpointing: - enabled: true - checkpoint_dir: "results/grpo" - metric_name: "val:accuracy" - higher_is_better: true - keep_top_k: 3 - save_period: 1 - checkpoint_must_save_by: null - -policy: - model_name: "Qwen/Qwen3-4B-Instruct-2507" - tokenizer: - name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default - chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true - hf_config_overrides: {} - train_global_batch_size: ${mul:${grpo.num_prompts_per_step}, ${grpo.num_generations_per_prompt}} # Match the total rollouts per step - train_micro_batch_size: 1 - logprob_batch_size: 1 - generation_batch_size: 32 # Only used when generating using HF backend - max_total_sequence_length: 32768 - precision: "bfloat16" - logprob_chunk_size: 1024 - - dtensor_cfg: - _v2: false - enabled: true - cpu_offload: False - sequence_parallel: false - activation_checkpointing: true - tensor_parallel_size: 2 - context_parallel_size: 1 - custom_parallel_plan: null - clear_cache_every_n_steps: null - - megatron_cfg: - enabled: false - # We might want to consider setting this value higher (e.g. to 1) and raising the vllm generation max mem utilization - empty_unused_memory_level: 0 - activation_checkpointing: true - converter_type: "Qwen2ForCausalLM" # Apparently this is comptible with Qwen 3 dense models. - tensor_model_parallel_size: 1 - expert_tensor_parallel_size: 1 - expert_model_parallel_size: 1 - pipeline_model_parallel_size: 1 - num_layers_in_first_pipeline_stage: null - num_layers_in_last_pipeline_stage: null - context_parallel_size: 1 - pipeline_dtype: ${policy.precision} - sequence_parallel: false - freeze_moe_router: true - moe_router_dtype: "fp64" - moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo - moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo - #gives ~20% training perf speedup with sequence packing - apply_rope_fusion: True - defer_fp32_logits: true - moe_permute_fusion: false - bias_activation_fusion: True - moe_per_layer_logging: False - - optimizer: - optimizer: "adam" - lr: 5.0e-6 - min_lr: 5.0e-7 - weight_decay: 0.01 - bf16: true - fp16: false - params_dtype: "float32" - - #adam - adam_beta1: 0.9 - adam_beta2: 0.999 - adam_eps: 1e-8 - - #sgd - sgd_momentum: 0.9 - - #distributed optimizer - use_distributed_optimizer: true - use_precision_aware_optimizer: true - - # optimizer cpu offload - optimizer_cpu_offload: false - optimizer_offload_fraction: 0.0 - - clip_grad: ${policy.max_grad_norm} - - scheduler: - start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay} - end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay} - weight_decay_incr_style: "constant" - lr_decay_style: "constant" - lr_decay_iters: null - lr_warmup_iters: 13 - lr_warmup_init: 5.0e-7 - - distributed_data_parallel_config: - grad_reduce_in_fp32: false - overlap_grad_reduce: true - overlap_param_gather: true - use_custom_fsdp: false - data_parallel_sharding_strategy: "optim_grads_params" - - env_vars: null - - # See docs/design-docs/sequence-packing-and-dynamic-batching.md - # for more details on dynamic batching and sequence packing. - dynamic_batching: - enabled: False - train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}} - logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}} - sequence_length_round: 64 - - sequence_packing: - enabled: false - train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}} - logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}} - algorithm: "modified_first_fit_decreasing" - sequence_length_round: 64 - - # makes the training sequence length divisible by the tensor parallel size - # this is useful for sequence parallel training - make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size} - max_grad_norm: 1.0 - - optimizer: - name: "torch.optim.AdamW" - kwargs: - lr: 1.0e-6 - weight_decay: 0.01 - betas: [0.9, 0.999] - eps: 1e-8 - # when using Dtensor, we need to set foreach - # and fused to False - foreach: False - fused: False - - scheduler: - - name: "torch.optim.lr_scheduler.ConstantLR" - kwargs: - factor: 1.0 - total_iters: 10000000000 - - milestones: [] - - generation: - backend: "vllm" - max_new_tokens: ${policy.max_total_sequence_length} - temperature: 1.0 - top_p: 1.0 - top_k: null - stop_token_ids: null - stop_strings: null - vllm_cfg: - async_engine: true - precision: ${policy.precision} - tensor_parallel_size: 1 - pipeline_parallel_size: 1 - enable_expert_parallel: false - expert_parallel_size: 1 - gpu_memory_utilization: 0.8 - max_model_len: ${policy.max_total_sequence_length} - enforce_eager: false - use_deep_gemm: False - num_last_layers_in_bf16: 0 - num_first_layers_in_bf16: 0 - expose_http_server: true - skip_tokenizer_init: false - http_server_serving_chat_kwargs: - # This is the tool parser for Qwen 3 4B Instruct. This needs to be changed for other models. - enable_auto_tools: true - tool_parser: hermes - # Enable the appropriate reasoning parser here. Since this model is an instruct model, we comment it out. - # reasoning_parser: deepseek_r1 - vllm_kwargs: - compilation_config: - # when enforce_eager is False, set ++policy.generation.vllm_kwargs.compilation_config.use_inductor=False for better accuracy, - # with the flag, vllm will use the custom CUDA kernels instead of the Triton kernels generated by torch.compile - # for more details, see convergence issue https://github.com/NVIDIA-NeMo/RL/issues/998 - use_inductor: False - colocated: - # true: generation shares training GPUs - # false: uses dedicated generation resources - enabled: true - # only relevant when enabled is false - resources: - gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1 - num_nodes: null # Decides number of nodes to be dedicated to generation - -data: - train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl - validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl - shuffle: true - num_workers: 0 - -env: - should_use_nemo_gym: true - should_log_nemo_gym_responses: true # If you have low logging storage, set this to false - nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict - config_paths: - - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training - - resources_servers/library_judge_math/configs/library_judge_math.yaml - library_judge_math: - resources_servers: - library_judge_math: - judge_model_server: - name: policy_model - should_use_judge: false - -logger: - log_dir: "logs" # Base directory for all logs - num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal - wandb_enabled: true - tensorboard_enabled: false - mlflow_enabled: false # Disable MLflow logging - swanlab_enabled: false - monitor_gpus: true # If true, will monitor GPU usage and log to wandb and/or tensorboard - wandb: - project: "grpo-dev" - name: "grpo-dev-logger" - swanlab: - project: "grpo-dev" - name: "grpo-dev-logger" - tensorboard: {} - mlflow: - experiment_name: "grpo-dev" - run_name: "grpo-dev-logger" - gpu_monitoring: - collection_interval: 10 # How often to collect GPU usage metrics (in seconds) - flush_interval: 10 # How often to flush GPU usage metrics to the loggers (in seconds) - -cluster: - gpus_per_node: 8 - num_nodes: 8 diff --git a/examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml b/examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml new file mode 100644 index 0000000000..da7a392dae --- /dev/null +++ b/examples/nemo_gym/grpo_qwen3_30ba3b_instruct.yaml @@ -0,0 +1,155 @@ +defaults: "grpo_workplace_assistant_nemotron_nano_v2_9b.yaml" + +grpo: + max_num_epochs: 10 + # We observe MoE likes more data per optimization step. Here we increase the num prompts per step from the dense 64 to 256. + # We retain the 16 generations per prompt for now. Later on this may change for agentic tasks as the action/sample space grows. + # We take up to 16 steps off policy, which is 256 * 16 = 4096 + num_prompts_per_step: 4096 + num_generations_per_prompt: 16 + # Each "step" in NeMo RL is all 16 minibatch steps we want to take. So we val and save every step. + val_period: 1 + # The advantages are much larger and this becomes non-trivially expensive. + calculate_advantages_on_gpu: true + +# We use GSPO rather than GRPO for MoE models +loss_fn: + reference_policy_kl_penalty: 0 + ratio_clip_min: 3e-4 + ratio_clip_max: 3e-4 + ratio_clip_c: null + use_on_policy_kl_approximation: false + # We observe importance sampling correction here to have very interesting effects on training dynamics. Usually it is fine to leave this off. + use_importance_sampling_correction: false + # sequence_level_importance_ratios turns GRPO -> GSPO + sequence_level_importance_ratios: true + # As of Mon Oct 13, token level loss as formulated in the GSPO paper is not yet supported in NeMo RL. + token_level_loss: false + +policy: + model_name: Qwen/Qwen3-30B-A3B-Instruct-2507 + logprob_chunk_size: null + + # This is one minibatch, which is 256 prompts per step * 16 rollouts per prompt. + train_global_batch_size: 4096 + + # max_total_sequence_length goes up to 131072, but we default to 32768 to make training more efficient for this instruct model which doesn't initially have such long output length. + max_total_sequence_length: 32768 + + # Don't use dtensor. optimizer and scheduler are dtensor only (mcore has its own version of these) + dtensor_cfg: + enabled: False + optimizer: null + scheduler: null + + # As of Thu Oct 02, 2025, we need sequence packing enabled to use context parallelism (CP) in mcore. + sequence_packing: + enabled: true + train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}} + logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}} + algorithm: "modified_first_fit_decreasing" + sequence_length_round: 64 + + generation: + vllm_cfg: + tensor_parallel_size: 4 + # This is a very low GPU mem utilization. We GPU OOM in two places: + # Refit after train, refit before validation. + gpu_memory_utilization: 0.7 + + http_server_serving_chat_kwargs: + # This is the tool parser for Qwen 3 30B A3B Instruct. This needs to be changed for other models. + enable_auto_tools: true + tool_parser: hermes + # Enable the appropriate reasoning parser here. Since this model is an instruct model, we comment it out. + # reasoning_parser: deepseek_r1 + + # Needs to be set to whatever backend TP size. + make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size} + + megatron_cfg: + enabled: true + empty_unused_memory_level: 1 + activation_checkpointing: true + converter_type: "LlamaForCausalLM" # This arg is deprecated, and we can set it to anything. + tensor_model_parallel_size: 4 + expert_tensor_parallel_size: 1 + # We set this to 8, the number of GPUs in one node + expert_model_parallel_size: 8 + pipeline_model_parallel_size: 1 + num_layers_in_first_pipeline_stage: null + num_layers_in_last_pipeline_stage: null + # The context parallel size times the tensor model parallel size should equal 8. + context_parallel_size: 2 + pipeline_dtype: ${policy.precision} + # Sequence parallel is required for expert parallel + sequence_parallel: true + # Apparently freezing the MoE router and using fp64 here stabilizes training + # This is possibly related to some refit issues. + freeze_moe_router: false + moe_router_dtype: fp32 + moe_router_load_balancing_type: none # "seq_aux_loss" causes logprob error divergence for grpo. As of Jan 06, 2025, global_aux_loss is also not supported properly in mcore. + moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo + #gives ~20% training perf speedup with sequence packing + moe_permute_fusion: true + apply_rope_fusion: True + # gives ~25% training perf speedup with sequence packing and apply_rope_fusion + bias_activation_fusion: True + defer_fp32_logits: true + moe_per_layer_logging: true + + optimizer: + optimizer: "adam" + # As of Mon Oct 13, we default to 2e-6 here, but it's possible this value may increase/decrease depending on our subsequent observations. + lr: 2.0e-6 + min_lr: ${policy.megatron_cfg.optimizer.lr} + weight_decay: 0.01 + bf16: true + fp16: false + params_dtype: "float32" + + #adam + adam_beta1: 0.9 + adam_beta2: 0.999 + adam_eps: 1e-8 + + #sgd + sgd_momentum: 0.9 + + #distributed optimizer + use_distributed_optimizer: true + use_precision_aware_optimizer: true + + clip_grad: ${policy.max_grad_norm} + + # optimizer cpu offload + optimizer_cpu_offload: false + optimizer_offload_fraction: 0.0 + + scheduler: + start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay} + end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay} + weight_decay_incr_style: "constant" + lr_decay_style: "constant" + lr_decay_iters: null + lr_warmup_iters: 0 + lr_warmup_init: ${policy.megatron_cfg.optimizer.lr} + + distributed_data_parallel_config: + grad_reduce_in_fp32: false + overlap_grad_reduce: true + overlap_param_gather: true + use_custom_fsdp: false + data_parallel_sharding_strategy: "optim_grads_params" + + env_vars: null + +checkpointing: + # This assumes a slurm job timeout of 4 hours. + # 1. It will usually take a 10-15 minutes to spin up the training job and for the timeout iterations to start. + # 2. The next step may also be a validation step which takes extra long. + # 1. For this config Qwen 3 30BA3B on math with 32k context length, the validation could take up to 10 mins. + # 3. The step time for this config on 32 nodes takes around 30 mins. + # 4. The checkpoint time for this model is around 10 mins. + checkpoint_must_save_by: "00:03:30:00" + save_period: 1 diff --git a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml index dea76e41cf..c28d958cdc 100644 --- a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml +++ b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml @@ -201,7 +201,7 @@ policy: kv_cache_dtype: "auto" expose_http_server: true skip_tokenizer_init: false - tool_parser_plugin: ??? + # tool_parser_plugin: ??? # This is set to the path for Nemotron Nano v2 http_server_serving_chat_kwargs: # Workplace assistant uses 26 tools, so we enable auto_tools. # For Nemotron Nano v2, we use the dedicated `nemotron_json` tool parser @@ -227,9 +227,8 @@ policy: data: # Using the prepared train and validation datasets (downloaded from HuggingFace and split 90/10) # Train: 1129 samples, Validation: 126 samples - train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/resources_servers/workplace_assistant/data/train.jsonl - validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/resources_servers/workplace_assistant/data/validation.jsonl - agent_name: workplace_assistant_simple_agent + train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/workplace_assistant/train.jsonl + validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/workplace_assistant/validation.jsonl shuffle: true num_workers: 0 @@ -237,13 +236,16 @@ env: should_use_nemo_gym: true should_log_nemo_gym_responses: true # If you have low logging storage, set this to false nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict + is_trajectory_collection: false # Set this to true to enable trajectory collection (no training). You may also want to increase `policy.generation.vllm_cfg.gpu_memory_utilization` config_paths: - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training - resources_servers/workplace_assistant/configs/workplace_assistant.yaml - workplace_assistant_simple_agent: - responses_api_agents: - simple_agent: - max_steps: 6 # Workplace assistant allows up to 6 tool-calling steps per task + # You can uncomment these during `ng_prepare_data` and here to train on multiple environments at once! + # - resources_servers/math_with_judge/configs/math_with_judge.yaml + # - resources_servers/code_gen/configs/code_gen.yaml + # - resources_servers/mcqa/configs/mcqa.yaml + # - resources_servers/instruction_following/configs/instruction_following.yaml + # - resources_servers/structured_outputs/configs/structured_outputs_json.yaml policy_model: responses_api_models: vllm_model: @@ -252,6 +254,10 @@ env: extra_body: chat_template_kwargs: enable_thinking: false + code_gen: + resources_servers: + code_gen: + num_processes: ${mul:64, ${cluster.num_nodes}} logger: log_dir: "logs/grpo-workplace-assistant-nemotron-nano-v2-9b" # Base directory for all logs diff --git a/examples/nemo_gym/launch_nemo_gym_multinode_training.sh b/examples/nemo_gym/launch_nemo_gym_multinode_training.sh index 37ede71772..74b51c1527 100755 --- a/examples/nemo_gym/launch_nemo_gym_multinode_training.sh +++ b/examples/nemo_gym/launch_nemo_gym_multinode_training.sh @@ -13,7 +13,7 @@ # limitations under the License. # ----- PARAMETERS ----- -# WANDB_API_KEY, EXP_NAME, NUM_ACTOR_NODES, REPO_LOCATION, CONTAINER_IMAGE_PATH, SLURM_ACCOUNT, SLURM_PARTITION +# WANDB_API_KEY, HF_TOKEN, EXP_NAME, NUM_ACTOR_NODES, NUM_SLURM_NODES (optional), REPO_LOCATION, CONTAINER_IMAGE_PATH, SLURM_ACCOUNT, SLURM_PARTITION # ray.sub needs to be launched from the NeMo-RL root directory cd $REPO_LOCATION @@ -23,6 +23,7 @@ read -r -d '' COMMAND < bool: return should_use_nemo_gym +def _should_log_nemo_gym_responses(master_config: MasterConfig) -> bool: + env_config = master_config.get("env") or dict() + should_log_nemo_gym_responses = bool( + env_config.get("should_log_nemo_gym_responses") + ) + + return should_log_nemo_gym_responses + + def refit_policy_generation( policy: ColocatablePolicyInterface, policy_generation: GenerationInterface, @@ -977,6 +990,30 @@ def refit_policy_generation( policy_generation.prepare_for_generation(tags=["kv_cache"]) +def _log_mixed_rewards_and_advantages_information( + logger: Logger, + total_steps: int, + metrics: dict[str, Any], + baseline: torch.Tensor, + advantages: torch.Tensor, +) -> None: + # The histograms that are logged are logged with a prefix "train/" to the name, since that is what the remaining metrics will be logged with. + logger.log_histogram( + baseline.numpy(), total_steps + 1, "train/baseline_reward/histogram" + ) + metrics["baseline_reward/pct_0"] = 100 * (baseline == 0).float().mean().item() + metrics["baseline_reward/pct_1"] = 100 * (baseline == 1).float().mean().item() + metrics["baseline_reward/pct_mixed"] = ( + 100 - metrics["baseline_reward/pct_0"] - metrics["baseline_reward/pct_1"] + ) + + logger.log_histogram( + advantages.numpy(), total_steps + 1, "train/advantages/histogram" + ) + metrics["advantages/sum"] = advantages.float().sum().item() + metrics["advantages/mean"] = advantages.float().mean().item() + + # =============================================================================== # Training & Validation # =============================================================================== @@ -1004,6 +1041,7 @@ def grpo_train( fit_last_save_time=True, ) timeout.start_iterations() + memory_tracker = MemoryTracker() kv_scales_cache = None # Cache reused for computed kv scales @@ -1015,11 +1053,17 @@ def grpo_train( POLICY_GENERATION_STALE = True # tracks if generation needs a refit before running assert policy_generation is not None # for mypy type check + if master_config["grpo"].get("skip_reference_policy_logprobs_calculation"): + assert master_config["loss_fn"]["reference_policy_kl_penalty"] == 0 + print( + "Reference policy logprob calculation will be skipped since `grpo.skip_reference_policy_logprobs_calculation` is set to True and `loss_fn.reference_policy_kl_penalty` is 0." + ) + # Check if we need to sync KV cache scales # When fallback to policy as the policy_generation, we use getattr to check. sync_kv_scales = getattr(policy_generation, "requires_kv_scale_sync", False) - # common config/state itmes + # common config/state times current_step = grpo_save_state["current_step"] # current step within an epoch total_steps = grpo_save_state["total_steps"] # total steps across all epochs max_num_steps = master_config["grpo"][ @@ -1043,6 +1087,8 @@ def grpo_train( # TODO: Add validation with kv scales if needed if val_at_start and current_step == 0: print("\nšŸ” Running initial validation...", flush=True) + memory_tracker.snapshot_start_of_stage("Initial validation", dir()) + if NEED_REFIT and POLICY_GENERATION_STALE: refit_policy_generation(policy, policy_generation, colocated_inference) POLICY_GENERATION_STALE = False @@ -1061,6 +1107,7 @@ def grpo_train( logger.log_metrics(validation_timings, current_step, prefix="timing/validation") while current_epoch < max_num_epochs and total_steps < max_num_steps: + memory_tracker.snapshot_start_of_stage("Preparing batch", dir()) print(f"\n{'=' * 25} Epoch {current_epoch + 1}/{max_num_epochs} {'=' * 25}") # batch cache is used for DAPO. We store prompts with non-zero standard deviation in this cache. batch_cache: BatchedDataDict[DatumSpec] = None @@ -1069,6 +1116,10 @@ def grpo_train( # Run grpo/dapo training loop (single-turn) for batch in dataloader: + # A central place to store logging data that won't be deleted until the loop ends + metrics_logging_data = dict() + metrics = dict() + print( f"\n{'=' * 25} Step {current_step + 1}/{min(len(dataloader), max_num_steps)} {'=' * 25}", flush=True, @@ -1096,6 +1147,7 @@ def grpo_train( input_ids = batched_flat["token_ids"] # Generate responses - this updates the LLMMessageLogType in repeated_batch + memory_tracker.snapshot_start_of_stage("Generation", dir()) print( f"ā–¶ Generating responses for batch of size {repeated_batch.size}...", flush=True, @@ -1169,6 +1221,14 @@ def grpo_train( input_ids = nemo_gym_rollout_result.input_ids repeated_batch = nemo_gym_rollout_result.final_batch rollout_metrics = nemo_gym_rollout_result.rollout_metrics + del nemo_gym_rollout_result + + # NeMo Gym responses can be very large and expensive to log. Here we have logic to opt-in to logging. + if not _should_log_nemo_gym_responses(master_config): + for key in list(rollout_metrics): + if "full_result" in key: + rollout_metrics.pop(key) + # Use async rollouts if vLLM async engine is enabled elif _should_use_async_rollouts(master_config): ( @@ -1213,6 +1273,12 @@ def grpo_train( else: vllm_logger_metrics = {} + metrics_logging_data["mean_gen_tokens_per_sample"] = ( + rollout_metrics["mean_gen_tokens_per_sample"] + ) + logger.log_metrics(rollout_metrics, total_steps + 1, prefix="train") + del rollout_metrics + repeated_batch = scale_rewards( repeated_batch, master_config["grpo"]["reward_scaling"] ) @@ -1223,20 +1289,37 @@ def grpo_train( ) # Calculate rewards & advantages + memory_tracker.snapshot_start_of_stage("Processing rewards", dir()) print("ā–¶ Processing rewards...,", flush=True) with timer.time("reward_calculation"): # Extract rewards from final_batch rewards = repeated_batch["total_reward"] print("ā–¶ Computing advantages...", flush=True) - baseline, std = calculate_baseline_and_std_per_prompt( - input_ids, - rewards, - torch.ones_like(rewards), - leave_one_out_baseline=master_config["grpo"][ - "use_leave_one_out_baseline" - ], - ) + if master_config["grpo"].get("calculate_advantages_on_gpu"): + print("Computing advantages on GPU!") + # Just fix the device id for now + device_id = 0 + baseline, std = calculate_baseline_and_std_per_prompt( + input_ids.cuda(device_id), + rewards.cuda(device_id), + torch.ones_like(rewards).cuda(device_id), + leave_one_out_baseline=master_config["grpo"][ + "use_leave_one_out_baseline" + ], + ) + baseline = baseline.cpu() + std = std.cpu() + else: + baseline, std = calculate_baseline_and_std_per_prompt( + input_ids, + rewards, + torch.ones_like(rewards), + leave_one_out_baseline=master_config["grpo"][ + "use_leave_one_out_baseline" + ], + ) + # Apply dynamic sampling to filter prompts with non-zero std (DAPO algorithm) repeated_batch, is_batch_complete, batch_cache, ds_metrics = ( dynamic_sampling( @@ -1273,6 +1356,18 @@ def grpo_train( std=std, ) + _log_mixed_rewards_and_advantages_information( + logger=logger, + total_steps=total_steps, + metrics=metrics, + baseline=baseline, + advantages=advantages, + ) + + del input_ids + del baseline + del std + with timer.time("data_processing"): use_overlong_filtering = master_config["grpo"]["overlong_filtering"] if use_overlong_filtering: @@ -1302,6 +1397,7 @@ def grpo_train( message["advantages"] = advantages[i].expand( message["token_ids"].shape ) + del advantages # Convert updated LLMMessageLogType to FlatMessagesType for training flat_messages, input_lengths = batched_message_log_to_flat_message( @@ -1324,24 +1420,47 @@ def grpo_train( } ) # this will be mini-batched inside the policy, so maintain the packed multimodal structure - train_data.update( - flat_messages.get_multimodal_dict(as_tensors=False) + # This is also used to populate part of the downstream logprob calculation data + extra_multimodal_data = flat_messages.get_multimodal_dict( + as_tensors=False ) + train_data.update(extra_multimodal_data) train_data.to("cpu") + metrics_logging_data["content"] = flat_messages["content"] + + memory_tracker.snapshot_start_of_stage("Computing logprobs", dir()) print("ā–¶ Preparing for logprob inference...", flush=True) with timer.time("logprob_inference_prep"): policy.prepare_for_lp_inference() print("ā–¶ Computing logprobs...", flush=True) with timer.time("policy_and_reference_logprobs"): - fprop_logprobs = policy.get_logprobs(train_data)["logprobs"] - reference_logprobs = policy.get_reference_policy_logprobs( - train_data - )["reference_logprobs"] - train_data["prev_logprobs"] = fprop_logprobs - train_data["reference_policy_logprobs"] = reference_logprobs + # Custom create this logprob_data so we avoid Ray comm overheads sending unused data to workers. + logprob_data = BatchedDataDict[ClippedPGLossDataDict]( + { + "input_ids": train_data["input_ids"], + "input_lengths": train_data["input_lengths"], + **extra_multimodal_data, + } + ) + train_data["prev_logprobs"] = policy.get_logprobs(logprob_data)[ + "logprobs" + ] + + if not master_config["grpo"].get( + "skip_reference_policy_logprobs_calculation" + ): + train_data["reference_policy_logprobs"] = ( + policy.get_reference_policy_logprobs(logprob_data)[ + "reference_logprobs" + ] + ) + + del logprob_data + del extra_multimodal_data + memory_tracker.snapshot_start_of_stage("Policy train", dir()) print("ā–¶ Preparing for training...", flush=True) with timer.time("training_prep"): policy.prepare_for_training() # set model train and reload optim to GPU @@ -1371,6 +1490,7 @@ def grpo_train( # Run validation if it's a validation step if val_period > 0 and (total_steps + 1) % val_period == 0: + memory_tracker.snapshot_start_of_stage("Validation", dir()) if NEED_REFIT and POLICY_GENERATION_STALE: refit_policy_generation( policy, @@ -1402,13 +1522,16 @@ def grpo_train( # Get flat advantages and token mask for masked metrics computation flat_advantages = flat_messages["advantages"] flat_token_mask = flat_messages["token_loss_mask"] + del flat_messages # Filter advantages using token mask (only valid response tokens) response_advantages = torch.masked_select( flat_advantages, flat_token_mask.bool() ) + memory_tracker.snapshot_start_of_stage("Metrics", dir()) metrics = { + **metrics, "loss": train_results["loss"].numpy(), "grad_norm": train_results["grad_norm"].numpy(), "reward": rewards.numpy(), @@ -1456,10 +1579,11 @@ def grpo_train( "mean_prompt_length", }: metrics[k] = np.mean(v).item() - else: + elif isinstance(v, (np.ndarray, list)): metrics[k] = np.sum(v).item() + else: + print(f"Skipping aggregation for {k} ({type(v)})") - metrics.update(rollout_metrics) metrics["vllm_logger_metrics"] = vllm_logger_metrics total_valid_tokens += metrics["global_valid_toks"] @@ -1476,6 +1600,7 @@ def grpo_train( # Check if timeout-based checkpointing is enabled in config. should_save_by_timeout = timeout.check_save() + memory_tracker.snapshot_start_of_stage("Checkpointing", dir()) if master_config["checkpointing"]["enabled"] and ( should_save_by_step or should_save_by_timeout ): @@ -1549,18 +1674,23 @@ def grpo_train( # Logging # Log training data - log_data = {"content": flat_messages["content"]} - log_data["rewards"] = rewards.tolist() - if master_config["grpo"]["use_dynamic_sampling"]: - log_data["filtered_rewards"] = rewards.tolist() - log_data["rewards"] = repeated_batch["total_reward"].tolist() - - log_data["generation_logprobs"] = train_data["generation_logprobs"].tolist() - log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist() - log_data["input_lengths"] = input_lengths.tolist() - logger.log_batched_dict_as_jsonl( - log_data, f"train_data_step{total_steps + 1}.jsonl" - ) + memory_tracker.snapshot_start_of_stage("Logging", dir()) + if not _should_log_nemo_gym_responses(master_config): + log_data = {"content": metrics_logging_data["content"]} + log_data["rewards"] = rewards.tolist() + if master_config["grpo"]["use_dynamic_sampling"]: + log_data["filtered_rewards"] = rewards.tolist() + log_data["rewards"] = repeated_batch["total_reward"].tolist() + + log_data["generation_logprobs"] = train_data[ + "generation_logprobs" + ].tolist() + log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist() + log_data["input_lengths"] = input_lengths.tolist() + logger.log_batched_dict_as_jsonl( + log_data, f"train_data_step{total_steps + 1}.jsonl" + ) + del log_data timing_metrics: dict[str, float] = timer.get_timing_metrics( reduction_op="sum" @@ -1617,7 +1747,7 @@ def grpo_train( else: print(f" • Avg Reward: {np.mean(rewards.numpy()):.4f}") print( - f" • Mean Generation Length: {rollout_metrics['mean_gen_tokens_per_sample']:.4f}", + f" • Mean Generation Length: {metrics_logging_data['mean_gen_tokens_per_sample']:.4f}", flush=True, ) @@ -1655,19 +1785,39 @@ def grpo_train( logger.log_metrics( performance_metrics, total_steps + 1, prefix="performance" ) - logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train") + # step_finished=True here since this is the final log of our current step. + logger.log_metrics( + timing_metrics, + total_steps + 1, + prefix="timing/train", + step_finished=True, + ) # Reset the batch and set dynamic_sampling_num_gen_batches to 0 batch_cache = None dynamic_sampling_num_gen_batches = 0 + # Clear mem + memory_tracker.snapshot_start_of_stage("After CPU memory clear", dir()) + + # processing rewards + del repeated_batch + del rewards + del train_data + # logging + del metrics + if "val_metrics" in dir(): + del val_metrics + timer.reset() current_step += 1 total_steps += 1 if should_save_by_timeout: + memory_tracker.snapshot_start_of_stage("", dir()) print("Timeout has been reached, stopping training early", flush=True) return if total_steps >= max_num_steps: + memory_tracker.snapshot_start_of_stage("", dir()) print( "Max number of steps has been reached, stopping training early", flush=True, diff --git a/nemo_rl/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py index 459181c899..21333d1f8d 100755 --- a/nemo_rl/algorithms/loss_functions.py +++ b/nemo_rl/algorithms/loss_functions.py @@ -168,7 +168,8 @@ def __call__( advantages = data["advantages"][:, 1:] prev_logprobs = data["prev_logprobs"][:, 1:] generation_logprobs = data["generation_logprobs"][:, 1:] - reference_policy_logprobs = data["reference_policy_logprobs"][:, 1:] + if self.reference_policy_kl_penalty != 0: + reference_policy_logprobs = data["reference_policy_logprobs"][:, 1:] seq_index = data.get("seq_index", None) mask = token_mask * sample_mask.unsqueeze(-1) diff --git a/nemo_rl/data/packing/algorithms.py b/nemo_rl/data/packing/algorithms.py index a0eab88f0f..08cd5bcce6 100644 --- a/nemo_rl/data/packing/algorithms.py +++ b/nemo_rl/data/packing/algorithms.py @@ -18,6 +18,7 @@ import math import random from abc import ABC, abstractmethod +from bisect import bisect from typing import Dict, List, Optional, Tuple, Type, Union @@ -611,6 +612,9 @@ def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]: # Phase-5: FFD on leftovers leftovers = remaining_items # renamed for clarity + + # Original O(n * m) implementation + """ ffd_bins: List[List[Tuple[int, int]]] = [] for idx, size in sorted(leftovers, key=lambda x: x[1], reverse=True): placed = False @@ -621,10 +625,31 @@ def _pack_implementation(self, sequence_lengths: List[int]) -> List[List[int]]: break if not placed: ffd_bins.append([(idx, size)]) + """ + + # New O(n * logn) implementation + ffd_bins: List[List[Tuple[int, int]]] = [[]] + ffd_bin_sizes: List[int] = [0] + for idx, size in sorted(leftovers, key=lambda x: x[1], reverse=True): + # We only need to check the first bin since we guarantee the order of ffd_bin_sizes to be sorted from smallest to largest. + if size <= (self.bin_capacity - ffd_bin_sizes[0]): + new_bin = ffd_bins.pop(0) + new_bin_size = ffd_bin_sizes.pop(0) + else: + new_bin = [] + new_bin_size = 0 + + new_bin.append((idx, size)) + new_bin_size += size + + new_idx = bisect(ffd_bin_sizes, new_bin_size) + ffd_bins.insert(new_idx, new_bin) + ffd_bin_sizes.insert(new_idx, new_bin_size) + bins.extend(ffd_bins) # Convert to list of index lists (discard sizes) - return [[idx for idx, _ in b] for b in bins] + return [[idx for idx, _ in b] for b in bins if b] def get_packer( diff --git a/nemo_rl/environments/nemo_gym.py b/nemo_rl/environments/nemo_gym.py index da47ff5184..5ec15c3cef 100644 --- a/nemo_rl/environments/nemo_gym.py +++ b/nemo_rl/environments/nemo_gym.py @@ -148,6 +148,10 @@ async def run_rollouts( def _postprocess_nemo_gym_to_nemo_rl_result( self, nemo_gym_result: dict, tokenizer: PreTrainedTokenizerBase ) -> dict: + assert isinstance(nemo_gym_result, dict), ( + f"Hit a non-successful response when querying NeMo Gym for rollouts: {nemo_gym_result}" + ) + nemo_rl_message_log = [] seen_token_ids: List[int] = [] for output_item_dict in nemo_gym_result["response"]["output"]: diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py index f329dd70c7..f8e9ad0c6f 100644 --- a/nemo_rl/utils/logger.py +++ b/nemo_rl/utils/logger.py @@ -99,6 +99,7 @@ def log_metrics( step: int, prefix: Optional[str] = "", step_metric: Optional[str] = None, + step_finished: bool = False, ) -> None: """Log a dictionary of metrics.""" pass @@ -144,6 +145,7 @@ def log_metrics( step: int, prefix: Optional[str] = "", step_metric: Optional[str] = None, # ignored in TensorBoard + step_finished: bool = False, # ignored in TensorBoard ) -> None: """Log metrics to Tensorboard. @@ -199,6 +201,14 @@ class WandbLogger(LoggerInterface): def __init__(self, cfg: WandbConfig, log_dir: Optional[str] = None): self.run = wandb.init(**cfg, dir=log_dir) + + if os.environ.get("RAY_BACKEND_LOG_LEVEL", "").lower() == "debug": + print( + "Uploading raylet.out and raylet.err files to W&B since environment variable RAY_BACKEND_LOG_LEVEL=debug" + ) + wandb.save("/tmp/ray/session_latest/logs/raylet.out", policy="live") + wandb.save("/tmp/ray/session_latest/logs/raylet.err", policy="live") + self._log_code() self._log_diffs() print( @@ -332,6 +342,7 @@ def log_metrics( step: int, prefix: Optional[str] = "", step_metric: Optional[str] = None, + step_finished: bool = False, ) -> None: """Log metrics to wandb. @@ -352,6 +363,10 @@ def log_metrics( if step_metric and step_metric in metrics: # commit=False so the step does not get incremented self.run.log(metrics, commit=False) + elif step_finished: + # Commit param defaults to None. By default if step is set, then commit defaults to False + # Here, we have an explicit fork for commit in case W&B ever decides to change their default logic. + self.run.log(metrics, step=step, commit=True) else: self.run.log(metrics, step=step) @@ -404,6 +419,7 @@ def log_metrics( step: int, prefix: Optional[str] = "", step_metric: Optional[str] = None, + step_finished: bool = False, ) -> None: """Log metrics to the associated Swanlab run. @@ -781,6 +797,7 @@ def log_metrics( step: int, prefix: Optional[str] = "", step_metric: Optional[str] = None, + step_finished: bool = False, ) -> None: """Log metrics to MLflow. @@ -906,6 +923,7 @@ def log_metrics( step: int, prefix: Optional[str] = "", step_metric: Optional[str] = None, + step_finished: bool = False, ) -> None: """Log metrics to all enabled backends. @@ -917,7 +935,7 @@ def log_metrics( of the provided step value (currently only needed for wandb) """ for logger in self.loggers: - logger.log_metrics(metrics, step, prefix, step_metric) + logger.log_metrics(metrics, step, prefix, step_metric, step_finished) def log_hyperparams(self, params: Mapping[str, Any]) -> None: """Log hyperparameters to all enabled backends. @@ -954,6 +972,24 @@ def log_batched_dict_as_jsonl( print(f"Logged data to {filepath}") + def log_string_list_as_jsonl(self, to_log: list[str], filename: str) -> None: + """Log a list of strings to a JSONL file. + + Args: + to_log: list of strings to log + filename: Filename to log to (within the log directory) + """ + # Create full path within log directory + filepath = os.path.join(self.base_log_dir, filename) + os.makedirs(os.path.dirname(filepath), exist_ok=True) + + # Write to JSONL file + with open(filepath, "a") as f: + for sample in to_log: + f.write(sample + "\n") + + print(f"Logged data to {filepath}") + def log_plot_per_worker_timeline_metrics( self, metrics: dict[int, list[Any]], diff --git a/nemo_rl/utils/memory_tracker.py b/nemo_rl/utils/memory_tracker.py new file mode 100644 index 0000000000..be55426205 --- /dev/null +++ b/nemo_rl/utils/memory_tracker.py @@ -0,0 +1,80 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import List, Optional + +from psutil import Process +from pydantic import BaseModel, Field +from ray.scripts.scripts import memory_summary + + +class MemoryTrackerDataPoint(BaseModel): + stage: str + memory_used_before_stage_gb: float + variables_before_stage: List[str] + + memory_used_after_stage_gb: Optional[float] = None + variables_after_stage: Optional[List[str]] = None + + @property + def mem_used_diff_gb(self) -> float: + return self.memory_used_after_stage_gb - self.memory_used_before_stage_gb + + @property + def new_variables(self) -> List[str]: + return [ + v + for v in self.variables_after_stage + if v not in self.variables_before_stage + ] + + def get_snapshot_str(self) -> str: + ray_memory_summary = memory_summary(stats_only=True, num_entries=5) + return f"""šŸ’­ Driver CPU memory tracker for {self.stage}: +- Mem usage before {self.memory_used_before_stage_gb:>7.2f} GB +- Mem usage after {self.memory_used_after_stage_gb:>7.2f} GB +- Mem usage diff (after - before) {self.mem_used_diff_gb:>+7.2f} GB +- New variables: {self.new_variables} + +āš”ļø Ray memory snapshot: +{ray_memory_summary}""" + + +class MemoryTracker(BaseModel): + data_points: List[MemoryTrackerDataPoint] = Field(default_factory=list) + + def model_post_init(self, context): + self._process = Process(os.getpid()) + return super().model_post_init(context) + + def snapshot_start_of_stage( + self, new_stage: str, all_current_variables: List[str] + ) -> None: + mem_info = self._process.memory_info() + current_mem_used_gb: float = mem_info.rss / (1024**3) + + if self.data_points: + last_data_point = self.data_points[-1] + last_data_point.memory_used_after_stage_gb = current_mem_used_gb + last_data_point.variables_after_stage = all_current_variables + + print(last_data_point.get_snapshot_str()) + + self.data_points.append( + MemoryTrackerDataPoint( + stage=new_stage, + memory_used_before_stage_gb=current_mem_used_gb, + variables_before_stage=all_current_variables, + ) + ) diff --git a/pyproject.toml b/pyproject.toml index 87198f1e92..29e683fdbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -178,7 +178,7 @@ members = [ "3rdparty/Megatron-LM-workspace", "3rdparty/Automodel-workspace/Automodel", "3rdparty/Megatron-Bridge-workspace", - "3rdparty/Gym-workspace", + "3rdparty/Gym-workspace/Gym", # Research projects are also added here in order for them to share the global root level uv.lock. # If we don't do this, the research projects do not see the global uv.lock, and may mistakenly # install numpy>=2.0 because nemo-rl's core [dependencies] do not pin numpy, but when you inspect diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py index d88137746a..52b380a213 100644 --- a/tests/unit/utils/test_logger.py +++ b/tests/unit/utils/test_logger.py @@ -1493,8 +1493,12 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir): logger.log_metrics(metrics, step) # Check that log_metrics was called on both loggers - mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) - mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) + mock_wandb_instance.log_metrics.assert_called_once_with( + metrics, step, "", None, False + ) + mock_tb_instance.log_metrics.assert_called_once_with( + metrics, step, "", None, False + ) @patch("nemo_rl.utils.logger.WandbLogger") @patch("nemo_rl.utils.logger.TensorboardLogger") @@ -1603,10 +1607,10 @@ def test_log_metrics_with_prefix_and_step_metric( # Check that log_metrics was called on both loggers with correct parameters mock_wandb_instance.log_metrics.assert_called_once_with( - metrics, step, prefix, step_metric + metrics, step, prefix, step_metric, False ) mock_tb_instance.log_metrics.assert_called_once_with( - metrics, step, prefix, step_metric + metrics, step, prefix, step_metric, False ) @patch("nemo_rl.utils.logger.WandbLogger") @@ -1768,13 +1772,17 @@ def test_log_metrics_with_mlflow( logger.log_metrics(metrics, step) # Check that log_metrics was called on all loggers - mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) + mock_wandb_instance.log_metrics.assert_called_once_with( + metrics, step, "", None, False + ) mock_swanlab_instance.log_metrics.assert_called_once_with( - metrics, step, "", None + metrics, step, "", None, False + ) + mock_tb_instance.log_metrics.assert_called_once_with( + metrics, step, "", None, False ) - mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None) mock_mlflow_instance.log_metrics.assert_called_once_with( - metrics, step, "", None + metrics, step, "", None, False ) @patch("nemo_rl.utils.logger.WandbLogger") diff --git a/uv.lock b/uv.lock index 5818765dad..e6b1c3fe30 100644 --- a/uv.lock +++ b/uv.lock @@ -107,12 +107,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" }, ] -[[package]] -name = "accumulation-tree" -version = "0.6.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ac/dc/4ffda8a22b6af3f41bcec07ddfebe723218976eaa016cefbc904634a4e85/accumulation_tree-0.6.4.tar.gz", hash = "sha256:5f907667e4106b5ba140b6b871e1902eb2a93d429b92f8a9f7ddb2bee7704334", size = 12635, upload-time = "2024-09-26T21:50:40.627Z" } - [[package]] name = "aiobotocore" version = "2.24.3" @@ -1549,6 +1543,15 @@ dependencies = [ { name = "typing-extensions" }, ] +[[package]] +name = "execnet" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" }, +] + [[package]] name = "executing" version = "2.2.1" @@ -2687,6 +2690,58 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b5/ba/c63c5786dfee4c3417094c4b00966e61e4a63efecee22cb7b4c0387dda83/librosa-0.11.0-py3-none-any.whl", hash = "sha256:0b6415c4fd68bff4c29288abe67c6d80b587e0e1e2cfb0aad23e4559504a7fa1", size = 260749, upload-time = "2025-03-11T15:09:52.982Z" }, ] +[[package]] +name = "librt" +version = "0.7.7" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/29/47f29026ca17f35cf299290292d5f8331f5077364974b7675a353179afa2/librt-0.7.7.tar.gz", hash = "sha256:81d957b069fed1890953c3b9c3895c7689960f233eea9a1d9607f71ce7f00b2c", size = 145910, upload-time = "2026-01-01T23:52:22.87Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/72/1cd9d752070011641e8aee046c851912d5f196ecd726fffa7aed2070f3e0/librt-0.7.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2a85a1fc4ed11ea0eb0a632459ce004a2d14afc085a50ae3463cd3dfe1ce43fc", size = 55687, upload-time = "2026-01-01T23:51:16.291Z" }, + { url = "https://files.pythonhosted.org/packages/50/aa/d5a1d4221c4fe7e76ae1459d24d6037783cb83c7645164c07d7daf1576ec/librt-0.7.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c87654e29a35938baead1c4559858f346f4a2a7588574a14d784f300ffba0efd", size = 57136, upload-time = "2026-01-01T23:51:17.363Z" }, + { url = "https://files.pythonhosted.org/packages/23/6f/0c86b5cb5e7ef63208c8cc22534df10ecc5278efc0d47fb8815577f3ca2f/librt-0.7.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c9faaebb1c6212c20afd8043cd6ed9de0a47d77f91a6b5b48f4e46ed470703fe", size = 165320, upload-time = "2026-01-01T23:51:18.455Z" }, + { url = "https://files.pythonhosted.org/packages/16/37/df4652690c29f645ffe405b58285a4109e9fe855c5bb56e817e3e75840b3/librt-0.7.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1908c3e5a5ef86b23391448b47759298f87f997c3bd153a770828f58c2bb4630", size = 174216, upload-time = "2026-01-01T23:51:19.599Z" }, + { url = "https://files.pythonhosted.org/packages/9a/d6/d3afe071910a43133ec9c0f3e4ce99ee6df0d4e44e4bddf4b9e1c6ed41cc/librt-0.7.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dbc4900e95a98fc0729523be9d93a8fedebb026f32ed9ffc08acd82e3e181503", size = 189005, upload-time = "2026-01-01T23:51:21.052Z" }, + { url = "https://files.pythonhosted.org/packages/d5/18/74060a870fe2d9fd9f47824eba6717ce7ce03124a0d1e85498e0e7efc1b2/librt-0.7.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a7ea4e1fbd253e5c68ea0fe63d08577f9d288a73f17d82f652ebc61fa48d878d", size = 183961, upload-time = "2026-01-01T23:51:22.493Z" }, + { url = "https://files.pythonhosted.org/packages/7c/5e/918a86c66304af66a3c1d46d54df1b2d0b8894babc42a14fb6f25511497f/librt-0.7.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ef7699b7a5a244b1119f85c5bbc13f152cd38240cbb2baa19b769433bae98e50", size = 177610, upload-time = "2026-01-01T23:51:23.874Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d7/b5e58dc2d570f162e99201b8c0151acf40a03a39c32ab824dd4febf12736/librt-0.7.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:955c62571de0b181d9e9e0a0303c8bc90d47670a5eff54cf71bf5da61d1899cf", size = 199272, upload-time = "2026-01-01T23:51:25.341Z" }, + { url = "https://files.pythonhosted.org/packages/18/87/8202c9bd0968bdddc188ec3811985f47f58ed161b3749299f2c0dd0f63fb/librt-0.7.7-cp312-cp312-win32.whl", hash = "sha256:1bcd79be209313b270b0e1a51c67ae1af28adad0e0c7e84c3ad4b5cb57aaa75b", size = 43189, upload-time = "2026-01-01T23:51:26.799Z" }, + { url = "https://files.pythonhosted.org/packages/61/8d/80244b267b585e7aa79ffdac19f66c4861effc3a24598e77909ecdd0850e/librt-0.7.7-cp312-cp312-win_amd64.whl", hash = "sha256:4353ee891a1834567e0302d4bd5e60f531912179578c36f3d0430f8c5e16b456", size = 49462, upload-time = "2026-01-01T23:51:27.813Z" }, + { url = "https://files.pythonhosted.org/packages/2d/1f/75db802d6a4992d95e8a889682601af9b49d5a13bbfa246d414eede1b56c/librt-0.7.7-cp312-cp312-win_arm64.whl", hash = "sha256:a76f1d679beccccdf8c1958e732a1dfcd6e749f8821ee59d7bec009ac308c029", size = 42828, upload-time = "2026-01-01T23:51:28.804Z" }, + { url = "https://files.pythonhosted.org/packages/8d/5e/d979ccb0a81407ec47c14ea68fb217ff4315521730033e1dd9faa4f3e2c1/librt-0.7.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f4a0b0a3c86ba9193a8e23bb18f100d647bf192390ae195d84dfa0a10fb6244", size = 55746, upload-time = "2026-01-01T23:51:29.828Z" }, + { url = "https://files.pythonhosted.org/packages/f5/2c/3b65861fb32f802c3783d6ac66fc5589564d07452a47a8cf9980d531cad3/librt-0.7.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5335890fea9f9e6c4fdf8683061b9ccdcbe47c6dc03ab8e9b68c10acf78be78d", size = 57174, upload-time = "2026-01-01T23:51:31.226Z" }, + { url = "https://files.pythonhosted.org/packages/50/df/030b50614b29e443607220097ebaf438531ea218c7a9a3e21ea862a919cd/librt-0.7.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9b4346b1225be26def3ccc6c965751c74868f0578cbcba293c8ae9168483d811", size = 165834, upload-time = "2026-01-01T23:51:32.278Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e1/bd8d1eacacb24be26a47f157719553bbd1b3fe812c30dddf121c0436fd0b/librt-0.7.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a10b8eebdaca6e9fdbaf88b5aefc0e324b763a5f40b1266532590d5afb268a4c", size = 174819, upload-time = "2026-01-01T23:51:33.461Z" }, + { url = "https://files.pythonhosted.org/packages/46/7d/91d6c3372acf54a019c1ad8da4c9ecf4fc27d039708880bf95f48dbe426a/librt-0.7.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:067be973d90d9e319e6eb4ee2a9b9307f0ecd648b8a9002fa237289a4a07a9e7", size = 189607, upload-time = "2026-01-01T23:51:34.604Z" }, + { url = "https://files.pythonhosted.org/packages/fa/ac/44604d6d3886f791fbd1c6ae12d5a782a8f4aca927484731979f5e92c200/librt-0.7.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:23d2299ed007812cccc1ecef018db7d922733382561230de1f3954db28433977", size = 184586, upload-time = "2026-01-01T23:51:35.845Z" }, + { url = "https://files.pythonhosted.org/packages/5c/26/d8a6e4c17117b7f9b83301319d9a9de862ae56b133efb4bad8b3aa0808c9/librt-0.7.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:6b6f8ea465524aa4c7420c7cc4ca7d46fe00981de8debc67b1cc2e9957bb5b9d", size = 178251, upload-time = "2026-01-01T23:51:37.018Z" }, + { url = "https://files.pythonhosted.org/packages/99/ab/98d857e254376f8e2f668e807daccc1f445e4b4fc2f6f9c1cc08866b0227/librt-0.7.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8df32a99cc46eb0ee90afd9ada113ae2cafe7e8d673686cf03ec53e49635439", size = 199853, upload-time = "2026-01-01T23:51:38.195Z" }, + { url = "https://files.pythonhosted.org/packages/7c/55/4523210d6ae5134a5da959900be43ad8bab2e4206687b6620befddb5b5fd/librt-0.7.7-cp313-cp313-win32.whl", hash = "sha256:86f86b3b785487c7760247bcdac0b11aa8bf13245a13ed05206286135877564b", size = 43247, upload-time = "2026-01-01T23:51:39.629Z" }, + { url = "https://files.pythonhosted.org/packages/25/40/3ec0fed5e8e9297b1cf1a3836fb589d3de55f9930e3aba988d379e8ef67c/librt-0.7.7-cp313-cp313-win_amd64.whl", hash = "sha256:4862cb2c702b1f905c0503b72d9d4daf65a7fdf5a9e84560e563471e57a56949", size = 49419, upload-time = "2026-01-01T23:51:40.674Z" }, + { url = "https://files.pythonhosted.org/packages/1c/7a/aab5f0fb122822e2acbc776addf8b9abfb4944a9056c00c393e46e543177/librt-0.7.7-cp313-cp313-win_arm64.whl", hash = "sha256:0996c83b1cb43c00e8c87835a284f9057bc647abd42b5871e5f941d30010c832", size = 42828, upload-time = "2026-01-01T23:51:41.731Z" }, + { url = "https://files.pythonhosted.org/packages/69/9c/228a5c1224bd23809a635490a162e9cbdc68d99f0eeb4a696f07886b8206/librt-0.7.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:23daa1ab0512bafdd677eb1bfc9611d8ffbe2e328895671e64cb34166bc1b8c8", size = 55188, upload-time = "2026-01-01T23:51:43.14Z" }, + { url = "https://files.pythonhosted.org/packages/ba/c2/0e7c6067e2b32a156308205e5728f4ed6478c501947e9142f525afbc6bd2/librt-0.7.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:558a9e5a6f3cc1e20b3168fb1dc802d0d8fa40731f6e9932dcc52bbcfbd37111", size = 56895, upload-time = "2026-01-01T23:51:44.534Z" }, + { url = "https://files.pythonhosted.org/packages/0e/77/de50ff70c80855eb79d1d74035ef06f664dd073fb7fb9d9fb4429651b8eb/librt-0.7.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:2567cb48dc03e5b246927ab35cbb343376e24501260a9b5e30b8e255dca0d1d2", size = 163724, upload-time = "2026-01-01T23:51:45.571Z" }, + { url = "https://files.pythonhosted.org/packages/6e/19/f8e4bf537899bdef9e0bb9f0e4b18912c2d0f858ad02091b6019864c9a6d/librt-0.7.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6066c638cdf85ff92fc6f932d2d73c93a0e03492cdfa8778e6d58c489a3d7259", size = 172470, upload-time = "2026-01-01T23:51:46.823Z" }, + { url = "https://files.pythonhosted.org/packages/42/4c/dcc575b69d99076768e8dd6141d9aecd4234cba7f0e09217937f52edb6ed/librt-0.7.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a609849aca463074c17de9cda173c276eb8fee9e441053529e7b9e249dc8b8ee", size = 186806, upload-time = "2026-01-01T23:51:48.009Z" }, + { url = "https://files.pythonhosted.org/packages/fe/f8/4094a2b7816c88de81239a83ede6e87f1138477d7ee956c30f136009eb29/librt-0.7.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:add4e0a000858fe9bb39ed55f31085506a5c38363e6eb4a1e5943a10c2bfc3d1", size = 181809, upload-time = "2026-01-01T23:51:49.35Z" }, + { url = "https://files.pythonhosted.org/packages/1b/ac/821b7c0ab1b5a6cd9aee7ace8309c91545a2607185101827f79122219a7e/librt-0.7.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a3bfe73a32bd0bdb9a87d586b05a23c0a1729205d79df66dee65bb2e40d671ba", size = 175597, upload-time = "2026-01-01T23:51:50.636Z" }, + { url = "https://files.pythonhosted.org/packages/71/f9/27f6bfbcc764805864c04211c6ed636fe1d58f57a7b68d1f4ae5ed74e0e0/librt-0.7.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:0ecce0544d3db91a40f8b57ae26928c02130a997b540f908cefd4d279d6c5848", size = 196506, upload-time = "2026-01-01T23:51:52.535Z" }, + { url = "https://files.pythonhosted.org/packages/46/ba/c9b9c6fc931dd7ea856c573174ccaf48714905b1a7499904db2552e3bbaf/librt-0.7.7-cp314-cp314-win32.whl", hash = "sha256:8f7a74cf3a80f0c3b0ec75b0c650b2f0a894a2cec57ef75f6f72c1e82cdac61d", size = 39747, upload-time = "2026-01-01T23:51:53.683Z" }, + { url = "https://files.pythonhosted.org/packages/c5/69/cd1269337c4cde3ee70176ee611ab0058aa42fc8ce5c9dce55f48facfcd8/librt-0.7.7-cp314-cp314-win_amd64.whl", hash = "sha256:3d1fe2e8df3268dd6734dba33ededae72ad5c3a859b9577bc00b715759c5aaab", size = 45971, upload-time = "2026-01-01T23:51:54.697Z" }, + { url = "https://files.pythonhosted.org/packages/79/fd/e0844794423f5583108c5991313c15e2b400995f44f6ec6871f8aaf8243c/librt-0.7.7-cp314-cp314-win_arm64.whl", hash = "sha256:2987cf827011907d3dfd109f1be0d61e173d68b1270107bb0e89f2fca7f2ed6b", size = 39075, upload-time = "2026-01-01T23:51:55.726Z" }, + { url = "https://files.pythonhosted.org/packages/42/02/211fd8f7c381e7b2a11d0fdfcd410f409e89967be2e705983f7c6342209a/librt-0.7.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:8e92c8de62b40bfce91d5e12c6e8b15434da268979b1af1a6589463549d491e6", size = 57368, upload-time = "2026-01-01T23:51:56.706Z" }, + { url = "https://files.pythonhosted.org/packages/4c/b6/aca257affae73ece26041ae76032153266d110453173f67d7603058e708c/librt-0.7.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f683dcd49e2494a7535e30f779aa1ad6e3732a019d80abe1309ea91ccd3230e3", size = 59238, upload-time = "2026-01-01T23:51:58.066Z" }, + { url = "https://files.pythonhosted.org/packages/96/47/7383a507d8e0c11c78ca34c9d36eab9000db5989d446a2f05dc40e76c64f/librt-0.7.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9b15e5d17812d4d629ff576699954f74e2cc24a02a4fc401882dd94f81daba45", size = 183870, upload-time = "2026-01-01T23:51:59.204Z" }, + { url = "https://files.pythonhosted.org/packages/a4/b8/50f3d8eec8efdaf79443963624175c92cec0ba84827a66b7fcfa78598e51/librt-0.7.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c084841b879c4d9b9fa34e5d5263994f21aea7fd9c6add29194dbb41a6210536", size = 194608, upload-time = "2026-01-01T23:52:00.419Z" }, + { url = "https://files.pythonhosted.org/packages/23/d9/1b6520793aadb59d891e3b98ee057a75de7f737e4a8b4b37fdbecb10d60f/librt-0.7.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:10c8fb9966f84737115513fecbaf257f9553d067a7dd45a69c2c7e5339e6a8dc", size = 206776, upload-time = "2026-01-01T23:52:01.705Z" }, + { url = "https://files.pythonhosted.org/packages/ff/db/331edc3bba929d2756fa335bfcf736f36eff4efcb4f2600b545a35c2ae58/librt-0.7.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9b5fb1ecb2c35362eab2dbd354fd1efa5a8440d3e73a68be11921042a0edc0ff", size = 203206, upload-time = "2026-01-01T23:52:03.315Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e1/6af79ec77204e85f6f2294fc171a30a91bb0e35d78493532ed680f5d98be/librt-0.7.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:d1454899909d63cc9199a89fcc4f81bdd9004aef577d4ffc022e600c412d57f3", size = 196697, upload-time = "2026-01-01T23:52:04.857Z" }, + { url = "https://files.pythonhosted.org/packages/f3/46/de55ecce4b2796d6d243295c221082ca3a944dc2fb3a52dcc8660ce7727d/librt-0.7.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7ef28f2e7a016b29792fe0a2dd04dec75725b32a1264e390c366103f834a9c3a", size = 217193, upload-time = "2026-01-01T23:52:06.159Z" }, + { url = "https://files.pythonhosted.org/packages/41/61/33063e271949787a2f8dd33c5260357e3d512a114fc82ca7890b65a76e2d/librt-0.7.7-cp314-cp314t-win32.whl", hash = "sha256:5e419e0db70991b6ba037b70c1d5bbe92b20ddf82f31ad01d77a347ed9781398", size = 40277, upload-time = "2026-01-01T23:52:07.625Z" }, + { url = "https://files.pythonhosted.org/packages/06/21/1abd972349f83a696ea73159ac964e63e2d14086fdd9bc7ca878c25fced4/librt-0.7.7-cp314-cp314t-win_amd64.whl", hash = "sha256:d6b7d93657332c817b8d674ef6bf1ab7796b4f7ce05e420fd45bd258a72ac804", size = 46765, upload-time = "2026-01-01T23:52:08.647Z" }, + { url = "https://files.pythonhosted.org/packages/51/0e/b756c7708143a63fca65a51ca07990fa647db2cc8fcd65177b9e96680255/librt-0.7.7-cp314-cp314t-win_arm64.whl", hash = "sha256:142c2cd91794b79fd0ce113bd658993b7ede0fe93057668c2f98a45ca00b7e91", size = 39724, upload-time = "2026-01-01T23:52:09.745Z" }, +] + [[package]] name = "liger-kernel" version = "0.6.2" @@ -3521,6 +3576,48 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" }, ] +[[package]] +name = "mypy" +version = "1.19.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "librt", marker = "platform_python_implementation != 'PyPy'" }, + { name = "mypy-extensions" }, + { name = "pathspec" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/db/4efed9504bc01309ab9c2da7e352cc223569f05478012b5d9ece38fd44d2/mypy-1.19.1.tar.gz", hash = "sha256:19d88bb05303fe63f71dd2c6270daca27cb9401c4ca8255fe50d1d920e0eb9ba", size = 3582404, upload-time = "2025-12-15T05:03:48.42Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/8a/19bfae96f6615aa8a0604915512e0289b1fad33d5909bf7244f02935d33a/mypy-1.19.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8174a03289288c1f6c46d55cef02379b478bfbc8e358e02047487cad44c6ca1", size = 13206053, upload-time = "2025-12-15T05:03:46.622Z" }, + { url = "https://files.pythonhosted.org/packages/a5/34/3e63879ab041602154ba2a9f99817bb0c85c4df19a23a1443c8986e4d565/mypy-1.19.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ffcebe56eb09ff0c0885e750036a095e23793ba6c2e894e7e63f6d89ad51f22e", size = 12219134, upload-time = "2025-12-15T05:03:24.367Z" }, + { url = "https://files.pythonhosted.org/packages/89/cc/2db6f0e95366b630364e09845672dbee0cbf0bbe753a204b29a944967cd9/mypy-1.19.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b64d987153888790bcdb03a6473d321820597ab8dd9243b27a92153c4fa50fd2", size = 12731616, upload-time = "2025-12-15T05:02:44.725Z" }, + { url = "https://files.pythonhosted.org/packages/00/be/dd56c1fd4807bc1eba1cf18b2a850d0de7bacb55e158755eb79f77c41f8e/mypy-1.19.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c35d298c2c4bba75feb2195655dfea8124d855dfd7343bf8b8c055421eaf0cf8", size = 13620847, upload-time = "2025-12-15T05:03:39.633Z" }, + { url = "https://files.pythonhosted.org/packages/6d/42/332951aae42b79329f743bf1da088cd75d8d4d9acc18fbcbd84f26c1af4e/mypy-1.19.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:34c81968774648ab5ac09c29a375fdede03ba253f8f8287847bd480782f73a6a", size = 13834976, upload-time = "2025-12-15T05:03:08.786Z" }, + { url = "https://files.pythonhosted.org/packages/6f/63/e7493e5f90e1e085c562bb06e2eb32cae27c5057b9653348d38b47daaecc/mypy-1.19.1-cp312-cp312-win_amd64.whl", hash = "sha256:b10e7c2cd7870ba4ad9b2d8a6102eb5ffc1f16ca35e3de6bfa390c1113029d13", size = 10118104, upload-time = "2025-12-15T05:03:10.834Z" }, + { url = "https://files.pythonhosted.org/packages/de/9f/a6abae693f7a0c697dbb435aac52e958dc8da44e92e08ba88d2e42326176/mypy-1.19.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e3157c7594ff2ef1634ee058aafc56a82db665c9438fd41b390f3bde1ab12250", size = 13201927, upload-time = "2025-12-15T05:02:29.138Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a4/45c35ccf6e1c65afc23a069f50e2c66f46bd3798cbe0d680c12d12935caa/mypy-1.19.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdb12f69bcc02700c2b47e070238f42cb87f18c0bc1fc4cdb4fb2bc5fd7a3b8b", size = 12206730, upload-time = "2025-12-15T05:03:01.325Z" }, + { url = "https://files.pythonhosted.org/packages/05/bb/cdcf89678e26b187650512620eec8368fded4cfd99cfcb431e4cdfd19dec/mypy-1.19.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f859fb09d9583a985be9a493d5cfc5515b56b08f7447759a0c5deaf68d80506e", size = 12724581, upload-time = "2025-12-15T05:03:20.087Z" }, + { url = "https://files.pythonhosted.org/packages/d1/32/dd260d52babf67bad8e6770f8e1102021877ce0edea106e72df5626bb0ec/mypy-1.19.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c9a6538e0415310aad77cb94004ca6482330fece18036b5f360b62c45814c4ef", size = 13616252, upload-time = "2025-12-15T05:02:49.036Z" }, + { url = "https://files.pythonhosted.org/packages/71/d0/5e60a9d2e3bd48432ae2b454b7ef2b62a960ab51292b1eda2a95edd78198/mypy-1.19.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:da4869fc5e7f62a88f3fe0b5c919d1d9f7ea3cef92d3689de2823fd27e40aa75", size = 13840848, upload-time = "2025-12-15T05:02:55.95Z" }, + { url = "https://files.pythonhosted.org/packages/98/76/d32051fa65ecf6cc8c6610956473abdc9b4c43301107476ac03559507843/mypy-1.19.1-cp313-cp313-win_amd64.whl", hash = "sha256:016f2246209095e8eda7538944daa1d60e1e8134d98983b9fc1e92c1fc0cb8dd", size = 10135510, upload-time = "2025-12-15T05:02:58.438Z" }, + { url = "https://files.pythonhosted.org/packages/de/eb/b83e75f4c820c4247a58580ef86fcd35165028f191e7e1ba57128c52782d/mypy-1.19.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:06e6170bd5836770e8104c8fdd58e5e725cfeb309f0a6c681a811f557e97eac1", size = 13199744, upload-time = "2025-12-15T05:03:30.823Z" }, + { url = "https://files.pythonhosted.org/packages/94/28/52785ab7bfa165f87fcbb61547a93f98bb20e7f82f90f165a1f69bce7b3d/mypy-1.19.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:804bd67b8054a85447c8954215a906d6eff9cabeabe493fb6334b24f4bfff718", size = 12215815, upload-time = "2025-12-15T05:02:42.323Z" }, + { url = "https://files.pythonhosted.org/packages/0a/c6/bdd60774a0dbfb05122e3e925f2e9e846c009e479dcec4821dad881f5b52/mypy-1.19.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:21761006a7f497cb0d4de3d8ef4ca70532256688b0523eee02baf9eec895e27b", size = 12740047, upload-time = "2025-12-15T05:03:33.168Z" }, + { url = "https://files.pythonhosted.org/packages/32/2a/66ba933fe6c76bd40d1fe916a83f04fed253152f451a877520b3c4a5e41e/mypy-1.19.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:28902ee51f12e0f19e1e16fbe2f8f06b6637f482c459dd393efddd0ec7f82045", size = 13601998, upload-time = "2025-12-15T05:03:13.056Z" }, + { url = "https://files.pythonhosted.org/packages/e3/da/5055c63e377c5c2418760411fd6a63ee2b96cf95397259038756c042574f/mypy-1.19.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:481daf36a4c443332e2ae9c137dfee878fcea781a2e3f895d54bd3002a900957", size = 13807476, upload-time = "2025-12-15T05:03:17.977Z" }, + { url = "https://files.pythonhosted.org/packages/cd/09/4ebd873390a063176f06b0dbf1f7783dd87bd120eae7727fa4ae4179b685/mypy-1.19.1-cp314-cp314-win_amd64.whl", hash = "sha256:8bb5c6f6d043655e055be9b542aa5f3bdd30e4f3589163e85f93f3640060509f", size = 10281872, upload-time = "2025-12-15T05:03:05.549Z" }, + { url = "https://files.pythonhosted.org/packages/8d/f4/4ce9a05ce5ded1de3ec1c1d96cf9f9504a04e54ce0ed55cfa38619a32b8d/mypy-1.19.1-py3-none-any.whl", hash = "sha256:f1235f5ea01b7db5468d53ece6aaddf1ad0b88d9e7462b86ef96fe04995d7247", size = 2471239, upload-time = "2025-12-15T05:03:07.248Z" }, +] + +[[package]] +name = "mypy-extensions" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" }, +] + [[package]] name = "myst-parser" version = "4.0.1" @@ -3698,7 +3795,7 @@ test = [ [[package]] name = "nemo-gym" -source = { editable = "3rdparty/Gym-workspace" } +source = { editable = "3rdparty/Gym-workspace/Gym" } dependencies = [ { name = "aiohttp" }, { name = "datasets" }, @@ -3709,38 +3806,89 @@ dependencies = [ { name = "mlflow" }, { name = "omegaconf" }, { name = "openai" }, + { name = "orjson" }, { name = "psutil" }, { name = "pydantic" }, { name = "pydantic-core" }, { name = "ray", extra = ["default"] }, - { name = "tdigest" }, { name = "tqdm" }, { name = "uvicorn" }, { name = "uvloop" }, { name = "yappi" }, ] +[package.optional-dependencies] +dev = [ + { name = "coverage" }, + { name = "mypy" }, + { name = "pre-commit" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-xdist" }, + { name = "requests-mock" }, + { name = "ruff" }, +] + +[package.dev-dependencies] +docs = [ + { name = "myst-parser" }, + { name = "nvidia-sphinx-theme" }, + { name = "sphinx" }, + { name = "sphinx-autobuild" }, + { name = "sphinx-autodoc2" }, + { name = "sphinx-copybutton" }, + { name = "sphinx-design" }, + { name = "sphinx-reredirects" }, + { name = "sphinxcontrib-mermaid" }, + { name = "swagger-plugin-for-sphinx" }, +] + [package.metadata] requires-dist = [ { name = "aiohttp" }, + { name = "coverage", extras = ["toml"], marker = "extra == 'dev'" }, { name = "datasets" }, { name = "devtools" }, { name = "fastapi" }, { name = "gradio" }, { name = "hydra-core" }, { name = "mlflow" }, + { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.8.0" }, { name = "omegaconf" }, { name = "openai", specifier = "<=2.6.1" }, + { name = "orjson" }, + { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.6.0" }, { name = "psutil" }, { name = "pydantic" }, { name = "pydantic-core" }, + { name = "pytest", marker = "extra == 'dev'" }, + { name = "pytest-asyncio", marker = "extra == 'dev'" }, + { name = "pytest-cov", marker = "extra == 'dev'" }, + { name = "pytest-xdist", marker = "extra == 'dev'" }, { name = "ray", extras = ["default"] }, - { name = "tdigest", specifier = ">=0.5.2.2" }, + { name = "requests-mock", marker = "extra == 'dev'" }, + { name = "ruff", marker = "extra == 'dev'" }, { name = "tqdm" }, { name = "uvicorn" }, { name = "uvloop" }, { name = "yappi" }, ] +provides-extras = ["dev"] + +[package.metadata.requires-dev] +docs = [ + { name = "myst-parser", specifier = ">=4.0.1" }, + { name = "nvidia-sphinx-theme", specifier = ">=0.0.8" }, + { name = "sphinx", specifier = ">=8.2.3" }, + { name = "sphinx-autobuild", specifier = ">=2025.8.25" }, + { name = "sphinx-autodoc2", specifier = ">=0.5.0" }, + { name = "sphinx-copybutton", specifier = ">=0.5.2" }, + { name = "sphinx-design", specifier = ">=0.6.1" }, + { name = "sphinx-reredirects", specifier = ">=0.1.6" }, + { name = "sphinxcontrib-mermaid", specifier = ">=1.0.0" }, + { name = "swagger-plugin-for-sphinx", specifier = ">=6.0.0" }, +] [[package]] name = "nemo-rl" @@ -3883,7 +4031,7 @@ requires-dist = [ { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" }, { name = "mlflow", specifier = ">=3.5.0,<3.6.0" }, { name = "nemo-automodel", marker = "extra == 'automodel'", editable = "3rdparty/Automodel-workspace/Automodel" }, - { name = "nemo-gym", marker = "extra == 'nemo-gym'", editable = "3rdparty/Gym-workspace" }, + { name = "nemo-gym", marker = "extra == 'nemo-gym'", editable = "3rdparty/Gym-workspace/Gym" }, { name = "ninja" }, { name = "num2words", specifier = ">=0.5.14" }, { name = "num2words", marker = "extra == 'vllm'", specifier = ">=0.5.14" }, @@ -5523,6 +5671,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" }, ] +[[package]] +name = "pytest-xdist" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "execnet" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -5571,12 +5732,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" }, ] -[[package]] -name = "pyudorandom" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/14/6fc20ea903eda547d6a255e995f8d4a09fdc3cf8bfacb6f85e6d669bc259/pyudorandom-1.0.0.tar.gz", hash = "sha256:f30a093a0170c15f9c7f87eb29f71f0f5fde995528b7c6dc4606d389e8c37755", size = 1599, upload-time = "2016-07-18T16:18:56.037Z" } - [[package]] name = "pywin32" version = "311" @@ -5824,6 +5979,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "requests-mock" +version = "1.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/92/32/587625f91f9a0a3d84688bf9cfc4b2480a7e8ec327cefd0ff2ac891fd2cf/requests-mock-1.12.1.tar.gz", hash = "sha256:e9e12e333b525156e82a3c852f22016b9158220d2f47454de9cae8a77d371401", size = 60901, upload-time = "2024-03-29T03:54:29.446Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/ec/889fbc557727da0c34a33850950310240f2040f3b1955175fdb2b36a8910/requests_mock-1.12.1-py2.py3-none-any.whl", hash = "sha256:b1e37054004cdd5e56c84454cc7df12b25f90f382159087f4b6915aaeef39563", size = 27695, upload-time = "2024-03-29T03:54:27.64Z" }, +] + [[package]] name = "rich" version = "13.9.4" @@ -6486,7 +6653,7 @@ wheels = [ [[package]] name = "sphinx-autobuild" -version = "2024.10.3" +version = "2025.8.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama" }, @@ -6496,9 +6663,9 @@ dependencies = [ { name = "watchfiles" }, { name = "websockets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a5/2c/155e1de2c1ba96a72e5dba152c509a8b41e047ee5c2def9e9f0d812f8be7/sphinx_autobuild-2024.10.3.tar.gz", hash = "sha256:248150f8f333e825107b6d4b86113ab28fa51750e5f9ae63b59dc339be951fb1", size = 14023, upload-time = "2024-10-02T23:15:30.172Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/3c/a59a3a453d4133777f7ed2e83c80b7dc817d43c74b74298ca0af869662ad/sphinx_autobuild-2025.8.25.tar.gz", hash = "sha256:9cf5aab32853c8c31af572e4fecdc09c997e2b8be5a07daf2a389e270e85b213", size = 15200, upload-time = "2025-08-25T18:44:55.436Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/18/c0/eba125db38c84d3c74717008fd3cb5000b68cd7e2cbafd1349c6a38c3d3b/sphinx_autobuild-2024.10.3-py3-none-any.whl", hash = "sha256:158e16c36f9d633e613c9aaf81c19b0fc458ca78b112533b20dafcda430d60fa", size = 11908, upload-time = "2024-10-02T23:15:28.739Z" }, + { url = "https://files.pythonhosted.org/packages/d7/20/56411b52f917696995f5ad27d2ea7e9492c84a043c5b49a3a3173573cd93/sphinx_autobuild-2025.8.25-py3-none-any.whl", hash = "sha256:b750ac7d5a18603e4665294323fd20f6dcc0a984117026d1986704fa68f0379a", size = 12535, upload-time = "2025-08-25T18:44:54.164Z" }, ] [[package]] @@ -6538,6 +6705,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338, upload-time = "2024-08-02T13:48:42.106Z" }, ] +[[package]] +name = "sphinx-reredirects" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/8d/0e39fe2740d7d71417edf9a6424aa80ca2c27c17fc21282cdc39f90d5a40/sphinx_reredirects-1.1.0.tar.gz", hash = "sha256:fb9b195335ab14b43f8273287d0c7eeb637ba6c56c66581c11b47202f6718b29", size = 614624, upload-time = "2025-12-22T08:28:02.792Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/51/81/b5dd07067f3daac6d23687ec737b2d593740671ebcd145830c8f92d381c5/sphinx_reredirects-1.1.0-py3-none-any.whl", hash = "sha256:4b5692273c72cd2d4d917f4c6f87d5919e4d6114a752d4be033f7f5f6310efd9", size = 6351, upload-time = "2025-12-22T08:27:59.724Z" }, +] + [[package]] name = "sphinxcontrib-applehelp" version = "2.0.0" @@ -6765,20 +6944,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] -[[package]] -name = "tdigest" -version = "0.5.2.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "accumulation-tree" }, - { name = "pyudorandom" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/dd/34/7e2f78d1ed0af7d0039ab2cff45b6bf8512234b9f178bb21713084a1f2f0/tdigest-0.5.2.2.tar.gz", hash = "sha256:8deffc8bac024761786f43d9444e3b6c91008cd690323e051f068820a7364d0e", size = 6549, upload-time = "2019-05-07T18:57:40.771Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/72/f420480118cbdd18eb761b9936f0a927957130659a638449575b4a4f0aa7/tdigest-0.5.2.2-py2.py3-none-any.whl", hash = "sha256:e32ff6ab62e4defdb93b816c831080d94dfa1efb68a9fa1e7976c237fa9375cb", size = 9445, upload-time = "2019-05-07T18:57:37.493Z" }, - { url = "https://files.pythonhosted.org/packages/b4/94/fd3853b98f39d10206b08f2737d2ec2dc6f46a42dc7b7e05f4f0162d13ee/tdigest-0.5.2.2-py3-none-any.whl", hash = "sha256:dd25f8d6e6be002192bba9e4b8c16491d36c10b389f50637818603d1f67c6fb2", size = 9440, upload-time = "2019-05-07T18:57:38.942Z" }, -] - [[package]] name = "template-project" version = "0.1.0"