diff --git a/3rdparty/Gym-workspace/Gym b/3rdparty/Gym-workspace/Gym
index 39ee39e35b..c192ee407f 160000
--- a/3rdparty/Gym-workspace/Gym
+++ b/3rdparty/Gym-workspace/Gym
@@ -1 +1 @@
-Subproject commit 39ee39e35bfedef86ccf8d1ada9835ab8f62d267
+Subproject commit c192ee407ff71046015d11da7c8960082bd62418
diff --git a/3rdparty/Gym-workspace/setup.py b/3rdparty/Gym-workspace/setup.py
index ddb5c62284..b6df0d66c0 100644
--- a/3rdparty/Gym-workspace/setup.py
+++ b/3rdparty/Gym-workspace/setup.py
@@ -42,6 +42,7 @@
     "yappi",
     "ray[default]",
     "psutil",
+    "datasets",
 ]
 
 if src_dir.exists():
diff --git a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
new file mode 100644
index 0000000000..dea76e41cf
--- /dev/null
+++ b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml
@@ -0,0 +1,277 @@
+grpo:
+  max_num_epochs: 1
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 16
+  max_rollout_turns: 1 # for multi-turn rollouts. Workplace assistant has 1 turn but can have up to 6 tool-calling steps
+  max_num_steps: 1000000
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: true
+  overlong_filtering: false
+  max_val_samples: null   # inferred from size of val dataset. for multi evals, repeat val ds via `num_repeats` in `ng_prepare_data`.
+  val_batch_size: null
+  seed: 42
+  use_dynamic_sampling: false
+  dynamic_sampling_max_gen_batches: 10
+  batch_multiplier: 1
+  reward_shaping:
+    enabled: false
+    overlong_buffer_length: 128
+    overlong_buffer_penalty: 1
+    max_response_length: ${policy.max_total_sequence_length}
+  reward_scaling:
+    enabled: false
+    source_min: 0.0
+    source_max: 1.0
+    target_min: 0.0
+    target_max: 1.0
+  skip_reference_policy_logprobs_calculation: true
+
+loss_fn:
+  reference_policy_kl_penalty: 0
+  reference_policy_kl_type: "k3"
+  kl_input_clamp_value: 20.0
+  kl_output_clamp_value: 10.0
+  ratio_clip_min: 0.2
+  ratio_clip_max: 0.2
+  ratio_clip_c: null
+  # (default off) loss formulation improvements (docs/guides/grpo.md#loss)
+  use_on_policy_kl_approximation: false
+  truncated_importance_sampling_ratio: null
+  use_importance_sampling_correction: false
+  token_level_loss: true
+
+checkpointing:
+  enabled: true
+  metric_name: "val:accuracy"
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 6  # Save checkpoint every 6 steps (aligned with val_period)
+  checkpoint_must_save_by: null
+
+policy:
+  model_name: "nvidia/NVIDIA-Nemotron-Nano-9B-v2"
+  tokenizer:
+    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
+  hf_config_overrides: {}
+  train_global_batch_size: ${mul:${grpo.num_prompts_per_step}, ${grpo.num_generations_per_prompt}}  # Match the total rollouts per step
+  train_micro_batch_size: 1
+  logprob_batch_size: 1
+  generation_batch_size: 32 # Only used when generating using HF backend
+  max_total_sequence_length: 8192
+  precision: "bfloat16"
+  logprob_chunk_size: null  # Disabled to allow defer_fp32_logits: false
+
+  dtensor_cfg:
+    _v2: false
+    enabled: false
+    cpu_offload: False
+    sequence_parallel: false
+    activation_checkpointing: true
+    tensor_parallel_size: 2
+    context_parallel_size: 1
+    custom_parallel_plan: null
+    clear_cache_every_n_steps: null
+
+  megatron_cfg:
+    enabled: true
+    bias_activation_fusion: false
+    tensor_model_parallel_size: 2
+    # We might want to consider setting this value higher (e.g. to 1) and raising the vllm generation max mem utilization
+    empty_unused_memory_level: 0
+    activation_checkpointing: true
+    # train_iters needs to be large enough to cover all training steps
+    train_iters: 100000
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
+    pipeline_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    context_parallel_size: 1
+    pipeline_dtype: ${policy.precision}
+    sequence_parallel: false
+    freeze_moe_router: true
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    #gives ~20% training perf speedup with sequence packing
+    apply_rope_fusion: True
+    defer_fp32_logits: false
+    moe_permute_fusion: false
+
+    optimizer:
+      optimizer: "adam"
+      lr: 5.0e-6
+      min_lr: 5.0e-7
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: "float32"
+
+      #adam
+      adam_beta1: 0.9
+      adam_beta2: 0.999
+      adam_eps: 1e-8
+
+      #sgd
+      sgd_momentum: 0.9
+
+      #distributed optimizer
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+      clip_grad: ${policy.max_grad_norm}
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: 100000  # Must be greater than lr_warmup_iters
+      lr_warmup_iters: 13
+      lr_warmup_init: 5.0e-7
+      override_opt_param_scheduler: true
+
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: true
+      overlap_param_gather: true
+      use_custom_fsdp: false
+      data_parallel_sharding_strategy: "optim_grads_params"
+
+    env_vars: null
+
+  # See docs/design-docs/sequence-packing-and-dynamic-batching.md 
+  # for more details on dynamic batching and sequence packing.
+  dynamic_batching:
+    enabled: False
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    sequence_length_round: 64
+
+  sequence_packing:
+    enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
+
+  # makes the training sequence length divisible by the tensor parallel size
+  # this is useful for sequence parallel training
+  make_sequence_length_divisible_by: 1
+  max_grad_norm: 1.0
+  offload_optimizer_for_logprob: false # Only useful for non-colocated generation since colocated generation will always offload optimizer to cuda before refit
+
+  optimizer: null
+
+  scheduler:
+    - name: "torch.optim.lr_scheduler.ConstantLR"
+      kwargs:
+        factor: 1.0
+        total_iters: 10000000000
+    - milestones: []
+
+  generation:
+    backend: "vllm"
+    max_new_tokens: ${policy.max_total_sequence_length}
+    temperature: 1.0
+    top_p: 1.0
+    top_k: null
+    stop_token_ids: null
+    stop_strings: null
+    vllm_cfg:
+      async_engine: true
+      precision: ${policy.precision}
+      tensor_parallel_size: 1
+      pipeline_parallel_size: 1
+      enable_expert_parallel: false
+      expert_parallel_size: 1
+      gpu_memory_utilization: 0.8
+      max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: false
+      use_deep_gemm: False
+      num_last_layers_in_bf16: 0
+      num_first_layers_in_bf16: 0
+      kv_cache_dtype: "auto"
+      expose_http_server: true
+      skip_tokenizer_init: false
+      tool_parser_plugin: ???
+      http_server_serving_chat_kwargs:
+        # Workplace assistant uses 26 tools, so we enable auto_tools.
+        # For Nemotron Nano v2, we use the dedicated `nemotron_json` tool parser
+        enable_auto_tools: true
+        tool_parser: nemotron_json
+    vllm_kwargs:
+      compilation_config:
+        # when enforce_eager is False, set ++policy.generation.vllm_kwargs.compilation_config.use_inductor=False for better accuracy,
+        # with the flag, vllm will use the custom CUDA kernels instead of the Triton kernels generated by torch.compile
+        # for more details, see convergence issue https://github.com/NVIDIA-NeMo/RL/issues/998
+        use_inductor: False
+      # We need the Mamba cache to be set to fp32 for Nemotron Nano v2
+      mamba_ssm_cache_dtype: "float32"
+    colocated:
+      # true: generation shares training GPUs
+      # false: uses dedicated generation resources
+      enabled: true
+      # only relevant when enabled is false
+      resources:
+        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+        num_nodes: null # Decides number of nodes to be dedicated to generation
+
+data:
+  # Using the prepared train and validation datasets (downloaded from HuggingFace and split 90/10)
+  # Train: 1129 samples, Validation: 126 samples
+  train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/resources_servers/workplace_assistant/data/train.jsonl
+  validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/resources_servers/workplace_assistant/data/validation.jsonl
+  agent_name: workplace_assistant_simple_agent
+  shuffle: true
+  num_workers: 0
+
+env:
+  should_use_nemo_gym: true
+  should_log_nemo_gym_responses: true  # If you have low logging storage, set this to false
+  nemo_gym:  # This is passed into NeMo-Gym as the initial_global_config_dict
+    config_paths:
+    - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml  # Required! And it must be *for_training
+    - resources_servers/workplace_assistant/configs/workplace_assistant.yaml
+    workplace_assistant_simple_agent:
+      responses_api_agents:
+        simple_agent:
+          max_steps: 6  # Workplace assistant allows up to 6 tool-calling steps per task
+    policy_model:
+      responses_api_models:
+        vllm_model:
+          # Disable reasoning!
+          uses_reasoning_parser: false
+          extra_body:
+            chat_template_kwargs:
+              enable_thinking: false
+
+logger:
+  log_dir: "logs/grpo-workplace-assistant-nemotron-nano-v2-9b"  # Base directory for all logs
+  num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
+  wandb_enabled: true
+  tensorboard_enabled: false
+  mlflow_enabled: false  # Disable MLflow logging
+  swanlab_enabled: false
+  monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  wandb:
+    project: "grpo-workplace-assistant"
+    name: "nemotron-nano-v2-9b-workplace-assistant"
+  tensorboard: {}
+  mlflow:
+    experiment_name: "grpo-workplace-assistant"
+    run_name: "nemotron-nano-v2-9b-workplace-assistant"
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+
+cluster:
+  gpus_per_node: 8
+  num_nodes: 1  # Single node by default; set to 2+ for multi-node training
diff --git a/examples/nemo_gym/launch_nemo_gym_multinode_training.sh b/examples/nemo_gym/launch_nemo_gym_multinode_training.sh
new file mode 100755
index 0000000000..37ede71772
--- /dev/null
+++ b/examples/nemo_gym/launch_nemo_gym_multinode_training.sh
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# ----- PARAMETERS -----
+# WANDB_API_KEY, EXP_NAME, NUM_ACTOR_NODES, REPO_LOCATION, CONTAINER_IMAGE_PATH, SLURM_ACCOUNT, SLURM_PARTITION
+
+# ray.sub needs to be launched from the NeMo-RL root directory
+cd $REPO_LOCATION
+
+# Construct the command
+read -r -d '' COMMAND <<EOF
+cd ${REPO_LOCATION}
+
+HF_HOME=$PWD/.cache/ \
+WANDB_API_KEY=$WANDB_API_KEY \
+uv run python examples/nemo_gym/run_grpo_nemo_gym.py \
+    ++cluster.num_nodes=$NUM_ACTOR_NODES \
+    ++logger.wandb.name=$EXP_NAME \
+    ++logger.log_dir=results/$EXP_NAME \
+    ++checkpointing.checkpoint_dir=results/$EXP_NAME \
+    $@
+EOF
+
+echo -e "Running command:\n$COMMAND"
+
+mount=$(findmnt -n -o TARGET --target .)
+
+COMMAND=$COMMAND \
+CONTAINER=$CONTAINER_IMAGE_PATH \
+MOUNTS=$mount:$mount \
+sbatch \
+    --nodes=$NUM_ACTOR_NODES \
+    --account=$SLURM_ACCOUNT \
+    --partition=$SLURM_PARTITION \
+    --time=4:0:0 \
+    --job-name=$EXP_NAME \
+    --gres=gpu:8 \
+    ray.sub
diff --git a/nemo_rl/models/generation/vllm/config.py b/nemo_rl/models/generation/vllm/config.py
index 65a174b2bc..857ed177c2 100644
--- a/nemo_rl/models/generation/vllm/config.py
+++ b/nemo_rl/models/generation/vllm/config.py
@@ -36,6 +36,9 @@ class VllmSpecificArgs(TypedDict):
     expose_http_server: NotRequired[bool]
     # These kwargs are passed to the vllm.LLM HTTP server Chat Completions endpoint config. Typically this will include things like tool parser, chat template, etc
     http_server_serving_chat_kwargs: NotRequired[dict[str, Any]]
+    # Miscellaneous top level vLLM HTTP server arguments.
+    # A filepath that can be imported to register a vLLM tool parser
+    tool_parser_plugin: NotRequired[str]
 
 
 class VllmConfig(GenerationConfig):
diff --git a/nemo_rl/models/generation/vllm/vllm_worker_async.py b/nemo_rl/models/generation/vllm/vllm_worker_async.py
index c8ab9fcf2a..f45ff440f4 100644
--- a/nemo_rl/models/generation/vllm/vllm_worker_async.py
+++ b/nemo_rl/models/generation/vllm/vllm_worker_async.py
@@ -106,6 +106,19 @@ def _replace_prefix_tokens(
         if model_prefix_token_ids[-1] == eos_token_id:
             model_cut_end -= 1
 
+    # Assert here to prepare for the logic below
+    assert len(template_token_ids) > len(
+        template_prefix_token_ids
+    ), f"""Found possibly non-monotonically increasing trajectory!
+Template prefix token IDs (everything before the final assistant message): {template_prefix_token_ids}
+
+Template token IDs (everything that was sent to the model endpoint): {template_token_ids}
+
+Template prefix repr (detokenized): {repr(tokenizer.decode(template_prefix_token_ids))}
+
+Template repr (detokenized): {repr(tokenizer.decode(template_token_ids))}
+"""
+
     # We take everything starting with the EOS token ID.
     template_cut_start = -1
     for pos in reversed(range(len(template_prefix_token_ids))):
@@ -114,9 +127,16 @@ def _replace_prefix_tokens(
             break
 
     # This should never be the case, but
-    assert template_cut_start >= 0, (
-        "No EOS token ID found in the chat-templated messages!"
-    )
+    assert (
+        template_cut_start >= 0
+    ), f"""No EOS token ID found in the chat-templated messages!
+Template prefix token IDs (everything before the final assistant message): {template_prefix_token_ids}
+
+Template token IDs (everything that was sent to the model endpoint): {template_token_ids}
+
+Template prefix repr (detokenized): {repr(tokenizer.decode(template_prefix_token_ids))}
+
+Template repr (detokenized): {repr(tokenizer.decode(template_token_ids))}"""
 
     return (
         model_prefix_token_ids[:model_cut_end] + template_token_ids[template_cut_start:]
@@ -269,6 +289,7 @@ def _setup_vllm_openai_api_server(self, app: FastAPI) -> FastAPI:
 
         from fastapi import Request
         from fastapi.responses import JSONResponse, StreamingResponse
+        from vllm import __version__ as vllm_version
         from vllm.entrypoints.openai.api_server import (
             BaseModelPath,
             OpenAIServingChat,
@@ -283,8 +304,13 @@ def _setup_vllm_openai_api_server(self, app: FastAPI) -> FastAPI:
             TokenizeCompletionRequest,
             TokenizeResponse,
         )
+        from vllm.entrypoints.openai.tool_parsers import ToolParserManager
         from vllm.v1.engine.async_llm import logger as vllm_async_llm_logger
 
+        maybe_tool_parser_plugin = self.cfg["vllm_cfg"].get("tool_parser_plugin")
+        if maybe_tool_parser_plugin:
+            ToolParserManager.import_tool_parser(maybe_tool_parser_plugin)
+
         engine_client = self.llm
         model_config = self.llm_async_engine_args.create_model_config()
         base_model_paths = [
@@ -294,12 +320,15 @@ def _setup_vllm_openai_api_server(self, app: FastAPI) -> FastAPI:
             BaseModelPath(name=model_config.model, model_path=model_config.model),
         ]
 
-        openai_serving_models = OpenAIServingModels(
+        openai_serving_models_kwargs = dict(
             engine_client=engine_client,
-            model_config=model_config,
             base_model_paths=base_model_paths,
             lora_modules=None,
         )
+        # Remove this fork when https://github.com/NVIDIA-NeMo/RL/pull/1563 is merged to NeMo RL main bumping to vLLM 0.11.2
+        if vllm_version < "0.11.1":
+            openai_serving_models_kwargs["model_config"] = model_config
+        openai_serving_models = OpenAIServingModels(**openai_serving_models_kwargs)
 
         class NeMoRLOpenAIChatRequestMixin:
             def model_post_init(self, context):
@@ -435,13 +464,17 @@ class NeMoRLOpenAIServingChat(NeMoRLOpenAIServingMixin, OpenAIServingChat):
         serving_chat_kwargs = serving_chat_default_kwargs | self.cfg["vllm_cfg"].get(
             "http_server_serving_chat_kwargs", dict()
         )
-        openai_serving_chat = NeMoRLOpenAIServingChat(
-            engine_client,
-            model_config,
-            openai_serving_models,
-            return_tokens_as_token_ids=True,
-            **serving_chat_kwargs,
+        serving_chat_kwargs.update(
+            dict(
+                engine_client=engine_client,
+                models=openai_serving_models,
+                return_tokens_as_token_ids=True,
+            )
         )
+        # Remove this fork when https://github.com/NVIDIA-NeMo/RL/pull/1563 is merged to NeMo RL main bumping to vLLM 0.11.2
+        if vllm_version < "0.11.1":
+            serving_chat_kwargs["model_config"] = model_config
+        openai_serving_chat = NeMoRLOpenAIServingChat(**serving_chat_kwargs)
 
         generation_config = self.cfg
 
@@ -496,15 +529,20 @@ class NeMoRLOpenAIServingTokenization(
         ):
             pass
 
-        openai_serving_tokenization = NeMoRLOpenAIServingTokenization(
-            engine_client,
-            model_config,
-            openai_serving_models,
+        serving_tokenization_kwargs = dict(
             request_logger=serving_chat_kwargs["request_logger"],
             chat_template=serving_chat_kwargs["chat_template"],
             chat_template_content_format=serving_chat_kwargs[
                 "chat_template_content_format"
             ],
+            engine_client=serving_chat_kwargs["engine_client"],
+            models=serving_chat_kwargs["models"],
+        )
+        # Remove this fork when https://github.com/NVIDIA-NeMo/RL/pull/1563 is merged to NeMo RL main bumping to vLLM 0.11.2
+        if vllm_version < "0.11.1":
+            serving_tokenization_kwargs["model_config"] = model_config
+        openai_serving_tokenization = NeMoRLOpenAIServingTokenization(
+            **serving_tokenization_kwargs
         )
 
         @app.post("/tokenize")
@@ -524,15 +562,21 @@ async def tokenize(request: NeMoRLTokenizeRequest, raw_request: Request):
         # Logging
         ########################################
         print(
-            "Adding a vLLM logging filter so that the logs aren't spammed with `Added request ...` messages. This is to help errors pop up better and filter out noise."
+            "Adding a vLLM logging filter so that the logs aren't spammed with not useful messages like `Added request ...`. This is to help errors pop up better and filter out noise."
         )
 
-        class NoAddedRequestFilter(LoggingFilter):
+        class CleanLoggingFilter(LoggingFilter):
             def filter(self, record: LogRecord) -> bool:
                 msg = record.getMessage()
-                return "Added request" not in msg
 
-        vllm_async_llm_logger.addFilter(NoAddedRequestFilter())
+                # vLLM does not accept `strict` tool definitions and reporting it to the user is not useful either.
+                return (
+                    "Added request" not in msg
+                    and "The following fields were present in the request but ignored: {'strict'}"
+                    not in msg
+                )
+
+        vllm_async_llm_logger.addFilter(CleanLoggingFilter())
 
         return app
 
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index ac03e0506f..e39fef12d4 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -1164,6 +1164,7 @@ def test_vllm_http_server(cluster, tokenizer):
                     "function_call": None,
                     "tool_calls": [],
                     "reasoning_content": None,
+                    "reasoning": None,
                 },
                 "logprobs": {
                     "content": [
@@ -1200,6 +1201,11 @@ def _standardize(d: dict) -> dict:
         # We don't want to implicate log prob accuracy in this test.
         d["choices"][0]["logprobs"]["content"][0].pop("logprob")
 
+        # Remove this fork when https://github.com/NVIDIA-NeMo/RL/pull/1563 is merged to NeMo RL main bumping to vLLM 0.11.2
+        message = d["choices"][0]["message"]
+        if "reasoning" in message:
+            message.pop("reasoning")
+
         return d
 
     assert _standardize(expected_result) == _standardize(actual_result)
@@ -1369,6 +1375,9 @@ def test_replace_prefix_tokens_missing_eos_in_template_prefix_raises():
     class _T:
         eos_token_id = 2
 
+        def decode(self, *args, **kwargs):
+            pass
+
     tokenizer = _T()
     model_prefix_token_ids = [7, 2]
     template_prefix_token_ids = [9, 9, 9]  # no EOS inside prefix
diff --git a/uv.lock b/uv.lock
index 7d12c3b39d..d333c863c4 100644
--- a/uv.lock
+++ b/uv.lock
@@ -3465,6 +3465,7 @@ name = "nemo-gym"
 source = { editable = "3rdparty/Gym-workspace" }
 dependencies = [
     { name = "aiohttp" },
+    { name = "datasets" },
     { name = "devtools" },
     { name = "fastapi" },
     { name = "gradio" },
@@ -3486,6 +3487,7 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "aiohttp" },
+    { name = "datasets" },
     { name = "devtools" },
     { name = "fastapi" },
     { name = "gradio" },