diff --git a/pyproject.toml b/pyproject.toml
index ec398ac9d0..9199527c20 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,7 +17,7 @@ dependencies = [
"rich>=14.1.0",
"safetensors>=0.6.2",
"tokenizers>=0.21.2",
- "transformers>=4.56.1,<5",
+ "transformers>=5.0.0,<=5.3.0",
"typer>=0.17.4",
# "wandb>=0.22.0",
"peft",
@@ -72,7 +72,6 @@ skyrl-train = [
"ninja",
"tensorboard",
"func_timeout",
- "transformers>=4.51.0",
"hydra-core==1.3.2",
"accelerate",
"torchdata",
@@ -217,6 +216,7 @@ override-dependencies = [
"causal-conv1d; sys_platform == 'never'",
"transformer-engine[pytorch]==2.10.0; sys_platform == 'linux'",
"megatron-core==0.16.1; sys_platform == 'linux'",
+ "transformers>=5.0.0,<=5.3.0; sys_platform == 'linux'",
"ml_dtypes>=0.5.0; sys_platform == 'linux'",
]
diff --git a/skyrl/backends/skyrl_train/distributed/fsdp_utils.py b/skyrl/backends/skyrl_train/distributed/fsdp_utils.py
index 449cf0266e..76dada1403 100644
--- a/skyrl/backends/skyrl_train/distributed/fsdp_utils.py
+++ b/skyrl/backends/skyrl_train/distributed/fsdp_utils.py
@@ -63,20 +63,13 @@ def init_fn(x: torch.nn.Module):
return x
-def get_init_weight_context_manager(use_meta_tensor=True, mesh: DeviceMesh = None):
- from accelerate import init_empty_weights
-
- def cpu_init_weights():
- return torch.device("cpu")
-
- if use_meta_tensor:
- if mesh is None:
- init_context = init_empty_weights if torch.distributed.get_rank() != 0 else cpu_init_weights
- else:
- init_context = init_empty_weights if mesh.get_coordinate()[-1] != 0 else cpu_init_weights
- else:
- init_context = cpu_init_weights
- return init_context
+def should_use_meta_init(use_meta_tensor=True, mesh: DeviceMesh = None) -> bool:
+ """Return True when this rank should create an empty model on meta device."""
+ if not use_meta_tensor:
+ return False
+ if mesh is None:
+ return torch.distributed.get_rank() != 0
+ return mesh.get_coordinate()[-1] != 0
def get_fsdp_wrap_policy(module, config=None, is_lora=False):
@@ -176,6 +169,14 @@ def offload_fsdp_model_to_cpu(model: FSDP, empty_cache: bool = True):
@torch.no_grad()
def offload_fsdp2_model_to_cpu(model, empty_cache: bool = True):
+ # Materialize any leftover meta buffers (e.g. non-persistent inv_freq from
+ # RotaryEmbedding created via from_config on meta device). We must NOT call
+ # model.to_empty() because that would wipe already-loaded FSDP parameters.
+ for module in model.modules():
+ for key in list(module._buffers.keys()):
+ buf = module._buffers[key]
+ if buf is not None and buf.device.type == "meta":
+ module._buffers[key] = torch.empty(buf.shape, dtype=buf.dtype, device="cpu")
model.to("cpu", non_blocking=True)
if empty_cache:
torch.cuda.empty_cache()
@@ -247,6 +248,27 @@ def get_fsdp_state_ctx(model, state_type, state_cfg, optim_cfg):
return nullcontext()
+def _sync_non_persistent_buffers(model: torch.nn.Module, loaded_sd: dict):
+ """Broadcast non-persistent buffers (e.g. inv_freq) from rank 0 to all ranks.
+
+ Non-persistent buffers are excluded from state_dict so they are never loaded
+ by the parameter broadcast loop. On non-rank-0 meta-init they remain on the
+ meta device with no data; rank 0 has the correctly computed values.
+ """
+ for module in model.modules():
+ non_persistent = getattr(module, "_non_persistent_buffers_set", set())
+ for key in sorted(non_persistent):
+ buf = module._buffers.get(key)
+ if buf is None:
+ continue
+ if dist.get_rank() == 0:
+ src = buf.detach().cuda()
+ else:
+ src = torch.empty(buf.shape, dtype=buf.dtype, device="cuda")
+ dist.broadcast(src, src=0)
+ module._buffers[key] = src.cpu()
+
+
# Fsdp2 load full state dict from `accelerate`
# Reference: https://github.com/huggingface/accelerate/blob/0af621bbecc0e43f5d43766a4945d3d2236bb8a9/src/accelerate/utils/fsdp_utils.py#L455
# NOTE (sumanthrh): The original code from `accelerate` assumes init on meta device - with cpu init only on rank 0, but the code is compatible with cpu init on all ranks.
@@ -324,6 +346,11 @@ def _cast_and_contiguous(tensor, to_contiguous, dtype):
# we set `assign=True` because our params can be on meta device
model.load_state_dict(sharded_sd, assign=True)
+ # Broadcast non-persistent buffers (e.g. inv_freq from RotaryEmbedding) that
+ # are excluded from state_dict. On non-rank-0 meta-init these are still on
+ # meta device with no data; rank 0 has the correctly computed values.
+ _sync_non_persistent_buffers(model, sharded_sd)
+
# If we don't offload FSDP2 Module to CPU and then back to GPU,
# it will occupy a large amount of reserved GPU memory,which can not be released using torch.cuda.empty_cache()
# even if we are using cpu_offload
diff --git a/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py b/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py
index d2310ef9ed..b51938a8ac 100644
--- a/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py
+++ b/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py
@@ -30,7 +30,6 @@ def __init__(self, args):
async def run_server(self, **uvicorn_kwargs) -> None:
sock_addr = (self.server_args.host or "", self.server_args.port)
sock = create_server_socket(sock_addr)
-
set_ulimit()
def signal_handler(*_) -> None:
@@ -39,7 +38,6 @@ def signal_handler(*_) -> None:
signal.signal(signal.SIGTERM, signal_handler)
- # TODO(tgriggs): Move this elsewhere, make configurable.
os.environ["VLLM_USE_V1"] = "1"
engine_args = AsyncEngineArgs.from_cli_args(self.server_args)
engine = AsyncLLMEngine.from_engine_args(
@@ -147,7 +145,10 @@ async def _destroy_weights_update_group(request: Request):
await shutdown_task
- sock.close()
+ try:
+ sock.close()
+ except (AttributeError, OSError):
+ pass
def run_server_uvloop(self, **uvicorn_kwargs) -> None:
uvloop.run(self.run_server(**uvicorn_kwargs))
diff --git a/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py b/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py
index c0c59da035..c828c2172d 100644
--- a/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py
+++ b/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py
@@ -260,7 +260,7 @@ async def _send_chunks_legacy(self, chunks: Iterable[WeightChunk]) -> None:
offset = 0
for name, tensor, shape in zip(chunk.names, chunk.tensors, chunk.shapes):
size = tensor.numel()
- packed_tensor[offset : offset + size].copy_(tensor.detach().view(-1))
+ packed_tensor[offset : offset + size].copy_(tensor.detach().reshape(-1))
offset += size
names.append(name)
dtypes.append(self._init_info.model_dtype_str)
diff --git a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py
index a0a1990f5d..6d28358809 100644
--- a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py
+++ b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py
@@ -24,7 +24,7 @@
from skyrl.backends.skyrl_train.distributed.fsdp_strategy import FSDPStrategy
from skyrl.backends.skyrl_train.distributed.fsdp_utils import (
fsdp_version,
- get_init_weight_context_manager,
+ should_use_meta_init,
)
from skyrl.backends.skyrl_train.training_batch import (
TrainingInputBatch,
@@ -165,37 +165,34 @@ def init_model(self, model_path, num_training_steps: int = None):
self._is_lora = self.cfg.policy.model.lora.rank > 0
model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
- init_context = get_init_weight_context_manager(
+ use_meta = should_use_meta_init(
use_meta_tensor=not model_config.tie_word_embeddings, mesh=self.strategy.device_mesh
)
- with init_context():
-
- wrapped_model = HFModelWrapper(
- model_path,
- use_flash_attention_2=self.cfg.flash_attn,
- # NOTE (sumanthrh): Model initialization should always be in fp32
- # during training
- bf16=False,
- lora_rank=self.cfg.policy.model.lora.rank,
- lora_alpha=self.cfg.policy.model.lora.alpha,
- lora_dropout=self.cfg.policy.model.lora.dropout,
- lora_init_method=self.cfg.policy.model.lora.init_method,
- target_modules=self.cfg.policy.model.lora.target_modules,
- exclude_modules=self.cfg.policy.model.lora.exclude_modules,
- sequence_parallel_size=self.cfg.policy.sequence_parallel_size,
- use_sample_packing=self.cfg.use_sample_packing,
- use_torch_compile=self.cfg.policy.use_torch_compile,
- rope_scaling=get_rope_scaling_config(self.cfg),
- rope_theta=get_rope_theta_config(self.cfg),
- model_config_kwargs=self.cfg.policy.model_config_kwargs,
- )
- # in-place patch
- self._seq_parallel_monkey_patch(model=wrapped_model.model)
- if self.cfg.gradient_checkpointing:
- wrapped_model.gradient_checkpointing_enable(
- gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant}
- )
+ wrapped_model = HFModelWrapper(
+ model_path,
+ use_flash_attention_2=self.cfg.flash_attn,
+ bf16=False,
+ lora_rank=self.cfg.policy.model.lora.rank,
+ lora_alpha=self.cfg.policy.model.lora.alpha,
+ lora_dropout=self.cfg.policy.model.lora.dropout,
+ lora_init_method=self.cfg.policy.model.lora.init_method,
+ target_modules=self.cfg.policy.model.lora.target_modules,
+ exclude_modules=self.cfg.policy.model.lora.exclude_modules,
+ sequence_parallel_size=self.cfg.policy.sequence_parallel_size,
+ use_sample_packing=self.cfg.use_sample_packing,
+ use_torch_compile=self.cfg.policy.use_torch_compile,
+ rope_scaling=get_rope_scaling_config(self.cfg),
+ rope_theta=get_rope_theta_config(self.cfg),
+ model_config_kwargs=self.cfg.policy.model_config_kwargs,
+ meta_init=use_meta,
+ )
+ self._seq_parallel_monkey_patch(model=wrapped_model.model)
+
+ if self.cfg.gradient_checkpointing:
+ wrapped_model.gradient_checkpointing_enable(
+ gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant}
+ )
self.model, self.optimizer, self.scheduler = strategy.prepare(
(wrapped_model, None, None),
@@ -342,34 +339,33 @@ def init_model(self, model_path, num_training_steps: int = None):
self.strategy = strategy
model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
- init_context = get_init_weight_context_manager(
+ use_meta = should_use_meta_init(
use_meta_tensor=not model_config.tie_word_embeddings, mesh=self.strategy.device_mesh
)
- with init_context():
- critic = get_llm_for_sequence_regression(
- model_path,
- "critic",
- use_flash_attention_2=self.cfg.flash_attn,
- # NOTE (sumanthrh): Model initialization should always be in fp32
- # during training
- bf16=False,
- lora_rank=self.cfg.critic.model.lora.rank,
- lora_alpha=self.cfg.critic.model.lora.alpha,
- lora_dropout=self.cfg.critic.model.lora.dropout,
- target_modules=self.cfg.critic.model.lora.target_modules,
- exclude_modules=self.cfg.critic.model.lora.exclude_modules,
- value_head_prefix=self.cfg.algorithm.value_head_prefix,
- init_value_head=self.cfg.policy.model.path == self.cfg.critic.model.path,
- sequence_parallel_size=self.cfg.critic.sequence_parallel_size,
- use_sample_packing=self.cfg.use_sample_packing,
- model_config_kwargs=self.cfg.critic.model_config_kwargs,
- )
- self._seq_parallel_monkey_patch(model=critic, use_parent_class=True)
- if self.cfg.gradient_checkpointing:
- critic.gradient_checkpointing_enable(
- gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant}
- )
+ critic = get_llm_for_sequence_regression(
+ model_path,
+ "critic",
+ use_flash_attention_2=self.cfg.flash_attn,
+ bf16=False,
+ lora_rank=self.cfg.critic.model.lora.rank,
+ lora_alpha=self.cfg.critic.model.lora.alpha,
+ lora_dropout=self.cfg.critic.model.lora.dropout,
+ target_modules=self.cfg.critic.model.lora.target_modules,
+ exclude_modules=self.cfg.critic.model.lora.exclude_modules,
+ value_head_prefix=self.cfg.algorithm.value_head_prefix,
+ init_value_head=self.cfg.policy.model.path == self.cfg.critic.model.path,
+ sequence_parallel_size=self.cfg.critic.sequence_parallel_size,
+ use_sample_packing=self.cfg.use_sample_packing,
+ model_config_kwargs=self.cfg.critic.model_config_kwargs,
+ meta_init=use_meta,
+ )
+ self._seq_parallel_monkey_patch(model=critic, use_parent_class=True)
+
+ if self.cfg.gradient_checkpointing:
+ critic.gradient_checkpointing_enable(
+ gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant}
+ )
# prepare models/optimizers...
self.model, self.optimizer, self.scheduler = strategy.prepare(
@@ -412,22 +408,22 @@ def init_model(self, model_path):
self.strategy = strategy
model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
- init_context = get_init_weight_context_manager(
+ use_meta = should_use_meta_init(
use_meta_tensor=not model_config.tie_word_embeddings, mesh=self.strategy.device_mesh
)
- with init_context():
- wrapped_model = HFModelWrapper(
- model_path,
- use_flash_attention_2=self.cfg.flash_attn,
- bf16=self.cfg.bf16,
- sequence_parallel_size=self.cfg.ref.sequence_parallel_size,
- use_sample_packing=self.cfg.use_sample_packing,
- rope_scaling=get_rope_scaling_config(self.cfg),
- rope_theta=get_rope_theta_config(self.cfg),
- model_config_kwargs=self.cfg.ref.model_config_kwargs,
- )
- self._seq_parallel_monkey_patch(model=wrapped_model.model)
+ wrapped_model = HFModelWrapper(
+ model_path,
+ use_flash_attention_2=self.cfg.flash_attn,
+ bf16=self.cfg.bf16,
+ sequence_parallel_size=self.cfg.ref.sequence_parallel_size,
+ use_sample_packing=self.cfg.use_sample_packing,
+ rope_scaling=get_rope_scaling_config(self.cfg),
+ rope_theta=get_rope_theta_config(self.cfg),
+ model_config_kwargs=self.cfg.ref.model_config_kwargs,
+ meta_init=use_meta,
+ )
+ self._seq_parallel_monkey_patch(model=wrapped_model.model)
self.model = strategy.prepare(wrapped_model)
self.model.eval()
diff --git a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py
index 1fb7235806..93589b46dc 100644
--- a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py
+++ b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py
@@ -288,6 +288,14 @@ def init_configs(
if hasattr(provider, "q_lora_rank") and hasattr(hf_config, "q_lora_rank"):
provider.q_lora_rank = hf_config.q_lora_rank
+ # Workaround for transformers v5 moving rope_theta into rope_parameters
+ # (previously it was a top-level config attribute). megatron-bridge's
+ # CONFIG_MAPPING reads config.rope_theta which no longer exists in v5,
+ # causing it to fall back to the default rotary_base of 10000.
+ rope_params = getattr(hf_config, "rope_parameters", None) or getattr(hf_config, "rope_scaling", None)
+ if isinstance(rope_params, dict) and "rope_theta" in rope_params:
+ provider.rotary_base = rope_params["rope_theta"]
+
provider.tensor_model_parallel_size = megatron_config.tensor_model_parallel_size
provider.pipeline_model_parallel_size = megatron_config.pipeline_model_parallel_size
provider.pipeline_dtype = torch.bfloat16 if bf16 else torch.float32
diff --git a/skyrl/backends/skyrl_train/workers/model_wrapper.py b/skyrl/backends/skyrl_train/workers/model_wrapper.py
index 3eb45f80a7..60b468bebe 100644
--- a/skyrl/backends/skyrl_train/workers/model_wrapper.py
+++ b/skyrl/backends/skyrl_train/workers/model_wrapper.py
@@ -78,6 +78,7 @@ def __init__(
rope_scaling: Dict[str, Any] = {},
rope_theta: float | None = None,
model_config_kwargs: dict = {},
+ meta_init: bool = False,
**kwargs,
) -> None:
super().__init__()
@@ -86,7 +87,6 @@ def __init__(
self.attn_implementation = "flash_attention_2" if use_flash_attention_2 else "sdpa"
self.use_sample_packing = use_sample_packing
self.is_vlm = False
- # packing samples using Flash Attention 2
if use_sample_packing:
assert (
self.attn_implementation == "flash_attention_2"
@@ -122,22 +122,26 @@ def __init__(
# NOTE: In future transformers releases (> 5.0.0), all multimodal models can use AutoModelForMultimodalLM.
model_class = AutoModelForImageTextToText
- rope_scaling_kwargs = {}
if rope_scaling:
- rope_scaling_kwargs["rope_scaling"] = rope_scaling
+ model_config.rope_scaling = rope_scaling
if rope_theta:
- rope_scaling_kwargs["rope_theta"] = rope_theta
-
- self.model = model_class.from_pretrained(
- pretrain_or_model,
- config=model_config,
- trust_remote_code=True,
- attn_implementation=self.attn_implementation,
- quantization_config=nf4_config,
- torch_dtype=torch.bfloat16 if bf16 else torch.float32,
- device_map=device_map,
- **rope_scaling_kwargs,
- )
+ model_config.rope_theta = rope_theta
+ model_config._attn_implementation = self.attn_implementation
+
+ if meta_init:
+ with torch.device("meta"):
+ self.model = model_class.from_config(model_config, trust_remote_code=True)
+ self.model.to(torch.bfloat16 if bf16 else torch.float32)
+ else:
+ self.model = model_class.from_pretrained(
+ pretrain_or_model,
+ config=model_config,
+ trust_remote_code=True,
+ attn_implementation=self.attn_implementation,
+ quantization_config=nf4_config,
+ torch_dtype=torch.bfloat16 if bf16 else torch.float32,
+ device_map=device_map,
+ )
# gpt oss
if Version(transformers.__version__) >= Version("4.56.2"):
@@ -303,6 +307,7 @@ def forward(
entropy_requires_grad=True,
pixel_values: Optional[TensorList] = None,
image_grid_thw: Optional[TensorList] = None,
+ mm_token_type_ids: Optional[torch.Tensor] = None,
) -> torch.Tensor:
"""Returns action log probs"""
if self.is_vlm:
@@ -352,12 +357,17 @@ def forward(
)
if self.is_vlm:
+ vlm_kwargs = dict(
+ pixel_values=pixel_values,
+ image_grid_thw=image_grid_thw,
+ )
+ if mm_token_type_ids is not None:
+ vlm_kwargs["mm_token_type_ids"] = mm_token_type_ids
output = self.model(
sequences_fwd,
attention_mask=attention_mask_fwd,
position_ids=None,
- pixel_values=pixel_values,
- image_grid_thw=image_grid_thw,
+ **vlm_kwargs,
)
# NOTE (sumanthrh): Once we have position_ids, we don't need attention mask with flash attention.
elif self.use_sample_packing and self.attn_implementation == "flash_attention_2":
@@ -480,6 +490,8 @@ def __init__(self, config: AutoConfig):
if self.sequence_parallel_size > 1:
logger.info("Critic model using sequence parallelism with size: ", self.sequence_parallel_size)
+ self.post_init()
+
def forward(
self,
input_ids: torch.LongTensor = None,
@@ -579,6 +591,7 @@ def get_llm_for_sequence_regression(
sequence_parallel_size=1,
use_sample_packing: bool = False,
model_config_kwargs: dict = {},
+ meta_init: bool = False,
**kwargs,
) -> nn.Module:
"""Get transformer with a sequence classification head on top (linear layer).
@@ -618,15 +631,22 @@ def get_llm_for_sequence_regression(
else:
nf4_config = None
- model = cls_class.from_pretrained(
- model_name_or_path,
- config=config,
- trust_remote_code=True,
- torch_dtype=torch.bfloat16 if bf16 else torch.float32,
- quantization_config=nf4_config,
- device_map=device_map,
- **kwargs,
- )
+ if meta_init:
+ from transformers.modeling_utils import no_init_weights
+
+ with no_init_weights(), torch.device("meta"):
+ model = cls_class(config)
+ model.to(dtype=torch.bfloat16 if bf16 else torch.float32)
+ else:
+ model = cls_class.from_pretrained(
+ model_name_or_path,
+ config=config,
+ trust_remote_code=True,
+ torch_dtype=torch.bfloat16 if bf16 else torch.float32,
+ quantization_config=nf4_config,
+ device_map=device_map,
+ **kwargs,
+ )
# LoRA
if lora_rank > 0:
diff --git a/skyrl/tx/models/configs.py b/skyrl/tx/models/configs.py
index 6d90f28e9a..f16f079f90 100644
--- a/skyrl/tx/models/configs.py
+++ b/skyrl/tx/models/configs.py
@@ -38,10 +38,19 @@ def __init__(
gradient_checkpointing: bool = False,
mhc_expansion_rate: int = 1,
):
- # `text_config` can come through as a raw dict from HF configs.
super().__init__(**(config if isinstance(config, dict) else config.__dict__))
- # Add LoRA-specific parameters
+ # In transformers v5, rope_parameters may not contain rope_theta
+ # even when it exists as a top-level config attribute (e.g. DeepSeek v3).
+ # Inject it so model code can always use config.rope_parameters["rope_theta"].
+ rope_params = getattr(self, "rope_parameters", None) or {}
+ if "rope_theta" not in rope_params:
+ rope_theta = getattr(self, "rope_theta", None)
+ if rope_theta is not None:
+ rope_params["rope_theta"] = rope_theta
+ if rope_params:
+ self.rope_parameters = rope_params
+
self.max_lora_adapters = max_lora_adapters
self.max_lora_rank = max_lora_rank
self.shard_attention_heads = shard_attention_heads
@@ -53,10 +62,13 @@ def get_config(self) -> PretrainedConfig:
"""Return `text_config` when present, otherwise return this config."""
return self.get_text_config() if hasattr(self, "text_config") else self
- def get_text_config(self) -> "ModelConfig":
+ def get_text_config(self, decoder=None, encoder=None) -> "ModelConfig":
"""Return a wrapped config built from `self.text_config`."""
+ text_cfg = super().get_text_config(decoder=decoder, encoder=encoder)
+ if text_cfg is self or isinstance(text_cfg, ModelConfig):
+ return text_cfg
return type(self)(
- self.text_config,
+ text_cfg,
max_lora_adapters=self.max_lora_adapters,
max_lora_rank=self.max_lora_rank,
shard_attention_heads=self.shard_attention_heads,
diff --git a/skyrl/tx/models/deepseekv3.py b/skyrl/tx/models/deepseekv3.py
index 8d16cbd807..6c975ffa97 100644
--- a/skyrl/tx/models/deepseekv3.py
+++ b/skyrl/tx/models/deepseekv3.py
@@ -119,7 +119,9 @@ def __init__(self, config: DeepseekV3Config, *, dtype: jnp.dtype, rngs: nnx.Rngs
rngs=rngs,
)
- self.rotary_emb, mscale = get_rope(self.qk_rope_head_dim, config.rope_theta, config.rope_scaling)
+ self.rotary_emb, mscale = get_rope(
+ self.qk_rope_head_dim, config.rope_parameters["rope_theta"], config.rope_parameters
+ )
self.scaling = self.qk_head_dim ** (-0.5) * mscale * mscale
def __call__(
diff --git a/skyrl/tx/models/llama3.py b/skyrl/tx/models/llama3.py
index 2e8a0c0680..cb714fc14c 100644
--- a/skyrl/tx/models/llama3.py
+++ b/skyrl/tx/models/llama3.py
@@ -98,8 +98,8 @@ def __call__(
v = self.v_proj(x, adapter_indices=adapter_indices).reshape(B, T, self.num_kv_heads, self.head_dim)
# Apply RoPE
- q = apply_rope(q, positions, self.head_dim, self.config.rope_theta)
- k = apply_rope(k, positions, self.head_dim, self.config.rope_theta)
+ q = apply_rope(q, positions, self.head_dim, self.config.rope_parameters["rope_theta"])
+ k = apply_rope(k, positions, self.head_dim, self.config.rope_parameters["rope_theta"])
# Handle KV cache
if kv_cache is not None:
diff --git a/skyrl/tx/models/qwen3.py b/skyrl/tx/models/qwen3.py
index 844809e6e2..bb24959dbe 100644
--- a/skyrl/tx/models/qwen3.py
+++ b/skyrl/tx/models/qwen3.py
@@ -81,8 +81,8 @@ def __call__(
v = v.reshape(B, T, self.num_kv_heads, self.head_dim)
# Apply RoPE
- q = apply_rope(q, positions, self.head_dim, self.config.rope_theta)
- k = apply_rope(k, positions, self.head_dim, self.config.rope_theta)
+ q = apply_rope(q, positions, self.head_dim, self.config.rope_parameters["rope_theta"])
+ k = apply_rope(k, positions, self.head_dim, self.config.rope_parameters["rope_theta"])
# Handle KV cache
if kv_cache is not None:
@@ -172,8 +172,10 @@ def __init__(self, config: Qwen3Config, *, dtype: jnp.dtype, rngs: nnx.Rngs) ->
def __call__(
self, hidden_states: jax.Array, router_logits: jax.Array, adapter_indices: jax.Array | None = None
) -> jax.Array:
- routing_weights, selected_experts = jax.lax.top_k(router_logits, k=self.config.num_experts_per_tok)
- routing_weights = nnx.softmax(routing_weights, axis=-1)
+ routing_weights = nnx.softmax(router_logits, axis=-1)
+ routing_weights, selected_experts = jax.lax.top_k(routing_weights, k=self.config.num_experts_per_tok)
+ if getattr(self.config, "norm_topk_prob", True):
+ routing_weights = routing_weights / routing_weights.sum(axis=-1, keepdims=True)
num_experts = self.config.num_experts
num_experts_per_tok = self.config.num_experts_per_tok
diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py
index 5b4d172c7a..300fac372a 100644
--- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py
+++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py
@@ -99,8 +99,6 @@ async def test_custom_chat_template(ray_init_fixture, use_custom_template: bool)
prompt_str = tokenizer.decode(prompt_token_ids)
if use_custom_template:
- # The custom template qwen3_acc_thinking.jinja2 will keep the thinking tokens.
assert "" in prompt_str and "" in prompt_str
else:
- # Default template strips thinking tokens
assert "" not in prompt_str and "" not in prompt_str
diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py
index 76dac7191a..29e0ed5dcb 100644
--- a/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py
+++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py
@@ -288,13 +288,14 @@ def run_hf_forward(batch, model_name):
position_ids = attention_mask.long().cumsum(-1) - 1
position_ids.masked_fill_(attention_mask == 0, 1)
- sequences_rolled = torch.roll(sequences_fwd, shifts=-1, dims=1).to("cuda")
-
- sequences_fwd, attention_mask, position_ids = (
+ sequences_rolled = torch.roll(sequences_fwd, shifts=-1, dims=1)
+ sequences_fwd, attention_mask, position_ids, sequences_rolled = (
sequences_fwd.to("cuda"),
attention_mask.to("cuda"),
position_ids.to("cuda"),
+ sequences_rolled.to("cuda"),
)
+
with torch.no_grad(), torch.autocast(dtype=torch.bfloat16, device_type="cuda"):
output = model(sequences_fwd, attention_mask=attention_mask, position_ids=position_ids)
log_probs = logprobs_from_logits(output["logits"], sequences_rolled)
diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py
index c1db2cc70e..77bb868a2e 100644
--- a/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py
+++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py
@@ -1,6 +1,6 @@
"""
Run with:
-uv run --isolated --extra dev -- pytest tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py
+uv run --isolated --extra dev --extra fsdp -- pytest tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py
"""
import pytest
@@ -13,7 +13,7 @@
make_dummy_training_batch,
)
-MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
+MODEL_NAME = "Qwen/Qwen3-0.6B"
MOE_MODEL_NAME = "Qwen/Qwen3-30B-A3B"
diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py
index 75b5116a7a..e58e6d0466 100644
--- a/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py
+++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py
@@ -78,6 +78,9 @@ def build_vlm_inputs(processor, prompt_text, response_text, image=None, device="
"num_actions": num_actions,
}
+ if "mm_token_type_ids" in full_inputs:
+ result["mm_token_type_ids"] = full_inputs["mm_token_type_ids"].to(device)
+
if image is not None:
result["pixel_values"] = TensorList([full_inputs["pixel_values"].to(device)])
result["image_grid_thw"] = TensorList([full_inputs["image_grid_thw"].to(device)])
@@ -104,6 +107,8 @@ def test_vlm_log_probs_match_manual(vlm_model, processor):
pv = inputs["pixel_values"]
igt = inputs["image_grid_thw"]
+ mm_token_type_ids = inputs.get("mm_token_type_ids")
+
# Wrapper path
with torch.no_grad():
wrapper_log_probs = vlm_model(
@@ -112,19 +117,23 @@ def test_vlm_log_probs_match_manual(vlm_model, processor):
attention_mask,
pixel_values=pv,
image_grid_thw=igt,
+ mm_token_type_ids=mm_token_type_ids,
)
# Manual path: run the raw model
pv_cat = torch.cat(pv.tensors, dim=0)
igt_cat = torch.cat(igt.tensors, dim=0)
+ manual_kwargs = dict(pixel_values=pv_cat, image_grid_thw=igt_cat)
+ if mm_token_type_ids is not None:
+ manual_kwargs["mm_token_type_ids"] = mm_token_type_ids
+
with torch.no_grad():
output = vlm_model.model(
input_ids,
attention_mask=attention_mask,
position_ids=None,
- pixel_values=pv_cat,
- image_grid_thw=igt_cat,
+ **manual_kwargs,
)
logits = output["logits"].float()
@@ -165,6 +174,7 @@ def test_vlm_semantic_color_recognition(vlm_model, processor):
inputs["attention_mask"],
pixel_values=inputs["pixel_values"],
image_grid_thw=inputs["image_grid_thw"],
+ mm_token_type_ids=inputs.get("mm_token_type_ids"),
)
log_p[resp_color] = action_lp.sum().item()
@@ -180,13 +190,16 @@ def _build_batched_vlm_inputs(processor, prompt, response, images, device="cuda"
"""Build batched model inputs from a list of images with shared prompt/response text."""
per_sample = [build_vlm_inputs(processor, prompt, response, image=img, device=device) for img in images]
num_actions = per_sample[0]["num_actions"]
- return {
+ result = {
"input_ids": torch.cat([inp["input_ids"] for inp in per_sample], dim=0),
"attention_mask": torch.cat([inp["attention_mask"] for inp in per_sample], dim=0),
"num_actions": num_actions,
"pixel_values": TensorList([inp["pixel_values"].tensors[0] for inp in per_sample]),
"image_grid_thw": TensorList([inp["image_grid_thw"].tensors[0] for inp in per_sample]),
}
+ if "mm_token_type_ids" in per_sample[0]:
+ result["mm_token_type_ids"] = torch.cat([inp["mm_token_type_ids"] for inp in per_sample], dim=0)
+ return result
def test_vlm_forward_batched_vision(vlm_model, processor):
@@ -212,6 +225,7 @@ def test_vlm_forward_batched_vision(vlm_model, processor):
fwd["attention_mask"],
pixel_values=fwd["pixel_values"],
image_grid_thw=fwd["image_grid_thw"],
+ mm_token_type_ids=fwd.get("mm_token_type_ids"),
)
# 2. Run batch in reversed order [blue, red]
@@ -224,6 +238,7 @@ def test_vlm_forward_batched_vision(vlm_model, processor):
rev["attention_mask"],
pixel_values=rev["pixel_values"],
image_grid_thw=rev["image_grid_thw"],
+ mm_token_type_ids=rev.get("mm_token_type_ids"),
)
# 3. Basic shape / sanity checks
diff --git a/tests/tx/layers/test_connectors.py b/tests/tx/layers/test_connectors.py
index 05ff58de94..401475b334 100644
--- a/tests/tx/layers/test_connectors.py
+++ b/tests/tx/layers/test_connectors.py
@@ -118,6 +118,7 @@ def test_deepseek_connector_identity_expansion_rate():
norm_topk_prob=True,
routed_scaling_factor=1.0,
tie_word_embeddings=False,
+ rope_theta=10000.0,
)
config_e1 = DeepseekV3Config(base_config, max_lora_adapters=4, max_lora_rank=8, shard_attention_heads=True)
config_e4 = DeepseekV3Config(base_config, max_lora_adapters=4, max_lora_rank=8, shard_attention_heads=True)
diff --git a/tests/tx/models/test_deepseekv3.py b/tests/tx/models/test_deepseekv3.py
index c6ceef0028..598f04849e 100644
--- a/tests/tx/models/test_deepseekv3.py
+++ b/tests/tx/models/test_deepseekv3.py
@@ -26,7 +26,7 @@ def test_deepseekv3(tp: int):
model_name = "yujiepan/deepseek-v3-tiny-random"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
hf_model = AutoModelForCausalLM.from_pretrained(
- model_name, attn_implementation="eager", use_safetensors=True, trust_remote_code=True
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
)
inputs = ["The capital of France is", "The most popular programming language is"]
@@ -40,7 +40,7 @@ def test_deepseekv3(tp: int):
with tempfile.TemporaryDirectory() as tmp:
hf_model.save_pretrained(tmp, safe_serialization=True)
- base_config = PretrainedConfig.from_pretrained(model_name, trust_remote_code=True)
+ base_config = PretrainedConfig.from_pretrained(model_name)
config = DeepseekV3Config(base_config, max_lora_adapters=32, max_lora_rank=32, shard_attention_heads=True)
# EP axis required for MoE expert sharding
mesh = jax.make_mesh((1, 1, tp), ("fsdp", "ep", "tp"), axis_types=(jax.sharding.AxisType.Auto,) * 3)
@@ -52,30 +52,41 @@ def test_deepseekv3(tp: int):
)
assert outputs.hidden_states is not None
- assert np.allclose(hf_outputs.hidden_states[0], outputs.hidden_states[0], rtol=1e-6)
- assert np.allclose(hf_outputs.hidden_states[1], outputs.hidden_states[1], rtol=1e-3, atol=1e-3)
- assert np.allclose(hf_outputs.hidden_states[-1], outputs.hidden_states[-1], rtol=3e-2, atol=6e-2)
+ assert np.allclose(hf_outputs.hidden_states[0].float(), outputs.hidden_states[0], rtol=1e-6)
+ assert np.allclose(hf_outputs.hidden_states[1].float(), outputs.hidden_states[1], rtol=1e-3, atol=1e-3)
+ assert np.allclose(hf_outputs.hidden_states[-1].float(), outputs.hidden_states[-1], rtol=3e-2, atol=6e-2)
def load_moe_base_weights(jax_moe_layer: DeepseekV3MoE, hf_moe_layer: HFDeepseekV3MoE) -> None:
"""Load base weights from HF MoE layer to JAX MoE layer."""
- jax_moe_layer.gate.weight[:] = hf_moe_layer.gate.weight.detach().numpy().T
- jax_moe_layer.gate.e_score_correction_bias[:] = hf_moe_layer.gate.e_score_correction_bias.detach().numpy()
-
- for i, expert in enumerate(hf_moe_layer.experts):
- jax_moe_layer.experts.gate_proj.weight[i, :, :] = expert.gate_proj.weight.detach().numpy().T
- jax_moe_layer.experts.up_proj.weight[i, :, :] = expert.up_proj.weight.detach().numpy().T
- jax_moe_layer.experts.down_proj.weight[i, :, :] = expert.down_proj.weight.detach().numpy().T
+ jax_moe_layer.gate.weight[:] = hf_moe_layer.gate.weight.detach().float().numpy().T
+ jax_moe_layer.gate.e_score_correction_bias[:] = hf_moe_layer.gate.e_score_correction_bias.detach().float().numpy()
+
+ gate_up = hf_moe_layer.experts.gate_up_proj.detach().float().numpy()
+ intermediate = gate_up.shape[1] // 2
+ jax_moe_layer.experts.gate_proj.weight[:] = gate_up[:, :intermediate, :].transpose(0, 2, 1)
+ jax_moe_layer.experts.up_proj.weight[:] = gate_up[:, intermediate:, :].transpose(0, 2, 1)
+ jax_moe_layer.experts.down_proj.weight[:] = (
+ hf_moe_layer.experts.down_proj.detach().float().numpy().transpose(0, 2, 1)
+ )
- jax_moe_layer.shared_experts.gate_proj.kernel[:] = hf_moe_layer.shared_experts.gate_proj.weight.detach().numpy().T
- jax_moe_layer.shared_experts.up_proj.kernel[:] = hf_moe_layer.shared_experts.up_proj.weight.detach().numpy().T
- jax_moe_layer.shared_experts.down_proj.kernel[:] = hf_moe_layer.shared_experts.down_proj.weight.detach().numpy().T
+ jax_moe_layer.shared_experts.gate_proj.kernel[:] = (
+ hf_moe_layer.shared_experts.gate_proj.weight.detach().float().numpy().T
+ )
+ jax_moe_layer.shared_experts.up_proj.kernel[:] = (
+ hf_moe_layer.shared_experts.up_proj.weight.detach().float().numpy().T
+ )
+ jax_moe_layer.shared_experts.down_proj.kernel[:] = (
+ hf_moe_layer.shared_experts.down_proj.weight.detach().float().numpy().T
+ )
@pytest.mark.parametrize("ep,tp", [(1, 1), (1, 2), (2, 1)])
def test_deepseekv3_moe_layer(ep: int, tp: int):
model_name = "yujiepan/deepseek-v3-tiny-random"
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
base_config = PretrainedConfig.from_pretrained(model_name)
config = DeepseekV3Config(base_config, max_lora_adapters=0, max_lora_rank=0, shard_attention_heads=True)
@@ -94,7 +105,7 @@ def test_deepseekv3_moe_layer(ep: int, tp: int):
jax_expert_output = moe_layer(x.numpy())
# Higher tolerance due to cross-platform BLAS differences
- assert np.allclose(hf_expert_output.detach().numpy(), jax_expert_output, rtol=6e-3, atol=6e-3)
+ assert np.allclose(hf_expert_output.detach().float().numpy(), jax_expert_output, rtol=6e-3, atol=6e-3)
def load_lora_weights(
@@ -122,7 +133,9 @@ def load_lora_weights(
def test_deepseekv3_moe_layer_lora(ep: int, tp: int):
"""Test MoE LoRA by merging adapter into base weights and comparing outputs."""
model_name = "yujiepan/deepseek-v3-tiny-random"
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
base_config = PretrainedConfig.from_pretrained(model_name)
config = DeepseekV3Config(base_config, max_lora_adapters=3, max_lora_rank=4, shard_attention_heads=True)
@@ -198,7 +211,7 @@ def test_deepseekv3_gradient_checkpointing():
that gradient checkpointing works correctly with heterogeneous layer types.
"""
model_name = "yujiepan/deepseek-v3-tiny-random"
- base_config = PretrainedConfig.from_pretrained(model_name, trust_remote_code=True)
+ base_config = PretrainedConfig.from_pretrained(model_name)
batch_size, seq_len = 2, 8
mesh = jax.make_mesh((1, 1, 1), ("fsdp", "ep", "tp"), axis_types=(jax.sharding.AxisType.Auto,) * 3)
diff --git a/tests/tx/models/test_models_common.py b/tests/tx/models/test_models_common.py
index a5462c23b5..1954ac9994 100644
--- a/tests/tx/models/test_models_common.py
+++ b/tests/tx/models/test_models_common.py
@@ -4,6 +4,7 @@
import jax.numpy as jnp
import numpy as np
import pytest
+import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from skyrl.tx.models.configs import Llama3Config, ModelConfig, Qwen3Config
@@ -110,7 +111,9 @@ def test_compute_logits(
batch = tokenizer(inputs, return_tensors="pt", padding=True)
# Load HF model, get logits, then delete to free memory
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
hf_outputs = hf_model(batch.input_ids, attention_mask=batch.attention_mask)
hf_logits = hf_outputs.logits.detach().numpy()
del hf_model, hf_outputs
diff --git a/tests/tx/models/test_qwen3.py b/tests/tx/models/test_qwen3.py
index ac4cb42dd0..7028fc2357 100644
--- a/tests/tx/models/test_qwen3.py
+++ b/tests/tx/models/test_qwen3.py
@@ -25,7 +25,7 @@ def test_qwen3(tp: int):
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
hf_model = AutoModelForCausalLM.from_pretrained(
- "Qwen/Qwen3-0.6B", attn_implementation="eager", use_safetensors=True
+ "Qwen/Qwen3-0.6B", attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
)
inputs = ["The capital of France is", "The most popular programming language is"]
@@ -48,31 +48,37 @@ def test_qwen3(tp: int):
outputs = model(batch.input_ids.numpy(), attention_mask=batch.attention_mask.numpy(), output_hidden_states=True)
assert outputs.hidden_states is not None
- assert np.allclose(hf_outputs.hidden_states[0], outputs.hidden_states[0], rtol=1e-6)
- assert np.allclose(hf_outputs.hidden_states[1], outputs.hidden_states[1], rtol=1e-3, atol=1e-3)
- assert np.allclose(hf_outputs.hidden_states[-1], outputs.hidden_states[-1], rtol=1e-3, atol=1e-3)
+ assert np.allclose(hf_outputs.hidden_states[0].float(), outputs.hidden_states[0], rtol=1e-6)
+ assert np.allclose(hf_outputs.hidden_states[1].float(), outputs.hidden_states[1], rtol=1e-3, atol=1e-3)
+ assert np.allclose(hf_outputs.hidden_states[-1].float(), outputs.hidden_states[-1], rtol=1e-3, atol=1e-3)
def load_moe_base_weights(jax_moe_layer: Qwen3MoeSparseMoeBlock, hf_moe_layer: HFQwen3MoeSparseMoeBlock) -> None:
"""Load base weights from HF MoE layer to JAX MoE layer."""
- jax_moe_layer.gate.kernel[:] = hf_moe_layer.gate.weight.detach().numpy().T
- for i, expert in enumerate(hf_moe_layer.experts):
- jax_moe_layer.experts.gate_proj.weight[i, :, :] = expert.gate_proj.weight.detach().numpy().T
- jax_moe_layer.experts.up_proj.weight[i, :, :] = expert.up_proj.weight.detach().numpy().T
- jax_moe_layer.experts.down_proj.weight[i, :, :] = expert.down_proj.weight.detach().numpy().T
+ jax_moe_layer.gate.kernel[:] = hf_moe_layer.gate.weight.detach().float().numpy().T
+ gate_up = hf_moe_layer.experts.gate_up_proj.detach().float().numpy()
+ intermediate = gate_up.shape[1] // 2
+ jax_moe_layer.experts.gate_proj.weight[:] = gate_up[:, :intermediate, :].transpose(0, 2, 1)
+ jax_moe_layer.experts.up_proj.weight[:] = gate_up[:, intermediate:, :].transpose(0, 2, 1)
+ jax_moe_layer.experts.down_proj.weight[:] = (
+ hf_moe_layer.experts.down_proj.detach().float().numpy().transpose(0, 2, 1)
+ )
@pytest.mark.parametrize("ep,tp", [(1, 1), (1, 2), (2, 1)])
def test_qwen3_moe_layer(ep: int, tp: int):
model_name = "trl-internal-testing/tiny-Qwen3MoeForCausalLM"
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
base_config = PretrainedConfig.from_pretrained(model_name)
config = Qwen3Config(base_config, max_lora_adapters=0, max_lora_rank=0, shard_attention_heads=True)
hf_moe_layer = hf_model.model.layers[0].mlp
x = torch.randn(4, 2, config.hidden_size)
with torch.no_grad():
- hf_final_hidden_states, hf_router_logits = hf_moe_layer.forward(x)
+ hf_final_hidden_states = hf_moe_layer.forward(x)
+ hf_router_logits = torch.nn.functional.linear(x.view(-1, config.hidden_size), hf_moe_layer.gate.weight)
mesh = jax.make_mesh((1, ep, tp), ("fsdp", "ep", "tp"), axis_types=(jax.sharding.AxisType.Auto,) * 3)
with jax.set_mesh(mesh):
@@ -117,7 +123,9 @@ def share_hf_lora_A(hf_modules: list) -> None:
def test_qwen3_moe_layer_lora(ep: int, tp: int):
"""Test MoE LoRA by merging adapter into base weights and comparing outputs."""
model_name = "trl-internal-testing/tiny-Qwen3MoeForCausalLM"
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
base_config = PretrainedConfig.from_pretrained(model_name)
config = Qwen3Config(base_config, max_lora_adapters=3, max_lora_rank=4, shard_attention_heads=True)
@@ -200,7 +208,9 @@ def test_qwen3_lora():
lora_configs.append(lora_config)
hf_model = get_peft_model(
- AutoModelForCausalLM.from_pretrained(base_model_name, attn_implementation="eager", use_safetensors=True),
+ AutoModelForCausalLM.from_pretrained(
+ base_model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ ),
lora_config,
)
hf_model.eval()
@@ -241,8 +251,8 @@ def test_qwen3_lora():
load_lora_weights(
model.model.embed_tokens,
adapter_idx=adapter_idx,
- lora_A_weights=hf_embed_tokens.lora_embedding_A["default"].detach().numpy().T,
- lora_B_weights=hf_embed_tokens.lora_embedding_B["default"].detach().numpy().T,
+ lora_A_weights=hf_embed_tokens.lora_embedding_A["default"].detach().float().numpy().T,
+ lora_B_weights=hf_embed_tokens.lora_embedding_B["default"].detach().float().numpy().T,
scaling=lora_config.lora_alpha / lora_config.r,
rank=lora_config.r,
)
@@ -262,9 +272,9 @@ def test_qwen3_lora():
load_lora_weights(
jax_proj,
adapter_idx=adapter_idx,
- lora_A_weights=hf_projs[0].lora_A["default"].weight.detach().numpy().T,
+ lora_A_weights=hf_projs[0].lora_A["default"].weight.detach().float().numpy().T,
lora_B_weights=FusedLoRALinear.fuse(
- *(p.lora_B["default"].weight.detach().numpy().T for p in hf_projs),
+ *(p.lora_B["default"].weight.detach().float().numpy().T for p in hf_projs),
group_sizes=group_sizes,
),
scaling=lora_config.lora_alpha / lora_config.r,
@@ -284,4 +294,4 @@ def test_qwen3_lora():
# Compare outputs with corresponding adapters
for idx in range(len(lora_adapters)):
- assert np.allclose(hf_outputs_list[idx].logits[0], logits[idx], rtol=1e-3, atol=1e-3)
+ assert np.allclose(hf_outputs_list[idx].logits[0].float(), logits[idx], rtol=1e-3, atol=1e-3)
diff --git a/tests/tx/models/test_qwen3_generate.py b/tests/tx/models/test_qwen3_generate.py
index 5a544e6202..a1f7bfe7e9 100644
--- a/tests/tx/models/test_qwen3_generate.py
+++ b/tests/tx/models/test_qwen3_generate.py
@@ -20,7 +20,9 @@ def test_qwen3_generate():
"""Test batched text generation with KV caching matches HuggingFace."""
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
inputs = ["My name is", "The capital of France is", "Test stopping", "Test stopping"]
max_new_tokens = [10, 20, 50, 2]
@@ -117,7 +119,7 @@ def test_qwen3_generate():
with torch.no_grad():
hf_logits = hf_model(tokens.input_ids).logits[0, :-1]
hf_logprobs = torch.nn.functional.log_softmax(hf_logits, dim=-1)
- expected = hf_logprobs[torch.arange(len(hf_logprobs)), tokens.input_ids[0, 1:]].numpy()
+ expected = hf_logprobs[torch.arange(len(hf_logprobs)), tokens.input_ids[0, 1:]].float().numpy()
assert np.allclose(result_with_prompt_logprobs.prompt_logprobs[i], expected, rtol=1e-3, atol=1e-3)
@@ -126,7 +128,9 @@ def test_qwen3_generate_speed():
"""Profile batched text generation with KV caching."""
model_name = "Qwen/Qwen3-0.6B"
tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right")
- hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True)
+ hf_model = AutoModelForCausalLM.from_pretrained(
+ model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32
+ )
base_config = PretrainedConfig.from_pretrained(model_name)
config = Qwen3Config(base_config, max_lora_adapters=32, max_lora_rank=32, shard_attention_heads=True)
diff --git a/uv.lock b/uv.lock
index 01ab6b5081..d08eba2ebb 100644
--- a/uv.lock
+++ b/uv.lock
@@ -313,6 +313,7 @@ overrides = [
{ name = "ml-dtypes", marker = "sys_platform == 'linux'", specifier = ">=0.5.0" },
{ name = "nvidia-resiliency-ext", marker = "sys_platform == 'never'" },
{ name = "transformer-engine", extras = ["pytorch"], marker = "sys_platform == 'linux'", specifier = "==2.10.0" },
+ { name = "transformers", marker = "sys_platform == 'linux'", specifier = ">=5.0.0,<=5.3.0" },
]
[[package]]
@@ -329,7 +330,8 @@ name = "accelerate"
version = "1.12.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "huggingface-hub" },
+ { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" },
{ name = "packaging" },
@@ -1708,7 +1710,8 @@ dependencies = [
{ name = "filelock" },
{ name = "fsspec", extra = ["http"] },
{ name = "httpx" },
- { name = "huggingface-hub" },
+ { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "multiprocess" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" },
@@ -3265,21 +3268,170 @@ wheels = [
name = "huggingface-hub"
version = "0.36.2"
source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "python_full_version >= '3.14' and platform_machine != 'arm64' and sys_platform == 'darwin'",
+ "python_full_version == '3.13.*' and platform_machine != 'arm64' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and platform_machine != 'arm64' and sys_platform == 'darwin'",
+ "python_full_version >= '3.14' and platform_machine == 'arm64' and sys_platform == 'darwin'",
+ "python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
+ "python_full_version == '3.12.*' and platform_machine == 'arm64' and sys_platform == 'darwin'",
+ "python_full_version < '3.12' and platform_machine != 'arm64' and sys_platform == 'darwin'",
+ "python_full_version < '3.12' and platform_machine == 'arm64' and sys_platform == 'darwin'",
+ "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'",
+ "python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'linux'",
+ "python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux'",
+ "python_full_version < '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'",
+]
dependencies = [
- { name = "filelock" },
- { name = "fsspec" },
- { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
- { name = "packaging" },
- { name = "pyyaml" },
- { name = "requests" },
- { name = "tqdm" },
- { name = "typing-extensions" },
+ { name = "filelock", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "fsspec", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "hf-xet", marker = "(platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'aarch64' and sys_platform != 'linux') or (platform_machine == 'amd64' and sys_platform != 'linux') or (platform_machine == 'arm64' and sys_platform != 'linux') or (platform_machine == 'x86_64' and sys_platform != 'linux') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "packaging", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "pyyaml", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "requests", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "tqdm", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "typing-extensions", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
]
+[[package]]
+name = "huggingface-hub"
+version = "1.8.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+ "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "(python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version < '3.12' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')",
+ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "(python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.14' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "(python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version == '3.13.*' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "(python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version < '3.12' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')",
+ "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+ "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'",
+]
+dependencies = [
+ { name = "filelock", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "fsspec", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "hf-xet", marker = "(platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "httpx", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "packaging", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "pyyaml", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "tqdm", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "typer", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "typing-extensions", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8e/2a/a847fd02261cd051da218baf99f90ee7c7040c109a01833db4f838f25256/huggingface_hub-1.8.0.tar.gz", hash = "sha256:c5627b2fd521e00caf8eff4ac965ba988ea75167fad7ee72e17f9b7183ec63f3", size = 735839, upload-time = "2026-03-25T16:01:28.152Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a9/ae/8a3a16ea4d202cb641b51d2681bdd3d482c1c592d7570b3fa264730829ce/huggingface_hub-1.8.0-py3-none-any.whl", hash = "sha256:d3eb5047bd4e33c987429de6020d4810d38a5bef95b3b40df9b17346b7f353f2", size = 625208, upload-time = "2026-03-25T16:01:26.603Z" },
+]
+
[[package]]
name = "humanize"
version = "4.15.0"
@@ -7865,7 +8017,7 @@ version = "3.2.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "ftfy", marker = "sys_platform == 'linux'" },
- { name = "huggingface-hub", marker = "sys_platform == 'linux'" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
{ name = "regex", marker = "sys_platform == 'linux'" },
{ name = "safetensors", marker = "sys_platform == 'linux'" },
{ name = "timm", marker = "sys_platform == 'linux'" },
@@ -8371,7 +8523,8 @@ version = "0.18.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "accelerate" },
- { name = "huggingface-hub" },
+ { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" },
{ name = "packaging" },
@@ -8384,7 +8537,7 @@ dependencies = [
{ name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu')" },
{ name = "tqdm" },
- { name = "transformers" },
+ { name = "transformers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/d8/48/147b3ea999560b40a34fd78724c7777aa9d18409c2250bdcaf9c4f2db7fc/peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2", size = 635030, upload-time = "2026-01-09T13:08:01.136Z" }
wheels = [
@@ -10523,7 +10676,7 @@ dependencies = [
{ name = "rich" },
{ name = "safetensors" },
{ name = "tokenizers" },
- { name = "transformers" },
+ { name = "transformers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "typer" },
]
@@ -10583,7 +10736,6 @@ flashrl = [
{ name = "torchvision", version = "0.22.0", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "torchvision", version = "0.22.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "tqdm" },
- { name = "transformers" },
{ name = "uvicorn" },
{ name = "vllm-router", marker = "sys_platform == 'linux'" },
{ name = "wandb" },
@@ -10617,7 +10769,6 @@ fsdp = [
{ name = "torchdata" },
{ name = "torchvision", version = "0.25.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "tqdm" },
- { name = "transformers" },
{ name = "uvicorn" },
{ name = "vllm", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "vllm-router", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
@@ -10670,7 +10821,6 @@ megatron = [
{ name = "torchvision", version = "0.25.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
{ name = "tqdm" },
{ name = "transformer-engine", extra = ["pytorch"], marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
- { name = "transformers" },
{ name = "uvicorn" },
{ name = "vllm", marker = "sys_platform == 'linux'" },
{ name = "vllm-router", marker = "sys_platform == 'linux'" },
@@ -10703,7 +10853,6 @@ miniswe = [
{ name = "tensordict" },
{ name = "torchdata" },
{ name = "tqdm" },
- { name = "transformers" },
{ name = "uvicorn" },
{ name = "vllm-router", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax')" },
{ name = "wandb" },
@@ -10733,7 +10882,6 @@ skyrl-train = [
{ name = "tensordict" },
{ name = "torchdata" },
{ name = "tqdm" },
- { name = "transformers" },
{ name = "uvicorn" },
{ name = "vllm-router", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "wandb" },
@@ -10842,8 +10990,7 @@ requires-dist = [
{ name = "torchvision", marker = "sys_platform == 'linux' and extra == 'megatron'", index = "https://download.pytorch.org/whl/cu128" },
{ name = "tqdm", marker = "extra == 'skyrl-train'" },
{ name = "transformer-engine", extras = ["pytorch"], marker = "sys_platform == 'linux' and extra == 'megatron'", specifier = "==2.10.0" },
- { name = "transformers", specifier = ">=4.56.1,<5" },
- { name = "transformers", marker = "extra == 'skyrl-train'", specifier = ">=4.51.0" },
+ { name = "transformers", specifier = ">=5.0.0,<=5.3.0" },
{ name = "ty", marker = "extra == 'dev'" },
{ name = "typer", specifier = ">=0.17.4" },
{ name = "uvicorn", marker = "extra == 'skyrl-train'" },
@@ -11332,7 +11479,7 @@ name = "timm"
version = "1.0.25"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "huggingface-hub", marker = "sys_platform == 'linux'" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" },
{ name = "pyyaml", marker = "sys_platform == 'linux'" },
{ name = "safetensors", marker = "sys_platform == 'linux'" },
{ name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" },
@@ -11357,7 +11504,7 @@ dependencies = [
{ name = "pydantic" },
{ name = "rich" },
{ name = "sniffio" },
- { name = "transformers" },
+ { name = "transformers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/e8/14/9abd320e01ec113dc383c407ea02261bfa97b9c43edbb299ce7ecaea3b61/tinker-0.13.1.tar.gz", hash = "sha256:d856cf99c37a46238a9d92cee719444ce657f5de9b45c4a8f233d3b6b1a482e1", size = 178940, upload-time = "2026-02-13T22:13:54.661Z" }
@@ -11370,7 +11517,8 @@ name = "tokenizers"
version = "0.22.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "huggingface-hub" },
+ { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
]
sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" }
wheels = [
@@ -12043,24 +12191,23 @@ sdist = { url = "https://files.pythonhosted.org/packages/18/94/609a7772569d3acdb
[[package]]
name = "transformers"
-version = "4.57.6"
+version = "5.3.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
- { name = "filelock" },
- { name = "huggingface-hub" },
+ { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
- { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" },
- { name = "packaging" },
- { name = "pyyaml" },
- { name = "regex" },
- { name = "requests" },
- { name = "safetensors" },
- { name = "tokenizers" },
- { name = "tqdm" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
+ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (sys_platform == 'linux' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (sys_platform == 'linux' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (sys_platform == 'linux' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "packaging", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "pyyaml", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "regex", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "safetensors", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "tokenizers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "tqdm", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+ { name = "typer", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" },
]
[[package]]