diff --git a/pyproject.toml b/pyproject.toml index ec398ac9d0..9199527c20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "rich>=14.1.0", "safetensors>=0.6.2", "tokenizers>=0.21.2", - "transformers>=4.56.1,<5", + "transformers>=5.0.0,<=5.3.0", "typer>=0.17.4", # "wandb>=0.22.0", "peft", @@ -72,7 +72,6 @@ skyrl-train = [ "ninja", "tensorboard", "func_timeout", - "transformers>=4.51.0", "hydra-core==1.3.2", "accelerate", "torchdata", @@ -217,6 +216,7 @@ override-dependencies = [ "causal-conv1d; sys_platform == 'never'", "transformer-engine[pytorch]==2.10.0; sys_platform == 'linux'", "megatron-core==0.16.1; sys_platform == 'linux'", + "transformers>=5.0.0,<=5.3.0; sys_platform == 'linux'", "ml_dtypes>=0.5.0; sys_platform == 'linux'", ] diff --git a/skyrl/backends/skyrl_train/distributed/fsdp_utils.py b/skyrl/backends/skyrl_train/distributed/fsdp_utils.py index 449cf0266e..76dada1403 100644 --- a/skyrl/backends/skyrl_train/distributed/fsdp_utils.py +++ b/skyrl/backends/skyrl_train/distributed/fsdp_utils.py @@ -63,20 +63,13 @@ def init_fn(x: torch.nn.Module): return x -def get_init_weight_context_manager(use_meta_tensor=True, mesh: DeviceMesh = None): - from accelerate import init_empty_weights - - def cpu_init_weights(): - return torch.device("cpu") - - if use_meta_tensor: - if mesh is None: - init_context = init_empty_weights if torch.distributed.get_rank() != 0 else cpu_init_weights - else: - init_context = init_empty_weights if mesh.get_coordinate()[-1] != 0 else cpu_init_weights - else: - init_context = cpu_init_weights - return init_context +def should_use_meta_init(use_meta_tensor=True, mesh: DeviceMesh = None) -> bool: + """Return True when this rank should create an empty model on meta device.""" + if not use_meta_tensor: + return False + if mesh is None: + return torch.distributed.get_rank() != 0 + return mesh.get_coordinate()[-1] != 0 def get_fsdp_wrap_policy(module, config=None, is_lora=False): @@ -176,6 +169,14 @@ def offload_fsdp_model_to_cpu(model: FSDP, empty_cache: bool = True): @torch.no_grad() def offload_fsdp2_model_to_cpu(model, empty_cache: bool = True): + # Materialize any leftover meta buffers (e.g. non-persistent inv_freq from + # RotaryEmbedding created via from_config on meta device). We must NOT call + # model.to_empty() because that would wipe already-loaded FSDP parameters. + for module in model.modules(): + for key in list(module._buffers.keys()): + buf = module._buffers[key] + if buf is not None and buf.device.type == "meta": + module._buffers[key] = torch.empty(buf.shape, dtype=buf.dtype, device="cpu") model.to("cpu", non_blocking=True) if empty_cache: torch.cuda.empty_cache() @@ -247,6 +248,27 @@ def get_fsdp_state_ctx(model, state_type, state_cfg, optim_cfg): return nullcontext() +def _sync_non_persistent_buffers(model: torch.nn.Module, loaded_sd: dict): + """Broadcast non-persistent buffers (e.g. inv_freq) from rank 0 to all ranks. + + Non-persistent buffers are excluded from state_dict so they are never loaded + by the parameter broadcast loop. On non-rank-0 meta-init they remain on the + meta device with no data; rank 0 has the correctly computed values. + """ + for module in model.modules(): + non_persistent = getattr(module, "_non_persistent_buffers_set", set()) + for key in sorted(non_persistent): + buf = module._buffers.get(key) + if buf is None: + continue + if dist.get_rank() == 0: + src = buf.detach().cuda() + else: + src = torch.empty(buf.shape, dtype=buf.dtype, device="cuda") + dist.broadcast(src, src=0) + module._buffers[key] = src.cpu() + + # Fsdp2 load full state dict from `accelerate` # Reference: https://github.com/huggingface/accelerate/blob/0af621bbecc0e43f5d43766a4945d3d2236bb8a9/src/accelerate/utils/fsdp_utils.py#L455 # NOTE (sumanthrh): The original code from `accelerate` assumes init on meta device - with cpu init only on rank 0, but the code is compatible with cpu init on all ranks. @@ -324,6 +346,11 @@ def _cast_and_contiguous(tensor, to_contiguous, dtype): # we set `assign=True` because our params can be on meta device model.load_state_dict(sharded_sd, assign=True) + # Broadcast non-persistent buffers (e.g. inv_freq from RotaryEmbedding) that + # are excluded from state_dict. On non-rank-0 meta-init these are still on + # meta device with no data; rank 0 has the correctly computed values. + _sync_non_persistent_buffers(model, sharded_sd) + # If we don't offload FSDP2 Module to CPU and then back to GPU, # it will occupy a large amount of reserved GPU memory,which can not be released using torch.cuda.empty_cache() # even if we are using cpu_offload diff --git a/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py b/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py index d2310ef9ed..b51938a8ac 100644 --- a/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py +++ b/skyrl/backends/skyrl_train/inference_engines/vllm/vllm_server.py @@ -30,7 +30,6 @@ def __init__(self, args): async def run_server(self, **uvicorn_kwargs) -> None: sock_addr = (self.server_args.host or "", self.server_args.port) sock = create_server_socket(sock_addr) - set_ulimit() def signal_handler(*_) -> None: @@ -39,7 +38,6 @@ def signal_handler(*_) -> None: signal.signal(signal.SIGTERM, signal_handler) - # TODO(tgriggs): Move this elsewhere, make configurable. os.environ["VLLM_USE_V1"] = "1" engine_args = AsyncEngineArgs.from_cli_args(self.server_args) engine = AsyncLLMEngine.from_engine_args( @@ -147,7 +145,10 @@ async def _destroy_weights_update_group(request: Request): await shutdown_task - sock.close() + try: + sock.close() + except (AttributeError, OSError): + pass def run_server_uvloop(self, **uvicorn_kwargs) -> None: uvloop.run(self.run_server(**uvicorn_kwargs)) diff --git a/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py b/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py index c0c59da035..c828c2172d 100644 --- a/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py +++ b/skyrl/backends/skyrl_train/weight_sync/cuda_ipc_strategy.py @@ -260,7 +260,7 @@ async def _send_chunks_legacy(self, chunks: Iterable[WeightChunk]) -> None: offset = 0 for name, tensor, shape in zip(chunk.names, chunk.tensors, chunk.shapes): size = tensor.numel() - packed_tensor[offset : offset + size].copy_(tensor.detach().view(-1)) + packed_tensor[offset : offset + size].copy_(tensor.detach().reshape(-1)) offset += size names.append(name) dtypes.append(self._init_info.model_dtype_str) diff --git a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py index a0a1990f5d..6d28358809 100644 --- a/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py +++ b/skyrl/backends/skyrl_train/workers/fsdp/fsdp_worker.py @@ -24,7 +24,7 @@ from skyrl.backends.skyrl_train.distributed.fsdp_strategy import FSDPStrategy from skyrl.backends.skyrl_train.distributed.fsdp_utils import ( fsdp_version, - get_init_weight_context_manager, + should_use_meta_init, ) from skyrl.backends.skyrl_train.training_batch import ( TrainingInputBatch, @@ -165,37 +165,34 @@ def init_model(self, model_path, num_training_steps: int = None): self._is_lora = self.cfg.policy.model.lora.rank > 0 model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) - init_context = get_init_weight_context_manager( + use_meta = should_use_meta_init( use_meta_tensor=not model_config.tie_word_embeddings, mesh=self.strategy.device_mesh ) - with init_context(): - - wrapped_model = HFModelWrapper( - model_path, - use_flash_attention_2=self.cfg.flash_attn, - # NOTE (sumanthrh): Model initialization should always be in fp32 - # during training - bf16=False, - lora_rank=self.cfg.policy.model.lora.rank, - lora_alpha=self.cfg.policy.model.lora.alpha, - lora_dropout=self.cfg.policy.model.lora.dropout, - lora_init_method=self.cfg.policy.model.lora.init_method, - target_modules=self.cfg.policy.model.lora.target_modules, - exclude_modules=self.cfg.policy.model.lora.exclude_modules, - sequence_parallel_size=self.cfg.policy.sequence_parallel_size, - use_sample_packing=self.cfg.use_sample_packing, - use_torch_compile=self.cfg.policy.use_torch_compile, - rope_scaling=get_rope_scaling_config(self.cfg), - rope_theta=get_rope_theta_config(self.cfg), - model_config_kwargs=self.cfg.policy.model_config_kwargs, - ) - # in-place patch - self._seq_parallel_monkey_patch(model=wrapped_model.model) - if self.cfg.gradient_checkpointing: - wrapped_model.gradient_checkpointing_enable( - gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant} - ) + wrapped_model = HFModelWrapper( + model_path, + use_flash_attention_2=self.cfg.flash_attn, + bf16=False, + lora_rank=self.cfg.policy.model.lora.rank, + lora_alpha=self.cfg.policy.model.lora.alpha, + lora_dropout=self.cfg.policy.model.lora.dropout, + lora_init_method=self.cfg.policy.model.lora.init_method, + target_modules=self.cfg.policy.model.lora.target_modules, + exclude_modules=self.cfg.policy.model.lora.exclude_modules, + sequence_parallel_size=self.cfg.policy.sequence_parallel_size, + use_sample_packing=self.cfg.use_sample_packing, + use_torch_compile=self.cfg.policy.use_torch_compile, + rope_scaling=get_rope_scaling_config(self.cfg), + rope_theta=get_rope_theta_config(self.cfg), + model_config_kwargs=self.cfg.policy.model_config_kwargs, + meta_init=use_meta, + ) + self._seq_parallel_monkey_patch(model=wrapped_model.model) + + if self.cfg.gradient_checkpointing: + wrapped_model.gradient_checkpointing_enable( + gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant} + ) self.model, self.optimizer, self.scheduler = strategy.prepare( (wrapped_model, None, None), @@ -342,34 +339,33 @@ def init_model(self, model_path, num_training_steps: int = None): self.strategy = strategy model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) - init_context = get_init_weight_context_manager( + use_meta = should_use_meta_init( use_meta_tensor=not model_config.tie_word_embeddings, mesh=self.strategy.device_mesh ) - with init_context(): - critic = get_llm_for_sequence_regression( - model_path, - "critic", - use_flash_attention_2=self.cfg.flash_attn, - # NOTE (sumanthrh): Model initialization should always be in fp32 - # during training - bf16=False, - lora_rank=self.cfg.critic.model.lora.rank, - lora_alpha=self.cfg.critic.model.lora.alpha, - lora_dropout=self.cfg.critic.model.lora.dropout, - target_modules=self.cfg.critic.model.lora.target_modules, - exclude_modules=self.cfg.critic.model.lora.exclude_modules, - value_head_prefix=self.cfg.algorithm.value_head_prefix, - init_value_head=self.cfg.policy.model.path == self.cfg.critic.model.path, - sequence_parallel_size=self.cfg.critic.sequence_parallel_size, - use_sample_packing=self.cfg.use_sample_packing, - model_config_kwargs=self.cfg.critic.model_config_kwargs, - ) - self._seq_parallel_monkey_patch(model=critic, use_parent_class=True) - if self.cfg.gradient_checkpointing: - critic.gradient_checkpointing_enable( - gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant} - ) + critic = get_llm_for_sequence_regression( + model_path, + "critic", + use_flash_attention_2=self.cfg.flash_attn, + bf16=False, + lora_rank=self.cfg.critic.model.lora.rank, + lora_alpha=self.cfg.critic.model.lora.alpha, + lora_dropout=self.cfg.critic.model.lora.dropout, + target_modules=self.cfg.critic.model.lora.target_modules, + exclude_modules=self.cfg.critic.model.lora.exclude_modules, + value_head_prefix=self.cfg.algorithm.value_head_prefix, + init_value_head=self.cfg.policy.model.path == self.cfg.critic.model.path, + sequence_parallel_size=self.cfg.critic.sequence_parallel_size, + use_sample_packing=self.cfg.use_sample_packing, + model_config_kwargs=self.cfg.critic.model_config_kwargs, + meta_init=use_meta, + ) + self._seq_parallel_monkey_patch(model=critic, use_parent_class=True) + + if self.cfg.gradient_checkpointing: + critic.gradient_checkpointing_enable( + gradient_checkpointing_kwargs={"use_reentrant": self.cfg.gradient_checkpointing_use_reentrant} + ) # prepare models/optimizers... self.model, self.optimizer, self.scheduler = strategy.prepare( @@ -412,22 +408,22 @@ def init_model(self, model_path): self.strategy = strategy model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True) - init_context = get_init_weight_context_manager( + use_meta = should_use_meta_init( use_meta_tensor=not model_config.tie_word_embeddings, mesh=self.strategy.device_mesh ) - with init_context(): - wrapped_model = HFModelWrapper( - model_path, - use_flash_attention_2=self.cfg.flash_attn, - bf16=self.cfg.bf16, - sequence_parallel_size=self.cfg.ref.sequence_parallel_size, - use_sample_packing=self.cfg.use_sample_packing, - rope_scaling=get_rope_scaling_config(self.cfg), - rope_theta=get_rope_theta_config(self.cfg), - model_config_kwargs=self.cfg.ref.model_config_kwargs, - ) - self._seq_parallel_monkey_patch(model=wrapped_model.model) + wrapped_model = HFModelWrapper( + model_path, + use_flash_attention_2=self.cfg.flash_attn, + bf16=self.cfg.bf16, + sequence_parallel_size=self.cfg.ref.sequence_parallel_size, + use_sample_packing=self.cfg.use_sample_packing, + rope_scaling=get_rope_scaling_config(self.cfg), + rope_theta=get_rope_theta_config(self.cfg), + model_config_kwargs=self.cfg.ref.model_config_kwargs, + meta_init=use_meta, + ) + self._seq_parallel_monkey_patch(model=wrapped_model.model) self.model = strategy.prepare(wrapped_model) self.model.eval() diff --git a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py index 1fb7235806..93589b46dc 100644 --- a/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py +++ b/skyrl/backends/skyrl_train/workers/megatron/megatron_worker.py @@ -288,6 +288,14 @@ def init_configs( if hasattr(provider, "q_lora_rank") and hasattr(hf_config, "q_lora_rank"): provider.q_lora_rank = hf_config.q_lora_rank + # Workaround for transformers v5 moving rope_theta into rope_parameters + # (previously it was a top-level config attribute). megatron-bridge's + # CONFIG_MAPPING reads config.rope_theta which no longer exists in v5, + # causing it to fall back to the default rotary_base of 10000. + rope_params = getattr(hf_config, "rope_parameters", None) or getattr(hf_config, "rope_scaling", None) + if isinstance(rope_params, dict) and "rope_theta" in rope_params: + provider.rotary_base = rope_params["rope_theta"] + provider.tensor_model_parallel_size = megatron_config.tensor_model_parallel_size provider.pipeline_model_parallel_size = megatron_config.pipeline_model_parallel_size provider.pipeline_dtype = torch.bfloat16 if bf16 else torch.float32 diff --git a/skyrl/backends/skyrl_train/workers/model_wrapper.py b/skyrl/backends/skyrl_train/workers/model_wrapper.py index 3eb45f80a7..60b468bebe 100644 --- a/skyrl/backends/skyrl_train/workers/model_wrapper.py +++ b/skyrl/backends/skyrl_train/workers/model_wrapper.py @@ -78,6 +78,7 @@ def __init__( rope_scaling: Dict[str, Any] = {}, rope_theta: float | None = None, model_config_kwargs: dict = {}, + meta_init: bool = False, **kwargs, ) -> None: super().__init__() @@ -86,7 +87,6 @@ def __init__( self.attn_implementation = "flash_attention_2" if use_flash_attention_2 else "sdpa" self.use_sample_packing = use_sample_packing self.is_vlm = False - # packing samples using Flash Attention 2 if use_sample_packing: assert ( self.attn_implementation == "flash_attention_2" @@ -122,22 +122,26 @@ def __init__( # NOTE: In future transformers releases (> 5.0.0), all multimodal models can use AutoModelForMultimodalLM. model_class = AutoModelForImageTextToText - rope_scaling_kwargs = {} if rope_scaling: - rope_scaling_kwargs["rope_scaling"] = rope_scaling + model_config.rope_scaling = rope_scaling if rope_theta: - rope_scaling_kwargs["rope_theta"] = rope_theta - - self.model = model_class.from_pretrained( - pretrain_or_model, - config=model_config, - trust_remote_code=True, - attn_implementation=self.attn_implementation, - quantization_config=nf4_config, - torch_dtype=torch.bfloat16 if bf16 else torch.float32, - device_map=device_map, - **rope_scaling_kwargs, - ) + model_config.rope_theta = rope_theta + model_config._attn_implementation = self.attn_implementation + + if meta_init: + with torch.device("meta"): + self.model = model_class.from_config(model_config, trust_remote_code=True) + self.model.to(torch.bfloat16 if bf16 else torch.float32) + else: + self.model = model_class.from_pretrained( + pretrain_or_model, + config=model_config, + trust_remote_code=True, + attn_implementation=self.attn_implementation, + quantization_config=nf4_config, + torch_dtype=torch.bfloat16 if bf16 else torch.float32, + device_map=device_map, + ) # gpt oss if Version(transformers.__version__) >= Version("4.56.2"): @@ -303,6 +307,7 @@ def forward( entropy_requires_grad=True, pixel_values: Optional[TensorList] = None, image_grid_thw: Optional[TensorList] = None, + mm_token_type_ids: Optional[torch.Tensor] = None, ) -> torch.Tensor: """Returns action log probs""" if self.is_vlm: @@ -352,12 +357,17 @@ def forward( ) if self.is_vlm: + vlm_kwargs = dict( + pixel_values=pixel_values, + image_grid_thw=image_grid_thw, + ) + if mm_token_type_ids is not None: + vlm_kwargs["mm_token_type_ids"] = mm_token_type_ids output = self.model( sequences_fwd, attention_mask=attention_mask_fwd, position_ids=None, - pixel_values=pixel_values, - image_grid_thw=image_grid_thw, + **vlm_kwargs, ) # NOTE (sumanthrh): Once we have position_ids, we don't need attention mask with flash attention. elif self.use_sample_packing and self.attn_implementation == "flash_attention_2": @@ -480,6 +490,8 @@ def __init__(self, config: AutoConfig): if self.sequence_parallel_size > 1: logger.info("Critic model using sequence parallelism with size: ", self.sequence_parallel_size) + self.post_init() + def forward( self, input_ids: torch.LongTensor = None, @@ -579,6 +591,7 @@ def get_llm_for_sequence_regression( sequence_parallel_size=1, use_sample_packing: bool = False, model_config_kwargs: dict = {}, + meta_init: bool = False, **kwargs, ) -> nn.Module: """Get transformer with a sequence classification head on top (linear layer). @@ -618,15 +631,22 @@ def get_llm_for_sequence_regression( else: nf4_config = None - model = cls_class.from_pretrained( - model_name_or_path, - config=config, - trust_remote_code=True, - torch_dtype=torch.bfloat16 if bf16 else torch.float32, - quantization_config=nf4_config, - device_map=device_map, - **kwargs, - ) + if meta_init: + from transformers.modeling_utils import no_init_weights + + with no_init_weights(), torch.device("meta"): + model = cls_class(config) + model.to(dtype=torch.bfloat16 if bf16 else torch.float32) + else: + model = cls_class.from_pretrained( + model_name_or_path, + config=config, + trust_remote_code=True, + torch_dtype=torch.bfloat16 if bf16 else torch.float32, + quantization_config=nf4_config, + device_map=device_map, + **kwargs, + ) # LoRA if lora_rank > 0: diff --git a/skyrl/tx/models/configs.py b/skyrl/tx/models/configs.py index 6d90f28e9a..f16f079f90 100644 --- a/skyrl/tx/models/configs.py +++ b/skyrl/tx/models/configs.py @@ -38,10 +38,19 @@ def __init__( gradient_checkpointing: bool = False, mhc_expansion_rate: int = 1, ): - # `text_config` can come through as a raw dict from HF configs. super().__init__(**(config if isinstance(config, dict) else config.__dict__)) - # Add LoRA-specific parameters + # In transformers v5, rope_parameters may not contain rope_theta + # even when it exists as a top-level config attribute (e.g. DeepSeek v3). + # Inject it so model code can always use config.rope_parameters["rope_theta"]. + rope_params = getattr(self, "rope_parameters", None) or {} + if "rope_theta" not in rope_params: + rope_theta = getattr(self, "rope_theta", None) + if rope_theta is not None: + rope_params["rope_theta"] = rope_theta + if rope_params: + self.rope_parameters = rope_params + self.max_lora_adapters = max_lora_adapters self.max_lora_rank = max_lora_rank self.shard_attention_heads = shard_attention_heads @@ -53,10 +62,13 @@ def get_config(self) -> PretrainedConfig: """Return `text_config` when present, otherwise return this config.""" return self.get_text_config() if hasattr(self, "text_config") else self - def get_text_config(self) -> "ModelConfig": + def get_text_config(self, decoder=None, encoder=None) -> "ModelConfig": """Return a wrapped config built from `self.text_config`.""" + text_cfg = super().get_text_config(decoder=decoder, encoder=encoder) + if text_cfg is self or isinstance(text_cfg, ModelConfig): + return text_cfg return type(self)( - self.text_config, + text_cfg, max_lora_adapters=self.max_lora_adapters, max_lora_rank=self.max_lora_rank, shard_attention_heads=self.shard_attention_heads, diff --git a/skyrl/tx/models/deepseekv3.py b/skyrl/tx/models/deepseekv3.py index 8d16cbd807..6c975ffa97 100644 --- a/skyrl/tx/models/deepseekv3.py +++ b/skyrl/tx/models/deepseekv3.py @@ -119,7 +119,9 @@ def __init__(self, config: DeepseekV3Config, *, dtype: jnp.dtype, rngs: nnx.Rngs rngs=rngs, ) - self.rotary_emb, mscale = get_rope(self.qk_rope_head_dim, config.rope_theta, config.rope_scaling) + self.rotary_emb, mscale = get_rope( + self.qk_rope_head_dim, config.rope_parameters["rope_theta"], config.rope_parameters + ) self.scaling = self.qk_head_dim ** (-0.5) * mscale * mscale def __call__( diff --git a/skyrl/tx/models/llama3.py b/skyrl/tx/models/llama3.py index 2e8a0c0680..cb714fc14c 100644 --- a/skyrl/tx/models/llama3.py +++ b/skyrl/tx/models/llama3.py @@ -98,8 +98,8 @@ def __call__( v = self.v_proj(x, adapter_indices=adapter_indices).reshape(B, T, self.num_kv_heads, self.head_dim) # Apply RoPE - q = apply_rope(q, positions, self.head_dim, self.config.rope_theta) - k = apply_rope(k, positions, self.head_dim, self.config.rope_theta) + q = apply_rope(q, positions, self.head_dim, self.config.rope_parameters["rope_theta"]) + k = apply_rope(k, positions, self.head_dim, self.config.rope_parameters["rope_theta"]) # Handle KV cache if kv_cache is not None: diff --git a/skyrl/tx/models/qwen3.py b/skyrl/tx/models/qwen3.py index 844809e6e2..bb24959dbe 100644 --- a/skyrl/tx/models/qwen3.py +++ b/skyrl/tx/models/qwen3.py @@ -81,8 +81,8 @@ def __call__( v = v.reshape(B, T, self.num_kv_heads, self.head_dim) # Apply RoPE - q = apply_rope(q, positions, self.head_dim, self.config.rope_theta) - k = apply_rope(k, positions, self.head_dim, self.config.rope_theta) + q = apply_rope(q, positions, self.head_dim, self.config.rope_parameters["rope_theta"]) + k = apply_rope(k, positions, self.head_dim, self.config.rope_parameters["rope_theta"]) # Handle KV cache if kv_cache is not None: @@ -172,8 +172,10 @@ def __init__(self, config: Qwen3Config, *, dtype: jnp.dtype, rngs: nnx.Rngs) -> def __call__( self, hidden_states: jax.Array, router_logits: jax.Array, adapter_indices: jax.Array | None = None ) -> jax.Array: - routing_weights, selected_experts = jax.lax.top_k(router_logits, k=self.config.num_experts_per_tok) - routing_weights = nnx.softmax(routing_weights, axis=-1) + routing_weights = nnx.softmax(router_logits, axis=-1) + routing_weights, selected_experts = jax.lax.top_k(routing_weights, k=self.config.num_experts_per_tok) + if getattr(self.config, "norm_topk_prob", True): + routing_weights = routing_weights / routing_weights.sum(axis=-1, keepdims=True) num_experts = self.config.num_experts num_experts_per_tok = self.config.num_experts_per_tok diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py index 5b4d172c7a..300fac372a 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/inference_servers/test_remote_inference_client_chat_template.py @@ -99,8 +99,6 @@ async def test_custom_chat_template(ray_init_fixture, use_custom_template: bool) prompt_str = tokenizer.decode(prompt_token_ids) if use_custom_template: - # The custom template qwen3_acc_thinking.jinja2 will keep the thinking tokens. assert "" in prompt_str and "" in prompt_str else: - # Default template strips thinking tokens assert "" not in prompt_str and "" not in prompt_str diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py index 76dac7191a..29e0ed5dcb 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_megatron_worker.py @@ -288,13 +288,14 @@ def run_hf_forward(batch, model_name): position_ids = attention_mask.long().cumsum(-1) - 1 position_ids.masked_fill_(attention_mask == 0, 1) - sequences_rolled = torch.roll(sequences_fwd, shifts=-1, dims=1).to("cuda") - - sequences_fwd, attention_mask, position_ids = ( + sequences_rolled = torch.roll(sequences_fwd, shifts=-1, dims=1) + sequences_fwd, attention_mask, position_ids, sequences_rolled = ( sequences_fwd.to("cuda"), attention_mask.to("cuda"), position_ids.to("cuda"), + sequences_rolled.to("cuda"), ) + with torch.no_grad(), torch.autocast(dtype=torch.bfloat16, device_type="cuda"): output = model(sequences_fwd, attention_mask=attention_mask, position_ids=position_ids) log_probs = logprobs_from_logits(output["logits"], sequences_rolled) diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py index c1db2cc70e..77bb868a2e 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py @@ -1,6 +1,6 @@ """ Run with: -uv run --isolated --extra dev -- pytest tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py +uv run --isolated --extra dev --extra fsdp -- pytest tests/backends/skyrl_train/gpu/gpu_ci/test_training_step.py """ import pytest @@ -13,7 +13,7 @@ make_dummy_training_batch, ) -MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" +MODEL_NAME = "Qwen/Qwen3-0.6B" MOE_MODEL_NAME = "Qwen/Qwen3-30B-A3B" diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py b/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py index 75b5116a7a..e58e6d0466 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/test_vlm_model_wrapper.py @@ -78,6 +78,9 @@ def build_vlm_inputs(processor, prompt_text, response_text, image=None, device=" "num_actions": num_actions, } + if "mm_token_type_ids" in full_inputs: + result["mm_token_type_ids"] = full_inputs["mm_token_type_ids"].to(device) + if image is not None: result["pixel_values"] = TensorList([full_inputs["pixel_values"].to(device)]) result["image_grid_thw"] = TensorList([full_inputs["image_grid_thw"].to(device)]) @@ -104,6 +107,8 @@ def test_vlm_log_probs_match_manual(vlm_model, processor): pv = inputs["pixel_values"] igt = inputs["image_grid_thw"] + mm_token_type_ids = inputs.get("mm_token_type_ids") + # Wrapper path with torch.no_grad(): wrapper_log_probs = vlm_model( @@ -112,19 +117,23 @@ def test_vlm_log_probs_match_manual(vlm_model, processor): attention_mask, pixel_values=pv, image_grid_thw=igt, + mm_token_type_ids=mm_token_type_ids, ) # Manual path: run the raw model pv_cat = torch.cat(pv.tensors, dim=0) igt_cat = torch.cat(igt.tensors, dim=0) + manual_kwargs = dict(pixel_values=pv_cat, image_grid_thw=igt_cat) + if mm_token_type_ids is not None: + manual_kwargs["mm_token_type_ids"] = mm_token_type_ids + with torch.no_grad(): output = vlm_model.model( input_ids, attention_mask=attention_mask, position_ids=None, - pixel_values=pv_cat, - image_grid_thw=igt_cat, + **manual_kwargs, ) logits = output["logits"].float() @@ -165,6 +174,7 @@ def test_vlm_semantic_color_recognition(vlm_model, processor): inputs["attention_mask"], pixel_values=inputs["pixel_values"], image_grid_thw=inputs["image_grid_thw"], + mm_token_type_ids=inputs.get("mm_token_type_ids"), ) log_p[resp_color] = action_lp.sum().item() @@ -180,13 +190,16 @@ def _build_batched_vlm_inputs(processor, prompt, response, images, device="cuda" """Build batched model inputs from a list of images with shared prompt/response text.""" per_sample = [build_vlm_inputs(processor, prompt, response, image=img, device=device) for img in images] num_actions = per_sample[0]["num_actions"] - return { + result = { "input_ids": torch.cat([inp["input_ids"] for inp in per_sample], dim=0), "attention_mask": torch.cat([inp["attention_mask"] for inp in per_sample], dim=0), "num_actions": num_actions, "pixel_values": TensorList([inp["pixel_values"].tensors[0] for inp in per_sample]), "image_grid_thw": TensorList([inp["image_grid_thw"].tensors[0] for inp in per_sample]), } + if "mm_token_type_ids" in per_sample[0]: + result["mm_token_type_ids"] = torch.cat([inp["mm_token_type_ids"] for inp in per_sample], dim=0) + return result def test_vlm_forward_batched_vision(vlm_model, processor): @@ -212,6 +225,7 @@ def test_vlm_forward_batched_vision(vlm_model, processor): fwd["attention_mask"], pixel_values=fwd["pixel_values"], image_grid_thw=fwd["image_grid_thw"], + mm_token_type_ids=fwd.get("mm_token_type_ids"), ) # 2. Run batch in reversed order [blue, red] @@ -224,6 +238,7 @@ def test_vlm_forward_batched_vision(vlm_model, processor): rev["attention_mask"], pixel_values=rev["pixel_values"], image_grid_thw=rev["image_grid_thw"], + mm_token_type_ids=rev.get("mm_token_type_ids"), ) # 3. Basic shape / sanity checks diff --git a/tests/tx/layers/test_connectors.py b/tests/tx/layers/test_connectors.py index 05ff58de94..401475b334 100644 --- a/tests/tx/layers/test_connectors.py +++ b/tests/tx/layers/test_connectors.py @@ -118,6 +118,7 @@ def test_deepseek_connector_identity_expansion_rate(): norm_topk_prob=True, routed_scaling_factor=1.0, tie_word_embeddings=False, + rope_theta=10000.0, ) config_e1 = DeepseekV3Config(base_config, max_lora_adapters=4, max_lora_rank=8, shard_attention_heads=True) config_e4 = DeepseekV3Config(base_config, max_lora_adapters=4, max_lora_rank=8, shard_attention_heads=True) diff --git a/tests/tx/models/test_deepseekv3.py b/tests/tx/models/test_deepseekv3.py index c6ceef0028..598f04849e 100644 --- a/tests/tx/models/test_deepseekv3.py +++ b/tests/tx/models/test_deepseekv3.py @@ -26,7 +26,7 @@ def test_deepseekv3(tp: int): model_name = "yujiepan/deepseek-v3-tiny-random" tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) hf_model = AutoModelForCausalLM.from_pretrained( - model_name, attn_implementation="eager", use_safetensors=True, trust_remote_code=True + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 ) inputs = ["The capital of France is", "The most popular programming language is"] @@ -40,7 +40,7 @@ def test_deepseekv3(tp: int): with tempfile.TemporaryDirectory() as tmp: hf_model.save_pretrained(tmp, safe_serialization=True) - base_config = PretrainedConfig.from_pretrained(model_name, trust_remote_code=True) + base_config = PretrainedConfig.from_pretrained(model_name) config = DeepseekV3Config(base_config, max_lora_adapters=32, max_lora_rank=32, shard_attention_heads=True) # EP axis required for MoE expert sharding mesh = jax.make_mesh((1, 1, tp), ("fsdp", "ep", "tp"), axis_types=(jax.sharding.AxisType.Auto,) * 3) @@ -52,30 +52,41 @@ def test_deepseekv3(tp: int): ) assert outputs.hidden_states is not None - assert np.allclose(hf_outputs.hidden_states[0], outputs.hidden_states[0], rtol=1e-6) - assert np.allclose(hf_outputs.hidden_states[1], outputs.hidden_states[1], rtol=1e-3, atol=1e-3) - assert np.allclose(hf_outputs.hidden_states[-1], outputs.hidden_states[-1], rtol=3e-2, atol=6e-2) + assert np.allclose(hf_outputs.hidden_states[0].float(), outputs.hidden_states[0], rtol=1e-6) + assert np.allclose(hf_outputs.hidden_states[1].float(), outputs.hidden_states[1], rtol=1e-3, atol=1e-3) + assert np.allclose(hf_outputs.hidden_states[-1].float(), outputs.hidden_states[-1], rtol=3e-2, atol=6e-2) def load_moe_base_weights(jax_moe_layer: DeepseekV3MoE, hf_moe_layer: HFDeepseekV3MoE) -> None: """Load base weights from HF MoE layer to JAX MoE layer.""" - jax_moe_layer.gate.weight[:] = hf_moe_layer.gate.weight.detach().numpy().T - jax_moe_layer.gate.e_score_correction_bias[:] = hf_moe_layer.gate.e_score_correction_bias.detach().numpy() - - for i, expert in enumerate(hf_moe_layer.experts): - jax_moe_layer.experts.gate_proj.weight[i, :, :] = expert.gate_proj.weight.detach().numpy().T - jax_moe_layer.experts.up_proj.weight[i, :, :] = expert.up_proj.weight.detach().numpy().T - jax_moe_layer.experts.down_proj.weight[i, :, :] = expert.down_proj.weight.detach().numpy().T + jax_moe_layer.gate.weight[:] = hf_moe_layer.gate.weight.detach().float().numpy().T + jax_moe_layer.gate.e_score_correction_bias[:] = hf_moe_layer.gate.e_score_correction_bias.detach().float().numpy() + + gate_up = hf_moe_layer.experts.gate_up_proj.detach().float().numpy() + intermediate = gate_up.shape[1] // 2 + jax_moe_layer.experts.gate_proj.weight[:] = gate_up[:, :intermediate, :].transpose(0, 2, 1) + jax_moe_layer.experts.up_proj.weight[:] = gate_up[:, intermediate:, :].transpose(0, 2, 1) + jax_moe_layer.experts.down_proj.weight[:] = ( + hf_moe_layer.experts.down_proj.detach().float().numpy().transpose(0, 2, 1) + ) - jax_moe_layer.shared_experts.gate_proj.kernel[:] = hf_moe_layer.shared_experts.gate_proj.weight.detach().numpy().T - jax_moe_layer.shared_experts.up_proj.kernel[:] = hf_moe_layer.shared_experts.up_proj.weight.detach().numpy().T - jax_moe_layer.shared_experts.down_proj.kernel[:] = hf_moe_layer.shared_experts.down_proj.weight.detach().numpy().T + jax_moe_layer.shared_experts.gate_proj.kernel[:] = ( + hf_moe_layer.shared_experts.gate_proj.weight.detach().float().numpy().T + ) + jax_moe_layer.shared_experts.up_proj.kernel[:] = ( + hf_moe_layer.shared_experts.up_proj.weight.detach().float().numpy().T + ) + jax_moe_layer.shared_experts.down_proj.kernel[:] = ( + hf_moe_layer.shared_experts.down_proj.weight.detach().float().numpy().T + ) @pytest.mark.parametrize("ep,tp", [(1, 1), (1, 2), (2, 1)]) def test_deepseekv3_moe_layer(ep: int, tp: int): model_name = "yujiepan/deepseek-v3-tiny-random" - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) base_config = PretrainedConfig.from_pretrained(model_name) config = DeepseekV3Config(base_config, max_lora_adapters=0, max_lora_rank=0, shard_attention_heads=True) @@ -94,7 +105,7 @@ def test_deepseekv3_moe_layer(ep: int, tp: int): jax_expert_output = moe_layer(x.numpy()) # Higher tolerance due to cross-platform BLAS differences - assert np.allclose(hf_expert_output.detach().numpy(), jax_expert_output, rtol=6e-3, atol=6e-3) + assert np.allclose(hf_expert_output.detach().float().numpy(), jax_expert_output, rtol=6e-3, atol=6e-3) def load_lora_weights( @@ -122,7 +133,9 @@ def load_lora_weights( def test_deepseekv3_moe_layer_lora(ep: int, tp: int): """Test MoE LoRA by merging adapter into base weights and comparing outputs.""" model_name = "yujiepan/deepseek-v3-tiny-random" - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) base_config = PretrainedConfig.from_pretrained(model_name) config = DeepseekV3Config(base_config, max_lora_adapters=3, max_lora_rank=4, shard_attention_heads=True) @@ -198,7 +211,7 @@ def test_deepseekv3_gradient_checkpointing(): that gradient checkpointing works correctly with heterogeneous layer types. """ model_name = "yujiepan/deepseek-v3-tiny-random" - base_config = PretrainedConfig.from_pretrained(model_name, trust_remote_code=True) + base_config = PretrainedConfig.from_pretrained(model_name) batch_size, seq_len = 2, 8 mesh = jax.make_mesh((1, 1, 1), ("fsdp", "ep", "tp"), axis_types=(jax.sharding.AxisType.Auto,) * 3) diff --git a/tests/tx/models/test_models_common.py b/tests/tx/models/test_models_common.py index a5462c23b5..1954ac9994 100644 --- a/tests/tx/models/test_models_common.py +++ b/tests/tx/models/test_models_common.py @@ -4,6 +4,7 @@ import jax.numpy as jnp import numpy as np import pytest +import torch from transformers import AutoModelForCausalLM, AutoTokenizer from skyrl.tx.models.configs import Llama3Config, ModelConfig, Qwen3Config @@ -110,7 +111,9 @@ def test_compute_logits( batch = tokenizer(inputs, return_tensors="pt", padding=True) # Load HF model, get logits, then delete to free memory - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) hf_outputs = hf_model(batch.input_ids, attention_mask=batch.attention_mask) hf_logits = hf_outputs.logits.detach().numpy() del hf_model, hf_outputs diff --git a/tests/tx/models/test_qwen3.py b/tests/tx/models/test_qwen3.py index ac4cb42dd0..7028fc2357 100644 --- a/tests/tx/models/test_qwen3.py +++ b/tests/tx/models/test_qwen3.py @@ -25,7 +25,7 @@ def test_qwen3(tp: int): tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B") hf_model = AutoModelForCausalLM.from_pretrained( - "Qwen/Qwen3-0.6B", attn_implementation="eager", use_safetensors=True + "Qwen/Qwen3-0.6B", attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 ) inputs = ["The capital of France is", "The most popular programming language is"] @@ -48,31 +48,37 @@ def test_qwen3(tp: int): outputs = model(batch.input_ids.numpy(), attention_mask=batch.attention_mask.numpy(), output_hidden_states=True) assert outputs.hidden_states is not None - assert np.allclose(hf_outputs.hidden_states[0], outputs.hidden_states[0], rtol=1e-6) - assert np.allclose(hf_outputs.hidden_states[1], outputs.hidden_states[1], rtol=1e-3, atol=1e-3) - assert np.allclose(hf_outputs.hidden_states[-1], outputs.hidden_states[-1], rtol=1e-3, atol=1e-3) + assert np.allclose(hf_outputs.hidden_states[0].float(), outputs.hidden_states[0], rtol=1e-6) + assert np.allclose(hf_outputs.hidden_states[1].float(), outputs.hidden_states[1], rtol=1e-3, atol=1e-3) + assert np.allclose(hf_outputs.hidden_states[-1].float(), outputs.hidden_states[-1], rtol=1e-3, atol=1e-3) def load_moe_base_weights(jax_moe_layer: Qwen3MoeSparseMoeBlock, hf_moe_layer: HFQwen3MoeSparseMoeBlock) -> None: """Load base weights from HF MoE layer to JAX MoE layer.""" - jax_moe_layer.gate.kernel[:] = hf_moe_layer.gate.weight.detach().numpy().T - for i, expert in enumerate(hf_moe_layer.experts): - jax_moe_layer.experts.gate_proj.weight[i, :, :] = expert.gate_proj.weight.detach().numpy().T - jax_moe_layer.experts.up_proj.weight[i, :, :] = expert.up_proj.weight.detach().numpy().T - jax_moe_layer.experts.down_proj.weight[i, :, :] = expert.down_proj.weight.detach().numpy().T + jax_moe_layer.gate.kernel[:] = hf_moe_layer.gate.weight.detach().float().numpy().T + gate_up = hf_moe_layer.experts.gate_up_proj.detach().float().numpy() + intermediate = gate_up.shape[1] // 2 + jax_moe_layer.experts.gate_proj.weight[:] = gate_up[:, :intermediate, :].transpose(0, 2, 1) + jax_moe_layer.experts.up_proj.weight[:] = gate_up[:, intermediate:, :].transpose(0, 2, 1) + jax_moe_layer.experts.down_proj.weight[:] = ( + hf_moe_layer.experts.down_proj.detach().float().numpy().transpose(0, 2, 1) + ) @pytest.mark.parametrize("ep,tp", [(1, 1), (1, 2), (2, 1)]) def test_qwen3_moe_layer(ep: int, tp: int): model_name = "trl-internal-testing/tiny-Qwen3MoeForCausalLM" - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) base_config = PretrainedConfig.from_pretrained(model_name) config = Qwen3Config(base_config, max_lora_adapters=0, max_lora_rank=0, shard_attention_heads=True) hf_moe_layer = hf_model.model.layers[0].mlp x = torch.randn(4, 2, config.hidden_size) with torch.no_grad(): - hf_final_hidden_states, hf_router_logits = hf_moe_layer.forward(x) + hf_final_hidden_states = hf_moe_layer.forward(x) + hf_router_logits = torch.nn.functional.linear(x.view(-1, config.hidden_size), hf_moe_layer.gate.weight) mesh = jax.make_mesh((1, ep, tp), ("fsdp", "ep", "tp"), axis_types=(jax.sharding.AxisType.Auto,) * 3) with jax.set_mesh(mesh): @@ -117,7 +123,9 @@ def share_hf_lora_A(hf_modules: list) -> None: def test_qwen3_moe_layer_lora(ep: int, tp: int): """Test MoE LoRA by merging adapter into base weights and comparing outputs.""" model_name = "trl-internal-testing/tiny-Qwen3MoeForCausalLM" - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) base_config = PretrainedConfig.from_pretrained(model_name) config = Qwen3Config(base_config, max_lora_adapters=3, max_lora_rank=4, shard_attention_heads=True) @@ -200,7 +208,9 @@ def test_qwen3_lora(): lora_configs.append(lora_config) hf_model = get_peft_model( - AutoModelForCausalLM.from_pretrained(base_model_name, attn_implementation="eager", use_safetensors=True), + AutoModelForCausalLM.from_pretrained( + base_model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ), lora_config, ) hf_model.eval() @@ -241,8 +251,8 @@ def test_qwen3_lora(): load_lora_weights( model.model.embed_tokens, adapter_idx=adapter_idx, - lora_A_weights=hf_embed_tokens.lora_embedding_A["default"].detach().numpy().T, - lora_B_weights=hf_embed_tokens.lora_embedding_B["default"].detach().numpy().T, + lora_A_weights=hf_embed_tokens.lora_embedding_A["default"].detach().float().numpy().T, + lora_B_weights=hf_embed_tokens.lora_embedding_B["default"].detach().float().numpy().T, scaling=lora_config.lora_alpha / lora_config.r, rank=lora_config.r, ) @@ -262,9 +272,9 @@ def test_qwen3_lora(): load_lora_weights( jax_proj, adapter_idx=adapter_idx, - lora_A_weights=hf_projs[0].lora_A["default"].weight.detach().numpy().T, + lora_A_weights=hf_projs[0].lora_A["default"].weight.detach().float().numpy().T, lora_B_weights=FusedLoRALinear.fuse( - *(p.lora_B["default"].weight.detach().numpy().T for p in hf_projs), + *(p.lora_B["default"].weight.detach().float().numpy().T for p in hf_projs), group_sizes=group_sizes, ), scaling=lora_config.lora_alpha / lora_config.r, @@ -284,4 +294,4 @@ def test_qwen3_lora(): # Compare outputs with corresponding adapters for idx in range(len(lora_adapters)): - assert np.allclose(hf_outputs_list[idx].logits[0], logits[idx], rtol=1e-3, atol=1e-3) + assert np.allclose(hf_outputs_list[idx].logits[0].float(), logits[idx], rtol=1e-3, atol=1e-3) diff --git a/tests/tx/models/test_qwen3_generate.py b/tests/tx/models/test_qwen3_generate.py index 5a544e6202..a1f7bfe7e9 100644 --- a/tests/tx/models/test_qwen3_generate.py +++ b/tests/tx/models/test_qwen3_generate.py @@ -20,7 +20,9 @@ def test_qwen3_generate(): """Test batched text generation with KV caching matches HuggingFace.""" model_name = "Qwen/Qwen3-0.6B" tokenizer = AutoTokenizer.from_pretrained(model_name) - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) inputs = ["My name is", "The capital of France is", "Test stopping", "Test stopping"] max_new_tokens = [10, 20, 50, 2] @@ -117,7 +119,7 @@ def test_qwen3_generate(): with torch.no_grad(): hf_logits = hf_model(tokens.input_ids).logits[0, :-1] hf_logprobs = torch.nn.functional.log_softmax(hf_logits, dim=-1) - expected = hf_logprobs[torch.arange(len(hf_logprobs)), tokens.input_ids[0, 1:]].numpy() + expected = hf_logprobs[torch.arange(len(hf_logprobs)), tokens.input_ids[0, 1:]].float().numpy() assert np.allclose(result_with_prompt_logprobs.prompt_logprobs[i], expected, rtol=1e-3, atol=1e-3) @@ -126,7 +128,9 @@ def test_qwen3_generate_speed(): """Profile batched text generation with KV caching.""" model_name = "Qwen/Qwen3-0.6B" tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="right") - hf_model = AutoModelForCausalLM.from_pretrained(model_name, attn_implementation="eager", use_safetensors=True) + hf_model = AutoModelForCausalLM.from_pretrained( + model_name, attn_implementation="eager", use_safetensors=True, torch_dtype=torch.float32 + ) base_config = PretrainedConfig.from_pretrained(model_name) config = Qwen3Config(base_config, max_lora_adapters=32, max_lora_rank=32, shard_attention_heads=True) diff --git a/uv.lock b/uv.lock index 01ab6b5081..d08eba2ebb 100644 --- a/uv.lock +++ b/uv.lock @@ -313,6 +313,7 @@ overrides = [ { name = "ml-dtypes", marker = "sys_platform == 'linux'", specifier = ">=0.5.0" }, { name = "nvidia-resiliency-ext", marker = "sys_platform == 'never'" }, { name = "transformer-engine", extras = ["pytorch"], marker = "sys_platform == 'linux'", specifier = "==2.10.0" }, + { name = "transformers", marker = "sys_platform == 'linux'", specifier = ">=5.0.0,<=5.3.0" }, ] [[package]] @@ -329,7 +330,8 @@ name = "accelerate" version = "1.12.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub" }, + { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" }, { name = "packaging" }, @@ -1708,7 +1710,8 @@ dependencies = [ { name = "filelock" }, { name = "fsspec", extra = ["http"] }, { name = "httpx" }, - { name = "huggingface-hub" }, + { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "multiprocess" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" }, @@ -3265,21 +3268,170 @@ wheels = [ name = "huggingface-hub" version = "0.36.2" source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "python_full_version >= '3.14' and platform_machine != 'arm64' and sys_platform == 'darwin'", + "python_full_version == '3.13.*' and platform_machine != 'arm64' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and platform_machine != 'arm64' and sys_platform == 'darwin'", + "python_full_version >= '3.14' and platform_machine == 'arm64' and sys_platform == 'darwin'", + "python_full_version == '3.13.*' and platform_machine == 'arm64' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and platform_machine == 'arm64' and sys_platform == 'darwin'", + "python_full_version < '3.12' and platform_machine != 'arm64' and sys_platform == 'darwin'", + "python_full_version < '3.12' and platform_machine == 'arm64' and sys_platform == 'darwin'", + "python_full_version >= '3.14' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version == '3.13.*' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux'", + "python_full_version < '3.12' and sys_platform != 'darwin' and sys_platform != 'linux'", +] dependencies = [ - { name = "filelock" }, - { name = "fsspec" }, - { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "tqdm" }, - { name = "typing-extensions" }, + { name = "filelock", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "fsspec", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "hf-xet", marker = "(platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'aarch64' and sys_platform != 'linux') or (platform_machine == 'amd64' and sys_platform != 'linux') or (platform_machine == 'arm64' and sys_platform != 'linux') or (platform_machine == 'x86_64' and sys_platform != 'linux') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "packaging", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "pyyaml", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "requests", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "tqdm", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "typing-extensions", marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" }, ] +[[package]] +name = "huggingface-hub" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +resolution-markers = [ + "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "(python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version < '3.12' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')", + "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "(python_full_version >= '3.14' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "(python_full_version >= '3.15' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.14' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version >= '3.14' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "(python_full_version == '3.13.*' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version == '3.13.*' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.14.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "(python_full_version < '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu') or (python_full_version < '3.12' and platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu')", + "python_full_version < '3.12' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.13.*' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine != 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version == '3.12.*' and platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", + "python_full_version < '3.12' and sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-gpu' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron' and extra != 'extra-5-skyrl-miniswe' and extra != 'extra-5-skyrl-tpu'", +] +dependencies = [ + { name = "filelock", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "fsspec", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "hf-xet", marker = "(platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'AMD64' and platform_machine != 'aarch64' and platform_machine != 'amd64' and platform_machine != 'arm64' and platform_machine != 'x86_64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'AMD64' and sys_platform == 'linux') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'amd64' and sys_platform == 'linux') or (platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "httpx", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "packaging", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "pyyaml", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "tqdm", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "typer", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "typing-extensions", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/2a/a847fd02261cd051da218baf99f90ee7c7040c109a01833db4f838f25256/huggingface_hub-1.8.0.tar.gz", hash = "sha256:c5627b2fd521e00caf8eff4ac965ba988ea75167fad7ee72e17f9b7183ec63f3", size = 735839, upload-time = "2026-03-25T16:01:28.152Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/ae/8a3a16ea4d202cb641b51d2681bdd3d482c1c592d7570b3fa264730829ce/huggingface_hub-1.8.0-py3-none-any.whl", hash = "sha256:d3eb5047bd4e33c987429de6020d4810d38a5bef95b3b40df9b17346b7f353f2", size = 625208, upload-time = "2026-03-25T16:01:26.603Z" }, +] + [[package]] name = "humanize" version = "4.15.0" @@ -7865,7 +8017,7 @@ version = "3.2.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "ftfy", marker = "sys_platform == 'linux'" }, - { name = "huggingface-hub", marker = "sys_platform == 'linux'" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, { name = "regex", marker = "sys_platform == 'linux'" }, { name = "safetensors", marker = "sys_platform == 'linux'" }, { name = "timm", marker = "sys_platform == 'linux'" }, @@ -8371,7 +8523,8 @@ version = "0.18.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "accelerate" }, - { name = "huggingface-hub" }, + { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" }, { name = "packaging" }, @@ -8384,7 +8537,7 @@ dependencies = [ { name = "torch", version = "2.10.0+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(platform_machine != 'arm64' and sys_platform == 'darwin') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine == 'arm64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'darwin' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(sys_platform == 'linux' and extra != 'extra-5-skyrl-flashrl') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu')" }, { name = "tqdm" }, - { name = "transformers" }, + { name = "transformers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d8/48/147b3ea999560b40a34fd78724c7777aa9d18409c2250bdcaf9c4f2db7fc/peft-0.18.1.tar.gz", hash = "sha256:2dd0d6bfce936d1850e48aaddbd250941c5c02fc8ef3237cd8fd5aac35e0bae2", size = 635030, upload-time = "2026-01-09T13:08:01.136Z" } wheels = [ @@ -10523,7 +10676,7 @@ dependencies = [ { name = "rich" }, { name = "safetensors" }, { name = "tokenizers" }, - { name = "transformers" }, + { name = "transformers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "typer" }, ] @@ -10583,7 +10736,6 @@ flashrl = [ { name = "torchvision", version = "0.22.0", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (python_full_version >= '3.14' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_machine != 'aarch64' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (platform_python_implementation != 'CPython' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "torchvision", version = "0.22.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (python_full_version < '3.14' and platform_machine == 'aarch64' and platform_python_implementation == 'CPython' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (platform_machine != 'aarch64' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (platform_python_implementation != 'CPython' and sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (sys_platform != 'linux' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "tqdm" }, - { name = "transformers" }, { name = "uvicorn" }, { name = "vllm-router", marker = "sys_platform == 'linux'" }, { name = "wandb" }, @@ -10617,7 +10769,6 @@ fsdp = [ { name = "torchdata" }, { name = "torchvision", version = "0.25.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "tqdm" }, - { name = "transformers" }, { name = "uvicorn" }, { name = "vllm", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "vllm-router", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, @@ -10670,7 +10821,6 @@ megatron = [ { name = "torchvision", version = "0.25.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, { name = "tqdm" }, { name = "transformer-engine", extra = ["pytorch"], marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, - { name = "transformers" }, { name = "uvicorn" }, { name = "vllm", marker = "sys_platform == 'linux'" }, { name = "vllm-router", marker = "sys_platform == 'linux'" }, @@ -10703,7 +10853,6 @@ miniswe = [ { name = "tensordict" }, { name = "torchdata" }, { name = "tqdm" }, - { name = "transformers" }, { name = "uvicorn" }, { name = "vllm-router", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax')" }, { name = "wandb" }, @@ -10733,7 +10882,6 @@ skyrl-train = [ { name = "tensordict" }, { name = "torchdata" }, { name = "tqdm" }, - { name = "transformers" }, { name = "uvicorn" }, { name = "vllm-router", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "wandb" }, @@ -10842,8 +10990,7 @@ requires-dist = [ { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'megatron'", index = "https://download.pytorch.org/whl/cu128" }, { name = "tqdm", marker = "extra == 'skyrl-train'" }, { name = "transformer-engine", extras = ["pytorch"], marker = "sys_platform == 'linux' and extra == 'megatron'", specifier = "==2.10.0" }, - { name = "transformers", specifier = ">=4.56.1,<5" }, - { name = "transformers", marker = "extra == 'skyrl-train'", specifier = ">=4.51.0" }, + { name = "transformers", specifier = ">=5.0.0,<=5.3.0" }, { name = "ty", marker = "extra == 'dev'" }, { name = "typer", specifier = ">=0.17.4" }, { name = "uvicorn", marker = "extra == 'skyrl-train'" }, @@ -11332,7 +11479,7 @@ name = "timm" version = "1.0.25" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub", marker = "sys_platform == 'linux'" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux'" }, { name = "pyyaml", marker = "sys_platform == 'linux'" }, { name = "safetensors", marker = "sys_platform == 'linux'" }, { name = "torch", version = "2.10.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux'" }, @@ -11357,7 +11504,7 @@ dependencies = [ { name = "pydantic" }, { name = "rich" }, { name = "sniffio" }, - { name = "transformers" }, + { name = "transformers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "typing-extensions" }, ] sdist = { url = "https://files.pythonhosted.org/packages/e8/14/9abd320e01ec113dc383c407ea02261bfa97b9c43edbb299ce7ecaea3b61/tinker-0.13.1.tar.gz", hash = "sha256:d856cf99c37a46238a9d92cee719444ce657f5de9b45c4a8f233d3b6b1a482e1", size = 178940, upload-time = "2026-02-13T22:13:54.661Z" } @@ -11370,7 +11517,8 @@ name = "tokenizers" version = "0.22.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub" }, + { name = "huggingface-hub", version = "0.36.2", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, ] sdist = { url = "https://files.pythonhosted.org/packages/73/6f/f80cfef4a312e1fb34baf7d85c72d4411afde10978d4657f8cdd811d3ccc/tokenizers-0.22.2.tar.gz", hash = "sha256:473b83b915e547aa366d1eee11806deaf419e17be16310ac0a14077f1e28f917", size = 372115, upload-time = "2026-01-05T10:45:15.988Z" } wheels = [ @@ -12043,24 +12191,23 @@ sdist = { url = "https://files.pythonhosted.org/packages/18/94/609a7772569d3acdb [[package]] name = "transformers" -version = "4.57.6" +version = "5.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "filelock" }, - { name = "huggingface-hub" }, + { name = "huggingface-hub", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-fsdp') or (sys_platform == 'linux' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, - { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' or extra == 'extra-5-skyrl-flashrl' or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron')" }, - { name = "packaging" }, - { name = "pyyaml" }, - { name = "regex" }, - { name = "requests" }, - { name = "safetensors" }, - { name = "tokenizers" }, - { name = "tqdm" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" }, + { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "(sys_platform == 'linux' and extra == 'extra-5-skyrl-flashrl') or (sys_platform == 'linux' and extra != 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-gpu') or (sys_platform == 'linux' and extra != 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-jax') or (sys_platform == 'linux' and extra != 'extra-5-skyrl-fsdp' and extra != 'extra-5-skyrl-jax' and extra != 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu')" }, + { name = "packaging", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "pyyaml", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "regex", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "safetensors", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "tokenizers", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "tqdm", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, + { name = "typer", marker = "sys_platform == 'linux' or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-fsdp') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-gpu') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-flashrl' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-jax') or (extra == 'extra-5-skyrl-fsdp' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-gpu' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-jax' and extra == 'extra-5-skyrl-megatron') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-miniswe') or (extra == 'extra-5-skyrl-megatron' and extra == 'extra-5-skyrl-tpu') or (extra == 'extra-5-skyrl-miniswe' and extra == 'extra-5-skyrl-tpu')" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" }, ] [[package]]