diff --git a/pyproject.toml b/pyproject.toml index 5f45626c01..7e4f24eb6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,7 +99,7 @@ dependencies = [ # Common pygame-dep = ["pygame>=2.5.1,<2.7.0"] placo-dep = ["placo>=0.9.6,<0.9.17"] -transformers-dep = ["transformers>=5.3.0,<6.0.0"] +transformers-dep = ["transformers>=5.4.0,<6.0.0"] grpcio-dep = ["grpcio==1.73.1", "protobuf>=6.31.1,<6.32.0"] can-dep = ["python-can>=4.2.0,<5.0.0"] peft-dep = ["peft>=0.18.0,<1.0.0"] diff --git a/src/lerobot/policies/groot/action_head/flow_matching_action_head.py b/src/lerobot/policies/groot/action_head/flow_matching_action_head.py index bfc456ba0b..74d922988b 100644 --- a/src/lerobot/policies/groot/action_head/flow_matching_action_head.py +++ b/src/lerobot/policies/groot/action_head/flow_matching_action_head.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass, field +from dataclasses import field from typing import TYPE_CHECKING import torch @@ -110,7 +110,6 @@ def forward(self, actions, timesteps, cat_ids): return x -@dataclass class FlowmatchingActionHeadConfig(PretrainedConfig): """NOTE: N1.5 uses XEmbFlowmatchingPolicyHeadConfig as action head""" diff --git a/src/lerobot/policies/groot/groot_n1.py b/src/lerobot/policies/groot/groot_n1.py index 06ff5a04d6..38512b8a86 100644 --- a/src/lerobot/policies/groot/groot_n1.py +++ b/src/lerobot/policies/groot/groot_n1.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dataclasses import dataclass, field +from dataclasses import field from pathlib import Path from typing import TYPE_CHECKING @@ -173,7 +173,6 @@ def forward(self, vl_input: BatchFeature) -> BatchFeature: # config -@dataclass class GR00TN15Config(PretrainedConfig): model_type = "gr00t_n1_5" backbone_cfg: dict = field(init=False, metadata={"help": "Backbone configuration."}) diff --git a/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py b/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py index ecf3eb3717..a80096514b 100644 --- a/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py +++ b/src/lerobot/policies/wall_x/qwen_model/qwen2_5_vl_moe.py @@ -22,7 +22,7 @@ add_start_docstrings, add_start_docstrings_to_model_forward, is_flash_attn_2_available, - is_flash_attn_greater_or_equal_2_10, + is_flash_attn_greater_or_equal, is_torchdynamo_compiling, logging, replace_return_docstrings, @@ -890,7 +890,7 @@ def __init__(self, *args, **kwargs): # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1. # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0. # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left). - self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10() + self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0") def forward( self, diff --git a/src/lerobot/policies/xvla/modeling_florence2.py b/src/lerobot/policies/xvla/modeling_florence2.py index e33efe5c30..81f9c8234b 100644 --- a/src/lerobot/policies/xvla/modeling_florence2.py +++ b/src/lerobot/policies/xvla/modeling_florence2.py @@ -45,7 +45,7 @@ add_start_docstrings, add_start_docstrings_to_model_forward, is_flash_attn_2_available, - is_flash_attn_greater_or_equal_2_10, + is_flash_attn_greater_or_equal, logging, replace_return_docstrings, ) @@ -909,7 +909,7 @@ def __init__(self, *args, **kwargs): # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1. # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignment, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0. # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left). - self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10() + self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal("2.1.0") def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int): return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)