You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have read the above rules and searched the existing issues.
System Info
webui上选择的是bf16, 跑的时候提示却提示只支持bf16 ? 请问这个是怎么回事呢?
0%| | 0/35325 [00:00<?, ?it/s]Traceback (most recent call last):
File "/opt/anaconda3/envs/factory/bin/llamafactory-cli", line 8, in
sys.exit(main())
^^^^^^
File "/home/being/github/LLaMA-Factory/src/llamafactory/cli.py", line 112, in main
run_exp()
File "/home/being/github/LLaMA-Factory/src/llamafactory/train/tuner.py", line 93, in run_exp
_training_function(config={"args": args, "callbacks": callbacks})
File "/home/being/github/LLaMA-Factory/src/llamafactory/train/tuner.py", line 67, in _training_function
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/being/github/LLaMA-Factory/src/llamafactory/train/sft/workflow.py", line 102, in run_sft
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 2171, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 2531, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 3675, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 3731, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/accelerate/utils/operations.py", line 823, in forward
return model_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/accelerate/utils/operations.py", line 811, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 816, in forward
outputs = self.model(
^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 562, in forward
layer_outputs = self._gradient_checkpointing_func(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/being/github/LLaMA-Factory/src/llamafactory/model/model_utils/checkpointing.py", line 97, in custom_gradient_checkpointing_func
return gradient_checkpointing_func(func, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_compile.py", line 32, in inner
return disable_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/utils/checkpoint.py", line 264, in forward
outputs = run_function(*args)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 259, in forward
hidden_states, self_attn_weights = self.self_attn(
^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 191, in forward
attn_output, attn_weights = attention_interface(
^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/integrations/flash_attention.py", line 50, in flash_attention_forward
attn_output = _flash_attention_forward(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/modeling_flash_attention_utils.py", line 311, in _flash_attention_forward
attn_output_unpad = flash_attn_varlen_func(
^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 1448, in flash_attn_varlen_func
return FlashAttnVarlenFunc.apply(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 930, in forward
out_padded, softmax_lse, S_dmask, rng_state = _wrapped_flash_attn_varlen_forward(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_ops.py", line 1123, in call
return self._op(*args, **(kwargs or {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/autograd.py", line 113, in autograd_impl
result = forward_no_grad(*args, Metadata(keyset, keyword_only_args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/autograd.py", line 40, in forward_no_grad
result = op.redispatch(keyset & _C._after_autograd_keyset, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_ops.py", line 728, in redispatch
return self._handle.redispatch_boxed(keyset, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/custom_ops.py", line 305, in backend_impl
result = self._backend_fns[device_type](*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_compile.py", line 32, in inner
return disable_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/custom_ops.py", line 337, in wrapped_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 170, in _flash_attn_varlen_forward
out, softmax_lse, S_dmask, rng_state = flash_attn_gpu.varlen_fwd(
^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: FlashAttention only support fp16 and bf16 data type
Reproduction
Put your message here.
Others
No response
The text was updated successfully, but these errors were encountered:
Reminder
System Info
webui上选择的是bf16, 跑的时候提示却提示只支持bf16 ? 请问这个是怎么回事呢?
0%| | 0/35325 [00:00<?, ?it/s]Traceback (most recent call last):
File "/opt/anaconda3/envs/factory/bin/llamafactory-cli", line 8, in
sys.exit(main())
^^^^^^
File "/home/being/github/LLaMA-Factory/src/llamafactory/cli.py", line 112, in main
run_exp()
File "/home/being/github/LLaMA-Factory/src/llamafactory/train/tuner.py", line 93, in run_exp
_training_function(config={"args": args, "callbacks": callbacks})
File "/home/being/github/LLaMA-Factory/src/llamafactory/train/tuner.py", line 67, in _training_function
run_sft(model_args, data_args, training_args, finetuning_args, generating_args, callbacks)
File "/home/being/github/LLaMA-Factory/src/llamafactory/train/sft/workflow.py", line 102, in run_sft
train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 2171, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 2531, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs, num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 3675, in training_step
loss = self.compute_loss(model, inputs, num_items_in_batch=num_items_in_batch)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/trainer.py", line 3731, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/accelerate/utils/operations.py", line 823, in forward
return model_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/accelerate/utils/operations.py", line 811, in call
return convert_to_fp32(self.model_forward(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/amp/autocast_mode.py", line 44, in decorate_autocast
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 816, in forward
outputs = self.model(
^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 562, in forward
layer_outputs = self._gradient_checkpointing_func(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/being/github/LLaMA-Factory/src/llamafactory/model/model_utils/checkpointing.py", line 97, in custom_gradient_checkpointing_func
return gradient_checkpointing_func(func, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_compile.py", line 32, in inner
return disable_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/utils/checkpoint.py", line 489, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/utils/checkpoint.py", line 264, in forward
outputs = run_function(*args)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 259, in forward
hidden_states, self_attn_weights = self.self_attn(
^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1739, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1750, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 191, in forward
attn_output, attn_weights = attention_interface(
^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/integrations/flash_attention.py", line 50, in flash_attention_forward
attn_output = _flash_attention_forward(
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/transformers/modeling_flash_attention_utils.py", line 311, in _flash_attention_forward
attn_output_unpad = flash_attn_varlen_func(
^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 1448, in flash_attn_varlen_func
return FlashAttnVarlenFunc.apply(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/autograd/function.py", line 575, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 930, in forward
out_padded, softmax_lse, S_dmask, rng_state = _wrapped_flash_attn_varlen_forward(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_ops.py", line 1123, in call
return self._op(*args, **(kwargs or {}))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/autograd.py", line 113, in autograd_impl
result = forward_no_grad(*args, Metadata(keyset, keyword_only_args))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/autograd.py", line 40, in forward_no_grad
result = op.redispatch(keyset & _C._after_autograd_keyset, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_ops.py", line 728, in redispatch
return self._handle.redispatch_boxed(keyset, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/custom_ops.py", line 305, in backend_impl
result = self._backend_fns[device_type](*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_compile.py", line 32, in inner
return disable_fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_dynamo/eval_frame.py", line 745, in _fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/torch/_library/custom_ops.py", line 337, in wrapped_fn
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/opt/anaconda3/envs/factory/lib/python3.12/site-packages/flash_attn/flash_attn_interface.py", line 170, in _flash_attn_varlen_forward
out, softmax_lse, S_dmask, rng_state = flash_attn_gpu.varlen_fwd(
^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: FlashAttention only support fp16 and bf16 data type
Reproduction
Others
No response
The text was updated successfully, but these errors were encountered: