Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"mode": "QUANTIZE",
"observer": "maxabs",
"scale_method": "maxabs_hw",
"whitelist": {"types": [], "names": ["gate","w1","w3","w2"]},
"blacklist": {"types": [], "names": [
"allowlist": {"types": [], "names": ["gate","w1","w3","w2"]},
"blocklist": {"types": [], "names": [
"model.layers.1.block_sparse_moe.experts.(3|4).w2",
"model.layers.[29-31].block_sparse_moe.experts.[0-7].w2"
]},
Expand Down
3 changes: 2 additions & 1 deletion optimum/habana/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,8 @@ def generate(
"llama",
"mistral",
"falcon",
], "reuse_cache only supported by llama, mistral and falcon at the moment"
"mixtral",
], "reuse_cache only supported by llama, mistral, falcon and mixtral at the moment"
if not generation_config.bucket_internal:
assert (
generation_config.bucket_size <= 0
Expand Down
4 changes: 2 additions & 2 deletions optimum/habana/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
GaudiMixtralAttention,
GaudiMixtralDecoderLayer,
GaudiMixtralForCausalLM,
GaudiMixtralModel,
GaudiMptForCausalLM,
GaudiMptModel,
GaudiOPTForCausalLM,
Expand Down Expand Up @@ -120,7 +121,6 @@
gaudi_llama_rmsnorm_forward,
gaudi_mistral_rmsnorm_forward,
gaudi_mixtral_block_sparse_moe_forward,
gaudi_mixtral_model_forward,
gaudi_mixtral_rmsnorm_forward,
gaudi_mpt_attention_forward,
gaudi_mpt_block_forward,
Expand Down Expand Up @@ -392,7 +392,7 @@ def adapt_transformers_to_gaudi():
# Optimization for mixtral on Gaudi
transformers.models.mixtral.modeling_mixtral.MixtralAttention = GaudiMixtralAttention
transformers.models.mixtral.modeling_mixtral.MixtralForCausalLM = GaudiMixtralForCausalLM
transformers.models.mixtral.modeling_mixtral.MixtralModel.forward = gaudi_mixtral_model_forward
transformers.models.mixtral.modeling_mixtral.MixtralModel = GaudiMixtralModel
transformers.models.mixtral.modeling_mixtral.MixtralSparseMoeBlock.forward = gaudi_mixtral_block_sparse_moe_forward
transformers.models.mixtral.modeling_mixtral.MixtralDecoderLayer = GaudiMixtralDecoderLayer
transformers.models.mixtral.modeling_mixtral.MixtralRMSNorm.forward = gaudi_mixtral_rmsnorm_forward
Expand Down
2 changes: 1 addition & 1 deletion optimum/habana/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@
GaudiMixtralAttention,
GaudiMixtralDecoderLayer,
GaudiMixtralForCausalLM,
GaudiMixtralModel,
MixtralConfig,
gaudi_mixtral_block_sparse_moe_forward,
gaudi_mixtral_model_forward,
gaudi_mixtral_rmsnorm_forward,
)
from .modeling_all_models import (
Expand Down
2 changes: 1 addition & 1 deletion optimum/habana/transformers/models/mixtral/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
GaudiMixtralAttention,
GaudiMixtralDecoderLayer,
GaudiMixtralForCausalLM,
GaudiMixtralModel,
gaudi_mixtral_block_sparse_moe_forward,
gaudi_mixtral_model_forward,
gaudi_mixtral_rmsnorm_forward,
)
Loading