format

jinhongyii · jinhongyii · commit a889be3983a5 · 2024-01-03T15:40:19.000-05:00
diff --git a/python/mlc_chat/model/mixtral/mixtral_loader.py b/python/mlc_chat/model/mixtral/mixtral_loader.py
@@ -102,20 +102,17 @@ def combine_expert_gate_up(*hf_params, dtype):
                 dtype=mlc_param.dtype,
             ),
         )
-        
+
         mlc_name = f"{mlc_mlp}.gate.weight"
         mlc_param = named_parameters[mlc_name]
         mapping.add_mapping(
             mlc_name,
-            [
-                f"{mlp}.gate.weight"
-            ],
+            [f"{mlp}.gate.weight"],
             functools.partial(
                 lambda x, dtype: x.astype(dtype),
                 dtype=mlc_param.dtype,
             ),
         )
-                
 
         # inv_freq is not used in the model
         mapping.add_unused(f"{attn}.rotary_emb.inv_freq")
@@ -131,4 +128,3 @@ def combine_expert_gate_up(*hf_params, dtype):
                 ),
             )
     return mapping
-
diff --git a/python/mlc_chat/model/mixtral/mixtral_model.py b/python/mlc_chat/model/mixtral/mixtral_model.py
@@ -16,7 +16,13 @@
 from mlc_chat.support.config import ConfigBase
 from mlc_chat.support.style import bold
 from mlc_chat.support import tensor_parallel as tp
-from mlc_chat.model.mistral.mistral_model import MistralConfig, RotaryEmbedding, MistralAttention, MistralModel, MistralForCasualLM
+from mlc_chat.model.mistral.mistral_model import (
+    MistralConfig,
+    RotaryEmbedding,
+    MistralAttention,
+    MistralModel,
+    MistralForCasualLM,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -31,6 +37,7 @@ class MixtralConfig(MistralConfig):  # pylint: disable=too-many-instance-attribu
     def __post_init__(self):
         super().__post_init__()
 
+
 # pylint: disable=invalid-name,missing-docstring
 
 
@@ -187,8 +194,9 @@ def forward(self, x: Tensor):
         )
         return weighted_sum
 
+
 class MixtralDecoderLayer(nn.Module):
-    """ Mixtral decoder layer"""
+    """Mixtral decoder layer"""
 
     def __init__(self, config: MixtralConfig, rotary_embedding: RotaryEmbedding):
         rms_norm_eps = config.rms_norm_eps
@@ -253,6 +261,7 @@ def __init__(self, config: MixtralConfig):
             [MixtralDecoderLayer(config, rotary_embedding) for _ in range(config.num_hidden_layers)]
         )
 
+
 class MixtralForCasualLM(MistralForCasualLM):
     """Same as LlamaForCausalLM, except for the use of sliding window attention."""