tests

oleksost · oleksost · commit 0fe64d3560eb · 2025-11-25T21:37:28.000Z
diff --git a/fast_llm/layers/common/normalization/normalization.py b/fast_llm/layers/common/normalization/normalization.py
@@ -311,7 +311,7 @@ def __init__(self, config: ConfigType, hidden_dim: TensorDim, lr_scale: float |
         super().__init__(config, hidden_dim, lr_scale)
 
         if rms_norm_gated is not None:
-            self._forward_gated = self._forward_local
+            self._forward_gated = self._forward_fla
         else:
             self._forward_gated = self._forward_local
 
diff --git a/tests/utils/model_configs.py b/tests/utils/model_configs.py
@@ -729,9 +729,9 @@ def _update_and_add_testing_config(
         ModelTestingGroup.megatron: ModelTestingGroupAction.not_implemented,
         ModelTestingGroup.distributed: ModelTestingGroupAction.normal,
     },
-    compare_factor=10.0,  # with compare_factor 2 fails fp16 and bf16 tests in the normalizaiton layer when using rms_norm_gated from fla (passes with local non-fla norm)
+    compare_factor=10.0,  # with compare_factor 2 fails fp16 and bf16 tests in the normalizaiton layer when using rms_norm_gated from fla
     # note: tp is excluded because there is currently no gradient reductions implemented for tp norm in gdn.py (STP works though).
-    # we should be using STP with this model!
+    # we should be using STP with this model, not TP!
     skip_tests=(r"sdp", r"ms", r"^tp2$"),
 )