From 59c0ae49c523a63e3a03779a50fa73d5482efea8 Mon Sep 17 00:00:00 2001 From: ashors1 Date: Thu, 28 Aug 2025 17:15:11 -0700 Subject: [PATCH] remove unused fp8 training args Signed-off-by: ashors1 --- .../configs/sft_openmathinstruct2_megatron.yaml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/examples/configs/sft_openmathinstruct2_megatron.yaml b/examples/configs/sft_openmathinstruct2_megatron.yaml index 17b7ddeaee..0abfa7cb6f 100644 --- a/examples/configs/sft_openmathinstruct2_megatron.yaml +++ b/examples/configs/sft_openmathinstruct2_megatron.yaml @@ -88,13 +88,14 @@ policy: env_vars: PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False" - fp8_cfg: - enabled: true - fp8: hybrid - fp8_recipe: delayed - fp8_param: true # false gives the following error: "RuntimeError: /TransformerEngine/transformer_engine/common/gemm/cublaslt_gemm.cu:116 in function CanonicalizeGemmInput: Assertion failed: !is_fp8_dtype(ret.Atype). Input A is missing column-wise usage" - fp8_dot_product_attention: false #true - fp8_multi_head_attention: false #true + ## fp8 training currently not supported + #fp8_cfg: + # enabled: true + # fp8: hybrid + # fp8_recipe: delayed + # fp8_param: true # false gives the following error: "RuntimeError: /TransformerEngine/transformer_engine/common/gemm/cublaslt_gemm.cu:116 in function CanonicalizeGemmInput: Assertion failed: !is_fp8_dtype(ret.Atype). Input A is missing column-wise usage" + # fp8_dot_product_attention: false #true + # fp8_multi_head_attention: false #true dynamic_batching: enabled: false