Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions examples/configs/sft_openmathinstruct2_megatron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,14 @@ policy:
env_vars:
PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"

fp8_cfg:
enabled: true
fp8: hybrid
fp8_recipe: delayed
fp8_param: true # false gives the following error: "RuntimeError: /TransformerEngine/transformer_engine/common/gemm/cublaslt_gemm.cu:116 in function CanonicalizeGemmInput: Assertion failed: !is_fp8_dtype(ret.Atype). Input A is missing column-wise usage"
fp8_dot_product_attention: false #true
fp8_multi_head_attention: false #true
## fp8 training currently not supported
#fp8_cfg:
# enabled: true
# fp8: hybrid
# fp8_recipe: delayed
# fp8_param: true # false gives the following error: "RuntimeError: /TransformerEngine/transformer_engine/common/gemm/cublaslt_gemm.cu:116 in function CanonicalizeGemmInput: Assertion failed: !is_fp8_dtype(ret.Atype). Input A is missing column-wise usage"
# fp8_dot_product_attention: false #true
# fp8_multi_head_attention: false #true

dynamic_batching:
enabled: false
Expand Down