Skip to content

Commit

Permalink
change fp8 defaults (NVIDIA#7894)
Browse files Browse the repository at this point in the history
Signed-off-by: Chen Cui <[email protected]>
  • Loading branch information
cuichenx committed Nov 16, 2023
1 parent 16f41f4 commit 8f4a63f
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 16 deletions.
6 changes: 3 additions & 3 deletions examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,11 @@ model:
transformer_engine: False
fp8: False # enables fp8 in TransformerLayer forward
fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3
fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID
fp8_hybrid: True # sets fp8_format = recipe.Format.HYBRID
fp8_margin: 0 # scaling margin
fp8_interval: 1 # scaling update interval
fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor
fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history
fp8_amax_history_len: 1024 # Number of steps for which amax history is recorded per tensor
fp8_amax_compute_algo: max # 'most_recent' or 'max'. Algorithm for computing amax from history
reduce_amax: True # Perform reduction to sync amax tensors across GPUs after every iteration
use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.
ub_tp_comm_overlap: False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@ model:
transformer_engine: True
fp8: False # enables fp8 in TransformerLayer forward
fp8_e4m3: False # sets fp8_format = recipe.Format.E4M3
fp8_hybrid: False # sets fp8_format = recipe.Format.HYBRID
fp8_hybrid: True # sets fp8_format = recipe.Format.HYBRID
fp8_margin: 0 # scaling margin
fp8_interval: 1 # scaling update interval
fp8_amax_history_len: 1 # Number of steps for which amax history is recorded per tensor
fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history
fp8_amax_history_len: 1024 # Number of steps for which amax history is recorded per tensor
fp8_amax_compute_algo: max # 'most_recent' or 'max'. Algorithm for computing amax from history
reduce_amax: True # Perform reduction to sync amax tensors across GPUs after every iteration
use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ def __init__(
fp8_hybrid=False,
fp8_margin=0,
fp8_interval=1,
fp8_amax_history_len=1,
fp8_amax_compute_algo='most_recent',
fp8_amax_history_len=1024,
fp8_amax_compute_algo='max',
reduce_amax=True,
use_emha=False,
ub_tp_comm_overlap=False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,8 @@ def model_provider_func(self, pre_process, post_process):
fp8_hybrid=self.cfg.get('fp8_hybrid', False),
fp8_margin=self.cfg.get('fp8_margin', 0),
fp8_interval=self.cfg.get('fp8_interval', 1),
fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1),
fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'),
fp8_amax_history_len=self.cfg.get('fp8_amax_history_len', 1024),
fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'max'),
reduce_amax=self.cfg.get('reduce_amax', True),
use_emha=self.cfg.get('use_emha', False),
ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,8 @@ def get_language_model(
fp8_hybrid=False,
fp8_margin=0,
fp8_interval=1,
fp8_amax_history_len=1,
fp8_amax_compute_algo='most_recent',
fp8_amax_history_len=1024,
fp8_amax_compute_algo='max',
reduce_amax=True,
use_emha=False,
ub_tp_comm_overlap=False,
Expand Down Expand Up @@ -506,8 +506,8 @@ def __init__(
fp8_hybrid=False,
fp8_margin=0,
fp8_interval=1,
fp8_amax_history_len=1,
fp8_amax_compute_algo='most_recent',
fp8_amax_history_len=1024,
fp8_amax_compute_algo='max',
reduce_amax=True,
use_emha=False,
ub_tp_comm_overlap=False,
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/nlp/modules/common/megatron/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,8 +930,8 @@ def __init__(
fp8_hybrid=False,
fp8_margin=0,
fp8_interval=1,
fp8_amax_history_len=1,
fp8_amax_compute_algo='most_recent',
fp8_amax_history_len=1024,
fp8_amax_compute_algo='max',
reduce_amax=True,
use_emha=False,
ub_tp_comm_overlap=False,
Expand Down

0 comments on commit 8f4a63f

Please sign in to comment.