Added sample cpu_offloading switch to YAML (#8148)

* Added sample cpu_offloading switch to YAML Signed-off-by: Selvaraj Anandaraj <[email protected]> * Added comments Signed-off-by: Selvaraj Anandaraj <[email protected]> * Removed arithmetic op Signed-off-by: Selvaraj Anandaraj <[email protected]> --------- Signed-off-by: Selvaraj Anandaraj <[email protected]> Co-authored-by: Selvaraj Anandaraj <[email protected]> Co-authored-by: Eric Harper <[email protected]> Signed-off-by: stevehuang52 <[email protected]>
NVIDIA · Jan 31, 2024 · 1761d35 · 1761d35
1 parent 731dfe4
commit 1761d35
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml
@@ -202,6 +202,12 @@ model:
   ## Flash Attention
   use_flash_attention: False # Use flash attention in self-attention module, this config does nothing when transformer_engine=True
 
+  ##Offloading Activations/Weights to CPU
+  cpu_offloading: False
+  cpu_offloading_num_layers: 11 #This value should be between [1,num_layers-1] as we don't want to offload the final layer's activations and expose any offloading duration for the final layer
+  cpu_offloading_activations: True
+  cpu_offloading_weights: True
+
   ## Network
   sharp: False # Enable the use of SHARP for NCCL data-parallel communications. This is going to be ignored if the network doesn't support SHARP.