updated batch sizes.

Signed-off-by: Vahid <[email protected]>
NVIDIA · Apr 19, 2023 · 84608ab · 84608ab
1 parent 82326a1
commit 84608ab
Show file tree

Hide file tree

Showing 4 changed files with 10 additions and 11 deletions.
diff --git a/examples/asr/conf/conformer/conformer_ctc_bpe.yaml b/examples/asr/conf/conformer/conformer_ctc_bpe.yaml
@@ -25,7 +25,7 @@
 # The checkpoint of the large model trained on LibriSpeech with this recipe can be found here: https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_large_ls
 
 # We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
-# Using bf16 or 16 would makes it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
+# Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
 # Here are the suggested batch size per GPU for each precision and memory sizes:
 #  +-----------+------------+------------+
 #  | Precision | GPU Memory | Batch Size |

diff --git a/examples/asr/conf/conformer/conformer_transducer_bpe.yaml b/examples/asr/conf/conformer/conformer_transducer_bpe.yaml
@@ -31,13 +31,13 @@
 #  +-----------+------------+------------+
 #  | Precision | GPU Memory | Batch Size |
 #  +===========+============+============+
-#  | 32        |    16GB    |     4      |
-#  |           |    32GB    |     8      |
-#  |           |    80GB    |     16     |
+#  | 32        |    16GB    |     8      |
+#  |           |    32GB    |     16     |
+#  |           |    80GB    |     32     |
 #  +-----------+------------+------------+
-#  | 16 or     |    16GB    |     8      |
-#  | bf16      |    32GB    |     16     |
-#  |           |    80GB    |     23     |
+#  | 16 or     |    16GB    |     16     |
+#  | bf16      |    32GB    |     32     |
+#  |           |    80GB    |     64     |
 #  +-----------+------------+------------+
 # Note:  They are based on the assumption of max_duration of 20. If you have longer or shorter max_duration, then batch sizes may need to get updated accordingly.
 

diff --git a/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml b/examples/asr/conf/fastconformer/fast-conformer_ctc_bpe.yaml
@@ -3,15 +3,15 @@
 # You may find more info about FastConformer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#fast-conformer
 
 # We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
-# Using bf16 or 16 would makes it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
+# Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
 # Here are the suggested batch size per GPU for each precision and memory sizes:
 
 #  +-----------+------------+------------+
 #  | Precision | GPU Memory | Batch Size |
 #  +===========+============+============+
 #  | 32        |    16GB    |     16     |
 #  |           |    32GB    |     32     |
-#  |           |    80GB    |     128    |
+#  |           |    80GB    |     64     |
 #  +-----------+------------+------------+
 #  | fp16 or   |    16GB    |     32     |
 #  | bf16      |    32GB    |     64     |

diff --git a/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml b/examples/asr/conf/fastconformer/fast-conformer_transducer_bpe.yaml
@@ -2,9 +2,8 @@
 
 # You may find more info about FastConformer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#fast-conformer
 
-
 # We suggest to use trainer.precision=bf16 for GPUs which support it otherwise trainer.precision=16 is recommended.
-# Using bf16 or 16 would makes it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
+# Using bf16 or 16 would make it possible to double the batch size and speedup training/inference. If fp16 is not stable and model diverges after some epochs, you may use fp32.
 # Here are the suggested batch size per GPU for each precision and memory sizes:
 
 #  +-----------+------------+------------+