Skip to content

Commit

Permalink
Add Packed Seq option to GPT based models (NVIDIA#11100)
Browse files Browse the repository at this point in the history
* add pack seq args/docstr

Signed-off-by: Ao Tang <[email protected]>

* Apply isort and black reformatting

Signed-off-by: suiyoubi <[email protected]>

* reword docstr

Signed-off-by: Ao Tang <[email protected]>

* space

Signed-off-by: Ao Tang <[email protected]>

---------

Signed-off-by: Ao Tang <[email protected]>
Signed-off-by: suiyoubi <[email protected]>
Co-authored-by: suiyoubi <[email protected]>
  • Loading branch information
2 people authored and XuesongYang committed Jan 18, 2025
1 parent e40e720 commit 56ab393
Show file tree
Hide file tree
Showing 18 changed files with 58 additions and 10 deletions.
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/baichuan2_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/chatglm3_6b.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/gemma2_27b.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Gemma2 27B model.
Expand All @@ -191,6 +192,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -208,7 +210,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "google/gemma-2-27b", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "google/gemma-2-27b", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
recipe.trainer.strategy.tensor_model_parallel_size = 8
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/gemma2_2b.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Gemma2 2B model.
Expand All @@ -191,6 +192,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -208,7 +210,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "google/gemma-2-2b", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "google/gemma-2-2b", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/gemma2_9b.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Gemma2 9B model.
Expand All @@ -191,6 +192,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -208,7 +210,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "google/gemma-2-9b", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "google/gemma-2-9b", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
recipe.trainer.strategy.tensor_model_parallel_size = 4
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/gemma_2b.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/gemma_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/mistral_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/mistral_nemo_12b.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/mixtral_8x22b.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): If true, fine-tuning sequences will be packed into batches up to the given maximum seq_length for better efficiency.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
1 change: 1 addition & 0 deletions nemo/collections/llm/recipes/mixtral_8x7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/qwen2_1p5b.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Qwen2 1.5b model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "Qwen/Qwen2-1.5B", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "Qwen/Qwen2-1.5B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/qwen2_500m.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Qwen2 500m model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "Qwen/Qwen2-0.5B", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "Qwen/Qwen2-0.5B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.optim.config.lr = 5e-6
elif peft_scheme.lower() == 'lora':
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/qwen2_72b.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Qwen2 72b model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "Qwen/Qwen2-72B", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "Qwen/Qwen2-72B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
assert num_nodes >= 4
recipe.trainer.strategy.tensor_model_parallel_size = 8
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/qwen2_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Qwen2 7b model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "Qwen/Qwen2-7B", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "Qwen/Qwen2-7B", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 2
recipe.optim.config.lr = 5e-6
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/starcoder2_15b.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Starcoder2 15B model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "bigcode/starcoder2-15b", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "bigcode/starcoder2-15b", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 4
recipe.optim.config.lr = 5e-6
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/starcoder2_3b.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Starcoder2 3B model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "bigcode/starcoder2-3b", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "bigcode/starcoder2-3b", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 2
recipe.optim.config.lr = 5e-6
Expand Down
6 changes: 5 additions & 1 deletion nemo/collections/llm/recipes/starcoder2_7b.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def finetune_recipe(
num_nodes: int = 1,
num_gpus_per_node: int = 8,
peft_scheme: Optional[str] = 'lora',
packed_sequence: bool = False,
) -> run.Partial:
"""
Create a fine-tuning recipe for Starcoder2 7B model.
Expand All @@ -194,6 +195,7 @@ def finetune_recipe(
num_nodes (int): Number of compute nodes to use.
num_gpus_per_node (int): Number of GPUs per node.
peft_scheme (Optional[str]): Name of the peft scheme to use for fine-tuning. Allowed values: 'lora', 'none'/None.
packed_sequence (Optional[bool]): Packing multiple training sequences into one long sequence for training efficiency. Default sequence length is 2048.
Returns:
run.Partial: Partial configuration for fine-tuning.
Expand All @@ -211,7 +213,9 @@ def finetune_recipe(
on fine-tuning LLMs with NeMo, see the fine-tuning guide in the
`examples/llm/finetune/` directory.
"""
recipe = default_finetune_recipe(model(), "bigcode/starcoder2-7b", dir, name, num_nodes, num_gpus_per_node)
recipe = default_finetune_recipe(
model(), "bigcode/starcoder2-7b", dir, name, num_nodes, num_gpus_per_node, packed_sequence
)
if peft_scheme is None or peft_scheme.lower() == 'none':
recipe.trainer.strategy.tensor_model_parallel_size = 2
recipe.optim.config.lr = 5e-6
Expand Down

0 comments on commit 56ab393

Please sign in to comment.