From b7739a819cd8ecc1ae206aaed1cead9d7bca723c Mon Sep 17 00:00:00 2001 From: Shane A Date: Tue, 9 Jul 2024 12:28:27 -0700 Subject: [PATCH 1/2] Default to FSDP strategy --- olmo/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/olmo/config.py b/olmo/config.py index 461fa2f54..e147b313f 100644 --- a/olmo/config.py +++ b/olmo/config.py @@ -1099,7 +1099,7 @@ class TrainConfig(BaseConfig): Settings for compiling the model with ``torch.compile()``. """ - distributed_strategy: Optional[DistributedStrategy] = None + distributed_strategy: Optional[DistributedStrategy] = DistributedStrategy.fsdp """ Distributed strategy for OLMo model (eg. single GPU, DDP, FSDP). """ From 75d45c6c3532ac3adf06867d80d73d04e1b6c668 Mon Sep 17 00:00:00 2001 From: Shane A Date: Mon, 15 Jul 2024 14:08:24 -0700 Subject: [PATCH 2/2] Update CHANGELOG --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index db8b4d828..b9de864d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Changed + +- Changed default distributed training strategy from single-GPU to FSDP + ## [v0.4.0](https://github.com/allenai/OLMo/releases/tag/v0.4.0) - 2024-07-11 ### Added