Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/accelerate/accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,6 +648,10 @@ def split_batches(self):
def dispatch_batches(self):
return self.dataloader_config.dispatch_batches

@property
def cp(self):
return self.dataloader_config.cp

@property
def even_batches(self):
return self.dataloader_config.even_batches
Expand Down Expand Up @@ -2410,6 +2414,7 @@ def prepare_data_loader(
non_blocking=self.non_blocking,
use_stateful_dataloader=self.use_stateful_dataloader,
torch_device_mesh=device_mesh,
cp=self.cp,
)
self._dataloaders.append(prepared_data_loader)
return prepared_data_loader
Expand Down
5 changes: 5 additions & 0 deletions src/accelerate/data_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1007,6 +1007,7 @@ def prepare_data_loader(
non_blocking: bool = False,
use_stateful_dataloader: bool = False,
torch_device_mesh=None,
cp=False,
) -> DataLoader:
"""
Wraps a PyTorch `DataLoader` to generate batches for one of the processes only.
Expand Down Expand Up @@ -1137,6 +1138,10 @@ def prepare_data_loader(
process_index = process_index // submesh_tp_size
num_processes = submesh_fsdp_size * submesh_dp_size

if cp:
process_index = 0
num_processes = 1

Comment on lines 1138 to +1144
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does only 1 process break n-d parallel? Maybe something like?

Suggested change
process_index = process_index // submesh_tp_size
num_processes = submesh_fsdp_size * submesh_dp_size
if cp:
process_index = 0
num_processes = 1
process_index = process_index // (submesh_tp_size * submesh_cp_size)
num_processes = submesh_fsdp_size * submesh_dp_size // (submesh_tp_size * submesh_cp_size)
if cp:
process_index = 0
num_processes = 1

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed we will have something like that. I just opened this PR to not forget about this but we will upstream the changes to main in another pr when n-d parallelism pr will be finished.

# Sanity check
if split_batches:
if dataloader.batch_size is not None:
Expand Down
1 change: 1 addition & 0 deletions src/accelerate/utils/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,7 @@ class DataLoaderConfiguration:
" underlying dataset is an `IterableDataset`, `False` otherwise."
},
)
cp: bool = field(default=False)
even_batches: bool = field(
default=True,
metadata={
Expand Down
Loading