From ec5abee1d2345a970951295c802e15e1bc9ada78 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Wed, 26 Oct 2022 23:42:14 +0000 Subject: [PATCH 1/5] add `buffers` support when computing `infer_auto_device_map` --- src/accelerate/utils/modeling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py index c1483355716..dc45d6c08d2 100644 --- a/src/accelerate/utils/modeling.py +++ b/src/accelerate/utils/modeling.py @@ -476,7 +476,9 @@ def infer_auto_device_map( current_memory_used = 0 # Direct submodules and parameters - modules_to_treat = list(model.named_parameters(recurse=False)) + list(model.named_children()) + modules_to_treat = ( + list(model.named_parameters(recurse=False)) + list(model.named_children()) + list(model.named_buffers()) + ) # Initialize maximum largest layer, to know which space to keep in memory max_layer_size, max_layer_names = get_max_layer_size(modules_to_treat, module_sizes, no_split_module_classes) From db6ab8a97e15a7a4f8ee1e56bc12ad114e2180af Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 27 Oct 2022 08:52:45 +0000 Subject: [PATCH 2/5] should fix broken test --- src/accelerate/utils/modeling.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py index dc45d6c08d2..aea4740c164 100644 --- a/src/accelerate/utils/modeling.py +++ b/src/accelerate/utils/modeling.py @@ -485,6 +485,7 @@ def infer_auto_device_map( # Ready ? This is going to be a bit messy. while len(modules_to_treat) > 0: name, module = modules_to_treat.pop(0) + # Max size in the remaining layers may have changed since we took one, so we maybe update it. max_layer_names = [n for n in max_layer_names if not n.startswith(name)] if len(max_layer_names) == 0: @@ -504,6 +505,13 @@ def infer_auto_device_map( # Reduce max size available by the largest layer. if devices[current_device] in main_devices: current_max_size = current_max_size - max_layer_size + + # Case 0: Put directly the buffer on the device_map - usually buffers are small + if name in model._buffers: + current_memory_used += module_size + device_map[name] = devices[current_device] + continue + # Case 1 -> We're too big! if current_max_size is not None and current_memory_used + module_size > current_max_size: # Split or not split? From 30e44e26800abd26d87def642902db103fa49a2a Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 27 Oct 2022 09:10:01 +0000 Subject: [PATCH 3/5] fix broken test --- src/accelerate/utils/modeling.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py index aea4740c164..d84514f087a 100644 --- a/src/accelerate/utils/modeling.py +++ b/src/accelerate/utils/modeling.py @@ -475,10 +475,11 @@ def infer_auto_device_map( current_device = 0 current_memory_used = 0 - # Direct submodules and parameters - modules_to_treat = ( - list(model.named_parameters(recurse=False)) + list(model.named_children()) + list(model.named_buffers()) - ) + # Direct submodules and parameters, including tensors that are registered as buffers + # but excluding buffers such as `running_mean` for batch_norm + filtered_buffers = list((m[0], m[1]) for m in model.named_buffers() if m[0] in model._buffers) + modules_to_treat = list(model.named_parameters(recurse=False)) + list(model.named_children()) + filtered_buffers + # Initialize maximum largest layer, to know which space to keep in memory max_layer_size, max_layer_names = get_max_layer_size(modules_to_treat, module_sizes, no_split_module_classes) From 771f35b0c80b5a8d3065f424bd7732f89c2efc74 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 27 Oct 2022 16:55:30 +0000 Subject: [PATCH 4/5] simpler solution - use `model.named_buffers(recurse=False)` instead Co-authored-by: Sylvain Gugger --- src/accelerate/utils/modeling.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py index d84514f087a..e471731a4e9 100644 --- a/src/accelerate/utils/modeling.py +++ b/src/accelerate/utils/modeling.py @@ -475,18 +475,18 @@ def infer_auto_device_map( current_device = 0 current_memory_used = 0 - # Direct submodules and parameters, including tensors that are registered as buffers - # but excluding buffers such as `running_mean` for batch_norm - filtered_buffers = list((m[0], m[1]) for m in model.named_buffers() if m[0] in model._buffers) - modules_to_treat = list(model.named_parameters(recurse=False)) + list(model.named_children()) + filtered_buffers - + # Direct submodules and parameters + modules_to_treat = ( + list(model.named_parameters(recurse=False)) + + list(model.named_children()) + + list(model.named_buffers(recurse=False)) + ) # Initialize maximum largest layer, to know which space to keep in memory max_layer_size, max_layer_names = get_max_layer_size(modules_to_treat, module_sizes, no_split_module_classes) # Ready ? This is going to be a bit messy. while len(modules_to_treat) > 0: name, module = modules_to_treat.pop(0) - # Max size in the remaining layers may have changed since we took one, so we maybe update it. max_layer_names = [n for n in max_layer_names if not n.startswith(name)] if len(max_layer_names) == 0: @@ -506,13 +506,6 @@ def infer_auto_device_map( # Reduce max size available by the largest layer. if devices[current_device] in main_devices: current_max_size = current_max_size - max_layer_size - - # Case 0: Put directly the buffer on the device_map - usually buffers are small - if name in model._buffers: - current_memory_used += module_size - device_map[name] = devices[current_device] - continue - # Case 1 -> We're too big! if current_max_size is not None and current_memory_used + module_size > current_max_size: # Split or not split? From 20923ca5ddb6e7ee20a2663ee27a68e6cbd7503e Mon Sep 17 00:00:00 2001 From: sgugger Date: Thu, 27 Oct 2022 17:01:10 +0000 Subject: [PATCH 5/5] forward contrib credits from suggestion