diff --git a/.github/actions/test/action.yml b/.github/actions/test/action.yml
index 4a4a29c52..6b2ed9e9d 100644
--- a/.github/actions/test/action.yml
+++ b/.github/actions/test/action.yml
@@ -23,7 +23,7 @@ runs:
       with:
           venv: ${{ inputs.venv }}
           name: compressed
-          extra: "[dev,accelerate]"
+          extra: "[dev]"
 
     - name: clean up
       run: |
diff --git a/.github/workflows/test-check.yaml b/.github/workflows/test-check.yaml
index 2407b9eaa..f3cc04f93 100644
--- a/.github/workflows/test-check.yaml
+++ b/.github/workflows/test-check.yaml
@@ -30,7 +30,7 @@ jobs:
         - name: Set Env
           run: pip3 install --upgrade pip setuptools
         - name: "⚙️ Install dependencies"
-          run: pip3 install .[dev,accelerate]
+          run: pip3 install .[dev]
         - name: clean up
           run: |
             echo "cleaning up disk space as GHA runner has limited disk size."
diff --git a/setup.py b/setup.py
index a37bd9d2d..fc59b3d84 100644
--- a/setup.py
+++ b/setup.py
@@ -92,7 +92,7 @@ def _setup_install_requires() -> List:
 
 def _setup_extras() -> Dict:
     return {
-        "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3", "transformers<5.0"],
+        "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3", "transformers<5.0", "accelerate"],
         "accelerate": ["accelerate"]
     }
 
diff --git a/src/compressed_tensors/linear/compressed_linear.py b/src/compressed_tensors/linear/compressed_linear.py
index d24df2fcd..014aeef9d 100644
--- a/src/compressed_tensors/linear/compressed_linear.py
+++ b/src/compressed_tensors/linear/compressed_linear.py
@@ -87,12 +87,6 @@ def from_linear(
         # mark module as compressed
         module.quantization_status = QuantizationStatus.COMPRESSED
 
-        # handles case where forward is wrapped in new_forward by accelerate hooks
-        if hasattr(module, "_old_forward"):
-            module._old_forward = CompressedLinear.forward.__get__(
-                module, CompressedLinear
-            )
-
         return module
 
     def forward(self, input: Tensor) -> Tensor:
diff --git a/src/compressed_tensors/offload/__init__.py b/src/compressed_tensors/offload/__init__.py
index 072dbdf7a..ab86b2d00 100644
--- a/src/compressed_tensors/offload/__init__.py
+++ b/src/compressed_tensors/offload/__init__.py
@@ -135,9 +135,7 @@ def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.M
     """
     cache = base._parameters
     if isinstance(cache, OffloadCache):
-        offload_module(
-            module, cache.onload_device, cache.offload_device, no_split=False
-        )
+        offload_module(module, cache.onload_device, cache.offload_device)
 
     base.register_module(name, module)
 
@@ -178,9 +176,12 @@ def align_module_device(
     if isinstance(module._parameters, OffloadCache):
         assert isinstance(module._buffers, OffloadCache)
         with module._parameters.disable_offloading():
-            with patch_attr(
-                module._parameters, "onload_device", execution_device
-            ), patch_attr(module._buffers, "onload_device", execution_device):
+            if execution_device is not None:
+                with patch_attr(
+                    module._parameters, "onload_device", execution_device
+                ), patch_attr(module._buffers, "onload_device", execution_device):
+                    yield
+            else:
                 yield
 
     else:
diff --git a/src/compressed_tensors/offload/dispatch.py b/src/compressed_tensors/offload/dispatch.py
index 5206e10f4..1f5e4fbf2 100644
--- a/src/compressed_tensors/offload/dispatch.py
+++ b/src/compressed_tensors/offload/dispatch.py
@@ -39,7 +39,7 @@
 def offload_model(
     model: ModelType,
     onload_device: torch.device | str,
-    offload_device: Optional[torch.device | str | Literal["disk"]] = None,
+    offload_device: torch.device | str | Literal["disk"] = torch.device("cpu"),
 ) -> ModelType:
     """
     Offload a model to the `offload_device`. During forward passes, model weights will
diff --git a/src/compressed_tensors/quantization/lifecycle/initialize.py b/src/compressed_tensors/quantization/lifecycle/initialize.py
index 8c1b251c5..f8ef79dcf 100644
--- a/src/compressed_tensors/quantization/lifecycle/initialize.py
+++ b/src/compressed_tensors/quantization/lifecycle/initialize.py
@@ -23,6 +23,7 @@
     QuantizedAttentionImpl,
     QuantizedKVCache,
 )
+from compressed_tensors.offload import unwrap_offload_forward
 from compressed_tensors.quantization import (
     ActivationOrdering,
     DynamicType,
@@ -37,7 +38,6 @@
 )
 from compressed_tensors.quantization.utils import strategy_cdiv
 from compressed_tensors.utils import (
-    disable_hf_hook,
     get_execution_device,
     get_head_dim,
     get_num_attn_heads,
@@ -134,7 +134,7 @@ def initialize_module_for_quantization(
                 force_zero_point=force_zero_point,
             )
 
-        with disable_hf_hook(module):
+        with unwrap_offload_forward(module):
             # wrap forward call of module to perform
             # quantized actions based on calltime status
             wrap_module_forward_quantized(module, scheme)
diff --git a/src/compressed_tensors/transform/apply.py b/src/compressed_tensors/transform/apply.py
index ade267234..e247e7029 100644
--- a/src/compressed_tensors/transform/apply.py
+++ b/src/compressed_tensors/transform/apply.py
@@ -12,12 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict
-
 import torch
 from compressed_tensors import TRANSFORM_CONFIG_NAME
 from compressed_tensors.transform import TransformConfig, TransformFactory
-from compressed_tensors.utils.offload import has_offloaded_params
 
 
 __all__ = ["apply_transform_config"]
@@ -37,35 +34,3 @@ def apply_transform_config(model: torch.nn.Module, config: TransformConfig):
 
     # attach config to model for compression/serialization
     setattr(model, TRANSFORM_CONFIG_NAME, config)
-
-    # ensure that tied weight transforms can be serialized without aliases
-    # In the future, this could be done by transformers or model compressor
-    # which would make this more robust to changing dispatches after transforms
-    _tie_offloaded_tensors(model)
-
-
-def _tie_offloaded_tensors(model: torch.nn.Module):
-    """
-    When accelerate replaces tensors with meta tensors during offloading, the meta
-    tensors may not be identical, even if the offloaded values are identical.
-
-    However, transformers can only serialize correctly if meta tensors are identical
-    (see transformers#39263).
-
-    This function collects all meta tensors which have shared offloaded values and sets
-    those tensors to be identical so that they can be removed during serialization
-
-    :param model: model potentially containing offloaded meta tensors to fix
-    """
-
-    # ensure that if a location shares an offloaded tensor pointers, that the
-    # meta tensor is also identical (assigned to the first instance of parameter)
-    ptr_to_meta: Dict[int, torch.nn.Parameter] = dict()
-    for module in model.modules():
-        if has_offloaded_params(module):
-            for key, _ in module.named_parameters(recurse=False):
-                offloaded_ptr = module._hf_hook.weights_map[key].data_ptr()
-
-                if offloaded_ptr not in ptr_to_meta:
-                    ptr_to_meta[offloaded_ptr] = getattr(module, key)
-                setattr(module, key, ptr_to_meta[offloaded_ptr])
diff --git a/src/compressed_tensors/transform/factory/base.py b/src/compressed_tensors/transform/factory/base.py
index 96f15c9da..c54328928 100644
--- a/src/compressed_tensors/transform/factory/base.py
+++ b/src/compressed_tensors/transform/factory/base.py
@@ -26,6 +26,7 @@
     initialize_hooked_kv_cache,
     register_key_hook,
 )
+from compressed_tensors.offload import OffloadCache
 from compressed_tensors.registry.registry import RegistryMixin, T
 from compressed_tensors.transform import (
     TransformArgs,
@@ -34,8 +35,6 @@
 )
 from compressed_tensors.utils import (
     align_module_device,
-    delete_offload_module,
-    has_offloaded_params,
     match_named_modules,
     patch_attr,
     register_offload_module,
@@ -116,13 +115,6 @@ def _apply_to_module(self, model: Module, module: Module, args: TransformArgs):
         :param module: target module to apply transforms to
         :param args: defines how the transform will be applied to the target module
         """
-        if has_offloaded_params(module):
-            if module._hf_hook.place_submodules:
-                raise NotImplementedError(
-                    "Applying transforms to offloaded submodules with "
-                    "`place_submodules=True` is not supported"
-                )
-
         # create transform as submodule
         transform_name = f"{self.name}_{args.location}"
         transform = self.create_transform(module, args)
@@ -150,13 +142,13 @@ def input_hook(_, args):
             if self.scheme.requires_grad:
                 # for training, the weight changes with every forward pass
                 # so we can leverage parametrization to propagate the gradient
-                if has_offloaded_params(module):
+                if isinstance(module._parameters, OffloadCache):
                     raise ValueError("Offloaded training is not supported")
                 P.register_parametrization(module, "weight", transform)
 
             else:
                 # transform is no longer needed (unfusing is not supported)
-                delete_offload_module(module, transform_name)
+                delattr(module, transform_name)
 
         # register output transformation hook
         elif args.location == TransformLocation.OUTPUT:
diff --git a/src/compressed_tensors/utils/offload.py b/src/compressed_tensors/utils/offload.py
index 01d833a77..e5c9bf13d 100644
--- a/src/compressed_tensors/utils/offload.py
+++ b/src/compressed_tensors/utils/offload.py
@@ -12,59 +12,31 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-Utilities associated with offloading functionality provided by `accelerate`.
+Utilities associated with offloading functionality
 
 | ------------------------------------------------------------------------------------------------------ | # noqa: E501
 | Operation  | Without offloading support             | With offloading support                          | # noqa: E501
 | ---------- | -------------------------------------- | ------------------------------------------------ | # noqa: E501
-| Add        | module.register_parameter(name, param) | register_offload_parameter(module, name, param)  | # noqa: E501
-| Check      | N/A                                    | has_offloaded_params(module)                     | # noqa: E501
-| Onload     | N/A                                    | with align_module_device(module)                 | # noqa: E501
 | Update     | module.name.data.copy_(new_data)       | update_offload_parameter(module, name, new_data) | # noqa: E501
-| Delete     | del module.name                        | delete_offload_parameter(module, name)           | # noqa: E501
-| Add Module | module.register_module(name, child)    | register_offload_module(name, child)             | # noqa: E501
-| Del Module | del module.name                        | delete_offload_module(module, name)              | # noqa: E501
 | ------------------------------------------------------------------------------------------------------ | # noqa: E501
 """
 
 import contextlib
-import warnings
-from functools import wraps
-from operator import attrgetter
-from typing import Any, Callable, Dict, Iterable, Literal, Optional, Tuple, Union
+from typing import Literal, Optional
 
 import torch
-from compressed_tensors.utils import patch_attr
-
-
-try:
-    from accelerate.hooks import (
-        AlignDevicesHook,
-        add_hook_to_module,
-        attach_align_device_hook,
-        named_module_tensors,
-        remove_hook_from_module,
-    )
-    from accelerate.utils import (
-        OffloadedWeightsLoader,
-        PrefixedDataset,
-        find_tied_parameters,
-        set_module_tensor_to_device,
-    )
-
-    _has_accelerate = True
-
-except ImportError:
-    _has_accelerate = False
-    AlignDevicesHook = None
-    add_hook_to_module = None
-    remove_hook_from_module = None
-    OffloadedWeightsLoader = None
-    PrefixedDataset = None
-    set_module_tensor_to_device = None
-    named_module_tensors = None
-    attach_align_device_hook = None
-    find_tied_parameters = None
+from compressed_tensors.offload import (
+    align_module_device,
+    align_modules,
+    disable_offloading,
+    get_execution_device,
+    get_offloaded_device,
+    offload_model,
+    register_offload_module,
+    remove_dispatch,
+    update_offload_parameter,
+)
+from compressed_tensors.utils.helpers import deprecated
 
 
 __all__ = [
@@ -85,51 +57,11 @@
     "disable_offloading",
     "remove_dispatch",
     "cast_to_device",
+    "offload_to_weights_map",
+    "delete_from_weights_map",
 ]
 
 
-def check_accelerate(fallback: Any):
-    def decorator(func: Callable[[Any], Any]):
-        if not _has_accelerate:
-            if fallback == "error":
-
-                @wraps(func)
-                def fallback_fn(*args, **kwargs):
-                    raise ValueError(
-                        "Please install `accelerate` in order to use this function"
-                    )
-
-            else:
-
-                @wraps(func)
-                def fallback_fn(*args, **kwargs):
-                    return fallback
-
-            return fallback_fn
-
-        return func
-
-    return decorator
-
-
-""" Candidates for Depreciation """
-
-
-def get_offloaded_device(module: torch.nn.Module) -> torch.device:
-    """
-    :param module: module to check
-    :return: device module is offloaded to onto after forward pass
-    """
-    if has_offloaded_params(module):
-        first_key = list(module._hf_hook.weights_map.keys())[0]
-        prefix_dataset = module._hf_hook.weights_map.dataset
-        return prefix_dataset[first_key].device
-    else:
-        # if the module is not offloaded, then any addded weights
-        # should be placed the module's execution device
-        return get_execution_device(module)
-
-
 def update_parameter_data(
     module: torch.nn.Module, new_param_data: torch.Tensor, param_name: str
 ):
@@ -147,7 +79,8 @@ def update_parameter_data(
 """ Candidates for Upstreaming """
 
 
-def cast_to_device(device_spec: Union[int, torch.device]) -> torch.device:
+@deprecated()
+def cast_to_device(device_spec: int | torch.device) -> torch.device:
     """
     Convert an integer device index or torch.device into a torch.device object.
 
@@ -160,31 +93,12 @@ def cast_to_device(device_spec: Union[int, torch.device]) -> torch.device:
     return device_spec
 
 
-def get_execution_device(module: torch.nn.Module) -> torch.device:
-    """
-    Get the device which inputs should be moved to before module execution.
-    Assume that modules execute in the same order as returned by `model.modules()`
-
-    :param module: module to check, may be offloaded
-    :return: onload device of module
-    """
-    for submodule in module.modules():
-        if has_offloaded_params(submodule):
-            return cast_to_device(submodule._hf_hook.execution_device)
-
-        param = next(submodule.parameters(recurse=False), None)
-        if param is not None:
-            return param.device
-
-    warnings.warn(f"Unable to get execution device of {module}, falling back to CPU")
-    return torch.device("cpu")
-
-
+@deprecated("module.register_parameter(name, parameter)")
 def register_offload_parameter(
     module: torch.nn.Module,
     name: str,
     parameter: torch.nn.Parameter,
-    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
+    offload_device: Optional[torch.device | Literal["disk"]] = None,
 ):
     """
     Register a parameter to the given module which may be offloaded
@@ -195,64 +109,13 @@ def register_offload_parameter(
     :param offload_device: device on which weight will be offloaded to. If None is
         provided, then infer device from parameters on module
     """
-    has_onload = any(p.device != torch.device("meta") for p in module.parameters())
-    module.register_parameter(name, parameter)
-
-    # do everything AlignDevicesHook.init_hook does
-    # https://github.com/huggingface/accelerate/blob/main/src/accelerate/hooks.py#L281
-    if has_offloaded_params(module):
-        hook: AlignDevicesHook = module._hf_hook
-        assert hook.weights_map is not None
-
-        # append to original_devices
-        hook.original_devices[name] = parameter.device
-
-        # append to weights map
-        offload_to_weights_map(hook.weights_map, name, parameter.data, offload_device)
-
-        # append to tied_params_map
-        offloaded = hook.weights_map[name]
-        if hook.tied_params_map is not None:
-            hook.tied_params_map[offloaded.data_ptr()] = {}  # (1)
-
-        # perform offloading
-        if not has_onload:
-            set_module_tensor_to_device(module, name, "meta")
-
-
-def update_offload_parameter(
-    module: torch.nn.Module,
-    name: str,
-    data: torch.Tensor,
-    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
-):
-    """
-    Update the data of an existing parameter and its offload dict. Supports both
-    parameters of offloaded modules and non-offloaded modules
-
-    :param module: module containing the parameter to update
-    :param name: name of module parameter to update
-    :param data: tensor to update parameter with
-    :param offload_device: device on which weight will be offloaded to. If None is
-        provided, then infer device from parameters on module
-    """
-    param: torch.nn.Parameter = getattr(module, name)
-    if param.data.shape != data.shape:
-        warnings.warn(
-            f"Shape of parameter being updated {param.data.shape} does not match shape "
-            f"of update data {data.shape}"
-        )
-
-    # copy data into onloaded parameter if applicable
-    if param.device != torch.device("meta") and data is not param.data:
-        param.data.copy_(data)
+    if offload_device == "disk":
+        raise NotImplementedError("Disk offloading is not currently supported")
 
-    # update offload dict
-    if has_offloaded_params(module):
-        weights_map = module._hf_hook.weights_map
-        offload_to_weights_map(weights_map, name, data, offload_device)
+    module.register_parameter(name, parameter)
 
 
+@deprecated("delattr(module, name)")
 def delete_offload_parameter(module: torch.nn.Module, name: str):
     """
     Delete a parameter from a module which may be offloaded,
@@ -263,347 +126,60 @@ def delete_offload_parameter(module: torch.nn.Module, name: str):
     """
     delattr(module, name)
 
-    if has_offloaded_params(module):
-        weights_map = module._hf_hook.weights_map
-        delete_from_weights_map(weights_map, name)
 
-        module._hf_hook.tied_params_names -= set(name)
-        if name in module._hf_hook.original_devices:
-            del module._hf_hook.original_devices[name]
-        if name in module._hf_hook.param_original_devices:
-            del module._hf_hook.param_original_devices[name]
-        if name in module._hf_hook.buffer_original_devices:
-            del module._hf_hook.param_original_devices[name]
-
-
-@check_accelerate(fallback=contextlib.nullcontext())
+@deprecated("compressed_tensors.offload::unwrap_offload")
 @contextlib.contextmanager
 def disable_hf_hook(module: torch.nn.Module):
-    hooks = {}
-
-    def collect_hooks(module):
-        if hasattr(module, "_hf_hook"):
-            hooks[module] = module._hf_hook
-            remove_hook_from_module(module)
-
-    module.apply(collect_hooks)
-
-    yield
-
-    for submodule, hook in hooks.items():
-        add_hook_to_module(submodule, hook)
-
-
-@check_accelerate(fallback=None)
-def offload_to_weights_map(
-    weights_map: Union[PrefixedDataset, Dict, OffloadedWeightsLoader],
-    key: str,
-    value: torch.Tensor,
-    offload_device: Optional[Union[torch.device, Literal["disk"]]] = None,
-):
-    """
-    Helper function which implements offloaded item assignment for PrefixedDataset,
-    OffloadedWeightsLoader, and Dict types.
-
-    :param weights_map: weight map to be updated with offload information
-    :param key: key used to identify weight location
-    :param value: weight being offloaded
-    :param offload_device: device on which weight will be offloaded to. If None is
-        provided, then infer device from parameters in weights_map
-    """
-    if isinstance(weights_map, PrefixedDataset):
-        if offload_device == "disk":
-            raise ValueError(f"Cannot offload to disk with type {type(weights_map)}")
-
-        dataset = weights_map.dataset
-        key = f"{weights_map.prefix}{key}"
-        offload_to_weights_map(dataset, key, value, offload_device)
-
-    elif isinstance(weights_map, OffloadedWeightsLoader):
-        if key not in weights_map.all_keys:
-            weights_map.all_keys.append(key)
-
-        if len(weights_map.index) <= 0 and offload_device != "disk":
-            offload_to_weights_map(weights_map.state_dict, key, value, offload_device)
-
-        else:
-            raise NotImplementedError(
-                "Updating weights_map with disk offloading is not implemented yet"
-            )
-
-    elif isinstance(weights_map, dict):
-        if offload_device == "disk":
-            raise ValueError(f"Cannot offload to disk with type {type(weights_map)}")
-
-        # infer offload device
-        if offload_device is None:
-            if key in weights_map:
-                offload_device = weights_map[key].device
-            else:
-                tens = next(iter(weights_map.values()), None)
-                if tens is None:
-                    raise ValueError(
-                        "Cannot infer offload device from empty weights_map"
-                    )
-                offload_device = tens.device
-
-        weights_map[key] = value.to(device=offload_device)
-
-    else:
-        raise NotImplementedError(
-            "Updating offload data not implemented for weights_map of type "
-            f"{type(weights_map)}"
-        )
-
-
-@check_accelerate(fallback=None)
-def delete_from_weights_map(
-    weights_map: Union[PrefixedDataset, Dict, OffloadedWeightsLoader],
-    key: str,
-):
-    if isinstance(weights_map, PrefixedDataset):
-        dataset = weights_map.dataset
-        key = f"{weights_map.prefix}{key}"
-        delete_from_weights_map(dataset, key)
-
-    elif isinstance(weights_map, OffloadedWeightsLoader):
-        if len(weights_map.index) <= 0:
-            delete_from_weights_map(weights_map.state_dict, key)
-
-        else:
-            raise NotImplementedError(
-                "Delete from weights_map with disk offloading is not implemented yet"
-            )
-
-    elif isinstance(weights_map, dict):
-        del weights_map[key]
-
-    else:
-        raise NotImplementedError(
-            "Updating offload data not implemented for weights_map of type "
-            f"{type(weights_map)}"
-        )
-
-
-@check_accelerate(fallback=contextlib.nullcontext())
-@contextlib.contextmanager
-def disable_offload(module: torch.nn.Module):
-    """
-    Context manager to disable module onloading and offloading. Parameters will stay on
-    their current device
-
-    :param module: module to disable offloading for
-    """
-    if has_offloaded_params(module):
-        module._hf_hook.offload = False
-        yield
-        module._hf_hook.offload = True
-    else:
-        yield
-
-
-@check_accelerate(fallback=contextlib.nullcontext())
-@contextlib.contextmanager
-def align_modules(
-    modules: Union[torch.nn.Module, Iterable[torch.nn.Module]],
-    execution_device: Optional[torch.device] = None,
-):
-    """
-    Context manager for onloading modules to a device, and disabling onload and offload
-    attempts triggered by forward calls. Used for sequential onloading of layers
-
-    :param modules: `torch.nn.Module` or iterable of `torch.nn.Module`s to onload
-    :param execution_device: device to onload to
-    """
-    modules = (modules,) if isinstance(modules, torch.nn.Module) else modules
-
-    with contextlib.ExitStack() as stack:
-        for module in modules:
-            stack.enter_context(align_module_device(module, execution_device))
-            stack.enter_context(disable_offload(module))  # disable redundant onloading
-        yield
-
-
-def register_offload_module(base: torch.nn.Module, name: str, module: torch.nn.Module):
-    """
-    Register a submodule with offloading if the parent module is offloaded
-
-    :param base: module to attach submodule to
-    :param name: name of submodule
-    :param module: submodule to attach
-    """
-
-    if has_offloaded_params(base):
-        hook: AlignDevicesHook = base._hf_hook
-        assert hook.offload
-        assert hook.weights_map is not None
-
-        # offloading kwargs for submodule
-        place_submodules = False
-        offload_buffers = True
-
-        # copy device offloading arguments from parent
-        current_device = next(base.parameters()).device  # assume base has parameters
-        offload_device = get_offloaded_device(base)
-
-        # offload parameters to weights map
-        for param_name, param in named_module_tensors(
-            module, include_buffers=offload_buffers, recurse=place_submodules
-        ):
-            offloaded = param.to(offload_device)
-            if hook.tied_params_map is not None:
-                hook.tied_params_map[offloaded.data_ptr()] = {}  # (1)
-            offload_to_weights_map(hook.weights_map, f"{name}.{param_name}", offloaded)
-
-            # if the parent places submodules, offload here
-            if hook.place_submodules:
-                set_module_tensor_to_device(module, param_name, current_device)
-
-        # if the parent does not place submodules, then add a hook
-        # parameters are offloaded by `add_hook_to_module`
-        if not hook.place_submodules:
-            weights_map = PrefixedDataset(
-                hook.weights_map.dataset, prefix=f"{hook.weights_map.prefix}{name}."
-            )
-
-            submodule_hook = AlignDevicesHook(
-                execution_device=hook.execution_device,
-                offload=hook.offload,
-                io_same_device=False,
-                weights_map=weights_map,
-                offload_buffers=offload_buffers,
-                place_submodules=place_submodules,
-                skip_keys=None,
-                tied_params_map=hook.tied_params_map,
-            )
-            add_hook_to_module(module, submodule_hook)
-
-    base.register_module(name, module)
+    raise ValueError()
 
 
+@deprecated("delattr(base, name)")
 def delete_offload_module(base: torch.nn.Module, name: str):
     """
     Delete a submodule from a model which may contain offloading
     :param base: parent module to delete submodule from
     :param name: name of submodule on parent
     """
-    module: torch.nn.Module = getattr(base, name)
-
-    for param_name, _ in list(module.named_parameters()):
-        delete_offload_parameter(module, param_name)
-
     delattr(base, name)
 
 
-@check_accelerate(fallback="error")
+@deprecated("compressed_tensors.offload::offload_model")
 def offloaded_dispatch(
     module: torch.nn.Module,
     execution_device: torch.device,
-    offload_device: Union[torch.device, Literal["disk"]] = torch.device("cpu"),
+    offload_device: Optional[torch.device | Literal["disk"]] = None,
 ) -> torch.nn.Module:
     """
-    Unlike `dispatch_model`, this function forces a module (and its submodules) to
-    offload all parameters and replace them with meta tensors, utiliizing the
-    `AlignDevicesHook` to control onloading and offloading.
+    Dispatch a model, keeping device parameters offloaded on their current device
 
     :param module: module containing parameters to offload
     :param execution_device: device that modules will onload and execute on
     :param offload_device: device that module parameters will offload to
     :return: module with offloading device hooks
     """
-    if offload_device == "disk":
-        raise NotImplementedError("Disk offloading is not currently supported")
-
-    # remove any existing hooks
-    remove_dispatch(module)
-
-    # create weights map
-    state_dict = module.state_dict()
-    state_dict = {key: val.to(offload_device) for key, val in state_dict.items()}
-    weights_map = OffloadedWeightsLoader(state_dict=state_dict, device=offload_device)
-
-    # create tied params map
-    tied_params = find_tied_parameters(module)
-    tied_params_map = {}
-    for group in tied_params:
-        for param_name in group:
-            data_ptr = attrgetter(param_name)(module).data_ptr()
-            tied_params_map[data_ptr] = {}
-
-    # recursively attaches hooks to all submodules
-    attach_align_device_hook(
-        module,
-        execution_device=execution_device,
-        offload=True,
-        weights_map=weights_map,
-        tied_params_map=tied_params_map,
-    )
-
-    # when saving a model, `PretrainedModel.save_pretrained` will only
-    # onload weights if the following requirements are met
-    # if (
-    #     hasattr(self, "hf_device_map")
-    #     and len(set(self.hf_device_map.values())) > 1
-    #     and ("cpu" in self.hf_device_map.values()
-    #          or "disk" in self.hf_device_map.values())
-    # ):
-    # because this function always offloads, disregard actual devices and
-    # always use `cpu` and `cuda:0` to guarantee this condition passes
-    setattr(module, "hf_device_map", {"fake_offload": "cpu", "fake_exec": "cuda:0"})
-
-    return module
-
-
-def remove_dispatch(module: torch.nn.Module) -> torch.nn.Module:
-    """
-    Remove any existing dispatches from module
-
-    :param module: module which may be dispatched with hf hooks
-    :return: module without dispatch
-    """
-    remove_hook_from_module(module, recurse=True)
-    if hasattr(module, "hf_device_map"):
-        delattr(module, "hf_device_map")
-    module.to("cpu")
-
-    return module
-
+    if offload_device is not None:
+        raise ValueError(
+            "Passing offload_device to offloaded_dispatch is no longer supported"
+        )
+    offload_model(module, execution_device)
 
-@contextlib.contextmanager
-def disable_offloading():
-    """
-    Keep modules onloaded and disable offloading until this context exits.
-    Affects modules which have been hooked with accelerate's `AlignDevicesHook`
-    """
-    original_pre_forward = AlignDevicesHook.pre_forward
-    onloaded_modules: Dict[torch.nn.Module, Tuple[AlignDevicesHook, bool]] = dict()
 
-    # onload once and disable any future onloading/offloading steps
-    def keep_onload_pre_forward(self: AlignDevicesHook, module, *args, **kwargs):
-        ret = original_pre_forward(self, module, *args, **kwargs)
-        if module not in onloaded_modules:
-            onloaded_modules[module] = (self, self.offload)
-            self.offload = False
-        return ret
+@deprecated("compressed_tensors.offload::align_module_device")
+def disable_offload(module: torch.nn.Module):
+    raise ValueError()
 
-    # use the patched pre_forward function within the context
-    with patch_attr(AlignDevicesHook, "pre_forward", keep_onload_pre_forward):
-        yield
 
-    # manually offload all modules that were onloaded
-    # update any parameters which may have changed
-    for module, (hook, offload) in onloaded_modules.items():
-        hook.offload = offload
-        for name, param in module.named_parameters(recurse=False):
-            update_offload_parameter(module, name, param.data)
-        hook.post_forward(module, None)
+@deprecated()
+def offload_to_weights_map(*args, **kwargs):
+    raise ValueError()
 
 
-""" Upstreamed Functions """
+@deprecated()
+def delete_from_weights_map(*args, **kwargs):
+    raise ValueError()
 
 
-# introduced in accelerate v1.1.0
-@check_accelerate(fallback=False)
+@deprecated()
 def has_offloaded_params(module: torch.nn.Module) -> bool:
     """
     Checks if a module has offloaded parameters by checking if the given module has a
@@ -616,57 +192,4 @@ def has_offloaded_params(module: torch.nn.Module) -> bool:
         bool: `True` if the module has an offload hook and offloading is enabled,
         `False` otherwise.
     """
-    return (
-        hasattr(module, "_hf_hook")
-        and isinstance(module._hf_hook, AlignDevicesHook)
-        and module._hf_hook.offload
-    )
-
-
-# introduced in accelerate v1.1.0
-@check_accelerate(fallback=contextlib.nullcontext())
-@contextlib.contextmanager
-def align_module_device(
-    module: torch.nn.Module, execution_device: Optional[torch.device] = None
-):
-    """
-    Context manager that moves a module's parameters to the specified execution device.
-
-    Args:
-        module (`torch.nn.Module`):
-            Module with parameters to align.
-        execution_device (`torch.device`, *optional*):
-            If provided, overrides the module's execution device within the context.
-            Otherwise, use hook execution device or pass
-    """
-    if has_offloaded_params(module):
-        if execution_device is not None:
-            original_device = module._hf_hook.execution_device
-            module._hf_hook.execution_device = execution_device
-
-        try:
-            module._hf_hook.pre_forward(module)
-            yield
-        finally:
-            module._hf_hook.post_forward(module, None)
-            if execution_device is not None:
-                module._hf_hook.execution_device = original_device
-
-    elif execution_device is not None:
-        devices = {
-            name: param.device for name, param in module.named_parameters(recurse=False)
-        }
-        try:
-            for name in devices:
-                set_module_tensor_to_device(module, name, execution_device)
-            yield
-        finally:
-            for name, device in devices.items():
-                set_module_tensor_to_device(module, name, device)
-
-    else:
-        yield
-
-
-# (1): Since we cannot know which pointers are shared when we add parameters in an
-# online way, assume that all pointers are shared. This has virtually no runtime cost
+    return False
diff --git a/tests/test_quantization/lifecycle/test_apply.py b/tests/test_quantization/lifecycle/test_apply.py
index caf679781..d18b08c46 100644
--- a/tests/test_quantization/lifecycle/test_apply.py
+++ b/tests/test_quantization/lifecycle/test_apply.py
@@ -32,7 +32,6 @@
 )
 from compressed_tensors.quantization.lifecycle import apply_quantization_config
 from compressed_tensors.utils import is_match, match_named_modules
-from tests.testing_utils import requires_accelerate
 from transformers import AutoModelForCausalLM
 
 
@@ -322,7 +321,6 @@ def get_sample_tinyllama_quant_config(
     return QuantizationConfig.model_validate(config_dict)
 
 
-@requires_accelerate()
 @pytest.mark.parametrize(
     "target,should_raise_warning",
     [
@@ -462,12 +460,8 @@ def test_multi_apply_quantization_config():
             )
 
 
-@requires_accelerate()
 def test_apply_kv_cache():
-    from accelerate import init_empty_weights
-
-    with init_empty_weights():
-        model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
+    model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
 
     args = QuantizationArgs(
         num_bits=8,
@@ -486,12 +480,8 @@ def test_apply_kv_cache():
         assert hasattr(layer.self_attn, "v_scale")
 
 
-@requires_accelerate()
 def test_apply_attention():
-    from accelerate import init_empty_weights
-
-    with init_empty_weights():
-        model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
+    model = AutoModelForCausalLM.from_pretrained("nm-testing/llama2.c-stories15M")
 
     scheme = QuantizationScheme(
         targets=["LlamaAttention"],
diff --git a/tests/test_quantization/lifecycle/test_initialize.py b/tests/test_quantization/lifecycle/test_initialize.py
index e463ea2c2..3bb8d407e 100644
--- a/tests/test_quantization/lifecycle/test_initialize.py
+++ b/tests/test_quantization/lifecycle/test_initialize.py
@@ -17,6 +17,7 @@
 
 import pytest
 import torch
+from compressed_tensors.offload import offload_model
 from compressed_tensors.quantization import (
     FP8_E4M3_DATA,
     ActivationOrdering,
@@ -28,7 +29,7 @@
 from compressed_tensors.quantization.lifecycle.initialize import (
     initialize_module_for_quantization,
 )
-from tests.testing_utils import requires_accelerate
+from tests.testing_utils import requires_gpu
 from torch.nn import Linear
 
 
@@ -98,7 +99,7 @@ def test_initialize_module_for_quantization(
     assert layer.quantization_status == QuantizationStatus.INITIALIZED
 
 
-@requires_accelerate()
+@requires_gpu
 @pytest.mark.parametrize(
     "weights,input_activations",
     [
@@ -119,9 +120,7 @@ def test_initialize_module_for_quantization(
 def test_initialize_module_for_quantization_offloaded(
     create_quantization_scheme, weights, input_activations, layer
 ):
-    from accelerate.hooks import attach_align_device_hook
-
-    attach_align_device_hook(layer, offload=True)
+    offload_model(layer, "cuda:0")
 
     test_initialize_module_for_quantization(
         create_quantization_scheme,
diff --git a/tests/test_transform/factory/test_correctness.py b/tests/test_transform/factory/test_correctness.py
index 1fdbc3a00..0c6a7bc28 100644
--- a/tests/test_transform/factory/test_correctness.py
+++ b/tests/test_transform/factory/test_correctness.py
@@ -14,6 +14,7 @@
 
 import pytest
 import torch
+from compressed_tensors.offload import offload_model
 from compressed_tensors.transform import (
     TransformArgs,
     TransformConfig,
@@ -21,9 +22,8 @@
     TransformScheme,
     apply_transform_config,
 )
-from compressed_tensors.utils import offloaded_dispatch
 from tests.test_transform.conftest import MockAttention, MockAttentionModel
-from tests.testing_utils import requires_accelerate, requires_gpu
+from tests.testing_utils import requires_gpu
 
 
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
@@ -89,16 +89,16 @@ def test_correctness_embedding(type, randomize, embed_loc, linear_loc):
     assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
 
 
+@requires_gpu
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("input_batch_size", (1, 5, 17))
-def test_correctness_model(
-    type, randomize, input_batch_size, model_apply, offload=False
-):
+@pytest.mark.parametrize("offload", (True, False))
+def test_correctness_model(type, randomize, input_batch_size, model_apply, offload):
     # load model
     model = model_apply[0]
     if offload:
-        model = offloaded_dispatch(model, torch.device("cuda"))
+        offload_model(model, torch.device("cuda"))
 
     # get output
     input = torch.rand((input_batch_size, 5, model.fcs[0].in_features))
@@ -119,15 +119,6 @@ def test_correctness_model(
     assert torch.allclose(true_output, output, atol=1e-5, rtol=0.0)
 
 
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
-@pytest.mark.parametrize("randomize", (True, False))
-@pytest.mark.parametrize("input_batch_size", (1, 5, 17))
-def test_correctness_model_offload(type, randomize, input_batch_size, model_apply):
-    test_correctness_model(type, randomize, input_batch_size, model_apply, offload=True)
-
-
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard", "random-matrix"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("head_dim", (4, 8))
diff --git a/tests/test_transform/factory/test_memory.py b/tests/test_transform/factory/test_memory.py
index 64a068c98..e373a752d 100644
--- a/tests/test_transform/factory/test_memory.py
+++ b/tests/test_transform/factory/test_memory.py
@@ -16,6 +16,11 @@
 
 import pytest
 import torch
+from compressed_tensors.offload import (
+    disable_offloading,
+    disable_onloading,
+    offload_model,
+)
 from compressed_tensors.transform import (
     TransformArgs,
     TransformBase,
@@ -23,19 +28,21 @@
     TransformScheme,
     apply_transform_config,
 )
-from compressed_tensors.utils import align_modules, offloaded_dispatch
 from tests.test_transform.conftest import TransformableModel
-from tests.testing_utils import requires_accelerate, requires_gpu
+from tests.testing_utils import requires_gpu
 
 
+@requires_gpu
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
 @pytest.mark.parametrize("randomize", (True, False))
 @pytest.mark.parametrize("requires_grad", (True, False))
-def test_memory_sharing(type, randomize, requires_grad, offload=False):
+# @pytest.mark.parametrize("offload", (True, False))
+@pytest.mark.parametrize("offload", (True,))
+def test_memory_sharing(type, randomize, requires_grad, offload):
     # load model (maybe with offloading)
     model = TransformableModel(2, 2, 4, 4, 8, 8)
     if offload:
-        offloaded_dispatch(model, torch.device("cuda"))
+        offload_model(model, torch.device("cuda"))
 
     # add transforms to model
     config = TransformConfig(
@@ -53,40 +60,15 @@ def test_memory_sharing(type, randomize, requires_grad, offload=False):
     )
     apply_transform_config(model, config)
 
-    # check that memory is shared when onloaded
-    with align_modules(model.modules()):
-        weights = [m.weight for m in model.modules() if isinstance(m, TransformBase)]
-        weight_to_count = Counter(weights)
-        size_to_weight = {weight.size(0): weight for weight in weight_to_count}
-
-        assert len(weight_to_count) == len(size_to_weight) == 3
-        assert weight_to_count[size_to_weight[2]] == 3
-        assert weight_to_count[size_to_weight[4]] == 4
-        assert weight_to_count[size_to_weight[8]] == 3
-
-    # check that memory is shared in offloaded dict
-    if offload:
-        weights_map = dict(model.fcs[0]._hf_hook.weights_map.dataset)
-        offloaded_weights = [
-            value
-            for name, value in weights_map.items()
-            if name.endswith("_input.weight") or name.endswith("_output.weight")
-        ]
-        weight_to_count = Counter(offloaded_weights)
-        size_to_weight = {weight.size(0): weight for weight in weight_to_count}
-
-        assert len(weight_to_count) == len(size_to_weight) == 3
-        assert weight_to_count[size_to_weight[2]] == 3
-        assert weight_to_count[size_to_weight[4]] == 4
-        assert weight_to_count[size_to_weight[8]] == 3
-
+    for context in disable_onloading, disable_offloading:
+        with context():
+            weights = [
+                m.weight for m in model.modules() if isinstance(m, TransformBase)
+            ]
+            weight_to_count = Counter(weights)
+            size_to_weight = {weight.size(0): weight for weight in weight_to_count}
 
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomize", (True, False))
-def test_memory_sharing_offload(
-    type,
-    randomize,
-):
-    test_memory_sharing(type, randomize, requires_grad=False, offload=True)
+            assert len(weight_to_count) == len(size_to_weight) == 3
+            assert weight_to_count[size_to_weight[2]] == 3
+            assert weight_to_count[size_to_weight[4]] == 4
+            assert weight_to_count[size_to_weight[8]] == 3
diff --git a/tests/test_transform/factory/test_serialization.py b/tests/test_transform/factory/test_serialization.py
index 15fa240ba..9adeb4cf8 100644
--- a/tests/test_transform/factory/test_serialization.py
+++ b/tests/test_transform/factory/test_serialization.py
@@ -16,24 +16,25 @@
 
 import pytest
 import torch
+from compressed_tensors.offload import offload_model
 from compressed_tensors.transform import (
     TransformConfig,
     TransformScheme,
     apply_transform_config,
 )
-from compressed_tensors.utils import offloaded_dispatch
 from safetensors import safe_open
-from tests.testing_utils import requires_accelerate, requires_gpu
+from tests.testing_utils import requires_gpu
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 @pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
 @pytest.mark.parametrize("randomize", (True, False))
-def test_serialization(type, randomize, model_apply, tmp_path, offload=False):
+@pytest.mark.parametrize("offload", (True, False))
+def test_serialization(type, randomize, model_apply, tmp_path, offload):
     # get model, maybe offload
     model, apply = model_apply
     if offload:
-        offloaded_dispatch(model, torch.device("cuda"))
+        offload_model(model, torch.device("cuda"))
 
     # apply transforms to model
     config = TransformConfig(
@@ -48,7 +49,8 @@ def test_serialization(type, randomize, model_apply, tmp_path, offload=False):
     # check that saved values match model values
     # note that shared weights are only serialized once
     safetensors_path = os.path.join(model_path, "model.safetensors")
-    with safe_open(safetensors_path, framework="pt", device="cpu") as file:
+    device = "cuda:0" if offload else "cpu"
+    with safe_open(safetensors_path, framework="pt", device=device) as file:
         saved_keys = set(file.keys())
         assert {
             "fcs.0.weight",
@@ -60,17 +62,7 @@ def test_serialization(type, randomize, model_apply, tmp_path, offload=False):
         for key in saved_keys:
             param = model.get_parameter(key)
             saved_param = file.get_tensor(key)
-
-            if param.device.type != "meta":  # skip testing values in offload case
-                assert torch.equal(param, saved_param)
-
-
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize("type", ("hadamard", "random-hadamard"))
-@pytest.mark.parametrize("randomize", (True, False))
-def test_serialization_offload(type, randomize, model_apply, tmp_path):
-    test_serialization(type, randomize, model_apply, tmp_path, offload=True)
+            assert torch.equal(param, saved_param)
 
 
 @pytest.mark.skip("Requires transformers#40673")
diff --git a/tests/test_utils/test_match.py b/tests/test_utils/test_match.py
index 86bf639be..467d74e0c 100644
--- a/tests/test_utils/test_match.py
+++ b/tests/test_utils/test_match.py
@@ -43,41 +43,35 @@ class DummyModel(nn.Module):
     """Test model for unit tests. Weights are initialized on meta device"""
 
     def __init__(self):
-        try:
-            from accelerate import init_empty_weights
-        except ImportError:
-            pytest.skip("Skipping weight init requires accelerate")
-
         super().__init__()
-        with init_empty_weights():
-            self.layer1 = nn.Linear(10, 20)
-            self.layer2 = nn.Linear(20, 30)
-            self.norm = nn.LayerNorm(30)
-            self.attention = nn.MultiheadAttention(30, 2)
-
-            # Create nested structure
-            self.transformer = nn.ModuleDict(
-                {
-                    "layers": nn.ModuleList(
-                        [
-                            nn.ModuleDict(
-                                {
-                                    "self_attn": nn.ModuleDict(
-                                        {
-                                            "q_proj": nn.Linear(30, 30),
-                                            "k_proj": nn.Linear(30, 30),
-                                            "v_proj": nn.Linear(30, 30),
-                                        }
-                                    ),
-                                    "norm": nn.LayerNorm(30),
-                                    "mlp": nn.Linear(30, 30),
-                                }
-                            )
-                            for _ in range(3)
-                        ]
-                    )
-                }
-            )
+        self.layer1 = nn.Linear(10, 20)
+        self.layer2 = nn.Linear(20, 30)
+        self.norm = nn.LayerNorm(30)
+        self.attention = nn.MultiheadAttention(30, 2)
+
+        # Create nested structure
+        self.transformer = nn.ModuleDict(
+            {
+                "layers": nn.ModuleList(
+                    [
+                        nn.ModuleDict(
+                            {
+                                "self_attn": nn.ModuleDict(
+                                    {
+                                        "q_proj": nn.Linear(30, 30),
+                                        "k_proj": nn.Linear(30, 30),
+                                        "v_proj": nn.Linear(30, 30),
+                                    }
+                                ),
+                                "norm": nn.LayerNorm(30),
+                                "mlp": nn.Linear(30, 30),
+                            }
+                        )
+                        for _ in range(3)
+                    ]
+                )
+            }
+        )
 
 
 class DummyMoEModel(nn.Module):
diff --git a/tests/test_utils/test_offload.py b/tests/test_utils/test_offload.py
deleted file mode 100644
index aed0186b2..000000000
--- a/tests/test_utils/test_offload.py
+++ /dev/null
@@ -1,540 +0,0 @@
-# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pytest
-import torch
-from compressed_tensors.utils import (
-    align_module_device,
-    align_modules,
-    delete_offload_module,
-    delete_offload_parameter,
-    disable_hf_hook,
-    disable_offloading,
-    get_execution_device,
-    has_offloaded_params,
-    offloaded_dispatch,
-    register_offload_module,
-    register_offload_parameter,
-    update_offload_parameter,
-)
-from compressed_tensors.utils.offload import offload_to_weights_map
-from tests.testing_utils import requires_accelerate, requires_gpu
-
-
-class ExampleModule(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.a = torch.nn.Parameter(torch.tensor(0).float())
-        self.b = torch.nn.Parameter(torch.tensor(0).float())
-
-    def forward(self, x):
-        return x * self.a + self.b
-
-
-class ExampleModel(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.linear = torch.nn.Linear(1, 2)
-
-    def forward(self, x):
-        return self.linear(x)
-
-
-@requires_accelerate()
-def test_has_offloaded_params():
-    from accelerate.hooks import attach_align_device_hook, remove_hook_from_module
-
-    module = ExampleModule()
-    assert not has_offloaded_params(module)
-
-    attach_align_device_hook(module, offload=False)
-    assert not has_offloaded_params(module)
-
-    remove_hook_from_module(module)
-    attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
-    assert has_offloaded_params(module)
-
-
-@requires_gpu
-@requires_accelerate()
-def test_get_execution_device():
-    from accelerate import init_empty_weights
-    from accelerate.big_modeling import attach_align_device_hook
-
-    # no offloading
-    module = ExampleModule()
-    assert get_execution_device(module) == torch.device("cpu")
-
-    # with offloading
-    attach_align_device_hook(module, torch.device("cuda:0"))
-    assert get_execution_device(module) == torch.device("cuda:0")
-
-    # in meta context
-    with torch.device("meta"):
-        module = ExampleModule()
-        assert get_execution_device(module) == torch.device("meta")
-
-    # offloaded in meta context
-    module = ExampleModule()
-    attach_align_device_hook(module, torch.device("cuda:0"))
-    with torch.device("meta"):
-        assert get_execution_device(module) == torch.device("cuda:0")
-
-    # in empty weights context
-    with init_empty_weights():
-        module = ExampleModule()
-        assert get_execution_device(module) == torch.device("meta")
-
-    # offloaded in empty weights context
-    module = ExampleModule()
-    attach_align_device_hook(module, torch.device("cuda:0"))
-    with init_empty_weights():
-        assert get_execution_device(module) == torch.device("cuda:0")
-
-
-@requires_gpu
-@requires_accelerate()
-def test_get_execution_device_model():
-    class Model(torch.nn.Module):
-        def __init__(self):
-            super().__init__()
-            self.a = torch.nn.Linear(1, 2)
-            self.b = torch.nn.Linear(2, 2, device="cuda:0")
-
-        def forward(self, x):
-            return self.b(self.a(x).to("cuda:0"))
-
-    model = Model()
-    assert get_execution_device(model) == torch.device("cpu")
-
-    offloaded_dispatch(model.a, torch.device("cuda:0"))
-    assert get_execution_device(model) == torch.device("cuda:0")
-
-
-@requires_accelerate()
-def test_register_offload_parameter():
-    from accelerate import init_empty_weights
-    from accelerate.hooks import attach_align_device_hook
-
-    module = ExampleModule()
-    parameter = torch.nn.Parameter(torch.tensor(1.0))
-
-    # register a param prior to offloading
-    register_offload_parameter(module, "c", parameter)
-    assert module.c == parameter
-
-    # offloading, check that added param was offloaded
-    attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
-    assert "c" in module._hf_hook.weights_map
-
-    # register a param after offloading, check that added param was offloaded
-    register_offload_parameter(module, "d", parameter)
-    assert module.d.device == torch.device("meta")
-    assert module._hf_hook.weights_map["d"].device == torch.device("cpu")
-
-    # added parameters can be onloaded and offloaded
-    with align_module_device(module, execution_device="cpu"):
-        assert module.c.device == torch.device("cpu")
-        assert module.d.device == torch.device("cpu")
-    assert module.c.device == torch.device("meta")
-    assert module.d.device == torch.device("meta")
-
-    # parameters can be added during onload
-    with align_module_device(module, execution_device="cpu"):
-        register_offload_parameter(module, "e", parameter)
-        assert module.e.device == torch.device("cpu")
-
-    # parameters can be added before onload and with explicit offload
-    register_offload_parameter(module, "f", parameter, offload_device="cpu")
-    assert module._hf_hook.weights_map["f"].device == torch.device("cpu")
-    with align_module_device(module, execution_device="cpu"):
-        assert module.f.device == torch.device("cpu")
-    assert module._hf_hook.weights_map["f"].device == torch.device("cpu")
-
-    # parameters registered in the empty init context are still empty
-    with init_empty_weights():
-        module = ExampleModule()
-        register_offload_parameter(module, "c", parameter)
-    assert module.a.device == module.b.device == module.c.device == torch.device("meta")
-
-
-@requires_accelerate()
-@requires_gpu
-def test_register_offload_parameter_hook_replacement():
-    module = ExampleModule()
-    parameter_c = torch.nn.Parameter(torch.tensor(1.0, device="cuda"))
-    parameter_d = torch.nn.Parameter(torch.tensor(1.0, device="cpu"))
-
-    offloaded_dispatch(module, "cuda")
-    register_offload_parameter(module, "c", parameter_c)
-    register_offload_parameter(module, "d", parameter_d)
-
-    with disable_hf_hook(module):
-        assert module.a.device == torch.device("cpu")
-        assert module.b.device == torch.device("cpu")
-        assert module.c.device == torch.device("cuda:0")
-        assert module.d.device == torch.device("cpu")
-
-    assert module.a.device == torch.device("meta")
-    assert module.b.device == torch.device("meta")
-    assert module.c.device == torch.device("meta")
-    assert module.d.device == torch.device("meta")
-    assert module._hf_hook.weights_map["a"].device == torch.device("cpu")
-    assert module._hf_hook.weights_map["b"].device == torch.device("cpu")
-    assert module._hf_hook.weights_map["c"].device == torch.device("cpu")
-    assert module._hf_hook.weights_map["d"].device == torch.device("cpu")
-
-
-@requires_accelerate()
-@requires_gpu
-def test_register_offload_parameter_shared():
-    module = ExampleModule()
-    parameter = torch.nn.Parameter(torch.tensor(1.0))
-
-    offloaded_dispatch(module, "cuda")
-    register_offload_parameter(module, "c", parameter)
-    register_offload_parameter(module, "d", parameter)
-
-    with align_module_device(module):
-        assert module.c is module.d
-
-
-@requires_accelerate()
-def test_update_offload_parameter():
-    from accelerate.hooks import attach_align_device_hook
-
-    module = ExampleModule()
-    tensor_a = torch.tensor(1.0)
-    tensor_b = torch.tensor(2.0)
-
-    # can update modules which are not offloaded
-    update_offload_parameter(module, "a", tensor_a)
-    assert module.a == tensor_a
-
-    # can update modules which are offloaded
-    attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
-    update_offload_parameter(module, "b", tensor_b)
-    assert module.b.device == torch.device("meta")
-    assert module._hf_hook.weights_map["b"] == tensor_b
-
-    # data persists across onloading
-    with align_module_device(module, execution_device="cpu"):
-        assert module.a.data == tensor_a
-        assert module.b.data == tensor_b
-        assert module._hf_hook.weights_map["a"] == tensor_a
-        assert module._hf_hook.weights_map["b"] == tensor_b
-
-    # data persists across offloading
-    assert module.a.device == torch.device("meta")
-    assert module.b.device == torch.device("meta")
-    assert module._hf_hook.weights_map["a"] == tensor_a
-    assert module._hf_hook.weights_map["b"] == tensor_b
-
-    # can update with differnt shape with warning
-    with pytest.warns():
-        new_data = torch.tensor([3.0])
-        update_offload_parameter(module, "a", new_data)
-    assert module._hf_hook.weights_map["a"] == new_data
-
-
-@requires_accelerate()
-def test_delete_offload_parameter():
-    from accelerate.hooks import attach_align_device_hook
-
-    module = ExampleModule()
-    param_c = torch.nn.Parameter(torch.tensor(1.0))
-    param_d = torch.nn.Parameter(torch.tensor(2.0))
-    register_offload_parameter(module, "c", param_c)
-    register_offload_parameter(module, "d", param_d)
-
-    # parameters are deleted
-    delete_offload_parameter(module, "a")
-    delete_offload_parameter(module, "c")
-    assert not hasattr(module, "a")
-    assert hasattr(module, "b")
-    assert not hasattr(module, "c")
-    assert hasattr(module, "d")
-
-    # parameters and their offload are deleted
-    attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
-    delete_offload_parameter(module, "b")
-    delete_offload_parameter(module, "d")
-    assert not hasattr(module, "a")
-    assert not hasattr(module, "b")
-    assert not hasattr(module, "c")
-    assert not hasattr(module, "d")
-    assert "a" not in module._hf_hook.weights_map
-    assert "b" not in module._hf_hook.weights_map
-    assert "c" not in module._hf_hook.weights_map
-    assert "d" not in module._hf_hook.weights_map
-
-
-@requires_accelerate()
-def test_disable_hf_hook():
-    from accelerate.hooks import attach_align_device_hook
-
-    module = ExampleModule()
-
-    def custom_forward():
-        pass
-
-    attach_align_device_hook(module, offload=True, weights_map=module.state_dict())
-    with disable_hf_hook(module):
-        assert not hasattr(module, "_hf_hook")
-        module.forward = custom_forward
-
-    assert hasattr(module, "_hf_hook")
-    assert module._old_forward == custom_forward
-
-
-@requires_accelerate()
-def test_disable_hf_hook_model_recurse():
-    from accelerate.hooks import attach_align_device_hook
-
-    module0 = ExampleModule()
-    module1 = ExampleModule()
-    module2 = ExampleModule()
-    model = torch.nn.Sequential(module0, torch.nn.Sequential(module1, module2))
-    attach_align_device_hook(model, offload=True, weights_map=model.state_dict())
-
-    with disable_hf_hook(model):
-        assert not hasattr(module0, "_hf_hook")
-        assert not hasattr(module1, "_hf_hook")
-        assert not hasattr(module2, "_hf_hook")
-
-    assert hasattr(module0, "_hf_hook")
-    assert hasattr(module1, "_hf_hook")
-    assert hasattr(module2, "_hf_hook")
-
-
-@requires_accelerate()
-def test_align_modules():
-    from accelerate.hooks import attach_align_device_hook
-
-    module0 = ExampleModule()
-    module1 = ExampleModule()
-    module2 = ExampleModule()
-    model = torch.nn.Sequential(module0, torch.nn.Sequential(module1, module2))
-    attach_align_device_hook(
-        model,
-        execution_device=torch.device("cpu"),
-        offload=True,
-        weights_map=model.state_dict(),
-    )
-
-    assert module0.a.device == torch.device("meta")
-    assert module1.a.device == torch.device("meta")
-    assert module2.a.device == torch.device("meta")
-
-    with align_modules((module0, module1)):
-        assert module0.a.device != torch.device("meta")
-        assert module1.a.device != torch.device("meta")
-        assert module2.a.device == torch.device("meta")
-
-    assert module0.a.device == torch.device("meta")
-    assert module1.a.device == torch.device("meta")
-    assert module2.a.device == torch.device("meta")
-
-
-@requires_accelerate()
-def test_offload_to_weights_map():
-    from accelerate.utils import OffloadedWeightsLoader, PrefixedDataset
-
-    name = "name"
-    old_value = torch.tensor(0.0)
-    new_value = torch.tensor(1.0)
-    prefix = "prefix"
-
-    # Dict empty
-    weights_map = {}
-    with pytest.raises(ValueError):
-        offload_to_weights_map(weights_map, name, new_value)
-    offload_to_weights_map(weights_map, name, new_value, offload_device="cpu")
-    assert weights_map[name] == new_value
-
-    # Dict populated
-    weights_map = {name: old_value}
-    offload_to_weights_map(weights_map, name, new_value)
-    assert weights_map[name] == new_value
-
-    # OffloadedWeightsLoader[Dict] empty
-    weights_map = OffloadedWeightsLoader({})
-    with pytest.raises(ValueError):
-        offload_to_weights_map(weights_map, name, new_value)
-    offload_to_weights_map(weights_map, name, new_value, offload_device="cpu")
-    assert weights_map[name] == new_value
-
-    # OffloadedWeightsLoader[Dict] populated
-    weights_map = OffloadedWeightsLoader({name: old_value})
-    offload_to_weights_map(weights_map, name, new_value)
-    assert weights_map[name] == new_value
-
-    # PrefixedDataset[Dict] empty
-    weights_map = PrefixedDataset({}, prefix)
-    with pytest.raises(ValueError):
-        offload_to_weights_map(weights_map, name, new_value)
-    offload_to_weights_map(weights_map, name, new_value, offload_device="cpu")
-    assert weights_map[name] == new_value
-
-    # PrefixedDataset[Dict] populated
-    weights_map = PrefixedDataset({name: old_value}, prefix)
-    offload_to_weights_map(weights_map, name, new_value)
-    assert weights_map[name] == new_value
-
-    # PrefixedDataset[OffloadedWeightsLoader[Dict]] empty
-    weights_map = PrefixedDataset(OffloadedWeightsLoader({}), prefix)
-    with pytest.raises(ValueError):
-        offload_to_weights_map(weights_map, name, new_value)
-    offload_to_weights_map(weights_map, name, new_value, offload_device="cpu")
-    assert weights_map[name] == new_value
-
-    # PrefixedDataset[OffloadedWeightsLoader[Dict]] populated
-    weights_map = PrefixedDataset(OffloadedWeightsLoader({name: old_value}), prefix)
-    offload_to_weights_map(weights_map, name, new_value)
-    assert weights_map[name] == new_value
-
-
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
-def test_register_offload_module(exec_device):
-    # no offloading
-    model = ExampleModel()
-    child = torch.nn.Linear(2, 3)
-    register_offload_module(model, "child", child)
-    register_offload_module(model.linear, "child", child)
-    assert child in model.children()
-    assert child in model.linear.children()
-
-    # with offloading
-    model = ExampleModel()
-    child = torch.nn.Linear(2, 3)
-    offloaded_dispatch(model, exec_device)
-    register_offload_module(model, "child", child)
-    register_offload_module(model.linear, "child", child)
-    assert child in model.children()
-    assert child in model.linear.children()
-
-    # can run modules
-    model(torch.empty(1))
-    child(torch.empty(2, device=exec_device))
-
-
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize("exec_device", [torch.device("cpu"), torch.device("cuda")])
-def test_delete_offload_module(exec_device):
-    # no offloading
-    model = ExampleModel()
-    child = torch.nn.Linear(2, 3)
-    register_offload_module(model, "child", child)
-    register_offload_module(model.linear, "child", child)
-    delete_offload_module(model, "child")
-    delete_offload_module(model.linear, "child")
-    assert child not in model.children()
-    assert child not in model.linear.children()
-
-    # with offloading
-    model = ExampleModel()
-    child = torch.nn.Linear(2, 3)
-    offloaded_dispatch(model, exec_device)
-    register_offload_module(model, "child", child)
-    register_offload_module(model.linear, "child", child)
-    delete_offload_module(model, "child")
-    delete_offload_module(model.linear, "child")
-    assert child not in model.children()
-    assert child not in model.linear.children()
-
-
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize(
-    "exec_device,offload_device",
-    [
-        (torch.device("cpu"), torch.device("cpu")),
-        (torch.device("cpu"), torch.device("cuda:0")),
-        (torch.device("cuda:0"), torch.device("cpu")),
-        (torch.device("cuda:0"), torch.device("cuda:0")),
-    ],
-)
-def test_offloaded_dispatch(exec_device, offload_device):
-    # single module
-    module = torch.nn.Linear(1, 2, device=offload_device)
-    module = offloaded_dispatch(module, exec_device, offload_device)
-    assert has_offloaded_params(module)
-    assert module._hf_hook.offload
-    assert module.weight.device == torch.device("meta")
-    assert module._hf_hook.weights_map["weight"].device == offload_device
-    assert module._hf_hook.tied_params_map is not None
-
-    # can run
-    module(torch.empty(1, device=exec_device))
-
-    # model
-    model = ExampleModel()
-    model = offloaded_dispatch(model, exec_device, offload_device)
-    assert not has_offloaded_params(model)
-
-    assert has_offloaded_params(model.linear)
-    assert model.linear._hf_hook.offload
-    assert model.linear.weight.device == torch.device("meta")
-    assert model.linear._hf_hook.weights_map["weight"].device == offload_device
-    assert model.linear._hf_hook.tied_params_map is not None
-
-    # can run
-    model(torch.empty(1, device=exec_device))
-
-    # can add new params
-    parameter = torch.nn.Parameter(torch.tensor(1.0))
-    register_offload_parameter(module, "new_param", parameter)
-    assert module.new_param.device == torch.device("meta")
-    assert module._hf_hook.weights_map["new_param"].device == offload_device
-
-
-@requires_gpu
-@requires_accelerate()
-@pytest.mark.parametrize(
-    "exec_device,offload_device",
-    [
-        (torch.device("cpu"), torch.device("cpu")),
-        (torch.device("cpu"), torch.device("cuda:0")),
-        (torch.device("cuda:0"), torch.device("cpu")),
-        (torch.device("cuda:0"), torch.device("cuda:0")),
-    ],
-)
-def test_disable_offloading(exec_device, offload_device):
-    module = torch.nn.Linear(1, 2, device=exec_device)
-
-    # non-offloaded modules are unaffected
-    with disable_offloading():
-        output = module(torch.empty(1, device=exec_device))
-        assert module.weight.device == exec_device
-        assert output.device == exec_device
-
-    # offloaded modules stay on device until context exit
-    offloaded_dispatch(module, exec_device, offload_device)
-    assert module.weight.device == torch.device("meta")
-    assert module._hf_hook.weights_map["weight"].device == offload_device
-
-    with disable_offloading():
-        assert module.weight.device == torch.device("meta")
-        output = module(torch.empty(1, device=exec_device))
-        assert module.weight.device == exec_device
-        assert output.device == exec_device
-
-        output = module(torch.empty(1, device=exec_device))
-        assert module.weight.device == exec_device
-        assert output.device == exec_device
-
-    assert module.weight.device == torch.device("meta")
-    assert module._hf_hook.weights_map["weight"].device == offload_device
diff --git a/tests/testing_utils.py b/tests/testing_utils.py
index 40a9dc2fb..b28af78ae 100644
--- a/tests/testing_utils.py
+++ b/tests/testing_utils.py
@@ -28,18 +28,7 @@ def compressed_tensors_config_available():
         return False
 
 
-def accelerate_availabe():
-    try:
-        import accelerate  # noqa: F401
-
-        return True
-
-    except ImportError:
-        return False
-
-
 _is_compressed_tensors_config_available = compressed_tensors_config_available()
-_is_accelerate_available = accelerate_availabe()
 
 
 def requires_hf_quantizer():
@@ -49,13 +38,6 @@ def requires_hf_quantizer():
     )
 
 
-def requires_accelerate():
-    return pytest.mark.skipif(
-        not _is_accelerate_available,
-        reason="requires accelerate",
-    )
-
-
 def get_random_mat(M, K, dtype) -> "torch.Tensor":
     """
     :param M: number of rows