diff --git a/src/llmcompressor/entrypoints/model_free/helpers.py b/src/llmcompressor/entrypoints/model_free/helpers.py index e98fdd0d1b..ef45f09346 100644 --- a/src/llmcompressor/entrypoints/model_free/helpers.py +++ b/src/llmcompressor/entrypoints/model_free/helpers.py @@ -33,8 +33,13 @@ def gpu_if_available(device: torch.device | str | None) -> torch.device: elif hasattr(torch, "xpu") and torch.xpu.is_available(): return torch.device("xpu:0") + elif hasattr(torch, "npu") and torch.npu.is_available(): + return torch.device("npu:0") + else: - logger.warning("CUDA/XPU is not available! Compressing model on CPU instead") + logger.warning( + "CUDA/XPU/NPU is not available! Compressing model on CPU instead" + ) return torch.device("cpu") diff --git a/src/llmcompressor/pipelines/sequential/helpers.py b/src/llmcompressor/pipelines/sequential/helpers.py index cb6d0a3f21..9bfc3bcd99 100644 --- a/src/llmcompressor/pipelines/sequential/helpers.py +++ b/src/llmcompressor/pipelines/sequential/helpers.py @@ -543,8 +543,12 @@ def dispatch_for_sequential(model: PreTrainedModel) -> PreTrainedModel: offloaded_dispatch(model, execution_device=torch.device("cuda:0")) elif hasattr(torch, "xpu") and torch.xpu.is_available(): offloaded_dispatch(model, execution_device=torch.device("xpu:0")) + elif hasattr(torch, "npu") and torch.npu.is_available(): + offloaded_dispatch(model, execution_device=torch.device("npu:0")) else: - logger.warning("CUDA/XPU is not available! Compressing model on CPU instead") + logger.warning( + "CUDA/XPU/NPU is not available! Compressing model on CPU instead" + ) return model diff --git a/tools/collect_env.py b/tools/collect_env.py index b1762c6219..62a100ce3d 100644 --- a/tools/collect_env.py +++ b/tools/collect_env.py @@ -21,6 +21,7 @@ def get_torch_hardware_info(): cuda_devices = [] amd_devices = [] + npu_devices = [] if torch.cuda.is_available(): for i in range(torch.cuda.device_count()): name = torch.cuda.get_device_name(i) @@ -28,13 +29,17 @@ def get_torch_hardware_info(): amd_devices.append(name) else: cuda_devices.append(name) - return cuda_devices, amd_devices + if hasattr(torch, "npu") and torch.npu.is_available(): + for i in range(torch.npu.device_count()): + name = torch.npu.get_device_name(i) + npu_devices.append(name) + return cuda_devices, amd_devices, npu_devices except ImportError: - return [], [] + return [], [], [] def collect_environment_info(): - cuda_devices, amd_devices = get_torch_hardware_info() + cuda_devices, amd_devices, npu_devices = get_torch_hardware_info() info = { "Operating System": platform.platform(), @@ -45,6 +50,7 @@ def collect_environment_info(): "torch Version": get_version("torch"), "CUDA Devices": cuda_devices if cuda_devices else "None", "AMD Devices": amd_devices if amd_devices else "None", + "NPU Devices": npu_devices if npu_devices else "None", } print("### Environment Information ###")