From 2fc98f7dde1d27e483e4e3817e7e427e54dbad2c Mon Sep 17 00:00:00 2001 From: Jiao Wang Date: Fri, 17 Jan 2025 19:23:04 -0800 Subject: [PATCH] Add Intel XPU device support to generate and serve (#1361) * add xpu * add xpu device * update * profile * update install * update * update * update --------- Co-authored-by: Jack-Khuu Co-authored-by: Guoqiong --- install/install_requirements.sh | 36 +++++++++++++++++++++------------ torchchat/cli/builder.py | 7 ++++++- torchchat/cli/cli.py | 4 ++-- torchchat/generate.py | 9 ++++++++- torchchat/utils/build_utils.py | 8 ++++++-- torchchat/utils/device_info.py | 11 +++++++++- torchchat/utils/quantize.py | 2 +- 7 files changed, 56 insertions(+), 21 deletions(-) diff --git a/install/install_requirements.sh b/install/install_requirements.sh index b5ac414fd..146e11096 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -59,12 +59,6 @@ VISION_NIGHTLY_VERSION=dev20241218 # Nightly version for torchtune TUNE_NIGHTLY_VERSION=dev20241218 -# Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same -( - set -x - $PIP_EXECUTABLE uninstall -y triton -) - # The pip repository that hosts nightly torch packages. cpu by default. # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly # with cuda for faster execution on cuda GPUs. @@ -74,16 +68,28 @@ then elif [[ -x "$(command -v rocminfo)" ]]; then TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/rocm6.2" +elif [[ -x "$(command -v xpu-smi)" ]]; +then + TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/xpu" else TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cpu" fi # pip packages needed by exir. -REQUIREMENTS_TO_INSTALL=( - torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}" - torchvision=="0.22.0.${VISION_NIGHTLY_VERSION}" - torchtune=="0.5.0.${TUNE_NIGHTLY_VERSION}" -) +if [[ -x "$(command -v xpu-smi)" ]]; +then + REQUIREMENTS_TO_INSTALL=( + torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}" + torchvision=="0.22.0.${VISION_NIGHTLY_VERSION}" + torchtune=="0.5.0" + ) +else + REQUIREMENTS_TO_INSTALL=( + torch=="2.6.0.${PYTORCH_NIGHTLY_VERSION}" + torchvision=="0.22.0.${VISION_NIGHTLY_VERSION}" + torchtune=="0.5.0.${TUNE_NIGHTLY_VERSION}" + ) +fi # # First install requirements in install/requirements.txt. Older torch may be @@ -95,6 +101,12 @@ REQUIREMENTS_TO_INSTALL=( $PIP_EXECUTABLE install -r install/requirements.txt --extra-index-url "${TORCH_NIGHTLY_URL}" ) +# Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same +( + set -x + $PIP_EXECUTABLE uninstall -y triton +) + # Install the requirements. --extra-index-url tells pip to look for package # versions on the provided URL if they aren't available on the default URL. ( @@ -116,8 +128,6 @@ if [[ -x "$(command -v nvidia-smi)" ]]; then $PYTHON_EXECUTABLE torchchat/utils/scripts/patch_triton.py ) fi - - ( set -x $PIP_EXECUTABLE install evaluate=="0.4.3" lm-eval=="0.4.2" psutil=="6.0.0" diff --git a/torchchat/cli/builder.py b/torchchat/cli/builder.py index 38d0e33b2..69db14a4b 100644 --- a/torchchat/cli/builder.py +++ b/torchchat/cli/builder.py @@ -72,7 +72,12 @@ class BuilderArgs: def __post_init__(self): if self.device is None: - self.device = "cuda" if torch.cuda.is_available() else "cpu" + if torch.cuda.is_available(): + self.device = "cuda" + elif torch.xpu.is_available(): + self.device = "xpu" + else: + self.device = "cpu" if not ( (self.checkpoint_path and self.checkpoint_path.is_file()) diff --git a/torchchat/cli/cli.py b/torchchat/cli/cli.py index 91bdcaf26..723f25ea4 100644 --- a/torchchat/cli/cli.py +++ b/torchchat/cli/cli.py @@ -176,8 +176,8 @@ def _add_model_config_args(parser, verb: str) -> None: "--device", type=str, default=None, - choices=["fast", "cpu", "cuda", "mps"], - help="Hardware device to use. Options: fast, cpu, cuda, mps", + choices=["fast", "cpu", "cuda", "mps", "xpu"], + help="Hardware device to use. Options: fast, cpu, cuda, mps, xpu", ) diff --git a/torchchat/generate.py b/torchchat/generate.py index e271f5027..8ec4d4d5d 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -1203,8 +1203,10 @@ def callback(x, *, done_generating=False): if hasattr(prof, "export_chrome_trace"): if self.builder_args.device == "cpu": print(prof.key_averages().table(sort_by="self_cpu_time_total")) - else: + elif self.builder_args.device == "cuda": print(prof.key_averages().table(sort_by="self_cuda_time_total")) + else: + print(prof.key_averages().table(sort_by="self_xpu_time_total")) prof.export_chrome_trace(f"{self.profile}.json") if start_pos >= max_seq_length: @@ -1289,6 +1291,9 @@ def callback(x, *, done_generating=False): ) if torch.cuda.is_available(): print(f"Memory used: {torch.cuda.max_memory_reserved() / 1e9:.02f} GB") + if torch.xpu.is_available(): + print(f"Memory used: {torch.xpu.max_memory_reserved() / 1e9:.02f} GB") + class DistributedGenerator(LocalGenerator): @@ -1615,6 +1620,8 @@ def run_generator( ) if torch.cuda.is_available(): torch.cuda.reset_peak_memory_stats() + if torch.xpu.is_available(): + torch.xpu.reset_peak_memory_stats() for _ in gen.chat(generator_args): pass diff --git a/torchchat/utils/build_utils.py b/torchchat/utils/build_utils.py index 2685ec2f3..a0862ff94 100644 --- a/torchchat/utils/build_utils.py +++ b/torchchat/utils/build_utils.py @@ -231,6 +231,8 @@ def find_multiple(n: int, k: int) -> int: def device_sync(device="cpu"): if "cuda" in device: torch.cuda.synchronize(device) + elif "xpu" in device: + torch.xpu.synchronize(device) elif ("cpu" in device) or ("mps" in device): pass else: @@ -279,7 +281,8 @@ def get_device_str(device) -> str: device = ( "cuda" if torch.cuda.is_available() - else "mps" if is_mps_available() else "cpu" + else "mps" if is_mps_available() + else "xpu" if torch.xpu.is_available() else "cpu" ) return device else: @@ -291,7 +294,8 @@ def get_device(device) -> str: device = ( "cuda" if torch.cuda.is_available() - else "mps" if is_mps_available() else "cpu" + else "mps" if is_mps_available() + else "xpu" if torch.xpu.is_available() else "cpu" ) return torch.device(device) diff --git a/torchchat/utils/device_info.py b/torchchat/utils/device_info.py index 9c5953944..950c03002 100644 --- a/torchchat/utils/device_info.py +++ b/torchchat/utils/device_info.py @@ -14,7 +14,7 @@ def get_device_info(device: str) -> str: """Returns a human-readable description of the hardware based on a torch.device.type Args: - device: A torch.device.type string: one of {"cpu", "cuda"}. + device: A torch.device.type string: one of {"cpu", "cuda", "xpu"}. Returns: str: A human-readable description of the hardware or an empty string if the device type is unhandled. @@ -37,4 +37,13 @@ def get_device_info(device: str) -> str: ) if device == "cuda": return torch.cuda.get_device_name(0) + if device == "xpu": + return ( + check_output( + ["xpu-smi discovery |grep 'Device Name:'"], shell=True + ) + .decode("utf-8") + .split("\n")[0] + .split("Device Name:")[1] + ) return "" diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index 171ce3742..b90d098b3 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -111,7 +111,7 @@ def quantize_model( raise RuntimeError(f"unknown quantizer {quantizer} specified") else: # Use tensor subclass API for int4 weight only. - if device == "cuda" and quantizer == "linear:int4": + if (device == "cuda" or device == "xpu") and quantizer == "linear:int4": quantize_(model, int4_weight_only(q_kwargs["groupsize"])) if not support_tensor_subclass: unwrap_tensor_subclass(model)