From 4e520b3b8068747196c088ff2be3cb1db53351b4 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 8 May 2024 10:42:27 -0700 Subject: [PATCH 1/3] Update PyTorch pin And enable linter:int8 and linter:int4 acceleration on MPS --- install_requirements.sh | 2 +- qops.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/install_requirements.sh b/install_requirements.sh index cfb2862fa..bbc1b48d9 100755 --- a/install_requirements.sh +++ b/install_requirements.sh @@ -39,7 +39,7 @@ $PIP_EXECUTABLE install -r requirements.txt --extra-index-url https://download.p # NOTE: If a newly-fetched version of the executorch repo changes the value of # NIGHTLY_VERSION, you should re-run this script to install the necessary # package versions. -NIGHTLY_VERSION=dev20240422 +NIGHTLY_VERSION=dev20240507 # The pip repository that hosts nightly torch packages. cpu by default. # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly diff --git a/qops.py b/qops.py index ab86250ff..b4f172163 100644 --- a/qops.py +++ b/qops.py @@ -15,7 +15,7 @@ def linear_int8_aoti(input, weight, scales): scales = scales.view(-1) if ( torch.compiler.is_compiling() - or input.device.type != "cpu" + or input.device.type not in ["cpu", "mps"] or not hasattr(torch.ops.aten, "_weight_int8pack_mm") ): lin = F.linear(input, weight.to(dtype=input.dtype)) @@ -395,9 +395,15 @@ def _prepare_weight_and_scales_and_zeros( weight_int32, scales_and_zeros = group_quantize_tensor( weight_bf16, n_bit=4, groupsize=groupsize ) - weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( - weight_int32, inner_k_tiles - ) + if weight_bf16.device.type == "mps": + # There are still no MPS-accelerated conversion OP + weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( + weight_int32.cpu(), inner_k_tiles + ).to("mps") + else: + weight_int4pack = torch.ops.aten._convert_weight_to_int4pack( + weight_int32, inner_k_tiles + ) return weight_int4pack, scales_and_zeros @classmethod From e4f771de6e9b92dca84b2f8fe4b6526a5227991e Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Wed, 8 May 2024 11:29:18 -0700 Subject: [PATCH 2/3] Update run-readme-pr.yml --- .github/workflows/run-readme-pr.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml index d650bf7d4..b53a79d87 100644 --- a/.github/workflows/run-readme-pr.yml +++ b/.github/workflows/run-readme-pr.yml @@ -20,10 +20,10 @@ jobs: uname -a echo "::endgroup::" - # echo "::group::Install newer objcopy that supports --set-section-alignment" - # yum install -y devtoolset-10-binutils - # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH - # echo "::endgroup::" + echo "::group::Install newer objcopy that supports --set-section-alignment" + yum install -y devtoolset-10-binutils + export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH + echo "::endgroup::" echo "::group::Create script to run README" python3 scripts/updown.py --file README.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh From 8485985e4c4476350fe6adf90a055ec73d940c42 Mon Sep 17 00:00:00 2001 From: Nikita Shulga <2453524+malfet@users.noreply.github.com> Date: Wed, 8 May 2024 22:50:47 -0700 Subject: [PATCH 3/3] Update install_requirements.sh --- install_requirements.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/install_requirements.sh b/install_requirements.sh index bbc1b48d9..7ab32a287 100755 --- a/install_requirements.sh +++ b/install_requirements.sh @@ -47,6 +47,8 @@ NIGHTLY_VERSION=dev20240507 if [[ -x "$(command -v nvidia-smi)" ]]; then TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu121" + # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same + $PIP_EXECUTABLE uninstall -y triton else TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cpu" fi