From 4e520b3b8068747196c088ff2be3cb1db53351b4 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@meta.com>
Date: Wed, 8 May 2024 10:42:27 -0700
Subject: [PATCH 1/3] Update PyTorch pin

And enable linter:int8 and linter:int4 acceleration on MPS
---
 install_requirements.sh |  2 +-
 qops.py                 | 14 ++++++++++----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/install_requirements.sh b/install_requirements.sh
index cfb2862fa..bbc1b48d9 100755
--- a/install_requirements.sh
+++ b/install_requirements.sh
@@ -39,7 +39,7 @@ $PIP_EXECUTABLE install -r requirements.txt --extra-index-url https://download.p
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-NIGHTLY_VERSION=dev20240422
+NIGHTLY_VERSION=dev20240507
 
 # The pip repository that hosts nightly torch packages. cpu by default.
 # If cuda is available, based on presence of nvidia-smi, install the pytorch nightly
diff --git a/qops.py b/qops.py
index ab86250ff..b4f172163 100644
--- a/qops.py
+++ b/qops.py
@@ -15,7 +15,7 @@ def linear_int8_aoti(input, weight, scales):
         scales = scales.view(-1)
         if (
             torch.compiler.is_compiling()
-            or input.device.type != "cpu"
+            or input.device.type not in ["cpu", "mps"]
             or not hasattr(torch.ops.aten, "_weight_int8pack_mm")
         ):
             lin = F.linear(input, weight.to(dtype=input.dtype))
@@ -395,9 +395,15 @@ def _prepare_weight_and_scales_and_zeros(
         weight_int32, scales_and_zeros = group_quantize_tensor(
             weight_bf16, n_bit=4, groupsize=groupsize
         )
-        weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
-            weight_int32, inner_k_tiles
-        )
+        if weight_bf16.device.type == "mps":
+            # There are still no MPS-accelerated conversion OP
+            weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
+                weight_int32.cpu(), inner_k_tiles
+            ).to("mps")
+        else:
+            weight_int4pack = torch.ops.aten._convert_weight_to_int4pack(
+                weight_int32, inner_k_tiles
+            )
         return weight_int4pack, scales_and_zeros
 
     @classmethod

From e4f771de6e9b92dca84b2f8fe4b6526a5227991e Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 8 May 2024 11:29:18 -0700
Subject: [PATCH 2/3] Update run-readme-pr.yml

---
 .github/workflows/run-readme-pr.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/run-readme-pr.yml b/.github/workflows/run-readme-pr.yml
index d650bf7d4..b53a79d87 100644
--- a/.github/workflows/run-readme-pr.yml
+++ b/.github/workflows/run-readme-pr.yml
@@ -20,10 +20,10 @@ jobs:
         uname -a
         echo "::endgroup::"
 
-        # echo "::group::Install newer objcopy that supports --set-section-alignment"
-        # yum install -y  devtoolset-10-binutils
-        # export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
-        # echo "::endgroup::"
+        echo "::group::Install newer objcopy that supports --set-section-alignment"
+        yum install -y  devtoolset-10-binutils
+        export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
+        echo "::endgroup::"
 
         echo "::group::Create script to run README"
         python3 scripts/updown.py --file README.md --replace 'llama3:stories15M,-l 3:-l 2,meta-llama/Meta-Llama-3-8B-Instruct:stories15M' --suppress huggingface-cli,HF_TOKEN > ./run-readme.sh

From 8485985e4c4476350fe6adf90a055ec73d940c42 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <2453524+malfet@users.noreply.github.com>
Date: Wed, 8 May 2024 22:50:47 -0700
Subject: [PATCH 3/3] Update install_requirements.sh

---
 install_requirements.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/install_requirements.sh b/install_requirements.sh
index bbc1b48d9..7ab32a287 100755
--- a/install_requirements.sh
+++ b/install_requirements.sh
@@ -47,6 +47,8 @@ NIGHTLY_VERSION=dev20240507
 if [[ -x "$(command -v nvidia-smi)" ]]; 
 then
   TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cu121"
+  # Uninstall triton, as nightly will depend on pytorch-triton, which is one and the same
+  $PIP_EXECUTABLE uninstall -y triton
 else
   TORCH_NIGHTLY_URL="https://download.pytorch.org/whl/nightly/cpu"
 fi