Skip to content
This repository was archived by the owner on Aug 15, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ merge_base_with = "origin/main"

[[linter]]
code = 'RUFF'
include_patterns = ['test/smoke_test/*.py', 's3_management/*.py']
include_patterns = ['test/smoke_test/*.py', 's3_management/*.py', 'aarch64_linux/*.py']
command = [
'python3',
'tools/linter/adapters/ruff_linter.py',
Expand Down
28 changes: 15 additions & 13 deletions aarch64_linux/aarch64_wheel_ci_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# encoding: UTF-8

import os
import subprocess
from subprocess import check_output
from pygit2 import Repository
from typing import List

Expand All @@ -11,18 +11,20 @@ def list_dir(path: str) -> List[str]:
''''
Helper for getting paths for Python
'''
return subprocess.check_output(["ls", "-1", path]).decode().split("\n")
return check_output(["ls", "-1", path]).decode().split("\n")


def build_ArmComputeLibrary(git_clone_flags: str = "") -> None:
'''
Using ArmComputeLibrary for aarch64 PyTorch
'''
print('Building Arm Compute Library')
acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
"arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
os.system("cd / && mkdir /acl")
os.system(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
os.system("cd ComputeLibrary; export acl_install_dir=/acl; "
"scons Werror=1 -j8 debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native build_dir=$acl_install_dir/build; "
f"scons Werror=1 -j8 {acl_build_flags} build_dir=$acl_install_dir/build; "
"cp -r arm_compute $acl_install_dir; "
"cp -r include $acl_install_dir; "
"cp -r utils $acl_install_dir; "
Expand Down Expand Up @@ -86,13 +88,12 @@ def parse_arguments():
if override_package_version is not None:
version = override_package_version
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
else:
if branch in ['nightly', 'master']:
build_date = subprocess.check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
version = subprocess.check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
if branch.startswith("v1.") or branch.startswith("v2."):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
elif branch in ['nightly', 'master']:
build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
elif branch.startswith(("v1.", "v2.")):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "

if enable_mkldnn:
build_ArmComputeLibrary(git_clone_flags)
Expand All @@ -105,9 +106,10 @@ def parse_arguments():
else:
print("build pytorch without mkldnn backend")

# work around to fix Raspberry pie crash
print("Applying mkl-dnn patch to fix readdir crash")
os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/aarch64-fix-readdir-crash.patch")
# patch mkldnn to fix aarch64 mac and aws lambda crash
print("Applying mkl-dnn patch to fix crash due to /sys not accesible")
os.system("cd /pytorch/third_party/ideep/mkl-dnn && patch -p1 < /builder/mkldnn_fix/fix-xbyak-failure.patch")

os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
pytorch_wheel_name = complete_wheel("pytorch")
print(f"Build Compelete. Created {pytorch_wheel_name}..")
33 changes: 18 additions & 15 deletions aarch64_linux/build_aarch64_wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

# This script is for building AARCH64 wheels using AWS EC2 instances.
# To generate binaries for the release follow these steps:
# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this: "v1.11.0": ("0.11.0", "rc1"),
# 2. Run script with following arguments for each of the supported python versions and specify required RC tag for example: v1.11.0-rc3:
# build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>
# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this:
# "v1.11.0": ("0.11.0", "rc1"),
# 2. Run script with following arguments for each of the supported python versions and required tag, for example:
# build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch v1.11.0-rc3


import boto3
Expand Down Expand Up @@ -177,7 +178,7 @@ def wait_for_connection(addr, port, timeout=15, attempt_cnt=5):
try:
with socket.create_connection((addr, port), timeout=timeout):
return
except (ConnectionRefusedError, socket.timeout):
except (ConnectionRefusedError, socket.timeout): # noqa: PERF203
if i == attempt_cnt - 1:
raise
time.sleep(timeout)
Expand All @@ -203,7 +204,7 @@ def install_condaforge(host: RemoteHost,
if host.using_docker():
host.run_cmd("echo 'PATH=$HOME/miniforge3/bin:$PATH'>>.bashrc")
else:
host.run_cmd(['sed', '-i', '\'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH\'', '.bashrc'])
host.run_cmd(['sed', '-i', '\'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH\'', '.bashrc']) # noqa: E501


def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None:
Expand All @@ -221,12 +222,13 @@ def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None:
print('Building OpenBLAS')
host.run_cmd(f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.25 {git_clone_flags}")
make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8"
host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS")
host.run_cmd(f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS") # noqa: E501


def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None:
print('Building Arm Compute Library')
acl_build_flags="debug=0 neon=1 opencl=0 os=linux openmp=1 cppthreads=0 arch=armv8a multi_isa=1 fixed_format_kernels=1 build=native"
acl_build_flags=" ".join(["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
"arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"])
host.run_cmd(f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v23.08 {git_clone_flags}")
host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")

Expand Down Expand Up @@ -301,7 +303,7 @@ def build_torchvision(host: RemoteHost, *,
# Remove .so files to force static linking
host.run_cmd("rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so")
# And patch setup.py to include libz dependency for libpng
host.run_cmd(['sed -i -e \'s/image_link_flags\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py'])
host.run_cmd(['sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py']) # noqa: E501

build_vars = ""
if branch == "nightly":
Expand Down Expand Up @@ -525,7 +527,7 @@ def start_build(host: RemoteHost, *,
if host.using_docker():
print("Move libgfortant.a into a standard location")
# HACK: pypa gforntran.a is compiled without PIC, which leads to the following error
# libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17'
# libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17' # noqa: E501
# Workaround by copying gfortran library from the host
host.run_ssh_cmd("sudo apt-get install -y gfortran-8")
host.run_cmd("mkdir -p /usr/lib/gcc/aarch64-linux-gnu/8")
Expand All @@ -543,22 +545,23 @@ def start_build(host: RemoteHost, *,
# Breakpad build fails on aarch64
build_vars = "USE_BREAKPAD=0 "
if branch == 'nightly':
build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "")
build_date = host.check_output("cd pytorch && git log --pretty=format:%s -1").strip().split()[0].replace("-", "") # noqa: E501
version = host.check_output("cat pytorch/version.txt").strip()[:-2]
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
if branch.startswith("v1.") or branch.startswith("v2."):
if branch.startswith(("v1.", "v2.")):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
if enable_mkldnn:
build_ArmComputeLibrary(host, git_clone_flags)
print("build pytorch with mkldnn+acl backend")
build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON"
host.run_cmd(f"cd $HOME && git clone https://github.com/pytorch/builder.git")
host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}")
host.run_cmd("cd $HOME && git clone https://github.com/pytorch/builder.git && cd builder && git checkout release/2.2") # noqa: E501
host.run_cmd("cd $HOME/pytorch/third_party/ideep/mkl-dnn && patch -p1 < $HOME/builder/mkldnn_fix/fix-xbyak-failure.patch") # noqa: E501
host.run_cmd(f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}") # noqa: E501
print('Repair the wheel')
pytorch_wheel_name = host.list_dir("pytorch/dist")[0]
host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}")
host.run_cmd(f"export LD_LIBRARY_PATH=$HOME/acl/build:$HOME/pytorch/build/lib && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}") # noqa: E501
print('replace the original wheel with the repaired one')
pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0]
host.run_cmd(f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}")
Expand Down Expand Up @@ -706,7 +709,7 @@ def parse_arguments():
parser.add_argument("--build-only", action="store_true")
parser.add_argument("--test-only", type=str)
parser.add_argument("--os", type=str, choices=list(os_amis.keys()), default='ubuntu20_04')
parser.add_argument("--python-version", type=str, choices=['3.6', '3.7', '3.8', '3.9', '3.10', '3.11'], default=None)
parser.add_argument("--python-version", type=str, choices=[f'3.{d}' for d in range(6, 12)], default=None)
parser.add_argument("--alloc-instance", action="store_true")
parser.add_argument("--list-instances", action="store_true")
parser.add_argument("--pytorch-only", action="store_true")
Expand Down
4 changes: 2 additions & 2 deletions aarch64_linux/embed_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


def replace_tag(filename):
with open(filename, 'r') as f:
with open(filename) as f:
lines = f.read().split("\\n")
for i,line in enumerate(lines):
if not line.startswith("Tag: "):
Expand Down Expand Up @@ -42,7 +42,7 @@ def embed_library(whl_path, lib_soname, update_tag=False):
torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib')
ctx.out_wheel=tmp_whl_name
new_lib_path, new_lib_soname = None, None
for filename, elf in elf_file_filter(ctx.iter_files()):
for filename, _ in elf_file_filter(ctx.iter_files()):
if not filename.startswith('torch/lib'):
continue
libtree = lddtree(filename)
Expand Down
14 changes: 0 additions & 14 deletions mkldnn_fix/aarch64-fix-readdir-crash.patch

This file was deleted.

96 changes: 96 additions & 0 deletions mkldnn_fix/fix-xbyak-failure.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
cpu: aarch64: fix xbyak functions for /sys access failures

There are platforms with /sys not mounted. skip handling HW caps
for such platforms.

This fixes the issue# pytorch/pytorch#115482
---
.../xbyak_aarch64/src/util_impl_linux.h | 24 ++++++++++++++-----
.../aarch64/xbyak_aarch64/src/util_impl_mac.h | 9 ++++---
2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
index 2c7b28e58b..860a05700f 100644
--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
+++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_linux.h
@@ -144,8 +144,13 @@ private:
regex_t regexBuf;
regmatch_t match[1];

- if (regcomp(&regexBuf, regex, REG_EXTENDED) != 0)
- throw ERR_INTERNAL;
+ if (regcomp(&regexBuf, regex, REG_EXTENDED) != 0) {
+ /* There are platforms with /sys not mounted. return empty buffers
+ * in these scenarios
+ */
+ buf[0] = '\0';
+ return 0;
+ }

const int retVal = regexec(&regexBuf, path, 1, match, 0);
regfree(&regexBuf);
@@ -187,8 +192,12 @@ private:
regex_t regexBuf;
regmatch_t match[2];

- if (regcomp(&regexBuf, "index[0-9]*$", REG_EXTENDED) != 0)
- throw ERR_INTERNAL;
+ if (regcomp(&regexBuf, "index[0-9]*$", REG_EXTENDED) != 0) {
+ /* There are platforms with /sys not mounted. return gracefully
+ * in these scenarios
+ */
+ goto init_and_return_false;
+ }

if (regexec(&regexBuf, dp->d_name, 1, match, 0) == 0) { // Found index[1-9][0-9]. directory
char *dir_name = buf0;
@@ -438,12 +447,15 @@ private:

FILE *file = fopen(path_midr_el1, "r");
if (file == nullptr) {
- throw Error(ERR_INTERNAL);
+ /* There are platforms with /sys not mounted. return empty buffer
+ * in these scenarios
+ */
+ cacheInfo_.midr_el1 = 0xFE << 24;
return;
}

if (fread(buf, sizeof(char), 64, file) == 0) {
- throw Error(ERR_INTERNAL);
+ cacheInfo_.midr_el1 = 0xFE << 24;
return;
}

diff --git a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
index ebd6dba7c0..93bdae1d7a 100644
--- a/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
+++ b/src/cpu/aarch64/xbyak_aarch64/src/util_impl_mac.h
@@ -102,18 +102,21 @@ private:
size_t val = 0;
size_t len = sizeof(val);

+ /* There are platforms with /sys not mounted. skip
+ * handling HW caps for such platforms.
+ */
if (sysctlbyname(hw_opt_atomics, &val, &len, NULL, 0) != 0)
- throw Error(ERR_INTERNAL);
+ type_ = 0;
else
type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ATOMIC : 0;

if (sysctlbyname(hw_opt_fp, &val, &len, NULL, 0) != 0)
- throw Error(ERR_INTERNAL);
+ type_ = 0;
else
type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_FP : 0;

if (sysctlbyname(hw_opt_neon, &val, &len, NULL, 0) != 0)
- throw Error(ERR_INTERNAL);
+ type_ = 0;
else
type_ |= (val == 1) ? (Type)XBYAK_AARCH64_HWCAP_ADVSIMD : 0;
}
--
2.34.1