CrayLabs · ashao · Feb 2, 2024 · Jan 22, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/Makefile b/Makefile
@@ -66,6 +66,10 @@ clobber: clean
 # help:
 # help: Style
 # help: -------
+# help: check-all                      - Performs all the style-related checks
+.PHONY: check-all
+check-all: check-style check-format check-sort-imports check-lint check-mypy
+	$(info All style checks PASSED)
 
 # help: style                          - Sort imports and format with black
 .PHONY: style

diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -26,6 +26,8 @@ Description
 - Updated SmartSim's machine learning backends
 - Added ONNX support for Python 3.10
 - Added support for Python 3.11
+- Added support for SmartSim with Torch on Apple Silicon
+
 
 Detailed Notes
 
@@ -44,12 +46,16 @@ Detailed Notes
   there is now an available ONNX wheel for use with Python 3.10, and wheels for
   all of SmartSim's machine learning backends with Python 3.11.
   (SmartSim-PR451_) (SmartSim-PR461_)
+- SmartSim can now be built and used on platforms using Apple Silicon
+  (ARM64). Currently, only the PyTorch backend is supported. Note that libtorch
+  will be downloaded from a CrayLabs github repo. (SmartSim-PR465_)
 
 .. _SmartSim-PR446: https://github.com/CrayLabs/SmartSim/pull/446
 .. _SmartSim-PR448: https://github.com/CrayLabs/SmartSim/pull/448
 .. _SmartSim-PR451: https://github.com/CrayLabs/SmartSim/pull/451
 .. _SmartSim-PR453: https://github.com/CrayLabs/SmartSim/pull/453
 .. _SmartSim-PR461: https://github.com/CrayLabs/SmartSim/pull/461
+.. _SmartSim-PR465: https://github.com/CrayLabs/SmartSim/pull/465
 .. _SmartSim-PR472: https://github.com/CrayLabs/SmartSim/pull/472
 
 

diff --git a/smartsim/_core/_install/builder.py b/smartsim/_core/_install/builder.py
@@ -26,6 +26,7 @@
 
 import concurrent.futures
 import enum
+import itertools
 import os
 import platform
 import re
@@ -44,12 +45,10 @@
 from shutil import which
 from subprocess import SubprocessError
 
-# NOTE: This will be imported by setup.py and hence no
-#       smartsim related items should be imported into
-#       this file.
+# NOTE: This will be imported by setup.py and hence no smartsim related
+# items should be imported into this file.
 
-# TODO:
-#   - check cmake version and use system if possible to avoid conflicts
+# TODO: check cmake version and use system if possible to avoid conflicts
 
 TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime", "tflite"]
 TDeviceStr = t.Literal["cpu", "gpu"]
@@ -84,6 +83,7 @@ class BuildError(Exception):
 
 class Architecture(enum.Enum):
     X64 = ("x86_64", "amd64")
+    ARM64 = ("arm64",)
 
     @classmethod
     def from_str(cls, string: str, /) -> "Architecture":
@@ -341,6 +341,10 @@ def __rai_dependency_name__(self) -> str: ...
     @abstractmethod
     def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path: ...
 
+    @staticmethod
+    @abstractmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: ...
+
 
 def _place_rai_dep_at(
     target: t.Union[str, "os.PathLike[str]"], verbose: bool
@@ -366,6 +370,8 @@ class RedisAIBuilder(Builder):
 
     def __init__(
         self,
+        _os: OperatingSystem = OperatingSystem.from_str(platform.system()),
+        architecture: Architecture = Architecture.from_str(platform.machine()),
         build_env: t.Optional[t.Dict[str, t.Any]] = None,
         torch_dir: str = "",
         libtf_dir: str = "",
@@ -376,7 +382,10 @@ def __init__(
         verbose: bool = False,
     ) -> None:
         super().__init__(build_env or {}, jobs=jobs, verbose=verbose)
+
         self.rai_install_path: t.Optional[Path] = None
+        self._os = _os
+        self._architecture = architecture
 
         # convert to int for RAI build script
         self._torch = build_torch
@@ -385,10 +394,25 @@ def __init__(
         self.libtf_dir = libtf_dir
         self.torch_dir = torch_dir
 
-        # TODO: It might be worth making these constructor args so that users
-        #       of this class can configure exactly _what_ they are building.
-        self._os = OperatingSystem.from_str(platform.system())
-        self._architecture = Architecture.from_str(platform.machine())
+        # Sanity checks
+        self._validate_platform()
+
+    def _validate_platform(self) -> None:
+        platform_ = (self._os, self._architecture)
+        unsupported = []
+        if platform_ not in _DLPackRepository.supported_platforms():
+            unsupported.append("DLPack")
+        if self.fetch_tf and (platform_ not in _TFArchive.supported_platforms()):
+            unsupported.append("Tensorflow")
+        if self.fetch_onnx and (platform_ not in _ORTArchive.supported_platforms()):
+            unsupported.append("ONNX")
+        if self.fetch_torch and (platform_ not in _PTArchive.supported_platforms()):
+            unsupported.append("PyTorch")
+        if unsupported:
+            raise BuildError(
+                f"The {', '.join(unsupported)} backend(s) are not "
+                f"supported on {self._os} with {self._architecture}"
+            )
 
     @property
     def rai_build_path(self) -> Path:
@@ -436,6 +460,8 @@ def fail_to_format(reason: str) -> BuildError:  # pragma: no cover
             raise fail_to_format(f"Unknown operating system: {self._os}")
         if self._architecture == Architecture.X64:
             arch = "x64"
+        elif self._architecture == Architecture.ARM64:
+            arch = "arm64v8"
         else:  # pragma: no cover
             raise fail_to_format(f"Unknown architecture: {self._architecture}")
         return self.rai_build_path / f"deps/{os_}-{arch}-{device}"
@@ -450,13 +476,18 @@ def _get_deps_to_fetch_for(
         #       dependency versions were declared in single location.
         #       Unfortunately importing into this module is non-trivial as it
         #       is used as script in the SmartSim `setup.py`.
-        fetchable_deps: t.Sequence[t.Tuple[bool, _RAIBuildDependency]] = (
-            (True, _DLPackRepository("v0.5_RAI")),
-            (self.fetch_torch, _PTArchive(os_, device, "2.0.1")),
-            (self.fetch_tf, _TFArchive(os_, arch, device, "2.13.1")),
-            (self.fetch_onnx, _ORTArchive(os_, device, "1.16.3")),
-        )
-        return tuple(dep for should_fetch, dep in fetchable_deps if should_fetch)
+
+        # DLPack is always required
+        fetchable_deps: t.List[_RAIBuildDependency] = [_DLPackRepository("v0.5_RAI")]
+        if self.fetch_torch:
+            pt_dep = _choose_pt_variant(os_)
+            fetchable_deps.append(pt_dep(arch, device, "2.0.1"))
+        if self.fetch_tf:
+            fetchable_deps.append(_TFArchive(os_, arch, device, "2.13.1"))
+        if self.fetch_onnx:
+            fetchable_deps.append(_ORTArchive(os_, device, "1.16.3"))
+
+        return tuple(fetchable_deps)
 
     def symlink_libtf(self, device: str) -> None:
         """Add symbolic link to available libtensorflow in RedisAI deps.
@@ -698,6 +729,14 @@ def clone(
 class _DLPackRepository(_WebGitRepository, _RAIBuildDependency):
     version: str
 
+    @staticmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
+        return (
+            (OperatingSystem.LINUX, Architecture.X64),
+            (OperatingSystem.DARWIN, Architecture.X64),
+            (OperatingSystem.DARWIN, Architecture.ARM64),
+        )
+
     @property
     def url(self) -> str:
         return "https://github.com/RedisAI/dlpack.git"
@@ -756,30 +795,20 @@ def _extract_download(
             zip_file.extractall(target)
 
 
-@t.final
 @dataclass(frozen=True)
 class _PTArchive(_WebZip, _RAIBuildDependency):
-    os_: OperatingSystem
+    architecture: Architecture
     device: TDeviceStr
     version: str
 
-    @property
-    def url(self) -> str:
-        if self.os_ == OperatingSystem.LINUX:
-            if self.device == "gpu":
-                pt_build = "cu117"
-            else:
-                pt_build = "cpu"
-            # pylint: disable-next=line-too-long
-            libtorch_arch = f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip"
-        elif self.os_ == OperatingSystem.DARWIN:
-            if self.device == "gpu":
-                raise BuildError("RedisAI does not currently support GPU on Macos")
-            pt_build = "cpu"
-            libtorch_arch = f"libtorch-macos-{self.version}.zip"
-        else:
-            raise BuildError(f"Unexpected OS for the PT Archive: {self.os_}")
-        return f"https://download.pytorch.org/libtorch/{pt_build}/{libtorch_arch}"
+    @staticmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
+        # TODO: This will need to be revisited if the inheritance tree gets deeper
+        return tuple(
+            itertools.chain.from_iterable(
+                var.supported_platforms() for var in _PTArchive.__subclasses__()
+            )
+        )
 
     @property
     def __rai_dependency_name__(self) -> str:
@@ -793,6 +822,66 @@ def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
         return target
 
 
+@t.final
+class _PTArchiveLinux(_PTArchive):
+    @staticmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
+        return ((OperatingSystem.LINUX, Architecture.X64),)
+
+    @property
+    def url(self) -> str:
+        if self.device == "gpu":
+            pt_build = "cu117"
+        else:
+            pt_build = "cpu"
+        # pylint: disable-next=line-too-long
+        libtorch_archive = (
+            f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip"
+        )
+        return f"https://download.pytorch.org/libtorch/{pt_build}/{libtorch_archive}"
+
+
+@t.final
+class _PTArchiveMacOSX(_PTArchive):
+    @staticmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
+        return (
+            (OperatingSystem.DARWIN, Architecture.ARM64),
+            (OperatingSystem.DARWIN, Architecture.X64),
+        )
+
+    @property
+    def url(self) -> str:
+        if self.device == "gpu":
+            raise BuildError("RedisAI does not currently support GPU on Mac OSX")
+        if self.architecture == Architecture.X64:
+            pt_build = "cpu"
+            libtorch_archive = f"libtorch-macos-{self.version}.zip"
+            root_url = "https://download.pytorch.org/libtorch"
+            return f"{root_url}/{pt_build}/{libtorch_archive}"
+        if self.architecture == Architecture.ARM64:
+            libtorch_archive = f"libtorch-macos-arm64-{self.version}.zip"
+            # pylint: disable-next=line-too-long
+            root_url = (
+                "https://github.com/CrayLabs/ml_lib_builder/releases/download/v0.1/"
+            )
+            return f"{root_url}/{libtorch_archive}"
+
+        raise BuildError("Unsupported architecture for Pytorch: {self.architecture}")
+
+
+def _choose_pt_variant(
+    os_: OperatingSystem,
+) -> t.Union[t.Type[_PTArchiveLinux], t.Type[_PTArchiveMacOSX]]:
+
+    if os_ == OperatingSystem.DARWIN:
+        return _PTArchiveMacOSX
+    if os_ == OperatingSystem.LINUX:
+        return _PTArchiveLinux
+
+    raise BuildError(f"Unsupported OS for PyTorch: {os_}")
+
+
 @t.final
 @dataclass(frozen=True)
 class _TFArchive(_WebTGZ, _RAIBuildDependency):
@@ -801,6 +890,13 @@ class _TFArchive(_WebTGZ, _RAIBuildDependency):
     device: TDeviceStr
     version: str
 
+    @staticmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
+        return (
+            (OperatingSystem.LINUX, Architecture.X64),
+            (OperatingSystem.DARWIN, Architecture.X64),
+        )
+
     @property
     def url(self) -> str:
         if self.architecture == Architecture.X64:
@@ -843,6 +939,13 @@ class _ORTArchive(_WebTGZ, _RAIBuildDependency):
     device: TDeviceStr
     version: str
 
+    @staticmethod
+    def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
+        return (
+            (OperatingSystem.LINUX, Architecture.X64),
+            (OperatingSystem.DARWIN, Architecture.X64),
+        )
+
     @property
     def url(self) -> str:
         ort_url_base = (