diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 1f0b729ed..e3c808410 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -49,7 +49,7 @@ env:
jobs:
run_tests:
- name: Run tests ${{ matrix.subset }} with ${{ matrix.os }}, Python ${{ matrix.py_v}}, RedisAI ${{ matrix.rai }}
+ name: Run tests ${{ matrix.subset }} with ${{ matrix.os }}, Python ${{ matrix.py_v}}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
@@ -63,9 +63,6 @@ jobs:
- os: macos-14
py_v: "3.9"
- env:
- SMARTSIM_REDISAI: ${{ matrix.rai }}
-
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
@@ -109,15 +106,10 @@ jobs:
- name: Install SmartSim (with ML backends)
run: |
python -m pip install git+https://github.com/CrayLabs/SmartRedis.git@develop#egg=smartredis
- python -m pip install .[dev,mypy,ml]
-
- - name: Install ML Runtimes with Smart (with pt, tf, and onnx support)
- if: contains( matrix.os, 'ubuntu' ) || contains( matrix.os, 'macos-12')
- run: smart build --device cpu --onnx -v
+ python -m pip install .[dev,mypy]
- - name: Install ML Runtimes with Smart (no ONNX,TF on Apple Silicon)
- if: contains( matrix.os, 'macos-14' )
- run: smart build --device cpu --no_tf -v
+ - name: Install ML Runtimes
+ run: smart build --device cpu -v
- name: Run mypy
run: |
diff --git a/.gitignore b/.gitignore
index 77b91d586..97132aff7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ tests/test_output
# Dependencies
smartsim/_core/.third-party
smartsim/_core/.dragon
+smartsim/_core/build
# Docs
_build
diff --git a/README.md b/README.md
index c0986042e..610d6608c 100644
--- a/README.md
+++ b/README.md
@@ -643,11 +643,11 @@ from C, C++, Fortran and Python with the SmartRedis Clients:
1.2.7 |
PyTorch |
- 2.0.1 |
+ 2.1.0 |
TensorFlow\Keras |
- 2.13.1 |
+ 2.15.0 |
ONNX |
diff --git a/doc/changelog.md b/doc/changelog.md
index 26388a05e..8dcb08d3a 100644
--- a/doc/changelog.md
+++ b/doc/changelog.md
@@ -9,6 +9,39 @@ Jump to:
## SmartSim
+### Cuda 12 and ROCm support branch
+
+To be merged into `develop` at some future point in time
+
+Description
+
+- Refactor to the RedisAI build to allow more flexibility in versions
+ and sources of ML backends
+- Add Dockerfiles with GPU support
+- Fine grain build support for GPUs
+- Update Torch to 2.1.0, Tensorflow to 2.15.0
+- Better error messages in build process
+
+Detailed Notes
+
+- The RedisAIBuilder class was completely overhauled to allow users to
+ express a wider range of support for hardware/software stacks. This
+ will be extended to support ROCm, CUDA-11, and CUDA-12.
+- Versions for each of these packages are no longer specified in an
+ internal class. Instead a default set of JSON files specifies the
+ sources and versions. Users can specify their own custom specifications
+ at smart build time
+- Two new Dockerfiles are now provided (one each for 11.8 and 12.1) that
+ can be used to build a container to run the tutorials. No HPC support
+ should be expected at this time
+- SmartSim can now be built using Cuda version 11.8 or Cuda 12.1 by specify
+ `smart build --device=cuda118` or `smart build --device=cuda121`. The
+ original `smart build --device=gpu` will default to using Cuda 11.8.
+- As a result of the previous change, SmartSim now requires C++17 and a
+ minimum Cuda version of 11.8 in order to build Torch 2.1.0.
+- Error messages were not being interpolated correctly. This has been
+ addressed to provide more context when exposing error messages to users.
+
### Development branch
To be released at some future point in time
diff --git a/doc/installation_instructions/basic.rst b/doc/installation_instructions/basic.rst
index 02c17e1fd..226ccb085 100644
--- a/doc/installation_instructions/basic.rst
+++ b/doc/installation_instructions/basic.rst
@@ -18,7 +18,7 @@ Prerequisites
Basic
=====
-The base prerequisites to install SmartSim and SmartRedis are:
+The base prerequisites to install SmartSim and SmartRedis wtih CPU-only support are:
- Python 3.9-3.11
- Pip
@@ -27,13 +27,11 @@ The base prerequisites to install SmartSim and SmartRedis are:
- C++ compiler
- GNU Make > 4.0
- git
- - `git-lfs`_
-
-.. _git-lfs: https://github.com/git-lfs/git-lfs?utm_source=gitlfs_site&utm_medium=installation_link&utm_campaign=gitlfs
.. note::
- GCC 5-9, 11, and 12 is recommended. There are known bugs with GCC 10.
+ GCC 9, 11-13 is recommended (here are known issues compiling with GCC 10). For
+ CUDA 11.8, GCC 9 or 11 must be used.
.. warning::
@@ -43,66 +41,146 @@ The base prerequisites to install SmartSim and SmartRedis are:
`which gcc g++` do not point to Apple Clang.
-GPU Support
-===========
+ML Library Support
+==================
-The machine-learning backends have additional requirements in order to
-use GPUs for inference
+We currently support both Nvidia and AMD GPUs when using RedisAI for GPU inference. The support
+for these GPUs often depends on the version of the CUDA or ROCm stack that is availble on your
+machine. In _most_ cases, the versions backwards compatible. If you encounter problems, please
+contact us and we can build the backend libraries for your desired version of CUDA and ROCm.
- - `CUDA Toolkit 11 (tested with 11.8) `_
- - `cuDNN 8 (tested with 8.9.1) `_
- - OS: Linux
- - GPU: Nvidia
+CPU backends are provided for Apple (both Intel and Apple Silicon) and Linux (x86_64).
-Be sure to reference the :ref:`installation notes ` for helpful
+Be sure to reference the table below to find which versions of the ML libraries are supported for
+your particular platform. Additional, see :ref:`installation notes ` for helpful
information regarding various system types before installation.
-==================
-Supported Versions
-==================
+Linux
+-----
+.. tabs::
-.. list-table:: Supported System for Pre-built Wheels
- :widths: 50 50 50 50
- :header-rows: 1
- :align: center
+ .. group-tab:: CUDA 11
+
+ Additional requirements:
+
+ * GCC <= 11
+ * CUDA Toolkit 11.7 or 11.8
+ * cuDNN 8.9
+
+ .. list-table:: Nvidia CUDA 11
+ :widths: 50 50 50 50
+ :header-rows: 1
+ :align: center
+
+ * - Python Versions
+ - Torch
+ - Tensorflow
+ - ONNX Runtime
+ * - 3.9-3.11
+ - 2.3.1
+ - 2.14.1
+ - 1.17.3
+
+ .. group-tab:: CUDA 12
+
+ Additional requirements:
+
+ * CUDA Toolkit 12
+ * cuDNN 8.9
+
+ .. list-table:: Nvidia CUDA 12
+ :widths: 50 50 50 50
+ :header-rows: 1
+ :align: center
+
+ * - Python Versions
+ - Torch
+ - Tensorflow
+ - ONNX Runtime
+ * - 3.9-3.11
+ - 2.3.1
+ - 2.17
+ - 1.17.3
+
+ .. group-tab:: ROCm 6
+
+ .. list-table:: AMD ROCm 6.1
+ :widths: 50 50 50 50
+ :header-rows: 1
+ :align: center
+
+ * - Python Versions
+ - Torch
+ - Tensorflow
+ - ONNX Runtime
+ * - 3.9-3.11
+ - 2.4.1
+ - N/A
+ - N/A
+
+ .. group-tab:: CPU
+
+ .. list-table:: CPU-only
+ :widths: 50 50 50 50
+ :header-rows: 1
+ :align: center
+
+ * - Python Versions
+ - Torch
+ - Tensorflow
+ - ONNX Runtime
+ * - 3.9-3.11
+ - 2.4.0
+ - 2.15
+ - 1.17.3
+
+MacOSX
+------
- * - Platform
- - CPU
- - GPU
- - Python Versions
- * - MacOS
- - x86_64, aarch64
- - Not supported
- - 3.9 - 3.11
- * - Linux
- - x86_64
- - Nvidia
- - 3.9 - 3.11
+.. tabs::
+ .. group-tab:: Apple Silicon
-.. note::
+ .. list-table:: Apple Silicon ARM64 (no Metal support)
+ :widths: 50 50 50 50
+ :header-rows: 1
+ :align: center
- Users have succesfully run SmartSim on Windows using Windows Subsystem for Linux
- with Nvidia support. Generally, users should follow the Linux instructions here,
- however we make no guarantee or offer of support.
+ * - Python Versions
+ - Torch
+ - Tensorflow
+ - ONNX Runtime
+ * - 3.9-3.11
+ - 2.4.0
+ - 2.17
+ - 1.17.3
+ .. group-tab:: Intel Mac (x86)
-Native support for various machine learning libraries and their
-versions is dictated by our dependency on RedisAI_ 1.2.7.
+ .. list-table:: CPU-only
+ :widths: 50 50 50 50
+ :header-rows: 1
+ :align: center
-+------------------+----------+-------------+---------------+
-| RedisAI | PyTorch | Tensorflow | ONNX Runtime |
-+==================+==========+=============+===============+
-| 1.2.7 (default) | 2.0.1 | 2.13.1 | 1.16.3 |
-+------------------+----------+-------------+---------------+
+ * - Python Versions
+ - Torch
+ - Tensorflow
+ - ONNX Runtime
+ * - 3.9-3.11
+ - 2.2.0
+ - 2.15
+ - 1.17.3
-.. warning::
- On Apple Silicon, only the PyTorch backend is supported for now. Please contact us
- if you need support for other backends
+.. note::
-TensorFlow_ 2.0 and Keras_ are supported through `graph freezing`_.
+ Users have succesfully run SmartSim on Windows using Windows Subsystem for Linux
+ with Nvidia support. Generally, users should follow the Linux instructions here,
+ however we make no guarantee or offer of support.
+
+
+TensorFlow_ and Keras_ are supported through `graph freezing`_.
ScikitLearn_ and Spark_ models are supported by SmartSim as well
through the use of the ONNX_ runtime (which is not built by
@@ -167,21 +245,8 @@ and install SmartSim from PyPI with the following command:
pip install smartsim
-If you would like SmartSim to also install python machine learning libraries
-that can be used outside SmartSim to build SmartSim-compatible models, you
-can request their installation through the ``[ml]`` optional dependencies,
-as follows:
-
-.. code-block:: bash
-
- # For bash
- pip install smartsim[ml]
- # For zsh
- pip install smartsim\[ml\]
-
-At this point, SmartSim is installed and can be used for more basic features.
-If you want to use the machine learning features of SmartSim, you will need
-to install the ML backends in the section below.
+At this point, SmartSim can be used for describing and launching experiments, but
+without any database/feature store functionality which allows for ML-enabled workflows.
Step 2: Build SmartSim
@@ -198,19 +263,19 @@ To see all the installation options:
smart --help
-CPU Install
------------
-
-To install the default ML backends for CPU, run
-
.. code-block:: bash
# run one of the following
- smart build --device cpu # install PT and TF for cpu
- smart build --device cpu --onnx # install all backends (PT, TF, ONNX) on cpu
+ smart build --device cpu # For unaccelerated AI/ML loads
+ smart build --device cuda118 # Nvidia Accelerator with CUDA 11.8
+ smart build --device cuda125 # Nvidia Accelerator with CUDA 12.5
+ smart build --device rocm57 # AMD Accelerator with ROCm 5.7.0
-By default, ``smart`` will install PyTorch and TensorFlow backends
-for use in SmartSim.
+By default, ``smart`` will install all backends available for the specified accelerator
+_and_ the compatible versions of the Python packages associated with the backends. To
+disable support for a specific backend, ``smart build`` accepts the flags
+``--skip-torch``, ``--skip-tensorflow``, ``--skip-onnx`` which can also be used in
+combination.
.. note::
@@ -218,19 +283,6 @@ for use in SmartSim.
all of the previous installs for the ML backends and ``smart clobber`` will
remove all pre-built dependencies as well as the ML backends.
-
-GPU Install
------------
-
-With the proper environment setup (see :ref:`GPU support`) the only difference
-to building SmartSim with GPU support is to specify a different ``device``
-
-.. code-block:: bash
-
- # run one of the following
- smart build --device gpu # install PT and TF for gpu
- smart build --device gpu --onnx # install all backends (PT, TF, ONNX) on gpu
-
.. note::
GPU builds can be troublesome due to the way that RedisAI and the ML-package
@@ -251,9 +303,7 @@ For example, to install dragon alongside the RedisAI CPU backends, you can run
.. code-block:: bash
- # run one of the following
smart build --device cpu --dragon # install Dragon, PT and TF for cpu
- smart build --device cpu --onnx --dragon # install Dragon and all backends (PT, TF, ONNX) on cpu
.. note::
Dragon is only supported on Linux systems. For further information, you
@@ -319,35 +369,11 @@ source remains at the site of the clone instead of in site-packages.
.. code-block:: bash
cd smartsim
- pip install -e .[dev,ml] # for bash users
- pip install -e .\[dev,ml\] # for zsh users
-
-Use the now installed ``smart`` cli to install the machine learning runtimes and dragon.
-
-.. tabs::
-
- .. tab:: Linux
-
- .. code-block:: bash
-
- # run one of the following
- smart build --device cpu --onnx --dragon # install with cpu-only support
- smart build --device gpu --onnx --dragon # install with both cpu and gpu support
-
-
- .. tab:: MacOS (Intel x64)
-
- .. code-block:: bash
-
- smart build --device cpu --onnx # install all backends (PT, TF, ONNX) on gpu
-
-
- .. tab:: MacOS (Apple Silicon)
-
- .. code-block:: bash
-
- smart build --device cpu --no_tf # Only install PyTorch (TF/ONNX unsupported)
+ pip install -e .[dev] # for bash users
+ pip install -e ".[dev]" # for zsh users
+Use the now installed ``smart`` cli to install the machine learning runtimes and
+dragon. Referring to "Step 2: Build SmartSim above".
Build the SmartRedis library
============================
diff --git a/doc/installation_instructions/platform.rst b/doc/installation_instructions/platform.rst
index 086fc2951..057a25d87 100644
--- a/doc/installation_instructions/platform.rst
+++ b/doc/installation_instructions/platform.rst
@@ -12,6 +12,8 @@ that SmartSim may be used on.
.. include:: platform/frontier.rst
+.. include:: platform/perlmutter.rst
+
.. include:: platform/cray.rst
.. include:: platform/ncar-cheyenne.rst
diff --git a/doc/installation_instructions/platform/frontier.rst b/doc/installation_instructions/platform/frontier.rst
index e23856155..d4db76a6d 100644
--- a/doc/installation_instructions/platform/frontier.rst
+++ b/doc/installation_instructions/platform/frontier.rst
@@ -1,23 +1,14 @@
OLCF Frontier
=============
-Summary
--------
-
-Frontier is an AMD CPU/AMD GPU system.
-
-As of 2023-07-06, users can use the following instructions, however we
-anticipate that all the SmartSim dependencies will be available system-wide via
-the modules system.
-
Known limitations
-----------------
We are continually working on getting all the features of SmartSim working on
Frontier, however we do have some known limitations:
-* For now, only Torch models are supported. We are working to find a recipe to
- install Tensorflow with ROCm support from scratch
+* For now, only Torch and ONNX runtime models are supported. If you need
+ Tensorflow support please contact us
* The colocated database will fail without specifying ``custom_pinning``. This
is because the default pinning assumes that processor 0 is available, but the
'low-noise' default on Frontier reserves the processor on each NUMA node.
@@ -30,8 +21,8 @@ Frontier, however we do have some known limitations:
Please raise an issue in the SmartSim Github or contact the developers if the above
issues are affecting your workflow or if you find any other problems.
-Build process
--------------
+One-time Setup
+--------------
To install the SmartRedis and SmartSim python packages on Frontier, please follow
these instructions, being sure to set the following variables
@@ -41,23 +32,22 @@ these instructions, being sure to set the following variables
export PROJECT_NAME=CHANGE_ME
export VENV_NAME=CHANGE_ME
-Then continue with the install:
+**Step 1:** Create and activate a virtual environment for SmartSim:
.. code:: bash
- module load PrgEnv-gnu-amd git-lfs cmake cray-python
- module unload xalt amd-mixed
- module load rocm/4.5.2
- export CC=gcc
- export CXX=g++
+ module load PrgEnv-gnu cray-python
+ module load rocm/6.1.3
export SCRATCH=/lustre/orion/$PROJECT_NAME/scratch/$USER/
export VENV_HOME=$SCRATCH/$VENV_NAME/
python3 -m venv $VENV_HOME
source $VENV_HOME/bin/activate
- pip install torch==1.11.0+rocm4.5.2 torchvision==0.12.0+rocm4.5.2 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/rocm4.5.2
+**Step 2:** Install SmartSim in the conda environment:
+
+.. code:: bash
cd $SCRATCH
git clone https://github.com/CrayLabs/SmartRedis.git
@@ -67,34 +57,33 @@ Then continue with the install:
# Download SmartSim and site-specific files
cd $SCRATCH
- git clone https://github.com/CrayLabs/site-deployments.git
- git clone https://github.com/CrayLabs/SmartSim.git
- cd SmartSim
- pip install -e .[dev]
+ pip install git+https://github.com/CrayLabs/SmartSim.git
-Next to finish the compilation, we need to manually modify one of the auxiliary
-cmake files that comes packaged with Torch
+**Step 3:** Build Redis, RedisAI, the backends, and all the Python packages:
.. code:: bash
- export TORCH_CMAKE_DIR=$(python -c 'import torch;print(torch.utils.cmake_prefix_path)')
- # Manual step: modify all references to the 'rocm' directory to rocm-4.5.2
- vim $TORCH_CMAKE_DIR/Caffe2/Caffe2Targets.cmake
+ smart build --device=rocm-6
-Finally, build Redis (or keydb for a more performant solution), RedisAI, and the
-machine-learning backends using:
+**Step 4:** Check that SmartSim has been installed and built correctly:
.. code:: bash
- KEYDB_FLAG="" # set this to --keydb if desired
- smart build --device gpu --torch_dir $TORCH_CMAKE_DIR --no_tf -v $(KEYDB_FLAG)
+ smart validate --device gpu
+
+The following output indicates a successful install:
+
+.. code:: bash
-Set up environment
-------------------
+ [SmartSim] INFO Verifying Tensor Transfer
+ [SmartSim] INFO Verifying Torch Backend
+ 16:26:35 login SmartSim[557020:MainThread] INFO Success!
+
+Post-installation
+-----------------
Before running SmartSim, the environment should match the one used to
-build, and some variables should be set to work around some ROCm PyTorch
-issues:
+build, and some variables should be set to optimize performance:
.. code:: bash
@@ -104,10 +93,10 @@ issues:
.. code:: bash
- module load PrgEnv-gnu-amd git-lfs cmake cray-python
- module unload xalt amd-mixed
- module load rocm/4.5.2
+ module load PrgEnv-gnu
+ module load rocm/6.1.3
+ # Optimizations for inference
export SCRATCH=/lustre/orion/$PROJECT_NAME/scratch/$USER/
export MIOPEN_USER_DB_PATH=/tmp/miopendb/
export MIOPEN_SYSTEM_DB_PATH=$MIOPEN_USER_DB_PATH
@@ -115,7 +104,6 @@ issues:
export MIOPEN_DISABLE_CACHE=1
export VENV_HOME=$SCRATCH/$VENV_NAME/
source $VENV_HOME/bin/activate
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$VENV_HOME/lib/python3.9/site-packages/torch/lib
Binding DBs to Slingshot
------------------------
@@ -129,17 +117,3 @@ following way:
exp = Experiment("my_exp", launcher="slurm")
orc = exp.create_database(db_nodes=3, interface=["hsn0","hsn1","hsn2","hsn3"], single_cmd=True)
-
-Running tests
--------------
-
-The same environment set to run SmartSim must be set to run tests. The
-environment variables needed to run the test suite are the following:
-
-.. code:: bash
-
- export SMARTSIM_TEST_ACCOUNT=PROJECT_NAME # Change this to above
- export SMARTSIM_TEST_LAUNCHER=slurm
- export SMARTSIM_TEST_DEVICE=gpu
- export SMARTSIM_TEST_PORT=6789
- export SMARTSIM_TEST_INTERFACE="hsn0,hsn1,hsn2,hsn3"
diff --git a/doc/installation_instructions/platform/perlmutter.rst b/doc/installation_instructions/platform/perlmutter.rst
new file mode 100644
index 000000000..6d1e22e1e
--- /dev/null
+++ b/doc/installation_instructions/platform/perlmutter.rst
@@ -0,0 +1,55 @@
+NERSC Perlmutter
+================
+
+One-time Setup
+--------------
+
+To install SmartSim on Perlmutter, follow these steps:
+
+**Step 1:** Create and activate a conda environment for SmartSim:
+
+.. code:: bash
+
+ module load conda
+ conda create -n smartsim python=3.11
+ conda activate smartsim
+
+**Step 2:** Install SmartSim in the conda environment:
+
+.. code:: bash
+
+ pip install git+https://github.com/CrayLabs/SmartSim.git
+
+**Step 3:** Build Redis, RedisAI, the backends, and all the Python packages:
+
+.. code:: bash
+
+ module load cudatoolkit/12.2 cudnn/8.9.3_cuda12
+ smart build --device=cuda-12
+
+**Step 4:** Check that SmartSim has been installed and built correctly:
+
+.. code:: bash
+
+ smart validate --device gpu
+
+The following output indicates a successful install:
+
+.. code:: bash
+
+ [SmartSim] INFO Verifying Tensor Transfer
+ [SmartSim] INFO Verifying Torch Backend
+ [SmartSim] INFO Verifying ONNX Backend
+ [SmartSim] INFO Verifying TensorFlow Backend
+ 16:26:35 login SmartSim[557020:MainThread] INFO Success!
+
+Post-installation
+-----------------
+
+After completing the above steps to install SmartSim in a conda environment, you
+can reload the conda environment by running the following commands:
+
+.. code:: bash
+
+ module load conda cudatoolkit/12.2 cudnn/8.9.3_cuda12
+ conda activate smartsim
diff --git a/doc/installation_instructions/site-install.rst b/doc/installation_instructions/site-install.rst
index 26ecd6c13..53e0ff8bf 100644
--- a/doc/installation_instructions/site-install.rst
+++ b/doc/installation_instructions/site-install.rst
@@ -11,5 +11,5 @@ from source with the following steps replacing ``COMPILER_VERSION`` and
module use -a /lus/scratch/smartsim/local/modulefiles
module load cudatoolkit/11.8 cudnn smartsim-deps/COMPILER_VERSION/SMARTSIM_VERSION
- pip install smartsim[ml]
- smart build --only_python_packages --device gpu [--onnx]
+ pip install smartsim
+ smart build --skip-backends --device gpu [--onnx]
diff --git a/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb b/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb
index 2d19cab13..2b5f0a3a5 100644
--- a/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb
+++ b/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb
@@ -132,7 +132,7 @@
"\n",
"ML Backends Requested\n",
"╒════════════╤════════╤══════╕\n",
- "│ PyTorch │ 2.0.1 │ \u001b[32mTrue\u001b[0m │\n",
+ "│ PyTorch │ 2.1.0 │ \u001b[32mTrue\u001b[0m │\n",
"│ TensorFlow │ 2.13.1 │ \u001b[32mTrue\u001b[0m │\n",
"│ ONNX │ 1.14.1 │ \u001b[32mTrue\u001b[0m │\n",
"╘════════════╧════════╧══════╛\n",
diff --git a/docker/prod-cuda11/Dockerfile b/docker/prod-cuda11/Dockerfile
new file mode 100644
index 000000000..ef73e2e01
--- /dev/null
+++ b/docker/prod-cuda11/Dockerfile
@@ -0,0 +1,61 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FROM ubuntu:22.04
+
+LABEL maintainer="Cray Labs"
+LABEL org.opencontainers.image.source https://github.com/CrayLabs/SmartSim
+
+ARG DEBIAN_FRONTEND="noninteractive"
+ENV TZ=US/Seattle
+
+# Make basic dependencies
+RUN apt-get update \
+ && apt-get install --no-install-recommends -y build-essential \
+ git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \
+ python3-pip python3 python3-dev cmake wget apt-utils
+
+# # Install Cudatoolkit 11.8
+ENV TERM="xterm"
+RUN wget https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run && \
+ chmod +x ./cuda_11.8.0_520.61.05_linux.run && \
+ ./cuda_11.8.0_520.61.05_linux.run --silent --toolkit && \
+ rm ./cuda_11.8.0_520.61.05_linux.run
+
+# Install cuDNN 8.9.7
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb && \
+ dpkg -i libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb && \
+ rm ./libcudnn8_8.9.7.29-1+cuda11.8_amd64.deb
+
+ # Install SmartSim and SmartRedis
+ RUN pip install git+https://github.com/CrayLabs/SmartRedis.git && \
+ pip install "smartsim[ml] @ git+https://github.com/CrayLabs/SmartSim.git"
+
+ ENV CUDA_HOME="/usr/local/cuda/"
+ ENV PATH="${PATH}:${CUDA_HOME}/bin"
+
+ # Build ML Backends
+ RUN smart build --device=gpu --onnx
diff --git a/docker/prod-cuda12/Dockerfile b/docker/prod-cuda12/Dockerfile
new file mode 100644
index 000000000..bbdfd3513
--- /dev/null
+++ b/docker/prod-cuda12/Dockerfile
@@ -0,0 +1,64 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+FROM ubuntu:22.04
+
+LABEL maintainer="Cray Labs"
+LABEL org.opencontainers.image.source https://github.com/CrayLabs/SmartSim
+
+ARG DEBIAN_FRONTEND="noninteractive"
+ENV TZ=US/Seattle
+
+# Make basic dependencies
+RUN apt-get update \
+ && apt-get install --no-install-recommends -y build-essential \
+ git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \
+ python3-pip python3 python3-dev cmake wget
+
+# Install Cudatoolkit 12.5
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb && \
+ dpkg -i cuda-keyring_1.1-1_all.deb && \
+ apt-get update -y && \
+ apt-get install -y cuda-toolkit-12-5
+
+# Install cuDNN 8.9.7
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb && \
+ dpkg -i libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb
+
+# Install SmartSim and SmartRedis
+RUN pip install git+https://github.com/CrayLabs/SmartRedis.git && \
+ pip install git+https://github.com/CrayLabs/SmartSim.git@cuda-12-support
+
+ENV CUDA_HOME="/usr/local/cuda/"
+ENV PATH="${PATH}:${CUDA_HOME}/bin"
+
+# Install machine-learning python packages consistent with RedisAI
+# Note: pytorch gets installed in the smart build step
+# This step will be deprecated in a future update
+RUN pip install tensorflow==2.15.0
+
+# Build ML Backends
+RUN smart build --device=cuda121
diff --git a/setup.py b/setup.py
index 42892ed7a..5b23fca4c 100644
--- a/setup.py
+++ b/setup.py
@@ -137,7 +137,7 @@ class BuildError(Exception):
"types-redis",
"types-tabulate",
"types-tqdm",
- "types-tensorflow==2.12.0.9",
+ "types-tensorflow",
"types-setuptools",
"typing_extensions>=4.1.0",
],
@@ -151,7 +151,7 @@ class BuildError(Exception):
"nbsphinx==0.9.3",
"docutils==0.18.1",
"torch==2.0.1",
- "tensorflow==2.13.1",
+ "tensorflow>=2.14,<3.0",
"ipython",
"jinja2==3.1.2",
"sphinx-design",
@@ -159,8 +159,6 @@ class BuildError(Exception):
"sphinx-autodoc-typehints",
"myst_parser",
],
- # see smartsim/_core/_install/buildenv.py for more details
- **versions.ml_extras_required(),
}
@@ -175,10 +173,11 @@ class BuildError(Exception):
"redis>=4.5",
"tqdm>=4.50.2",
"filelock>=3.4.2",
- "protobuf~=3.20",
+ "GitPython<=3.1.43",
+ "protobuf<=3.20.3",
"jinja2>=3.1.2",
"watchdog>4,<5",
- "pydantic==1.10.14",
+ "pydantic>2",
"pyzmq>=25.1.2",
"pygithub>=2.3.0",
"numpy<2",
diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py
index 951521f17..5d094b72f 100644
--- a/smartsim/_core/_cli/build.py
+++ b/smartsim/_core/_cli/build.py
@@ -25,26 +25,34 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import argparse
+import importlib.metadata
+import operator
import os
-import platform
-import sys
+import re
+import shutil
+import textwrap
import typing as t
from pathlib import Path
from tabulate import tabulate
from smartsim._core._cli.scripts.dragon_install import install_dragon
-from smartsim._core._cli.utils import SMART_LOGGER_FORMAT, color_bool, pip
+from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
from smartsim._core._install import builder
-from smartsim._core._install.buildenv import (
- BuildEnv,
- DbEngine,
- SetupError,
- Version_,
- VersionConflictError,
- Versioner,
+from smartsim._core._install.buildenv import BuildEnv, DbEngine, Version_, Versioner
+from smartsim._core._install.mlpackages import (
+ DEFAULT_MLPACKAGE_PATH,
+ DEFAULT_MLPACKAGES,
+ MLPackageCollection,
+ load_platform_configs,
)
-from smartsim._core._install.builder import BuildError, Device
+from smartsim._core._install.platform import (
+ Architecture,
+ Device,
+ OperatingSystem,
+ Platform,
+)
+from smartsim._core._install.redisaiBuilder import RedisAIBuilder
from smartsim._core.config import CONFIG
from smartsim._core.utils.helpers import installed_redisai_backends
from smartsim.error import SSConfigError
@@ -55,25 +63,6 @@
# NOTE: all smartsim modules need full paths as the smart cli
# may be installed into a different directory.
-_TPinningStr = t.Literal["==", "!=", ">=", ">", "<=", "<", "~="]
-
-
-def check_py_onnx_version(versions: Versioner) -> None:
- """Check Python environment for ONNX installation"""
- _check_packages_in_python_env(
- {
- "onnx": Version_(versions.ONNX),
- "skl2onnx": Version_(versions.REDISAI.skl2onnx),
- "onnxmltools": Version_(versions.REDISAI.onnxmltools),
- "scikit-learn": Version_(getattr(versions.REDISAI, "scikit-learn")),
- },
- )
-
-
-def check_py_tf_version(versions: Versioner) -> None:
- """Check Python environment for TensorFlow installation"""
- _check_packages_in_python_env({"tensorflow": Version_(versions.TENSORFLOW)})
-
def check_backends_install() -> bool:
"""Checks if backends have already been installed.
@@ -115,8 +104,6 @@ def build_database(
database_builder = builder.DatabaseBuilder(
build_env(),
jobs=build_env.JOBS,
- _os=builder.OperatingSystem.from_str(platform.system()),
- architecture=builder.Architecture.from_str(platform.machine()),
malloc=build_env.MALLOC,
verbose=verbose,
)
@@ -125,220 +112,92 @@ def build_database(
f"Building {database_name} version {versions.REDIS} "
f"from {versions.REDIS_URL}"
)
- database_builder.build_from_git(versions.REDIS_URL, versions.REDIS_BRANCH)
+ database_builder.build_from_git(
+ versions.REDIS_URL, branch=versions.REDIS_BRANCH
+ )
database_builder.cleanup()
- logger.info(f"{database_name} build complete!")
+ logger.info(f"{database_name} build complete!")
+ else:
+ logger.warning(
+ f"{database_name} was previously built, run 'smart clobber' to rebuild"
+ )
def build_redis_ai(
+ platform: Platform,
+ mlpackages: MLPackageCollection,
build_env: BuildEnv,
- versions: Versioner,
- device: Device,
- use_torch: bool = True,
- use_tf: bool = True,
- use_onnx: bool = False,
- torch_dir: t.Union[str, Path, None] = None,
- libtf_dir: t.Union[str, Path, None] = None,
- verbose: bool = False,
- torch_with_mkl: bool = True,
+ verbose: bool,
) -> None:
- # make sure user isn't trying to do something silly on MacOS
- if build_env.PLATFORM == "darwin" and device == Device.GPU:
- raise BuildError("SmartSim does not support GPU on MacOS")
-
- # decide which runtimes to build
- print("\nML Backends Requested")
- backends_table = [
- ["PyTorch", versions.TORCH, color_bool(use_torch)],
- ["TensorFlow", versions.TENSORFLOW, color_bool(use_tf)],
- ["ONNX", versions.ONNX, color_bool(use_onnx)],
- ]
- print(tabulate(backends_table, tablefmt="fancy_outline"), end="\n\n")
- print(f"Building for GPU support: {color_bool(device == Device.GPU)}\n")
-
- if not check_backends_install():
- sys.exit(1)
-
- # TORCH
- if use_torch and torch_dir:
- torch_dir = Path(torch_dir).resolve()
- if not torch_dir.is_dir():
- raise SetupError(
- f"Could not find requested user Torch installation: {torch_dir}"
- )
-
- # TF
- if use_tf and libtf_dir:
- libtf_dir = Path(libtf_dir).resolve()
- if not libtf_dir.is_dir():
- raise SetupError(
- f"Could not find requested user TF installation: {libtf_dir}"
- )
-
- build_env_dict = build_env()
-
- rai_builder = builder.RedisAIBuilder(
- build_env=build_env_dict,
- jobs=build_env.JOBS,
- _os=builder.OperatingSystem.from_str(platform.system()),
- architecture=builder.Architecture.from_str(platform.machine()),
- torch_dir=str(torch_dir) if torch_dir else "",
- libtf_dir=str(libtf_dir) if libtf_dir else "",
- build_torch=use_torch,
- build_tf=use_tf,
- build_onnx=use_onnx,
- verbose=verbose,
- torch_with_mkl=torch_with_mkl,
+ logger.info("Building RedisAI and backends...")
+ rai_builder = RedisAIBuilder(
+ platform, mlpackages, build_env, CONFIG.build_path, verbose
)
-
- if rai_builder.is_built:
- logger.info("RedisAI installed. Run `smart clean` to remove.")
- else:
- # get the build environment, update with CUDNN env vars
- # if present and building for GPU, otherwise warn the user
- if device == Device.GPU:
- gpu_env = build_env.get_cudnn_env()
- cudnn_env_vars = [
- "CUDNN_LIBRARY",
- "CUDNN_INCLUDE_DIR",
- "CUDNN_INCLUDE_PATH",
- "CUDNN_LIBRARY_PATH",
- ]
- if not gpu_env:
- logger.warning(
- "CUDNN environment variables not found.\n"
- f"Looked for {cudnn_env_vars}"
- )
- else:
- build_env_dict.update(gpu_env)
- # update RAI build env with cudnn env vars
- rai_builder.env = build_env_dict
-
- logger.info(
- f"Building RedisAI version {versions.REDISAI}"
- f" from {versions.REDISAI_URL}"
- )
-
- # NOTE: have the option to add other builds here in the future
- # like "from_tarball"
- rai_builder.build_from_git(
- versions.REDISAI_URL, versions.REDISAI_BRANCH, device
- )
- logger.info("ML Backends and RedisAI build complete!")
-
-
-def check_py_torch_version(versions: Versioner, device: Device = Device.CPU) -> None:
- """Check Python environment for TensorFlow installation"""
- if BuildEnv.is_macos():
- if device == Device.GPU:
- raise BuildError("SmartSim does not support GPU on MacOS")
- device_suffix = ""
- else: # linux
- if device == Device.CPU:
- device_suffix = versions.TORCH_CPU_SUFFIX
- elif device == Device.GPU:
- device_suffix = versions.TORCH_CUDA_SUFFIX
- else:
- raise BuildError("Unrecognized device requested")
-
- torch_deps = {
- "torch": Version_(f"{versions.TORCH}{device_suffix}"),
- "torchvision": Version_(f"{versions.TORCHVISION}{device_suffix}"),
+ rai_builder.build()
+ rai_builder.cleanup_build()
+
+
+def parse_requirement(
+ requirement: str,
+) -> t.Tuple[str, t.Optional[str], t.Callable[[Version_], bool]]:
+ operators = {
+ "==": operator.eq,
+ "<=": operator.le,
+ ">=": operator.ge,
+ "<": operator.lt,
+ ">": operator.gt,
}
- missing, conflicts = _assess_python_env(
- torch_deps,
- package_pinning="==",
- validate_installed_version=_create_torch_version_validator(
- with_suffix=device_suffix
- ),
+ semantic_version_pattern = r"\d+(?:\.\d+(?:\.\d+)?)?([^\s]*)"
+ pattern = (
+ r"^" # Start
+ r"([a-zA-Z0-9_\-]+)" # Package name
+ r"(?:\[[a-zA-Z0-9_\-,]+\])?" # Any extras
+ r"(?:([<>=!~]{1,2})" # Pinning string
+ rf"({semantic_version_pattern}))?" # A version number
+ r"$" # End
)
+ match = re.match(pattern, requirement)
+ if match is None:
+ raise ValueError(f"Invalid requirement string: {requirement}")
+ module_name, cmp_op, version_str, suffix = match.groups()
+ version = Version_(version_str) if version_str is not None else None
+ if cmp_op is None:
+ is_compatible = lambda _: True # pylint: disable=unnecessary-lambda-assignment
+ elif (cmp := operators.get(cmp_op, None)) is None:
+ raise ValueError(f"Unrecognized comparison operator: {cmp_op}")
+ else:
- if len(missing) == len(torch_deps) and not conflicts:
- # All PyTorch deps are not installed and there are no conflicting
- # python packages. We can try to install torch deps into the current env.
- logger.info(
- "Torch version not found in python environment. "
- "Attempting to install via `pip`"
- )
- wheel_device = (
- device.value if device == Device.CPU else device_suffix.replace("+", "")
- )
- pip(
- "install",
- "--extra-index-url",
- f"https://download.pytorch.org/whl/{wheel_device}",
- *(f"{package}=={version}" for package, version in torch_deps.items()),
- )
- elif missing or conflicts:
- logger.warning(_format_incompatible_python_env_message(missing, conflicts))
-
-
-def _create_torch_version_validator(
- with_suffix: str,
-) -> t.Callable[[str, t.Optional[Version_]], bool]:
- def check_torch_version(package: str, version: t.Optional[Version_]) -> bool:
- if not BuildEnv.check_installed(package, version):
- return False
- # Default check only looks at major/minor version numbers,
- # Torch requires we look at the patch as well
- installed = BuildEnv.get_py_package_version(package)
- if with_suffix and with_suffix not in installed.patch:
- raise VersionConflictError(
- package,
- installed,
- version or Version_(f"X.X.X{with_suffix}"),
- msg=(
- f"{package}=={installed} does not satisfy device "
- f"suffix requirement: {with_suffix}"
- ),
+ def is_compatible(other: Version_) -> bool:
+ assert version is not None # For type check, always should be true
+ match_ = re.match(rf"^{semantic_version_pattern}$", other)
+ return (
+ cmp(other, version) and match_ is not None and match_.group(1) == suffix
)
- return True
- return check_torch_version
+ return module_name, f"{cmp_op}{version}" if version else None, is_compatible
-def _check_packages_in_python_env(
- packages: t.Mapping[str, t.Optional[Version_]],
- package_pinning: _TPinningStr = "==",
- validate_installed_version: t.Optional[
- t.Callable[[str, t.Optional[Version_]], bool]
- ] = None,
-) -> None:
- # TODO: Do not like how the default validation function will always look for
- # a `==` pinning. Maybe turn `BuildEnv.check_installed` into a factory
- # that takes a pinning and returns an appropriate validation fn?
- validate_installed_version = validate_installed_version or BuildEnv.check_installed
- missing, conflicts = _assess_python_env(
- packages,
- package_pinning,
- validate_installed_version,
- )
+def check_ml_python_packages(packages: MLPackageCollection) -> None:
+ missing = []
+ conflicts = []
+
+ for package in packages.values():
+ for requirement in package.python_packages:
+ module_name, version_spec, is_compatible = parse_requirement(requirement)
+ try:
+ installed = BuildEnv.get_py_package_version(module_name)
+ if not is_compatible(installed):
+ conflicts.append(
+ f"{module_name}: {installed} is installed, "
+ f"but {version_spec or 'Any'} is required"
+ )
+ except importlib.metadata.PackageNotFoundError:
+ missing.append(module_name)
if missing or conflicts:
logger.warning(_format_incompatible_python_env_message(missing, conflicts))
-def _assess_python_env(
- packages: t.Mapping[str, t.Optional[Version_]],
- package_pinning: _TPinningStr,
- validate_installed_version: t.Callable[[str, t.Optional[Version_]], bool],
-) -> t.Tuple[t.List[str], t.List[str]]:
- missing: t.List[str] = []
- conflicts: t.List[str] = []
-
- for name, version in packages.items():
- spec = f"{name}{package_pinning}{version}" if version else name
- try:
- if not validate_installed_version(name, version):
- # Not installed!
- missing.append(spec)
- except VersionConflictError:
- # Incompatible version found
- conflicts.append(spec)
-
- return missing, conflicts
-
-
def _format_incompatible_python_env_message(
missing: t.Collection[str], conflicting: t.Collection[str]
) -> str:
@@ -349,20 +208,24 @@ def _format_incompatible_python_env_message(
missing_str = fmt_list("Missing", missing)
conflict_str = fmt_list("Conflicting", conflicting)
sep = "\n" if missing_str and conflict_str else ""
- return (
- "Python Env Status Warning!\n"
- "Requested Packages are Missing or Conflicting:\n\n"
- f"{missing_str}{sep}{conflict_str}\n\n"
- "Consider installing packages at the requested versions via `pip` or "
- "uninstalling them, installing SmartSim with optional ML dependencies "
- "(`pip install smartsim[ml]`), and running `smart clean && smart build ...`"
- )
+
+ return textwrap.dedent(f"""\
+ Python Package Warning:
+
+ Requested packages are missing or have a version mismatch with
+ their respective backend:
+
+ {missing_str}{sep}{conflict_str}
+
+ Consider uninstalling any conflicting packages and rerunning
+ `smart build` if you encounter issues.
+ """)
def _configure_keydb_build(versions: Versioner) -> None:
"""Configure the redis versions to be used during the build operation"""
versions.REDIS = Version_("6.2.0")
- versions.REDIS_URL = "https://github.com/EQ-Alpha/KeyDB"
+ versions.REDIS_URL = "https://github.com/EQ-Alpha/KeyDB.git"
versions.REDIS_BRANCH = "v6.2.0"
CONFIG.conf_path = Path(CONFIG.core_path, "config", "keydb.conf")
@@ -376,14 +239,33 @@ def _configure_keydb_build(versions: Versioner) -> None:
def execute(
args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
) -> int:
+
+ # Unpack various arguments
verbose = args.v
keydb = args.keydb
- device = Device(args.device.lower())
+ device = Device.from_str(args.device.lower())
is_dragon_requested = args.dragon
- # torch and tf build by default
- pt = not args.no_pt # pylint: disable=invalid-name
- tf = not args.no_tf # pylint: disable=invalid-name
- onnx = args.onnx
+
+ if Path(CONFIG.build_path).exists():
+ logger.warning(f"Build path already exists, removing: {CONFIG.build_path}")
+ shutil.rmtree(CONFIG.build_path)
+
+ # The user should never have to specify the OS and Architecture
+ current_platform = Platform(
+ OperatingSystem.autodetect(), Architecture.autodetect(), device
+ )
+
+ # Configure the ML Packages
+ configs = load_platform_configs(Path(args.config_dir))
+ mlpackages = configs[current_platform]
+
+ # Build all backends by default, pop off the ones that user wants skipped
+ if args.skip_torch and "libtorch" in mlpackages:
+ mlpackages.pop("libtorch")
+ if args.skip_tensorflow and "libtensorflow" in mlpackages:
+ mlpackages.pop("libtensorflow")
+ if args.skip_onnx and "onnxruntime" in mlpackages:
+ mlpackages.pop("onnxruntime")
build_env = BuildEnv(checks=True)
logger.info("Running SmartSim build process...")
@@ -409,6 +291,9 @@ def execute(
version_names = list(vers.keys())
print(tabulate(vers, headers=version_names, tablefmt="github"), "\n")
+ logger.info("ML Packages")
+ print(mlpackages)
+
if is_dragon_requested:
install_to = CONFIG.core_path / ".dragon"
return_code = install_dragon(install_to)
@@ -420,42 +305,25 @@ def execute(
else:
logger.warning("Dragon installation failed")
- try:
- if not args.only_python_packages:
- # REDIS/KeyDB
- build_database(build_env, versions, keydb, verbose)
-
- # REDISAI
- build_redis_ai(
- build_env,
- versions,
- device,
- pt,
- tf,
- onnx,
- args.torch_dir,
- args.libtensorflow_dir,
- verbose=verbose,
- torch_with_mkl=args.torch_with_mkl,
- )
- except (SetupError, BuildError) as e:
- logger.error(str(e))
- return os.EX_SOFTWARE
+ # REDIS/KeyDB
+ build_database(build_env, versions, keydb, verbose)
+
+ if (CONFIG.lib_path / "redisai.so").exists():
+ logger.warning("RedisAI was previously built, run 'smart clean' to rebuild")
+ elif not args.skip_backends:
+ build_redis_ai(current_platform, mlpackages, build_env, verbose)
+ else:
+ logger.info("Skipping compilation of RedisAI and backends")
backends = installed_redisai_backends()
backends_str = ", ".join(s.capitalize() for s in backends) if backends else "No"
- logger.info(f"{backends_str} backend(s) built")
-
- try:
- if "torch" in backends:
- check_py_torch_version(versions, device)
- if "tensorflow" in backends:
- check_py_tf_version(versions)
- if "onnxruntime" in backends:
- check_py_onnx_version(versions)
- except (SetupError, BuildError) as e:
- logger.error(str(e))
- return os.EX_SOFTWARE
+ logger.info(f"{backends_str} backend(s) available")
+
+ if not args.skip_python_packages:
+ for package in mlpackages.values():
+ logger.info(f"Installing python packages for {package.name}")
+ package.pip_install(quiet=not verbose)
+ check_ml_python_packages(mlpackages)
logger.info("SmartSim build complete!")
return os.EX_OK
@@ -463,7 +331,14 @@ def execute(
def configure_parser(parser: argparse.ArgumentParser) -> None:
"""Builds the parser for the command"""
- warn_usage = "(ONLY USE IF NEEDED)"
+
+ available_devices = []
+ for platform in DEFAULT_MLPACKAGES:
+ if (platform.operating_system == OperatingSystem.autodetect()) and (
+ platform.architecture == Architecture.autodetect()
+ ):
+ available_devices.append(platform.device.value)
+
parser.add_argument(
"-v",
action="store_true",
@@ -474,7 +349,7 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
"--device",
type=str.lower,
default=Device.CPU.value,
- choices=[device.value for device in Device],
+ choices=available_devices,
help="Device to build ML runtimes for",
)
parser.add_argument(
@@ -484,40 +359,35 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
help="Install the dragon runtime",
)
parser.add_argument(
- "--only_python_packages",
+ "--skip-python-packages",
action="store_true",
- default=False,
- help="Only evaluate the python packages (i.e. skip building backends)",
+ help="Do not install the python packages that match the backends",
)
parser.add_argument(
- "--no_pt",
+ "--skip-backends",
action="store_true",
- default=False,
- help="Do not build PyTorch backend",
+ help="Do not compile RedisAI and the backends",
)
parser.add_argument(
- "--no_tf",
+ "--skip-torch",
action="store_true",
- default=False,
- help="Do not build TensorFlow backend",
+ help="Do not build PyTorch backend",
)
parser.add_argument(
- "--onnx",
+ "--skip-tensorflow",
action="store_true",
- default=False,
- help="Build ONNX backend (off by default)",
+ help="Do not build TensorFlow backend",
)
parser.add_argument(
- "--torch_dir",
- default=None,
- type=str,
- help=f"Path to custom /torch/share/cmake/Torch/ directory {warn_usage}",
+ "--skip-onnx",
+ action="store_true",
+ help="Do not build the ONNX backend",
)
parser.add_argument(
- "--libtensorflow_dir",
- default=None,
+ "--config-dir",
+ default=str(DEFAULT_MLPACKAGE_PATH),
type=str,
- help=f"Path to custom libtensorflow directory {warn_usage}",
+ help="Path to directory with JSON files describing platform and packages",
)
parser.add_argument(
"--keydb",
@@ -525,9 +395,3 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
default=False,
help="Build KeyDB instead of Redis",
)
- parser.add_argument(
- "--no_torch_with_mkl",
- dest="torch_with_mkl",
- action="store_false",
- help="Do not build Torch with Intel MKL",
- )
diff --git a/smartsim/_core/_cli/scripts/dragon_install.py b/smartsim/_core/_cli/scripts/dragon_install.py
index a2e8ed36f..8028b8ecf 100644
--- a/smartsim/_core/_cli/scripts/dragon_install.py
+++ b/smartsim/_core/_cli/scripts/dragon_install.py
@@ -7,7 +7,7 @@
from github.GitReleaseAsset import GitReleaseAsset
from smartsim._core._cli.utils import pip
-from smartsim._core._install.builder import WebTGZ
+from smartsim._core._install.utils import retrieve
from smartsim._core.config import CONFIG
from smartsim._core.utils.helpers import check_platform, is_crayex_platform
from smartsim.error.errors import SmartSimCLIActionCancelled
@@ -159,8 +159,7 @@ def retrieve_asset(working_dir: pathlib.Path, asset: GitReleaseAsset) -> pathlib
if working_dir.exists() and list(working_dir.rglob("*.whl")):
return working_dir
- archive = WebTGZ(asset.browser_download_url)
- archive.extract(working_dir)
+ retrieve(asset.browser_download_url, working_dir)
logger.debug(f"Retrieved {asset.browser_download_url} to {working_dir}")
return working_dir
diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py
index 6d7c72f17..b7905b773 100644
--- a/smartsim/_core/_cli/validate.py
+++ b/smartsim/_core/_cli/validate.py
@@ -27,7 +27,6 @@
import argparse
import contextlib
import io
-import multiprocessing as mp
import os
import os.path
import tempfile
@@ -39,7 +38,7 @@
from smartsim import Experiment
from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
-from smartsim._core._install.builder import Device
+from smartsim._core.types import Device
from smartsim._core.utils.helpers import installed_redisai_backends
from smartsim._core.utils.network import find_free_port
from smartsim.log import get_logger
@@ -207,25 +206,8 @@ def _make_managed_local_orc(
def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None:
- recv_conn, send_conn = mp.Pipe(duplex=False)
- # Build the model in a subproc so that keras does not hog the gpu
- proc = mp.Process(target=_build_tf_frozen_model, args=(send_conn, tmp_dir))
- proc.start()
-
- # do not need the sending connection in this proc anymore
- send_conn.close()
-
- proc.join(timeout=600)
- if proc.is_alive():
- proc.terminate()
- raise Exception("Failed to build a simple keras model within 2 minutes")
- try:
- model_path, inputs, outputs = recv_conn.recv()
- except EOFError as e:
- raise Exception(
- "Failed to receive serialized model from subprocess. "
- "Is the `tensorflow` python package installed?"
- ) from e
+
+ model_path, inputs, outputs = _build_tf_frozen_model(tmp_dir)
client.set_model_from_file(
"keras-fcn",
@@ -240,8 +222,9 @@ def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None:
client.get_tensor("keras-output")
-def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None:
- from tensorflow import keras
+def _build_tf_frozen_model(tmp_dir: str) -> t.Tuple[str, t.List[str], t.List[str]]:
+
+ from tensorflow import keras # pylint: disable=no-name-in-module
from smartsim.ml.tf import freeze_model
@@ -258,7 +241,7 @@ def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None:
optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
model_path, inputs, outputs = freeze_model(fcn, tmp_dir, "keras_model.pb")
- conn.send((model_path, inputs, outputs))
+ return model_path, inputs, outputs
def _test_torch_install(client: Client, device: Device) -> None:
@@ -283,10 +266,12 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
net.eval()
forward_input = torch.rand(1, 1, 3, 3).to(device_)
- traced = torch.jit.trace(net, forward_input) # type: ignore[no-untyped-call]
+ traced = torch.jit.trace( # type: ignore[no-untyped-call, unused-ignore]
+ net, forward_input
+ )
buffer = io.BytesIO()
- torch.jit.save(traced, buffer) # type: ignore[no-untyped-call]
+ torch.jit.save(traced, buffer) # type: ignore[no-untyped-call, unused-ignore]
model = buffer.getvalue()
client.set_model("torch-nn", model, backend="TORCH", device=device.value.upper())
diff --git a/smartsim/_core/_install/buildenv.py b/smartsim/_core/_install/buildenv.py
index a066ab16a..ac5c345fc 100644
--- a/smartsim/_core/_install/buildenv.py
+++ b/smartsim/_core/_install/buildenv.py
@@ -55,30 +55,6 @@ class SetupError(Exception):
"""
-class VersionConflictError(SetupError):
- """An error for when version numbers of some library/package/program/etc
- do not match and build may not be able to continue
- """
-
- def __init__(
- self,
- name: str,
- current_version: "Version_",
- target_version: "Version_",
- msg: t.Optional[str] = None,
- ) -> None:
- if msg is None:
- msg = (
- f"Incompatible version for {name} detected: "
- f"{name} {target_version} requested but {name} {current_version} "
- "installed."
- )
- super().__init__(msg)
- self.name = name
- self.current_version = current_version
- self.target_version = target_version
-
-
# so as to not conflict with pkg_resources.packaging.version.Version
# pylint: disable-next=invalid-name
class Version_(str):
@@ -156,74 +132,6 @@ def get_env(var: str, default: str) -> str:
return os.environ.get(var, default)
-class RedisAIVersion(Version_):
- """A subclass of Version_ that holds the dependency sets for RedisAI
-
- this class serves two purposes:
-
- 1. It is used to populate the [ml] ``extras_require`` of the setup.py.
- This is because the RedisAI version will determine which ML based
- dependencies are required.
-
- 2. Used to set the default values for PyTorch, TF, and ONNX
- given the SMARTSIM_REDISAI env var set by the user.
-
- NOTE: Torch requires additional information depending on whether
- CPU or GPU support is requested
- """
-
- defaults = {
- "1.2.7": {
- "tensorflow": "2.13.1",
- "onnx": "1.14.1",
- "skl2onnx": "1.16.0",
- "onnxmltools": "1.12.0",
- "scikit-learn": "1.3.2",
- "torch": "2.0.1",
- "torch_cpu_suffix": "+cpu",
- "torch_cuda_suffix": "+cu117",
- "torchvision": "0.15.2",
- },
- }
-
- def __init__(self, vers: str) -> None: # pylint: disable=super-init-not-called
- min_rai_version = min(Version_(ver) for ver in self.defaults)
- if min_rai_version > vers:
- raise SetupError(
- f"RedisAI version must be greater than or equal to {min_rai_version}"
- )
- if vers not in self.defaults:
- if vers.startswith("1.2"):
- # resolve to latest version for 1.2.x
- # the str representation will still be 1.2.x
- self.version = "1.2.7"
- else:
- raise SetupError(
- (
- f"Invalid RedisAI version {vers}. Options are "
- f"{self.defaults.keys()}"
- )
- )
- else:
- self.version = vers
-
- def __getattr__(self, name: str) -> str:
- try:
- return self.defaults[self.version][name]
- except KeyError:
- raise AttributeError(
- f"'{type(self).__name__}' object has no attribute '{name}'\n\n"
- "This is likely a problem with the SmartSim build process;"
- "if this problem persists please log a new issue at "
- "https://github.com/CrayLabs/SmartSim/issues "
- "or get in contact with us at "
- "https://www.craylabs.org/docs/community.html"
- ) from None
-
- def get_defaults(self) -> t.Dict[str, str]:
- return self.defaults[self.version].copy()
-
-
class Versioner:
"""Versioner is responsible for managing all the versions
within SmartSim including SmartSim itself.
@@ -242,9 +150,7 @@ class Versioner:
``smart build`` command to determine which dependency versions
to look for and download.
- Default versions for SmartSim, Redis, and RedisAI are
- all set here. Setting a default version for RedisAI also dictates
- default versions of the machine learning libraries.
+ Default versions for SmartSim, Redis, and RedisAI are specified here.
"""
# compatible Python version
@@ -256,61 +162,24 @@ class Versioner:
# Redis
REDIS = Version_(get_env("SMARTSIM_REDIS", "7.2.4"))
- REDIS_URL = get_env("SMARTSIM_REDIS_URL", "https://github.com/redis/redis.git/")
+ REDIS_URL = get_env("SMARTSIM_REDIS_URL", "https://github.com/redis/redis.git")
REDIS_BRANCH = get_env("SMARTSIM_REDIS_BRANCH", REDIS)
# RedisAI
- REDISAI = RedisAIVersion(get_env("SMARTSIM_REDISAI", "1.2.7"))
+ REDISAI = "1.2.7"
REDISAI_URL = get_env(
- "SMARTSIM_REDISAI_URL", "https://github.com/RedisAI/RedisAI.git/"
+ "SMARTSIM_REDISAI_URL", "https://github.com/RedisAI/RedisAI.git"
)
REDISAI_BRANCH = get_env("SMARTSIM_REDISAI_BRANCH", f"v{REDISAI}")
- # ML/DL (based on RedisAI version defaults)
- # torch can be set by the user because we download that for them
- TORCH = Version_(get_env("SMARTSIM_TORCH", REDISAI.torch))
- TORCHVISION = Version_(get_env("SMARTSIM_TORCHVIS", REDISAI.torchvision))
- TORCH_CPU_SUFFIX = Version_(get_env("TORCH_CPU_SUFFIX", REDISAI.torch_cpu_suffix))
- TORCH_CUDA_SUFFIX = Version_(
- get_env("TORCH_CUDA_SUFFIX", REDISAI.torch_cuda_suffix)
- )
-
- # TensorFlow and ONNX only use the defaults, but these are not built into
- # the RedisAI package and therefore the user is free to pick other versions.
- TENSORFLOW = Version_(REDISAI.tensorflow)
- ONNX = Version_(REDISAI.onnx)
-
def as_dict(self, db_name: DbEngine = "REDIS") -> t.Dict[str, t.Tuple[str, ...]]:
pkg_map = {
"SMARTSIM": self.SMARTSIM,
db_name: self.REDIS,
"REDISAI": self.REDISAI,
- "TORCH": self.TORCH,
- "TENSORFLOW": self.TENSORFLOW,
- "ONNX": self.ONNX,
}
return {"Packages": tuple(pkg_map), "Versions": tuple(pkg_map.values())}
- def ml_extras_required(self) -> t.Dict[str, t.List[str]]:
- """Optional ML/DL dependencies we suggest for the user.
-
- The defaults are based on the RedisAI version
- """
- ml_defaults = self.REDISAI.get_defaults()
-
- # remove torch-related fields as they are subject to change
- # by having the user change hardware (cpu/gpu)
- _torch_fields = [
- "torch",
- "torchvision",
- "torch_cpu_suffix",
- "torch_cuda_suffix",
- ]
- for field in _torch_fields:
- ml_defaults.pop(field)
-
- return {"ml": [f"{lib}=={vers}" for lib, vers in ml_defaults.items()]}
-
@staticmethod
def get_sha(setup_py_dir: Path) -> str:
"""Get the git sha of the current branch"""
@@ -385,7 +254,7 @@ def __init__(self, checks: bool = True) -> None:
self.check_dependencies()
def check_dependencies(self) -> None:
- deps = ["git", "git-lfs", "make", "wget", "cmake", self.CC, self.CXX]
+ deps = ["git", "make", "wget", "cmake", self.CC, self.CXX]
if int(self.CHECKS) == 0:
for dep in deps:
self.check_build_dependency(dep)
@@ -498,23 +367,6 @@ def check_build_dependency(command: str) -> None:
except OSError:
raise SetupError(f"{command} must be installed to build SmartSim") from None
- @classmethod
- def check_installed(
- cls, package: str, version: t.Optional[Version_] = None
- ) -> bool:
- """Check if a package is installed. If version is provided, check if
- it's a compatible version. (major and minor the same)
- """
- try:
- installed = cls.get_py_package_version(package)
- except importlib.metadata.PackageNotFoundError:
- return False
- if version:
- # detect if major or minor versions differ
- if installed.major != version.major or installed.minor != version.minor:
- raise VersionConflictError(package, installed, version)
- return True
-
@staticmethod
def get_py_package_version(package: str) -> Version_:
return Version_(importlib.metadata.version(package))
diff --git a/smartsim/_core/_install/builder.py b/smartsim/_core/_install/builder.py
index 8f5bdc557..17036e825 100644
--- a/smartsim/_core/_install/builder.py
+++ b/smartsim/_core/_install/builder.py
@@ -26,98 +26,32 @@
# pylint: disable=too-many-lines
-import concurrent.futures
-import enum
-import fileinput
-import itertools
import os
-import platform
import re
import shutil
import stat
import subprocess
-import sys
-import tarfile
-import tempfile
import typing as t
-import urllib.request
-import zipfile
-from abc import ABC, abstractmethod
-from dataclasses import dataclass
from pathlib import Path
-from shutil import which
from subprocess import SubprocessError
-# NOTE: This will be imported by setup.py and hence no smartsim related
-# items should be imported into this file.
+from smartsim._core._install.utils import retrieve
+from smartsim._core.utils import expand_exe_path
+
+if t.TYPE_CHECKING:
+ from typing_extensions import Never
# TODO: check cmake version and use system if possible to avoid conflicts
-TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime", "tflite"]
_PathLike = t.Union[str, "os.PathLike[str]"]
_T = t.TypeVar("_T")
_U = t.TypeVar("_U")
-def expand_exe_path(exe: str) -> str:
- """Takes an executable and returns the full path to that executable
-
- :param exe: executable or file
- :raises TypeError: if file is not an executable
- :raises FileNotFoundError: if executable cannot be found
- """
-
- # which returns none if not found
- in_path = which(exe)
- if not in_path:
- if os.path.isfile(exe) and os.access(exe, os.X_OK):
- return os.path.abspath(exe)
- if os.path.isfile(exe) and not os.access(exe, os.X_OK):
- raise TypeError(f"File, {exe}, is not an executable")
- raise FileNotFoundError(f"Could not locate executable {exe}")
- return os.path.abspath(in_path)
-
-
class BuildError(Exception):
pass
-class Architecture(enum.Enum):
- X64 = ("x86_64", "amd64")
- ARM64 = ("arm64",)
-
- @classmethod
- def from_str(cls, string: str, /) -> "Architecture":
- string = string.lower()
- for type_ in cls:
- if string in type_.value:
- return type_
- raise BuildError(f"Unrecognized or unsupported architecture: {string}")
-
-
-class Device(enum.Enum):
- CPU = "cpu"
- GPU = "gpu"
-
-
-class OperatingSystem(enum.Enum):
- LINUX = ("linux", "linux2")
- DARWIN = ("darwin",)
-
- @classmethod
- def from_str(cls, string: str, /) -> "OperatingSystem":
- string = string.lower()
- for type_ in cls:
- if string in type_.value:
- return type_
- raise BuildError(f"Unrecognized or unsupported operating system: {string}")
-
-
-class Platform(t.NamedTuple):
- os: OperatingSystem
- architecture: Architecture
-
-
class Builder:
"""Base class for building third-party libraries"""
@@ -135,13 +69,10 @@ def __init__(
self,
env: t.Dict[str, str],
jobs: int = 1,
- _os: OperatingSystem = OperatingSystem.from_str(platform.system()),
- architecture: Architecture = Architecture.from_str(platform.machine()),
verbose: bool = False,
) -> None:
# build environment from buildenv
self.env = env
- self._platform = Platform(_os, architecture)
# Find _core directory and set up paths
_core_dir = Path(os.path.abspath(__file__)).parent.parent
@@ -176,11 +107,6 @@ def out(self) -> t.Optional[int]:
def is_built(self) -> bool:
raise NotImplementedError
- def build_from_git(
- self, git_url: str, branch: str, device: Device = Device.CPU
- ) -> None:
- raise NotImplementedError
-
@staticmethod
def binary_path(binary: str) -> str:
binary_ = shutil.which(binary)
@@ -256,15 +182,11 @@ def __init__(
build_env: t.Optional[t.Dict[str, str]] = None,
malloc: str = "libc",
jobs: int = 1,
- _os: OperatingSystem = OperatingSystem.from_str(platform.system()),
- architecture: Architecture = Architecture.from_str(platform.machine()),
verbose: bool = False,
) -> None:
super().__init__(
build_env or {},
jobs=jobs,
- _os=_os,
- architecture=architecture,
verbose=verbose,
)
self.malloc = malloc
@@ -277,9 +199,7 @@ def is_built(self) -> bool:
keydb_files = {"keydb-server", "keydb-cli"}
return redis_files.issubset(bin_files) or keydb_files.issubset(bin_files)
- def build_from_git(
- self, git_url: str, branch: str, device: Device = Device.CPU
- ) -> None:
+ def build_from_git(self, git_url: str, branch: str) -> None:
"""Build Redis from git
:param git_url: url from which to retrieve Redis
:param branch: branch to checkout
@@ -301,23 +221,7 @@ def build_from_git(
if not self.is_valid_url(git_url):
raise BuildError(f"Malformed {database_name} URL: {git_url}")
- clone_cmd = config_git_command(
- self._platform,
- [
- self.binary_path("git"),
- "clone",
- git_url,
- "--branch",
- branch,
- "--depth",
- "1",
- database_name,
- ],
- )
-
- # clone Redis
- self.run_command(clone_cmd, cwd=self.build_dir)
-
+ retrieve(git_url, self.build_dir / database_name, branch=branch, depth=1)
# build Redis
build_cmd = [
self.binary_path("make"),
@@ -354,724 +258,3 @@ def build_from_git(
_ = expand_exe_path(str(redis_cli))
except (TypeError, FileNotFoundError) as e:
raise BuildError("Installation of redis-cli failed!") from e
-
-
-class _RAIBuildDependency(ABC):
- """An interface with a collection of magic methods so that
- ``RedisAIBuilder`` can fetch and place its own dependencies
- """
-
- @property
- @abstractmethod
- def __rai_dependency_name__(self) -> str: ...
-
- @abstractmethod
- def __place_for_rai__(self, target: _PathLike) -> Path: ...
-
- @staticmethod
- @abstractmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: ...
-
-
-def _place_rai_dep_at(
- target: _PathLike, verbose: bool
-) -> t.Callable[[_RAIBuildDependency], Path]:
- def _place(dep: _RAIBuildDependency) -> Path:
- if verbose:
- print(f"Placing: '{dep.__rai_dependency_name__}'")
- path = dep.__place_for_rai__(target)
- if verbose:
- print(f"Placed: '{dep.__rai_dependency_name__}' at '{path}'")
- return path
-
- return _place
-
-
-class RedisAIBuilder(Builder):
- """Class to build RedisAI from Source
- Supported build method:
- - from git
- See buildenv.py for buildtime configuration of RedisAI
- version and url.
- """
-
- def __init__(
- self,
- _os: OperatingSystem = OperatingSystem.from_str(platform.system()),
- architecture: Architecture = Architecture.from_str(platform.machine()),
- build_env: t.Optional[t.Dict[str, str]] = None,
- torch_dir: str = "",
- libtf_dir: str = "",
- build_torch: bool = True,
- build_tf: bool = True,
- build_onnx: bool = False,
- jobs: int = 1,
- verbose: bool = False,
- torch_with_mkl: bool = True,
- ) -> None:
- super().__init__(
- build_env or {},
- jobs=jobs,
- _os=_os,
- architecture=architecture,
- verbose=verbose,
- )
-
- self.rai_install_path: t.Optional[Path] = None
-
- # convert to int for RAI build script
- self._torch = build_torch
- self._tf = build_tf
- self._onnx = build_onnx
- self.libtf_dir = libtf_dir
- self.torch_dir = torch_dir
-
- # extra configuration options
- self.torch_with_mkl = torch_with_mkl
-
- # Sanity checks
- self._validate_platform()
-
- def _validate_platform(self) -> None:
- unsupported = []
- if self._platform not in _DLPackRepository.supported_platforms():
- unsupported.append("DLPack")
- if self.fetch_tf and (self._platform not in _TFArchive.supported_platforms()):
- unsupported.append("Tensorflow")
- if self.fetch_onnx and (
- self._platform not in _ORTArchive.supported_platforms()
- ):
- unsupported.append("ONNX")
- if self.fetch_torch and (
- self._platform not in _PTArchive.supported_platforms()
- ):
- unsupported.append("PyTorch")
- if unsupported:
- raise BuildError(
- f"The {', '.join(unsupported)} backend(s) are not supported "
- f"on {self._platform.os} with {self._platform.architecture}"
- )
-
- @property
- def rai_build_path(self) -> Path:
- return Path(self.build_dir, "RedisAI")
-
- @property
- def is_built(self) -> bool:
- server = self.lib_path.joinpath("backends").is_dir()
- cli = self.lib_path.joinpath("redisai.so").is_file()
- return server and cli
-
- @property
- def build_torch(self) -> bool:
- return self._torch
-
- @property
- def fetch_torch(self) -> bool:
- return self.build_torch and not self.torch_dir
-
- @property
- def build_tf(self) -> bool:
- return self._tf
-
- @property
- def fetch_tf(self) -> bool:
- return self.build_tf and not self.libtf_dir
-
- @property
- def build_onnx(self) -> bool:
- return self._onnx
-
- @property
- def fetch_onnx(self) -> bool:
- return self.build_onnx
-
- def get_deps_dir_path_for(self, device: Device) -> Path:
- def fail_to_format(reason: str) -> BuildError: # pragma: no cover
- return BuildError(f"Failed to format RedisAI dependency path: {reason}")
-
- _os, architecture = self._platform
- if _os == OperatingSystem.DARWIN:
- os_ = "macos"
- elif _os == OperatingSystem.LINUX:
- os_ = "linux"
- else: # pragma: no cover
- raise fail_to_format(f"Unknown operating system: {_os}")
- if architecture == Architecture.X64:
- arch = "x64"
- elif architecture == Architecture.ARM64:
- arch = "arm64v8"
- else: # pragma: no cover
- raise fail_to_format(f"Unknown architecture: {architecture}")
- return self.rai_build_path / f"deps/{os_}-{arch}-{device.value}"
-
- def _get_deps_to_fetch_for(
- self, device: Device
- ) -> t.Tuple[_RAIBuildDependency, ...]:
- os_, arch = self._platform
- # TODO: It would be nice if the backend version numbers were declared
- # alongside the python package version numbers so that all of the
- # dependency versions were declared in single location.
- # Unfortunately importing into this module is non-trivial as it
- # is used as script in the SmartSim `setup.py`.
-
- # DLPack is always required
- fetchable_deps: t.List[_RAIBuildDependency] = [_DLPackRepository("v0.5_RAI")]
- if self.fetch_torch:
- pt_dep = _choose_pt_variant(os_)(arch, device, "2.0.1", self.torch_with_mkl)
- fetchable_deps.append(pt_dep)
- if self.fetch_tf:
- fetchable_deps.append(_TFArchive(os_, arch, device, "2.13.1"))
- if self.fetch_onnx:
- fetchable_deps.append(_ORTArchive(os_, device, "1.16.3"))
-
- return tuple(fetchable_deps)
-
- def symlink_libtf(self, device: Device) -> None:
- """Add symbolic link to available libtensorflow in RedisAI deps.
-
- :param device: cpu or gpu
- """
- rai_deps_path = sorted(
- self.rai_build_path.glob(os.path.join("deps", f"*{device.value}*"))
- )
- if not rai_deps_path:
- raise FileNotFoundError("Could not find RedisAI 'deps' directory")
-
- # There should only be one path for a given device,
- # and this should hold even if in the future we use
- # an external build of RedisAI
- rai_libtf_path = rai_deps_path[0] / "libtensorflow"
- rai_libtf_path.resolve()
- if rai_libtf_path.is_dir():
- shutil.rmtree(rai_libtf_path)
-
- os.makedirs(rai_libtf_path)
- libtf_path = Path(self.libtf_dir).resolve()
-
- # Copy include directory to deps/libtensorflow
- include_src_path = libtf_path / "include"
- if not include_src_path.exists():
- raise FileNotFoundError(f"Could not find include directory in {libtf_path}")
- os.symlink(include_src_path, rai_libtf_path / "include")
-
- # RedisAI expects to find a lib directory, which is only
- # available in some distributions.
- rai_libtf_lib_dir = rai_libtf_path / "lib"
- os.makedirs(rai_libtf_lib_dir)
- src_libtf_lib_dir = libtf_path / "lib"
- # If the lib directory existed in the libtensorflow distribution,
- # copy its content, otherwise gather library files from
- # libtensorflow base dir and copy them into destination lib dir
- if src_libtf_lib_dir.is_dir():
- library_files = sorted(src_libtf_lib_dir.glob("*"))
- if not library_files:
- raise FileNotFoundError(
- f"Could not find libtensorflow library files in {src_libtf_lib_dir}"
- )
- else:
- library_files = sorted(libtf_path.glob("lib*.so*"))
- if not library_files:
- raise FileNotFoundError(
- f"Could not find libtensorflow library files in {libtf_path}"
- )
-
- for src_file in library_files:
- dst_file = rai_libtf_lib_dir / src_file.name
- if not dst_file.is_file():
- os.symlink(src_file, dst_file)
-
- def build_from_git(
- self, git_url: str, branch: str, device: Device = Device.CPU
- ) -> None:
- """Build RedisAI from git
-
- :param git_url: url from which to retrieve RedisAI
- :param branch: branch to checkout
- :param device: cpu or gpu
- """
- # delete previous build dir (should never be there)
- if self.rai_build_path.is_dir():
- shutil.rmtree(self.rai_build_path)
-
- # Check RedisAI URL
- if not self.is_valid_url(git_url):
- raise BuildError(f"Malformed RedisAI URL: {git_url}")
-
- # clone RedisAI
- clone_cmd = config_git_command(
- self._platform,
- [
- self.binary_path("env"),
- "GIT_LFS_SKIP_SMUDGE=1",
- "git",
- "clone",
- "--recursive",
- git_url,
- "--branch",
- branch,
- "--depth=1",
- os.fspath(self.rai_build_path),
- ],
- )
-
- self.run_command(clone_cmd, out=subprocess.DEVNULL, cwd=self.build_dir)
- self._fetch_deps_for(device)
-
- if self.libtf_dir and device.value:
- self.symlink_libtf(device)
-
- build_cmd = self._rai_build_env_prefix(
- with_pt=self.build_torch,
- with_tf=self.build_tf,
- with_ort=self.build_onnx,
- extra_env={"GPU": "1" if device == Device.GPU else "0"},
- )
-
- if self.torch_dir:
- self.env["Torch_DIR"] = str(self.torch_dir)
-
- build_cmd.extend(
- [
- self.binary_path("make"),
- "-C",
- str(self.rai_build_path / "opt"),
- "-j",
- f"{self.jobs}",
- "build",
- ]
- )
- self.run_command(build_cmd, cwd=self.rai_build_path)
-
- self._install_backends(device)
- if self.user_supplied_backend("torch"):
- self._move_torch_libs()
- self.cleanup()
-
- def user_supplied_backend(self, backend: TRedisAIBackendStr) -> bool:
- if backend == "torch":
- return bool(self.build_torch and not self.fetch_torch)
- if backend == "tensorflow":
- return bool(self.build_tf and not self.fetch_tf)
- if backend == "onnxruntime":
- return bool(self.build_onnx and not self.fetch_onnx)
- if backend == "tflite":
- return False
- raise BuildError(f"Unrecognized backend requested {backend}")
-
- def _rai_build_env_prefix(
- self,
- with_tf: bool,
- with_pt: bool,
- with_ort: bool,
- extra_env: t.Optional[t.Dict[str, str]] = None,
- ) -> t.List[str]:
- extra_env = extra_env or {}
- return [
- self.binary_path("env"),
- f"WITH_PT={1 if with_pt else 0}",
- f"WITH_TF={1 if with_tf else 0}",
- "WITH_TFLITE=0", # never use TF Lite (for now)
- f"WITH_ORT={1 if with_ort else 0}",
- *(f"{key}={val}" for key, val in extra_env.items()),
- ]
-
- def _fetch_deps_for(self, device: Device) -> None:
- if not self.rai_build_path.is_dir():
- raise BuildError("RedisAI build directory not found")
-
- deps_dir = self.get_deps_dir_path_for(device)
- deps_dir.mkdir(parents=True, exist_ok=True)
- if any(deps_dir.iterdir()):
- raise BuildError("RAI build dependency directory is not empty")
- to_fetch = self._get_deps_to_fetch_for(device)
- placed_paths = _threaded_map(
- _place_rai_dep_at(deps_dir, self.verbose), to_fetch
- )
- unique_placed_paths = {os.fspath(path.resolve()) for path in placed_paths}
- if len(unique_placed_paths) != len(to_fetch):
- raise BuildError(
- f"Expected to place {len(to_fetch)} dependencies, but only "
- f"found {len(unique_placed_paths)}"
- )
-
- def _install_backends(self, device: Device) -> None:
- """Move backend libraries to smartsim/_core/lib/
- :param device: cpu or cpu
- """
- self.rai_install_path = self.rai_build_path.joinpath(
- f"install-{device.value}"
- ).resolve()
- rai_lib = self.rai_install_path / "redisai.so"
- rai_backends = self.rai_install_path / "backends"
-
- if rai_backends.is_dir():
- self.copy_dir(rai_backends, self.lib_path / "backends", set_exe=True)
- if rai_lib.is_file():
- self.copy_file(rai_lib, self.lib_path / "redisai.so", set_exe=True)
-
- def _move_torch_libs(self) -> None:
- """Move pip install torch libraries
- Since we use pip installed torch libraries for building
- RedisAI, we need to move them into the LD_runpath of redisai.so
- in the smartsim/_core/lib directory.
- """
- ss_rai_torch_path = self.lib_path / "backends" / "redisai_torch"
- ss_rai_torch_lib_path = ss_rai_torch_path / "lib"
-
- # retrieve torch shared libraries and copy to the
- # smartsim/_core/lib/backends/redisai_torch/lib dir
- # self.torch_dir should be /path/to/torch/share/cmake/Torch
- # so we take the great grandparent here
- pip_torch_path = Path(self.torch_dir).parent.parent.parent
- pip_torch_lib_path = pip_torch_path / "lib"
-
- self.copy_dir(pip_torch_lib_path, ss_rai_torch_lib_path, set_exe=True)
-
- # also move the openmp files if on a mac
- if sys.platform == "darwin":
- dylibs = pip_torch_path / ".dylibs"
- self.copy_dir(dylibs, ss_rai_torch_path / ".dylibs", set_exe=True)
-
-
-def _threaded_map(fn: t.Callable[[_T], _U], items: t.Iterable[_T]) -> t.Sequence[_U]:
- items = tuple(items)
- if not items: # No items so no work to do
- return ()
- num_workers = min(len(items), (os.cpu_count() or 4) * 5)
- with concurrent.futures.ThreadPoolExecutor(num_workers) as pool:
- return tuple(pool.map(fn, items))
-
-
-class _WebLocation(ABC):
- @property
- @abstractmethod
- def url(self) -> str: ...
-
-
-class _WebGitRepository(_WebLocation):
- def clone(
- self,
- target: _PathLike,
- depth: t.Optional[int] = None,
- branch: t.Optional[str] = None,
- ) -> None:
- depth_ = ("--depth", str(depth)) if depth is not None else ()
- branch_ = ("--branch", branch) if branch is not None else ()
- _git("clone", "-q", *depth_, *branch_, self.url, os.fspath(target))
-
-
-@t.final
-@dataclass(frozen=True)
-class _DLPackRepository(_WebGitRepository, _RAIBuildDependency):
- version: str
-
- @staticmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
- return (
- (OperatingSystem.LINUX, Architecture.X64),
- (OperatingSystem.DARWIN, Architecture.X64),
- (OperatingSystem.DARWIN, Architecture.ARM64),
- )
-
- @property
- def url(self) -> str:
- return "https://github.com/RedisAI/dlpack.git"
-
- @property
- def __rai_dependency_name__(self) -> str:
- return f"dlpack@{self.url}"
-
- def __place_for_rai__(self, target: _PathLike) -> Path:
- target = Path(target) / "dlpack"
- self.clone(target, branch=self.version, depth=1)
- if not target.is_dir():
- raise BuildError("Failed to place dlpack")
- return target
-
-
-class _WebArchive(_WebLocation):
- @property
- def name(self) -> str:
- _, name = self.url.rsplit("/", 1)
- return name
-
- def download(self, target: _PathLike) -> Path:
- target = Path(target)
- if target.is_dir():
- target = target / self.name
- file, _ = urllib.request.urlretrieve(self.url, target)
- return Path(file).resolve()
-
-
-class _ExtractableWebArchive(_WebArchive, ABC):
- @abstractmethod
- def _extract_download(self, download_path: Path, target: _PathLike) -> None: ...
-
- def extract(self, target: _PathLike) -> None:
- with tempfile.TemporaryDirectory() as tmp_dir:
- arch_path = self.download(tmp_dir)
- self._extract_download(arch_path, target)
-
-
-class _WebTGZ(_ExtractableWebArchive):
- def _extract_download(self, download_path: Path, target: _PathLike) -> None:
- with tarfile.open(download_path, "r") as tgz_file:
- tgz_file.extractall(target)
-
-
-class _WebZip(_ExtractableWebArchive):
- def _extract_download(self, download_path: Path, target: _PathLike) -> None:
- with zipfile.ZipFile(download_path, "r") as zip_file:
- zip_file.extractall(target)
-
-
-class WebTGZ(_WebTGZ):
- def __init__(self, url: str) -> None:
- self._url = url
-
- @property
- def url(self) -> str:
- return self._url
-
-
-@dataclass(frozen=True)
-class _PTArchive(_WebZip, _RAIBuildDependency):
- architecture: Architecture
- device: Device
- version: str
- with_mkl: bool
-
- @staticmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
- # TODO: This will need to be revisited if the inheritance tree gets deeper
- return tuple(
- itertools.chain.from_iterable(
- var.supported_platforms() for var in _PTArchive.__subclasses__()
- )
- )
-
- @property
- def __rai_dependency_name__(self) -> str:
- return f"libtorch@{self.url}"
-
- @staticmethod
- def _patch_out_mkl(libtorch_root: Path) -> None:
- _modify_source_files(
- libtorch_root / "share/cmake/Caffe2/public/mkl.cmake",
- r"find_package\(MKL QUIET\)",
- "# find_package(MKL QUIET)",
- )
-
- def extract(self, target: _PathLike) -> None:
- super().extract(target)
- if not self.with_mkl:
- self._patch_out_mkl(Path(target))
-
- def __place_for_rai__(self, target: _PathLike) -> Path:
- self.extract(target)
- target = Path(target) / "libtorch"
- if not target.is_dir():
- raise BuildError("Failed to place RAI dependency: `libtorch`")
- return target
-
-
-@t.final
-class _PTArchiveLinux(_PTArchive):
- @staticmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
- return ((OperatingSystem.LINUX, Architecture.X64),)
-
- @property
- def url(self) -> str:
- if self.device == Device.GPU:
- pt_build = "cu117"
- else:
- pt_build = Device.CPU.value
- # pylint: disable-next=line-too-long
- libtorch_archive = (
- f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip"
- )
- return f"https://download.pytorch.org/libtorch/{pt_build}/{libtorch_archive}"
-
-
-@t.final
-class _PTArchiveMacOSX(_PTArchive):
- @staticmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
- return (
- (OperatingSystem.DARWIN, Architecture.ARM64),
- (OperatingSystem.DARWIN, Architecture.X64),
- )
-
- @property
- def url(self) -> str:
- if self.device == Device.GPU:
- raise BuildError("RedisAI does not currently support GPU on Mac OSX")
- if self.architecture == Architecture.X64:
- pt_build = Device.CPU.value
- libtorch_archive = f"libtorch-macos-{self.version}.zip"
- root_url = "https://download.pytorch.org/libtorch"
- return f"{root_url}/{pt_build}/{libtorch_archive}"
- if self.architecture == Architecture.ARM64:
- libtorch_archive = f"libtorch-macos-arm64-{self.version}.zip"
- # pylint: disable-next=line-too-long
- root_url = (
- "https://github.com/CrayLabs/ml_lib_builder/releases/download/v0.1/"
- )
- return f"{root_url}/{libtorch_archive}"
-
- raise BuildError(f"Unsupported architecture for Pytorch: {self.architecture}")
-
-
-def _choose_pt_variant(
- os_: OperatingSystem,
-) -> t.Union[t.Type[_PTArchiveLinux], t.Type[_PTArchiveMacOSX]]:
- if os_ == OperatingSystem.DARWIN:
- return _PTArchiveMacOSX
- if os_ == OperatingSystem.LINUX:
- return _PTArchiveLinux
-
- raise BuildError(f"Unsupported OS for PyTorch: {os_}")
-
-
-@t.final
-@dataclass(frozen=True)
-class _TFArchive(_WebTGZ, _RAIBuildDependency):
- os_: OperatingSystem
- architecture: Architecture
- device: Device
- version: str
-
- @staticmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
- return (
- (OperatingSystem.LINUX, Architecture.X64),
- (OperatingSystem.DARWIN, Architecture.X64),
- )
-
- @property
- def url(self) -> str:
- if self.architecture == Architecture.X64:
- tf_arch = "x86_64"
- else:
- raise BuildError(
- f"Unexpected Architecture for TF Archive: {self.architecture}"
- )
-
- if self.os_ == OperatingSystem.LINUX:
- tf_os = "linux"
- tf_device = self.device
- elif self.os_ == OperatingSystem.DARWIN:
- tf_os = "darwin"
- if self.device == Device.GPU:
- raise BuildError("RedisAI does not currently support GPU on Macos")
- tf_device = Device.CPU
- else:
- raise BuildError(f"Unexpected OS for TF Archive: {self.os_}")
- return (
- "https://storage.googleapis.com/tensorflow/libtensorflow/"
- f"libtensorflow-{tf_device.value}-{tf_os}-{tf_arch}-{self.version}.tar.gz"
- )
-
- @property
- def __rai_dependency_name__(self) -> str:
- return f"libtensorflow@{self.url}"
-
- def __place_for_rai__(self, target: _PathLike) -> Path:
- target = Path(target) / "libtensorflow"
- target.mkdir()
- self.extract(target)
- return target
-
-
-@t.final
-@dataclass(frozen=True)
-class _ORTArchive(_WebTGZ, _RAIBuildDependency):
- os_: OperatingSystem
- device: Device
- version: str
-
- @staticmethod
- def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
- return (
- (OperatingSystem.LINUX, Architecture.X64),
- (OperatingSystem.DARWIN, Architecture.X64),
- )
-
- @property
- def url(self) -> str:
- ort_url_base = (
- "https://github.com/microsoft/onnxruntime/releases/"
- f"download/v{self.version}"
- )
- if self.os_ == OperatingSystem.LINUX:
- ort_os = "linux"
- ort_arch = "x64"
- ort_build = "-gpu" if self.device == Device.GPU else ""
- elif self.os_ == OperatingSystem.DARWIN:
- ort_os = "osx"
- ort_arch = "x86_64"
- ort_build = ""
- if self.device == Device.GPU:
- raise BuildError("RedisAI does not currently support GPU on Macos")
- else:
- raise BuildError(f"Unexpected OS for TF Archive: {self.os_}")
- ort_archive = f"onnxruntime-{ort_os}-{ort_arch}{ort_build}-{self.version}.tgz"
- return f"{ort_url_base}/{ort_archive}"
-
- @property
- def __rai_dependency_name__(self) -> str:
- return f"onnxruntime@{self.url}"
-
- def __place_for_rai__(self, target: _PathLike) -> Path:
- target = Path(target).resolve() / "onnxruntime"
- self.extract(target)
- try:
- (extracted_dir,) = target.iterdir()
- except ValueError:
- raise BuildError(
- "Unexpected number of files extracted from ORT archive"
- ) from None
- for file in extracted_dir.iterdir():
- file.rename(target / file.name)
- extracted_dir.rmdir()
- return target
-
-
-def _git(*args: str) -> None:
- git = Builder.binary_path("git")
- cmd = (git,) + args
- with subprocess.Popen(cmd) as proc:
- proc.wait()
- if proc.returncode != 0:
- raise BuildError(
- f"Command `{' '.join(cmd)}` failed with exit code {proc.returncode}"
- )
-
-
-def config_git_command(plat: Platform, cmd: t.Sequence[str]) -> t.List[str]:
- """Modify git commands to include autocrlf when on a platform that needs
- autocrlf enabled to behave correctly
- """
- cmd = list(cmd)
- where = next((i for i, tok in enumerate(cmd) if tok.endswith("git")), len(cmd)) + 2
- if where >= len(cmd):
- raise ValueError(f"Failed to locate git command in '{' '.join(cmd)}'")
- if plat == Platform(OperatingSystem.DARWIN, Architecture.ARM64):
- cmd = (
- cmd[:where]
- + ["--config", "core.autocrlf=false", "--config", "core.eol=lf"]
- + cmd[where:]
- )
- return cmd
-
-
-def _modify_source_files(
- files: t.Union[_PathLike, t.Iterable[_PathLike]], regex: str, replacement: str
-) -> None:
- compiled_regex = re.compile(regex)
- with fileinput.input(files=files, inplace=True) as handles:
- for line in handles:
- line = compiled_regex.sub(replacement, line)
- print(line, end="")
diff --git a/smartsim/_core/_install/configs/mlpackages/DarwinARM64CPU.json b/smartsim/_core/_install/configs/mlpackages/DarwinARM64CPU.json
new file mode 100644
index 000000000..2f49a393e
--- /dev/null
+++ b/smartsim/_core/_install/configs/mlpackages/DarwinARM64CPU.json
@@ -0,0 +1,47 @@
+{
+ "platform": {
+ "operating_system":"darwin",
+ "architecture":"arm64",
+ "device":"cpu"
+ },
+ "ml_packages": [
+ {
+ "name": "dlpack",
+ "version": "v0.5_RAI",
+ "pip_index": "",
+ "python_packages": [],
+ "lib_source": "https://github.com/RedisAI/dlpack.git"
+ },
+ {
+ "name": "libtorch",
+ "version": "2.4.0",
+ "pip_index": "",
+ "python_packages": [
+ "torch==2.4.0",
+ "torchvision==0.19.0",
+ "torchaudio==2.4.0"
+ ],
+ "lib_source": "https://download.pytorch.org/libtorch/cpu/libtorch-macos-arm64-2.4.0.zip",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI module to require C++17 standard instead of C++14",
+ "source_file": "src/backends/libtorch_c/CMakeLists.txt",
+ "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)",
+ "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)"
+ }
+ ]
+ },
+ {
+ "name": "onnxruntime",
+ "version": "1.17.3",
+ "pip_index": "",
+ "python_packages": [
+ "onnx==1.15",
+ "skl2onnx",
+ "scikit-learn",
+ "onnxmltools"
+ ],
+ "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-osx-arm64-1.17.3.tgz"
+ }
+ ]
+}
diff --git a/smartsim/_core/_install/configs/mlpackages/DarwinX64CPU.json b/smartsim/_core/_install/configs/mlpackages/DarwinX64CPU.json
new file mode 100644
index 000000000..e7b67e35b
--- /dev/null
+++ b/smartsim/_core/_install/configs/mlpackages/DarwinX64CPU.json
@@ -0,0 +1,56 @@
+{
+ "platform": {
+ "operating_system":"darwin",
+ "architecture":"x86_64",
+ "device":"cpu"
+ },
+ "ml_packages": [
+ {
+ "name": "dlpack",
+ "version": "v0.5_RAI",
+ "pip_index": "",
+ "python_packages": [],
+ "lib_source": "https://github.com/RedisAI/dlpack.git"
+ },
+ {
+ "name": "libtorch",
+ "version": "2.2.2",
+ "pip_index": "",
+ "python_packages": [
+ "torch==2.2.2",
+ "torchvision==0.17.2",
+ "torchaudio==2.2.2"
+ ],
+ "lib_source": "https://download.pytorch.org/libtorch/cpu/libtorch-macos-x86_64-2.2.2.zip",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI module to require C++17 standard instead of C++14",
+ "source_file": "src/backends/libtorch_c/CMakeLists.txt",
+ "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)",
+ "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)"
+ }
+ ]
+ },
+ {
+ "name": "libtensorflow",
+ "version": "2.15",
+ "pip_index": "",
+ "python_packages": [
+ "tensorflow==2.15"
+ ],
+ "lib_source": "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.15.0.tar.gz"
+ },
+ {
+ "name": "onnxruntime",
+ "version": "1.17.3",
+ "pip_index": "",
+ "python_packages": [
+ "onnx==1.15",
+ "skl2onnx",
+ "scikit-learn",
+ "onnxmltools"
+ ],
+ "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-osx-x86_64-1.17.3.tgz"
+ }
+ ]
+}
diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64CPU.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64CPU.json
new file mode 100644
index 000000000..cc2f81194
--- /dev/null
+++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64CPU.json
@@ -0,0 +1,56 @@
+{
+ "platform": {
+ "operating_system":"linux",
+ "architecture":"x86_64",
+ "device":"cpu"
+ },
+ "ml_packages": [
+ {
+ "name": "dlpack",
+ "version": "v0.5_RAI",
+ "pip_index": "",
+ "python_packages": [],
+ "lib_source": "https://github.com/RedisAI/dlpack.git"
+ },
+ {
+ "name": "libtorch",
+ "version": "2.4.0",
+ "pip_index": "https://download.pytorch.org/whl/cpu",
+ "python_packages": [
+ "torch==2.4.0+cpu",
+ "torchvision==0.19.0+cpu",
+ "torchaudio==2.4.0+cpu"
+ ],
+ "lib_source": "https://download.pytorch.org/libtorch/cpu/libtorch-cxx11-abi-shared-with-deps-2.4.0%2Bcpu.zip",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI module to require C++17 standard instead of C++14",
+ "source_file": "src/backends/libtorch_c/CMakeLists.txt",
+ "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)",
+ "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)"
+ }
+ ]
+ },
+ {
+ "name": "libtensorflow",
+ "version": "2.15",
+ "pip_index": "",
+ "python_packages": [
+ "tensorflow==2.15"
+ ],
+ "lib_source": "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.15.0.tar.gz"
+ },
+ {
+ "name": "onnxruntime",
+ "version": "1.17.3",
+ "pip_index": "",
+ "python_packages": [
+ "onnx<=1.15",
+ "skl2onnx",
+ "scikit-learn",
+ "onnxmltools"
+ ],
+ "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-1.17.3.tgz"
+ }
+ ]
+}
diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA11.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA11.json
new file mode 100644
index 000000000..cf302534c
--- /dev/null
+++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA11.json
@@ -0,0 +1,56 @@
+{
+ "platform": {
+ "operating_system":"linux",
+ "architecture":"x86_64",
+ "device":"cuda-11"
+ },
+ "ml_packages": [
+ {
+ "name": "dlpack",
+ "version": "v0.5_RAI",
+ "pip_index": "",
+ "python_packages": [],
+ "lib_source": "https://github.com/RedisAI/dlpack.git"
+ },
+ {
+ "name": "libtorch",
+ "version": "2.3.1",
+ "pip_index": "https://download.pytorch.org/whl/cu118",
+ "python_packages": [
+ "torch==2.3.1+cu118",
+ "torchvision==0.18.1+cu118",
+ "torchaudio==2.3.1+cu118"
+ ],
+ "lib_source": "https://download.pytorch.org/libtorch/cu118/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu118.zip",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI module to require C++17 standard instead of C++14",
+ "source_file": "src/backends/libtorch_c/CMakeLists.txt",
+ "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)",
+ "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)"
+ }
+ ]
+ },
+ {
+ "name": "libtensorflow",
+ "version": "2.14.1",
+ "pip_index": "",
+ "python_packages": [
+ "tensorflow==2.14.1"
+ ],
+ "lib_source": "https://github.com/CrayLabs/ml_lib_builder/releases/download/v0.2/libtensorflow-2.14.1-linux-x64-cuda-11.8.0.tgz"
+ },
+ {
+ "name": "onnxruntime",
+ "version": "1.17.3",
+ "pip_index": "",
+ "python_packages": [
+ "onnx==1.15",
+ "skl2onnx",
+ "scikit-learn",
+ "onnxmltools"
+ ],
+ "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-1.17.3.tgz"
+ }
+ ]
+}
diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA12.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA12.json
new file mode 100644
index 000000000..a415b3103
--- /dev/null
+++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64CUDA12.json
@@ -0,0 +1,64 @@
+{
+ "platform": {
+ "operating_system":"linux",
+ "architecture":"x86_64",
+ "device":"cuda-12"
+ },
+ "ml_packages": [
+ {
+ "name": "dlpack",
+ "version": "v0.5_RAI",
+ "pip_index": "",
+ "python_packages": [],
+ "lib_source": "https://github.com/RedisAI/dlpack.git"
+ },
+ {
+ "name": "libtorch",
+ "version": "2.3.1",
+ "pip_index": "https://download.pytorch.org/whl/cu121",
+ "python_packages": [
+ "torch==2.3.1+cu121",
+ "torchvision==0.18.1+cu121",
+ "torchaudio==2.3.1+cu121"
+ ],
+ "lib_source": "https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.3.1%2Bcu121.zip",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI module to require C++17 standard instead of C++14",
+ "source_file": "src/backends/libtorch_c/CMakeLists.txt",
+ "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)",
+ "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)"
+ }
+ ]
+ },
+ {
+ "name": "libtensorflow",
+ "version": "2.15",
+ "pip_index": "",
+ "python_packages": [
+ "tensorflow==2.15"
+ ],
+ "lib_source": "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.15.0.tar.gz",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI to point to correct tsl directory",
+ "source_file": "CMakeLists.txt",
+ "regex": "INCLUDE_DIRECTORIES\\(\\$\\{depsAbs\\}/libtensorflow/include\\)",
+ "replacement": "INCLUDE_DIRECTORIES(${depsAbs}/libtensorflow/include ${depsAbs}/libtensorflow/include/external/local_tsl)"
+ }
+ ]
+ },
+ {
+ "name": "onnxruntime",
+ "version": "1.17.3",
+ "pip_index": "",
+ "python_packages": [
+ "onnx==1.15",
+ "skl2onnx",
+ "scikit-learn",
+ "onnxmltools"
+ ],
+ "lib_source": "https://github.com/microsoft/onnxruntime/releases/download/v1.17.3/onnxruntime-linux-x64-gpu-cuda12-1.17.3.tgz"
+ }
+ ]
+}
diff --git a/smartsim/_core/_install/configs/mlpackages/LinuxX64ROCM6.json b/smartsim/_core/_install/configs/mlpackages/LinuxX64ROCM6.json
new file mode 100644
index 000000000..b4673e901
--- /dev/null
+++ b/smartsim/_core/_install/configs/mlpackages/LinuxX64ROCM6.json
@@ -0,0 +1,47 @@
+{
+ "platform": {
+ "operating_system":"linux",
+ "architecture":"x86_64",
+ "device":"rocm-6"
+ },
+ "ml_packages": [
+ {
+ "name": "dlpack",
+ "version": "v0.5_RAI",
+ "pip_index": "",
+ "python_packages": [],
+ "lib_source": "https://github.com/RedisAI/dlpack.git"
+ },
+ {
+ "name": "libtorch",
+ "version": "2.4.0",
+ "pip_index": "https://download.pytorch.org/whl/rocm6.1",
+ "python_packages": [
+ "torch==2.4.0+rocm6.1",
+ "torchvision==0.19.0+rocm6.1",
+ "torchaudio==2.4.0+rocm6.1"
+ ],
+ "lib_source": "https://download.pytorch.org/libtorch/rocm6.1/libtorch-cxx11-abi-shared-with-deps-2.4.1%2Brocm6.1.zip",
+ "rai_patches": [
+ {
+ "description": "Patch RedisAI module to require C++17 standard instead of C++14",
+ "source_file": "src/backends/libtorch_c/CMakeLists.txt",
+ "regex": "set_property\\(TARGET\\storch_c\\sPROPERTY\\sCXX_STANDARD\\s(98|11|14)\\)",
+ "replacement": "set_property(TARGET torch_c PROPERTY CXX_STANDARD 17)"
+ },
+ {
+ "description": "Fix Regex, Load HIP",
+ "source_file": "../package/libtorch/share/cmake/Caffe2/public/LoadHIP.cmake",
+ "regex": ".*string.*",
+ "replacement": ""
+ },
+ {
+ "description": "Replace `/opt/rocm` with `$ENV{ROCM_PATH}`",
+ "source_file": "../package/libtorch/share/cmake/Caffe2/Caffe2Targets.cmake",
+ "regex": "/opt/rocm",
+ "replacement": "$ENV{ROCM_PATH}"
+ }
+ ]
+ }
+ ]
+}
diff --git a/smartsim/_core/_install/mlpackages.py b/smartsim/_core/_install/mlpackages.py
new file mode 100644
index 000000000..04e3798d3
--- /dev/null
+++ b/smartsim/_core/_install/mlpackages.py
@@ -0,0 +1,198 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import os
+import pathlib
+import re
+import subprocess
+import sys
+import typing as t
+from collections.abc import MutableMapping
+from dataclasses import dataclass
+
+from tabulate import tabulate
+
+from .platform import Platform
+from .types import PathLike
+from .utils import retrieve
+
+
+class RequireRelativePath(Exception):
+ pass
+
+
+@dataclass
+class RAIPatch:
+ """Holds information about how to patch a RedisAI source file
+
+ :param description: Human-readable description of the patch's purpose
+ :param replacement: "The replacement for the line found by the regex"
+ :param source_file: A relative path to the chosen file
+ :param regex: A regex pattern to match in the given file
+
+ """
+
+ description: str
+ replacement: str
+ source_file: pathlib.Path
+ regex: re.Pattern[str]
+
+ def __post_init__(self) -> None:
+ self.source_file = pathlib.Path(self.source_file)
+ self.regex = re.compile(self.regex)
+
+
+@dataclass
+class MLPackage:
+ """Describes the python and C/C++ library for an ML package"""
+
+ name: str
+ version: str
+ pip_index: str
+ python_packages: t.List[str]
+ lib_source: PathLike
+ rai_patches: t.Tuple[RAIPatch, ...] = ()
+
+ def retrieve(self, destination: PathLike) -> None:
+ """Retrieve an archive and/or repository for the package
+
+ :param destination: Path to place the extracted package or repository
+ """
+ retrieve(self.lib_source, pathlib.Path(destination))
+
+ def pip_install(self, quiet: bool = False) -> None:
+ """Install associated python packages
+
+ :param quiet: If True, suppress most of the pip output, defaults to False
+ """
+ if self.python_packages:
+ install_command = [sys.executable, "-m", "pip", "install"]
+ if self.pip_index:
+ install_command += ["--index-url", self.pip_index]
+ if quiet:
+ install_command += ["--quiet", "--no-warn-conflicts"]
+ install_command += self.python_packages
+ subprocess.check_call(install_command)
+
+
+class MLPackageCollection(MutableMapping[str, MLPackage]):
+ """Collects multiple MLPackages
+
+ Define a collection of MLPackages available for a specific platform
+ """
+
+ def __init__(self, platform: Platform, ml_packages: t.Sequence[MLPackage]):
+ self.platform = platform
+ self._ml_packages = {pkg.name: pkg for pkg in ml_packages}
+
+ @classmethod
+ def from_json_file(cls, json_file: PathLike) -> "MLPackageCollection":
+ """Create an MLPackageCollection specified from a JSON file
+
+ :param json_file: path to the JSON file
+ :return: An instance of MLPackageCollection for a platform
+ """
+ with open(json_file, "r", encoding="utf-8") as file_handle:
+ config_json = json.load(file_handle)
+ platform = Platform.from_strs(**config_json["platform"])
+
+ for ml_package in config_json["ml_packages"]:
+ # Convert the dictionary representation to a RAIPatch
+ if "rai_patches" in ml_package:
+ patch_list = ml_package.pop("rai_patches")
+ ml_package["rai_patches"] = [RAIPatch(**patch) for patch in patch_list]
+
+ ml_packages = [
+ MLPackage(**ml_package) for ml_package in config_json["ml_packages"]
+ ]
+ return cls(platform, ml_packages)
+
+ def __iter__(self) -> t.Iterator[str]:
+ """Iterate over the mlpackages in the collection
+
+ :return: Iterator over mlpackages
+ """
+ return iter(self._ml_packages)
+
+ def __getitem__(self, key: str) -> MLPackage:
+ """Retrieve an MLPackage based on its name
+
+ :param key: Name of the python package (e.g. libtorch)
+ :return: MLPackage with all requirements
+ """
+ return self._ml_packages[key]
+
+ def __len__(self) -> int:
+ return len(self._ml_packages)
+
+ def __delitem__(self, key: str) -> None:
+ del self._ml_packages[key]
+
+ def __setitem__(self, key: t.Any, value: t.Any) -> t.NoReturn:
+ raise TypeError(f"{type(self).__name__} does not support item assignment")
+
+ def __contains__(self, key: object) -> bool:
+ return key in self._ml_packages
+
+ def __str__(self, tablefmt: str = "github") -> str:
+ """Display package names and versions as a table
+
+ :param tablefmt: Tabulate format, defaults to "github"
+ """
+
+ return tabulate(
+ [[k, v.version] for k, v in self._ml_packages.items()],
+ headers=["Package", "Version"],
+ tablefmt=tablefmt,
+ )
+
+
+def load_platform_configs(
+ config_file_path: pathlib.Path,
+) -> t.Dict[Platform, MLPackageCollection]:
+ """Create MLPackageCollections from JSON files in directory
+
+ :param config_file_path: Directory with JSON files describing the
+ configuration by platform
+ :return: Dictionary whose keys are the supported platform and values
+ are its associated MLPackageCollection
+ """
+ if not config_file_path.is_dir():
+ path = os.fspath(config_file_path)
+ msg = f"Platform configuration directory `{path}` does not exist"
+ raise FileNotFoundError(msg)
+ configs = {}
+ for config_file in config_file_path.glob("*.json"):
+ dependencies = MLPackageCollection.from_json_file(config_file)
+ configs[dependencies.platform] = dependencies
+ return configs
+
+
+DEFAULT_MLPACKAGE_PATH: t.Final = (
+ pathlib.Path(__file__).parent / "configs" / "mlpackages"
+)
+DEFAULT_MLPACKAGES: t.Final = load_platform_configs(DEFAULT_MLPACKAGE_PATH)
diff --git a/smartsim/_core/_install/platform.py b/smartsim/_core/_install/platform.py
new file mode 100644
index 000000000..bef13c6a0
--- /dev/null
+++ b/smartsim/_core/_install/platform.py
@@ -0,0 +1,226 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import enum
+import json
+import os
+import pathlib
+import platform
+import typing as t
+from dataclasses import dataclass
+
+from typing_extensions import Self
+
+
+class PlatformError(Exception):
+ pass
+
+
+class UnsupportedError(PlatformError):
+ pass
+
+
+class Architecture(enum.Enum):
+ """Identifiers for supported CPU architectures
+
+ :return: An enum representing the CPU architecture
+ """
+
+ X64 = "x86_64"
+ ARM64 = "arm64"
+
+ @classmethod
+ def from_str(cls, string: str) -> "Architecture":
+ """Return enum associated with the architecture
+
+ :param string: String representing the architecture, see platform.machine
+ :return: Enum for a specific architecture
+ """
+ string = string.lower()
+ return cls(string)
+
+ @classmethod
+ def autodetect(cls) -> "Architecture":
+ """Automatically return the architecture of the current machine
+
+ :return: enum of this platform's architecture
+ """
+ return cls.from_str(platform.machine())
+
+
+class Device(enum.Enum):
+ """Identifiers for the device stack
+
+ :return: Enum associated with the device stack
+ """
+
+ CPU = "cpu"
+ CUDA11 = "cuda-11"
+ CUDA12 = "cuda-12"
+ ROCM5 = "rocm-5"
+ ROCM6 = "rocm-6"
+
+ @classmethod
+ def from_str(cls, str_: str) -> "Device":
+ """Return enum associated with the device
+
+ :param string: String representing the device and version
+ :return: Enum for a specific device
+ """
+ str_ = str_.lower()
+ if str_ == "gpu":
+ # TODO: auto detect which device to use
+ # currently hard coded to `cuda11`
+ return cls.CUDA11
+ return cls(str_)
+
+ @classmethod
+ def detect_cuda_version(cls) -> t.Optional["Device"]:
+ """Find the enum based on environment CUDA
+
+ :return: Enum for the version of CUDA currently available
+ """
+ if cuda_home := os.environ.get("CUDA_HOME"):
+ cuda_path = pathlib.Path(cuda_home)
+ with open(cuda_path / "version.json", "r", encoding="utf-8") as file_handle:
+ cuda_versions = json.load(file_handle)
+ major = cuda_versions["cuda"]["version"].split(".")[0]
+ return cls.from_str(f"cuda-{major}")
+ return None
+
+ @classmethod
+ def detect_rocm_version(cls) -> t.Optional["Device"]:
+ """Find the enum based on environment ROCm
+
+ :return: Enum for the version of ROCm currently available
+ """
+ if rocm_home := os.environ.get("ROCM_HOME"):
+ rocm_path = pathlib.Path(rocm_home)
+ fname = rocm_path / ".info" / "version"
+ with open(fname, "r", encoding="utf-8") as file_handle:
+ major = file_handle.readline().split("-")[0].split(".")[0]
+ return cls.from_str(f"rocm-{major}")
+ return None
+
+ def is_gpu(self) -> bool:
+ """Whether the enum is categorized as a GPU
+
+ :return: True if GPU
+ """
+ return self != type(self).CPU
+
+ def is_cuda(self) -> bool:
+ """Whether the enum is associated with a CUDA device
+
+ :return: True for any supported CUDA enums
+ """
+ cls = type(self)
+ return self in cls.cuda_enums()
+
+ def is_rocm(self) -> bool:
+ """Whether the enum is associated with a ROCm device
+
+ :return: True for any supported ROCm enums
+ """
+ cls = type(self)
+ return self in cls.rocm_enums()
+
+ @classmethod
+ def cuda_enums(cls) -> t.Tuple["Device", ...]:
+ """Detect all CUDA devices supported by SmartSim
+
+ :return: all enums associated with CUDA
+ """
+ return tuple(device for device in cls if "cuda" in device.value)
+
+ @classmethod
+ def rocm_enums(cls) -> t.Tuple["Device", ...]:
+ """Detect all ROCm devices supported by SmartSim
+
+ :return: all enums associated with ROCm
+ """
+ return tuple(device for device in cls if "rocm" in device.value)
+
+
+class OperatingSystem(enum.Enum):
+ """Enum for all supported operating systems"""
+
+ LINUX = "linux"
+ DARWIN = "darwin"
+
+ @classmethod
+ def from_str(cls, string: str, /) -> "OperatingSystem":
+ """Return enum associated with the OS
+
+ :param string: String representing the OS
+ :return: Enum for a specific OS
+ """
+ string = string.lower()
+ return cls(string)
+
+ @classmethod
+ def autodetect(cls) -> "OperatingSystem":
+ """Automatically return the OS of the current machine
+
+ :return: enum of this platform's OS
+ """
+ return cls.from_str(platform.system())
+
+
+@dataclass(frozen=True)
+class Platform:
+ """Container describing relevant identifiers for a platform"""
+
+ operating_system: OperatingSystem
+ architecture: Architecture
+ device: Device
+
+ @classmethod
+ def from_strs(cls, operating_system: str, architecture: str, device: str) -> Self:
+ """Factory method for Platform from string onput
+
+ :param os: String identifier for the OS
+ :param architecture: String identifier for the architecture
+ :param device: String identifer for the device and version
+ :return: Instance of Platform
+ """
+ return cls(
+ OperatingSystem.from_str(operating_system),
+ Architecture.from_str(architecture),
+ Device.from_str(device),
+ )
+
+ def __str__(self) -> str:
+ """Human-readable representation of Platform
+
+ :return: String created from the values of the enums for each property
+ """
+ output = [
+ self.operating_system.name,
+ self.architecture.name,
+ self.device.name,
+ ]
+ return "-".join(output)
diff --git a/smartsim/_core/_install/redisaiBuilder.py b/smartsim/_core/_install/redisaiBuilder.py
new file mode 100644
index 000000000..1dce6ddb4
--- /dev/null
+++ b/smartsim/_core/_install/redisaiBuilder.py
@@ -0,0 +1,301 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import fileinput
+import os
+import pathlib
+import shutil
+import stat
+import subprocess
+import typing as t
+from collections import deque
+
+from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
+from smartsim._core._install.buildenv import BuildEnv
+from smartsim._core._install.mlpackages import MLPackageCollection, RAIPatch
+from smartsim._core._install.platform import OperatingSystem, Platform
+from smartsim._core._install.utils import retrieve
+from smartsim._core.config import CONFIG
+from smartsim.log import get_logger
+
+logger = get_logger("Smart", fmt=SMART_LOGGER_FORMAT)
+_SUPPORTED_ROCM_ARCH = "gfx90a"
+
+
+class RedisAIBuildError(Exception):
+ pass
+
+
+class RedisAIBuilder:
+ """Class to build RedisAI from Source"""
+
+ def __init__(
+ self,
+ platform: Platform,
+ mlpackages: MLPackageCollection,
+ build_env: BuildEnv,
+ main_build_path: pathlib.Path,
+ verbose: bool = False,
+ source: t.Union[str, pathlib.Path] = "https://github.com/RedisAI/RedisAI.git",
+ version: str = "v1.2.7",
+ ) -> None:
+
+ self.platform = platform
+ self.mlpackages = mlpackages
+ self.build_env = build_env
+ self.verbose = verbose
+ self.source = source
+ self.version = version
+ self._root_path = main_build_path / "RedisAI"
+
+ self.cleanup_build()
+
+ @property
+ def src_path(self) -> pathlib.Path:
+ return pathlib.Path(self._root_path / "src")
+
+ @property
+ def build_path(self) -> pathlib.Path:
+ return pathlib.Path(self._root_path / "build")
+
+ @property
+ def package_path(self) -> pathlib.Path:
+ return pathlib.Path(self._root_path / "package")
+
+ def cleanup_build(self) -> None:
+ """Removes all directories associated with the build"""
+ shutil.rmtree(self.src_path, ignore_errors=True)
+ shutil.rmtree(self.build_path, ignore_errors=True)
+ shutil.rmtree(self.package_path, ignore_errors=True)
+
+ @property
+ def is_built(self) -> bool:
+ """Determine whether RedisAI and backends were built
+
+ :return: True if all backends and RedisAI module are in
+ the expected location
+ """
+ backend_dir = CONFIG.lib_path / "backends"
+ rai_exists = [
+ (backend_dir / f"redisai_{backend_name}").is_dir()
+ for backend_name in self.mlpackages
+ ]
+ rai_exists.append((CONFIG.lib_path / "redisai.so").is_file())
+ return all(rai_exists)
+
+ @property
+ def build_torch(self) -> bool:
+ """Whether to build torch backend
+
+ :return: True if torch backend should be built
+ """
+ return "libtorch" in self.mlpackages
+
+ @property
+ def build_tensorflow(self) -> bool:
+ """Whether to build tensorflow backend
+
+ :return: True if tensorflow backend should be built
+ """
+ return "libtensorflow" in self.mlpackages
+
+ @property
+ def build_onnxruntime(self) -> bool:
+ """Whether to build onnx backend
+
+ :return: True if onnx backend should be built
+ """
+ return "onnxruntime" in self.mlpackages
+
+ def build(self) -> None:
+ """Build RedisAI
+
+ :param git_url: url from which to retrieve RedisAI
+ :param branch: branch to checkout
+ :param device: cpu or gpu
+ """
+
+ # Following is needed to make sure that the clone/checkout is not
+ # impeded by git LFS limits imposed by RedisAI
+ os.environ["GIT_LFS_SKIP_SMUDGE"] = "1"
+
+ self.src_path.mkdir(parents=True)
+ self.build_path.mkdir(parents=True)
+ self.package_path.mkdir(parents=True)
+
+ retrieve(self.source, self.src_path, depth=1, branch=self.version)
+
+ self._prepare_packages()
+
+ for package in self.mlpackages.values():
+ self._patch_source_files(package.rai_patches)
+ cmake_command = self._rai_cmake_cmd()
+ build_command = self._rai_build_cmd
+
+ if self.platform.device.is_rocm() and "libtorch" in self.mlpackages:
+ pytorch_rocm_arch = os.environ.get("PYTORCH_ROCM_ARCH")
+ if not pytorch_rocm_arch:
+ logger.info(
+ f"PYTORCH_ROCM_ARCH not set. Defaulting to '{_SUPPORTED_ROCM_ARCH}'"
+ )
+ os.environ["PYTORCH_ROCM_ARCH"] = _SUPPORTED_ROCM_ARCH
+ elif pytorch_rocm_arch != _SUPPORTED_ROCM_ARCH:
+ logger.warning(
+ f"PYTORCH_ROCM_ARCH is not {_SUPPORTED_ROCM_ARCH} which is the "
+ "only officially supported architecture. This may still work "
+ "if you are supplying your own version of libtensorflow."
+ )
+
+ logger.info("Configuring CMake Build")
+ if self.verbose:
+ print(" ".join(cmake_command))
+ self.run_command(cmake_command, self.build_path)
+
+ logger.info("Building RedisAI")
+ if self.verbose:
+ print(" ".join(build_command))
+ self.run_command(build_command, self.build_path)
+
+ if self.platform.operating_system == OperatingSystem.LINUX:
+ self._set_execute(CONFIG.lib_path / "redisai.so")
+
+ @staticmethod
+ def _set_execute(target: pathlib.Path) -> None:
+ """Set execute permissions for file
+
+ :param target: The target file to add execute permission
+ """
+ permissions = os.stat(target).st_mode | stat.S_IXUSR
+ os.chmod(target, permissions)
+
+ @staticmethod
+ def _find_closest_object(
+ start_path: pathlib.Path, target_obj: str
+ ) -> t.Optional[pathlib.Path]:
+ queue = deque([start_path])
+ while queue:
+ current_dir = queue.popleft()
+ current_target = current_dir / target_obj
+ if current_target.exists():
+ return current_target.parent
+ for sub_dir in current_dir.iterdir():
+ if sub_dir.is_dir():
+ queue.append(sub_dir)
+ return None
+
+ def _prepare_packages(self) -> None:
+ """Ensure that retrieved archives/packages are in the expected location
+
+ RedisAI requires that the root directory of the backend is at
+ DEP_PATH/example_backend. Due to difficulties in retrieval methods and
+ naming conventions from different sources, this cannot be standardized.
+ Instead we try to find the parent of the "include" directory and assume
+ this is the root.
+ """
+
+ for package in self.mlpackages.values():
+ logger.info(f"Retrieving package: {package.name} {package.version}")
+ target_dir = self.package_path / package.name
+ package.retrieve(target_dir)
+ # Move actual contents to root of the expected location
+ actual_root = self._find_closest_object(target_dir, "include")
+ if actual_root and actual_root != target_dir:
+ logger.debug(
+ (
+ "Non-standard location found: \n",
+ f"{actual_root} -> {target_dir}",
+ )
+ )
+ for file in actual_root.iterdir():
+ file.rename(target_dir / file.name)
+
+ def run_command(self, cmd: t.Union[str, t.List[str]], cwd: pathlib.Path) -> None:
+ """Executor of commands usedi in the build
+
+ :param cmd: The actual command to execute
+ :param cwd: The working directory to execute in
+ """
+ stdout = None if self.verbose else subprocess.DEVNULL
+ stderr = None if self.verbose else subprocess.PIPE
+ proc = subprocess.run(
+ cmd, cwd=str(cwd), stdout=stdout, stderr=stderr, check=False
+ )
+ if proc.returncode != 0:
+ if stderr:
+ print(proc.stderr.decode("utf-8"))
+ raise RedisAIBuildError(
+ f"RedisAI build failed during command: {' '.join(cmd)}"
+ )
+
+ def _rai_cmake_cmd(self) -> t.List[str]:
+ """Build the CMake configuration command
+
+ :return: CMake command with correct options
+ """
+
+ def on_off(expression: bool) -> t.Literal["ON", "OFF"]:
+ return "ON" if expression else "OFF"
+
+ cmake_args = {
+ "BUILD_TF": on_off(self.build_tensorflow),
+ "BUILD_ORT": on_off(self.build_onnxruntime),
+ "BUILD_TORCH": on_off(self.build_torch),
+ "BUILD_TFLITE": "OFF",
+ "DEPS_PATH": str(self.package_path),
+ "DEVICE": "gpu" if self.platform.device.is_gpu() else "cpu",
+ "INSTALL_PATH": str(CONFIG.lib_path),
+ "CMAKE_C_COMPILER": self.build_env.CC,
+ "CMAKE_CXX_COMPILER": self.build_env.CXX,
+ }
+ if self.platform.device.is_rocm():
+ cmake_args["Torch_DIR"] = str(self.package_path / "libtorch")
+ cmd = ["cmake"]
+ cmd += (f"-D{key}={value}" for key, value in cmake_args.items())
+ cmd.append(str(self.src_path))
+ return cmd
+
+ @property
+ def _rai_build_cmd(self) -> t.List[str]:
+ """Shell command to build RedisAI and modules
+
+ With the CMake based install, very little needs to be done here.
+ "make install" is used to ensure that all resulting RedisAI backends
+ and their dependencies end up in the same location with the correct
+ RPATH if applicable.
+
+ :return: Command used to compile RedisAI and backends
+ """
+ return "make install -j VERBOSE=1".split(" ")
+
+ def _patch_source_files(self, patches: t.Tuple[RAIPatch, ...]) -> None:
+ """Apply specified RedisAI patches"""
+ for patch in patches:
+ with fileinput.input(
+ str(self.src_path / patch.source_file), inplace=True
+ ) as file_handle:
+ for line in file_handle:
+ line = patch.regex.sub(patch.replacement, line)
+ print(line, end="")
diff --git a/smartsim/_core/_install/types.py b/smartsim/_core/_install/types.py
new file mode 100644
index 000000000..0266ace34
--- /dev/null
+++ b/smartsim/_core/_install/types.py
@@ -0,0 +1,30 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pathlib
+import typing as t
+
+PathLike = t.Union[str, pathlib.Path]
diff --git a/smartsim/_core/_install/utils/__init__.py b/smartsim/_core/_install/utils/__init__.py
new file mode 100644
index 000000000..4e47cf282
--- /dev/null
+++ b/smartsim/_core/_install/utils/__init__.py
@@ -0,0 +1,27 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .retrieve import retrieve
diff --git a/smartsim/_core/_install/utils/retrieve.py b/smartsim/_core/_install/utils/retrieve.py
new file mode 100644
index 000000000..fcac565d4
--- /dev/null
+++ b/smartsim/_core/_install/utils/retrieve.py
@@ -0,0 +1,185 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pathlib
+import shutil
+import tarfile
+import typing as t
+import zipfile
+from urllib.parse import urlparse
+from urllib.request import urlretrieve
+
+import git
+from tqdm import tqdm
+
+from smartsim._core._install.platform import Architecture, OperatingSystem
+from smartsim._core._install.types import PathLike
+
+
+class UnsupportedArchive(Exception):
+ pass
+
+
+class _TqdmUpTo(tqdm): # type: ignore[type-arg]
+ """Provides `update_to(n)` which uses `tqdm.update(delta_n)`
+
+ From tqdm doumentation for progress bar when downloading
+ """
+
+ def update_to(
+ self, num_blocks: int = 1, bsize: int = 1, tsize: t.Optional[int] = None
+ ) -> t.Optional[bool]:
+ """Update progress in tqdm-like way
+
+ :param b: number of blocks transferred so far, defaults to 1
+ :param bsize: size of each block (in tqdm units), defaults to 1
+ :param tsize: total size (in tqdm units), defaults to None
+ :return: Update
+ """
+
+ if tsize is not None:
+ self.total = tsize
+ return self.update(num_blocks * bsize - self.n) # also sets self.n = b * bsize
+
+
+def _from_local_archive(
+ source: PathLike,
+ destination: pathlib.Path,
+ **kwargs: t.Any,
+) -> None:
+ """Decompress a local archive
+
+ :param source: Path to the archive on a local system
+ :param destination: Where to unpack the archive
+ """
+ if tarfile.is_tarfile(source):
+ with tarfile.open(source) as archive:
+ archive.extractall(path=destination, **kwargs)
+ if zipfile.is_zipfile(source):
+ with zipfile.ZipFile(source) as archive:
+ archive.extractall(path=destination, **kwargs)
+
+
+def _from_local_directory(
+ source: PathLike,
+ destination: pathlib.Path,
+ **kwargs: t.Any,
+) -> None:
+ """Copy the contents of a directory
+
+ :param source: source directory
+ :param destination: desitnation directory
+ """
+ shutil.copytree(source, destination, **kwargs)
+
+
+def _from_http(
+ source: str,
+ destination: pathlib.Path,
+ **kwargs: t.Any,
+) -> None:
+ """Download and decompress a package
+
+ :param source: URL to a particular package
+ :param destination: Where to unpack the archive
+ """
+ with _TqdmUpTo(
+ unit="B",
+ unit_scale=True,
+ unit_divisor=1024,
+ miniters=1,
+ desc=source.split("/")[-1],
+ ) as _t: # all optional kwargs
+ local_file, _ = urlretrieve(source, reporthook=_t.update_to, **kwargs)
+ _t.total = _t.n
+
+ _from_local_archive(local_file, destination)
+ os.remove(local_file)
+
+
+def _from_git(source: str, destination: pathlib.Path, **clone_kwargs: t.Any) -> None:
+ """Clone a repository
+
+ :param source: Path to the remote (URL or local) repository
+ :param destination: where to clone the repository
+ :param clone_kwargs: various options to send to the clone command
+ """
+ is_mac = OperatingSystem.autodetect() == OperatingSystem.DARWIN
+ is_arm64 = Architecture.autodetect() == Architecture.ARM64
+ if is_mac and is_arm64:
+ config_options = ["--config core.autocrlf=false", "--config core.eol=lf"]
+ allow_unsafe_options = True
+ else:
+ config_options = None
+ allow_unsafe_options = False
+ git.Repo.clone_from(
+ source,
+ destination,
+ multi_options=config_options,
+ allow_unsafe_options=allow_unsafe_options,
+ **clone_kwargs,
+ )
+
+
+def retrieve(
+ source: PathLike, destination: pathlib.Path, **retrieve_kwargs: t.Any
+) -> None:
+ """Primary method for retrieval
+
+ Automatically choose the correct method based on the extension and/or source
+ of the archive. If downloaded, this will also decompress the archive and
+ extract
+
+ :param source: URL or path to find the package
+ :param destination: where to place the package
+ :raises UnsupportedArchive: Unknown archive type
+ :raises FileNotFound: Path to archive does not exist
+ """
+ parsed_url = urlparse(str(source))
+ url_scheme = parsed_url.scheme
+ if parsed_url.path.endswith(".git"):
+ _from_git(str(source), destination, **retrieve_kwargs)
+ elif url_scheme == "http":
+ _from_http(str(source), destination, **retrieve_kwargs)
+ elif url_scheme == "https":
+ _from_http(str(source), destination, **retrieve_kwargs)
+ else: # This is probably a path
+ source_path = pathlib.Path(source)
+ if not source_path.exists():
+ raise FileNotFoundError(f"Package path or file does not exist: {source}")
+ if source_path.is_dir():
+ _from_local_directory(source, destination, **retrieve_kwargs)
+ elif source_path.is_file() and source_path.suffix in (
+ ".gz",
+ ".zip",
+ ".tgz",
+ ):
+ _from_local_archive(source, destination, **retrieve_kwargs)
+ else:
+ raise UnsupportedArchive(
+ f"Source ({source}) is not a supported archive or directory "
+ )
diff --git a/smartsim/_core/config/config.py b/smartsim/_core/config/config.py
index 9cf950b21..03c284edb 100644
--- a/smartsim/_core/config/config.py
+++ b/smartsim/_core/config/config.py
@@ -33,7 +33,7 @@
import psutil
from ...error import SSConfigError
-from ..utils.helpers import expand_exe_path
+from ..utils import expand_exe_path
# Configuration Values
#
@@ -94,13 +94,28 @@ class Config:
def __init__(self) -> None:
# SmartSim/smartsim/_core
self.core_path = Path(os.path.abspath(__file__)).parent.parent
+ # TODO: Turn this into a property. Need to modify the configuration
+ # of KeyDB vs Redis at build time
+ self.conf_dir = self.core_path / "config"
+ self.conf_path = self.conf_dir / "redis.conf"
- dependency_path = os.environ.get("SMARTSIM_DEP_INSTALL_PATH", self.core_path)
+ @property
+ def dependency_path(self) -> Path:
+ return Path(
+ os.environ.get("SMARTSIM_DEP_INSTALL_PATH", str(self.core_path))
+ ).resolve()
+
+ @property
+ def lib_path(self) -> Path:
+ return Path(self.dependency_path, "lib")
- self.lib_path = Path(dependency_path, "lib").resolve()
- self.bin_path = Path(dependency_path, "bin").resolve()
- self.conf_path = Path(dependency_path, "config", "redis.conf")
- self.conf_dir = Path(self.core_path, "config")
+ @property
+ def bin_path(self) -> Path:
+ return Path(self.dependency_path, "bin")
+
+ @property
+ def build_path(self) -> Path:
+ return Path(self.dependency_path, "build")
@property
def redisai(self) -> str:
@@ -157,7 +172,7 @@ def database_file_parse_interval(self) -> int:
@property
def dragon_dotenv(self) -> Path:
"""Returns the path to a .env file containing dragon environment variables"""
- return self.conf_dir / "dragon" / ".env"
+ return Path(self.conf_dir / "dragon" / ".env")
@property
def dragon_server_path(self) -> t.Optional[str]:
diff --git a/smartsim/_core/types.py b/smartsim/_core/types.py
new file mode 100644
index 000000000..d3dc029ea
--- /dev/null
+++ b/smartsim/_core/types.py
@@ -0,0 +1,32 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import enum
+
+
+class Device(enum.Enum):
+ CPU = "cpu"
+ GPU = "gpu"
diff --git a/smartsim/_core/utils/__init__.py b/smartsim/_core/utils/__init__.py
index 3ea928797..cddbc4ce9 100644
--- a/smartsim/_core/utils/__init__.py
+++ b/smartsim/_core/utils/__init__.py
@@ -29,6 +29,7 @@
colorize,
delete_elements,
execute_platform_cmd,
+ expand_exe_path,
installed_redisai_backends,
is_crayex_platform,
)
diff --git a/smartsim/_core/utils/helpers.py b/smartsim/_core/utils/helpers.py
index df2c016a1..b17be763b 100644
--- a/smartsim/_core/utils/helpers.py
+++ b/smartsim/_core/utils/helpers.py
@@ -39,12 +39,11 @@
from pathlib import Path
from shutil import which
-from smartsim._core._install.builder import TRedisAIBackendStr as _TRedisAIBackendStr
-
if t.TYPE_CHECKING:
from types import FrameType
+_TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime"]
_TSignalHandlerFn = t.Callable[[int, t.Optional["FrameType"]], object]
@@ -230,7 +229,9 @@ def redis_install_base(backends_path: t.Optional[str] = None) -> Path:
# pylint: disable-next=import-outside-toplevel
from ..._core.config import CONFIG
- base_path = Path(backends_path) if backends_path else CONFIG.lib_path / "backends"
+ base_path: Path = (
+ Path(backends_path) if backends_path else CONFIG.lib_path / "backends"
+ )
return base_path
@@ -255,10 +256,10 @@ def installed_redisai_backends(
"tensorflow",
"torch",
"onnxruntime",
- "tflite",
}
- return {backend for backend in backends if _installed(base_path, backend)}
+ installed = {backend for backend in backends if _installed(base_path, backend)}
+ return installed
def get_ts_ms() -> int:
diff --git a/smartsim/entity/dbobject.py b/smartsim/entity/dbobject.py
index 5cb0d061f..fa9983c50 100644
--- a/smartsim/entity/dbobject.py
+++ b/smartsim/entity/dbobject.py
@@ -27,7 +27,8 @@
import typing as t
from pathlib import Path
-from .._core._install.builder import Device
+from smartsim._core.types import Device
+
from ..error import SSUnsupportedError
__all__ = ["DBObject", "DBModel", "DBScript"]
diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py
index cab138685..965b10db7 100644
--- a/smartsim/entity/ensemble.py
+++ b/smartsim/entity/ensemble.py
@@ -31,7 +31,8 @@
from tabulate import tabulate
-from .._core._install.builder import Device
+from smartsim._core.types import Device
+
from ..error import (
EntityExistsError,
SmartSimError,
diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py
index a11a594fc..3e8baad5c 100644
--- a/smartsim/entity/model.py
+++ b/smartsim/entity/model.py
@@ -35,7 +35,8 @@
from os import getcwd
from os import path as osp
-from .._core._install.builder import Device
+from smartsim._core.types import Device
+
from .._core.utils.helpers import cat_arg_and_value
from ..error import EntityExistsError, SSUnsupportedError
from ..log import get_logger
diff --git a/smartsim/ml/tf/__init__.py b/smartsim/ml/tf/__init__.py
index 46d89d733..ee791ea98 100644
--- a/smartsim/ml/tf/__init__.py
+++ b/smartsim/ml/tf/__init__.py
@@ -31,23 +31,12 @@
logger = get_logger(__name__)
vers = Versioner()
-TF_VERSION = vers.TENSORFLOW
try:
import tensorflow as tf
except ImportError: # pragma: no cover
raise ModuleNotFoundError(
- f"TensorFlow {TF_VERSION} is not installed. "
- "Please install it to use smartsim.ml.tf"
- ) from None
-
-try:
- installed_tf = Version_(tf.__version__)
- assert installed_tf >= TF_VERSION
-except AssertionError: # pragma: no cover
- raise SmartSimError(
- f"TensorFlow >= {TF_VERSION} is required for smartsim. "
- f"tf, you have {tf.__version__}"
+ f"TensorFlow is not installed. Please install it to use smartsim.ml.tf"
) from None
diff --git a/smartsim/ml/tf/utils.py b/smartsim/ml/tf/utils.py
index cf69b65e5..4e45f1847 100644
--- a/smartsim/ml/tf/utils.py
+++ b/smartsim/ml/tf/utils.py
@@ -29,7 +29,7 @@
import keras
import tensorflow as tf
-from tensorflow.python.framework.convert_to_constants import (
+from tensorflow.python.framework.convert_to_constants import ( # type: ignore[import-not-found,unused-ignore]
convert_variables_to_constants_v2,
)
@@ -62,7 +62,7 @@ def freeze_model(
tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype)
)
- frozen_func = convert_variables_to_constants_v2(full_model)
+ frozen_func = convert_variables_to_constants_v2(full_model) # type: ignore[no-untyped-call,unused-ignore]
frozen_func.graph.as_graph_def()
input_names = [x.name.split(":")[0] for x in frozen_func.inputs]
@@ -97,7 +97,7 @@ def serialize_model(model: keras.Model) -> t.Tuple[str, t.List[str], t.List[str]
tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype)
)
- frozen_func = convert_variables_to_constants_v2(full_model)
+ frozen_func = convert_variables_to_constants_v2(full_model) # type: ignore[no-untyped-call,unused-ignore]
frozen_func.graph.as_graph_def()
input_names = [x.name.split(":")[0] for x in frozen_func.inputs]
diff --git a/tests/backends/run_torch.py b/tests/backends/run_torch.py
index 6e9ba2859..b3c0fc964 100644
--- a/tests/backends/run_torch.py
+++ b/tests/backends/run_torch.py
@@ -74,7 +74,7 @@ def calc_svd(input_tensor):
return input_tensor.svd()
-def run(device):
+def run(device, num_devices):
# connect a client to the database
client = Client(cluster=False)
@@ -92,9 +92,23 @@ def run(device):
net = create_torch_model()
# 20 samples of "image" data
example_forward_input = torch.rand(20, 1, 28, 28)
- client.set_model("cnn", net, "TORCH", device=device)
client.put_tensor("input", example_forward_input.numpy())
- client.run_model("cnn", inputs=["input"], outputs=["output"])
+ if device == "CPU":
+ client.set_model("cnn", net, "TORCH", device=device)
+ client.run_model("cnn", inputs=["input"], outputs=["output"])
+ else:
+ client.set_model_multigpu(
+ "cnn", net, "TORCH", first_gpu=0, num_gpus=num_devices
+ )
+ client.run_model_multigpu(
+ "cnn",
+ offset=1,
+ first_gpu=0,
+ num_gpus=num_devices,
+ inputs=["input"],
+ outputs=["output"],
+ )
+
output = client.get_tensor("output")
print(f"Prediction: {output}")
@@ -106,5 +120,11 @@ def run(device):
parser.add_argument(
"--device", type=str, default="CPU", help="device type for model execution"
)
+ parser.add_argument(
+ "--num-devices",
+ type=int,
+ default=1,
+ help="Number of devices to set the model on",
+ )
args = parser.parse_args()
- run(args.device)
+ run(args.device, args.num_devices)
diff --git a/tests/backends/test_cli_mini_exp.py b/tests/backends/test_cli_mini_exp.py
index 2fde2ff5f..3379bf2ee 100644
--- a/tests/backends/test_cli_mini_exp.py
+++ b/tests/backends/test_cli_mini_exp.py
@@ -32,6 +32,7 @@
import smartsim._core._cli.validate
import smartsim._core._install.builder as build
+from smartsim._core._install.platform import Device
from smartsim._core.utils.helpers import installed_redisai_backends
sklearn_available = True
@@ -79,7 +80,7 @@ def _mock_make_managed_local_orc(*a, **kw):
location=test_dir,
port=db_port,
# Always test on CPU, heads don't always have GPU
- device=build.Device.CPU,
+ device=Device.CPU,
# Test the backends the dev has installed
with_tf="tensorflow" in backends,
with_pt="torch" in backends,
diff --git a/tests/backends/test_torch.py b/tests/backends/test_torch.py
index c995f76ca..6aff6b0ba 100644
--- a/tests/backends/test_torch.py
+++ b/tests/backends/test_torch.py
@@ -65,9 +65,11 @@ def test_torch_model_and_script(
db = prepare_db(single_db).orchestrator
wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
test_device = mlutils.get_test_device()
+ test_num_gpus = mlutils.get_test_num_gpus() if pytest.test_device == "GPU" else 1
run_settings = wlm_experiment.create_run_settings(
- "python", f"run_torch.py --device={test_device}"
+ "python",
+ ["run_torch.py", f"--device={test_device}", f"--num-devices={test_num_gpus}"],
)
if wlmutils.get_test_launcher() != "local":
run_settings.set_tasks(1)
diff --git a/tests/install/test_build.py b/tests/install/test_build.py
new file mode 100644
index 000000000..f8a5c4896
--- /dev/null
+++ b/tests/install/test_build.py
@@ -0,0 +1,148 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import operator
+
+import pytest
+
+from smartsim._core._cli.build import parse_requirement
+from smartsim._core._install.buildenv import Version_
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+_SUPPORTED_OPERATORS = ("==", ">=", ">", "<=", "<")
+
+
+@pytest.mark.parametrize(
+ "spec, name, pin",
+ (
+ pytest.param("foo", "foo", None, id="Just Name"),
+ pytest.param("foo==1", "foo", "==1", id="With Major"),
+ pytest.param("foo==1.2", "foo", "==1.2", id="With Minor"),
+ pytest.param("foo==1.2.3", "foo", "==1.2.3", id="With Patch"),
+ pytest.param("foo[with-extras]==1.2.3", "foo", "==1.2.3", id="With Extra"),
+ pytest.param(
+ "foo[with,many,extras]==1.2.3", "foo", "==1.2.3", id="With Many Extras"
+ ),
+ *(
+ pytest.param(
+ f"foo{symbol}1.2.3{tag}",
+ "foo",
+ f"{symbol}1.2.3{tag}",
+ id=f"{symbol=} | {tag=}",
+ )
+ for symbol in _SUPPORTED_OPERATORS
+ for tag in ("", "+cuda", "+rocm", "+cpu")
+ ),
+ ),
+)
+def test_parse_requirement_name_and_version(spec, name, pin):
+ p_name, p_pin, _ = parse_requirement(spec)
+ assert p_name == name
+ assert p_pin == pin
+
+
+# fmt: off
+@pytest.mark.parametrize(
+ "spec, ver, should_pass",
+ (
+ pytest.param("foo" , Version_("1.2.3") , True, id="No spec"),
+ # EQ --------------------------------------------------------------------------
+ pytest.param("foo==1.2.3" , Version_("1.2.3") , True, id="EQ Spec, EQ Version"),
+ pytest.param("foo==1.2.3" , Version_("1.2.5") , False, id="EQ Spec, GT Version"),
+ pytest.param("foo==1.2.3" , Version_("1.2.2") , False, id="EQ Spec, LT Version"),
+ pytest.param("foo==1.2.3+rocm", Version_("1.2.3+rocm"), True, id="EQ Spec, Compatible Version with suffix"),
+ pytest.param("foo==1.2.3" , Version_("1.2.3+cuda"), False, id="EQ Spec, Compatible Version, Extra Suffix"),
+ pytest.param("foo==1.2.3+cuda", Version_("1.2.3") , False, id="EQ Spec, Compatible Version, Missing Suffix"),
+ pytest.param("foo==1.2.3+cuda", Version_("1.2.3+rocm"), False, id="EQ Spec, Compatible Version, Mismatched Suffix"),
+ # LT --------------------------------------------------------------------------
+ pytest.param("foo<1.2.3" , Version_("1.2.3") , False, id="LT Spec, EQ Version"),
+ pytest.param("foo<1.2.3" , Version_("1.2.5") , False, id="LT Spec, GT Version"),
+ pytest.param("foo<1.2.3" , Version_("1.2.2") , True, id="LT Spec, LT Version"),
+ pytest.param("foo<1.2.3+rocm" , Version_("1.2.2+rocm"), True, id="LT Spec, Compatible Version with suffix"),
+ pytest.param("foo<1.2.3" , Version_("1.2.2+cuda"), False, id="LT Spec, Compatible Version, Extra Suffix"),
+ pytest.param("foo<1.2.3+cuda" , Version_("1.2.2") , False, id="LT Spec, Compatible Version, Missing Suffix"),
+ pytest.param("foo<1.2.3+cuda" , Version_("1.2.2+rocm"), False, id="LT Spec, Compatible Version, Mismatched Suffix"),
+ # LE --------------------------------------------------------------------------
+ pytest.param("foo<=1.2.3" , Version_("1.2.3") , True, id="LE Spec, EQ Version"),
+ pytest.param("foo<=1.2.3" , Version_("1.2.5") , False, id="LE Spec, GT Version"),
+ pytest.param("foo<=1.2.3" , Version_("1.2.2") , True, id="LE Spec, LT Version"),
+ pytest.param("foo<=1.2.3+rocm", Version_("1.2.3+rocm"), True, id="LE Spec, Compatible Version with suffix"),
+ pytest.param("foo<=1.2.3" , Version_("1.2.3+cuda"), False, id="LE Spec, Compatible Version, Extra Suffix"),
+ pytest.param("foo<=1.2.3+cuda", Version_("1.2.3") , False, id="LE Spec, Compatible Version, Missing Suffix"),
+ pytest.param("foo<=1.2.3+cuda", Version_("1.2.3+rocm"), False, id="LE Spec, Compatible Version, Mismatched Suffix"),
+ # GT --------------------------------------------------------------------------
+ pytest.param("foo>1.2.3" , Version_("1.2.3") , False, id="GT Spec, EQ Version"),
+ pytest.param("foo>1.2.3" , Version_("1.2.5") , True, id="GT Spec, GT Version"),
+ pytest.param("foo>1.2.3" , Version_("1.2.2") , False, id="GT Spec, LT Version"),
+ pytest.param("foo>1.2.3+rocm" , Version_("1.2.4+rocm"), True, id="GT Spec, Compatible Version with suffix"),
+ pytest.param("foo>1.2.3" , Version_("1.2.4+cuda"), False, id="GT Spec, Compatible Version, Extra Suffix"),
+ pytest.param("foo>1.2.3+cuda" , Version_("1.2.4") , False, id="GT Spec, Compatible Version, Missing Suffix"),
+ pytest.param("foo>1.2.3+cuda" , Version_("1.2.4+rocm"), False, id="GT Spec, Compatible Version, Mismatched Suffix"),
+ # GE --------------------------------------------------------------------------
+ pytest.param("foo>=1.2.3" , Version_("1.2.3") , True, id="GE Spec, EQ Version"),
+ pytest.param("foo>=1.2.3" , Version_("1.2.5") , True, id="GE Spec, GT Version"),
+ pytest.param("foo>=1.2.3" , Version_("1.2.2") , False, id="GE Spec, LT Version"),
+ pytest.param("foo>=1.2.3+rocm", Version_("1.2.3+rocm"), True, id="GE Spec, Compatible Version with suffix"),
+ pytest.param("foo>=1.2.3" , Version_("1.2.3+cuda"), False, id="GE Spec, Compatible Version, Extra Suffix"),
+ pytest.param("foo>=1.2.3+cuda", Version_("1.2.3") , False, id="GE Spec, Compatible Version, Missing Suffix"),
+ pytest.param("foo>=1.2.3+cuda", Version_("1.2.3+rocm"), False, id="GE Spec, Compatible Version, Mismatched Suffix"),
+ )
+)
+# fmt: on
+def test_parse_requirement_comparison_fn(spec, ver, should_pass):
+ _, _, cmp = parse_requirement(spec)
+ assert cmp(ver) == should_pass
+
+
+@pytest.mark.parametrize(
+ "spec, ctx",
+ (
+ *(
+ pytest.param(
+ f"thing{symbol}",
+ pytest.raises(ValueError, match="Invalid requirement string:"),
+ id=f"No version w/ operator {symbol}",
+ )
+ for symbol in _SUPPORTED_OPERATORS
+ ),
+ pytest.param(
+ "thing>=>1.2.3",
+ pytest.raises(ValueError, match="Invalid requirement string:"),
+ id="Operator too long",
+ ),
+ pytest.param(
+ "thing<>1.2.3",
+ pytest.raises(ValueError, match="Unrecognized comparison operator: <>"),
+ id="Nonsense operator",
+ ),
+ ),
+)
+def test_parse_requirement_errors_on_invalid_spec(spec, ctx):
+ with ctx:
+ parse_requirement(spec)
diff --git a/tests/install/test_builder.py b/tests/install/test_builder.py
deleted file mode 100644
index feaf7e54f..000000000
--- a/tests/install/test_builder.py
+++ /dev/null
@@ -1,404 +0,0 @@
-# BSD 2-Clause License
-#
-# Copyright (c) 2021-2024, Hewlett Packard Enterprise
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-# list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-# this list of conditions and the following disclaimer in the documentation
-# and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-import functools
-import pathlib
-import textwrap
-import time
-
-import pytest
-
-import smartsim._core._install.builder as build
-from smartsim._core._install.buildenv import RedisAIVersion
-
-# The tests in this file belong to the group_a group
-pytestmark = pytest.mark.group_a
-
-RAI_VERSIONS = RedisAIVersion("1.2.7")
-
-for_each_device = pytest.mark.parametrize(
- "device", [build.Device.CPU, build.Device.GPU]
-)
-
-_toggle_build_optional_backend = lambda backend: pytest.mark.parametrize(
- f"build_{backend}",
- [
- pytest.param(switch, id=f"with{'' if switch else 'out'}-{backend}")
- for switch in (True, False)
- ],
-)
-toggle_build_tf = _toggle_build_optional_backend("tf")
-toggle_build_pt = _toggle_build_optional_backend("pt")
-toggle_build_ort = _toggle_build_optional_backend("ort")
-
-
-@pytest.mark.parametrize(
- "mock_os", [pytest.param(os_, id=f"os='{os_}'") for os_ in ("Windows", "Java", "")]
-)
-def test_os_enum_raises_on_unsupported(mock_os):
- with pytest.raises(build.BuildError, match="operating system") as err_info:
- build.OperatingSystem.from_str(mock_os)
-
-
-@pytest.mark.parametrize(
- "mock_arch",
- [
- pytest.param(arch_, id=f"arch='{arch_}'")
- for arch_ in ("i386", "i686", "i86pc", "aarch64", "armv7l", "")
- ],
-)
-def test_arch_enum_raises_on_unsupported(mock_arch):
- with pytest.raises(build.BuildError, match="architecture"):
- build.Architecture.from_str(mock_arch)
-
-
-@pytest.fixture
-def p_test_dir(test_dir):
- yield pathlib.Path(test_dir).resolve()
-
-
-@for_each_device
-def test_rai_builder_raises_if_attempting_to_place_deps_when_build_dir_dne(
- monkeypatch, p_test_dir, device
-):
- monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None)
- monkeypatch.setattr(
- build.RedisAIBuilder,
- "rai_build_path",
- property(lambda self: p_test_dir / "path/to/dir/that/dne"),
- )
- rai_builder = build.RedisAIBuilder()
- with pytest.raises(build.BuildError, match=r"build directory not found"):
- rai_builder._fetch_deps_for(device)
-
-
-@for_each_device
-def test_rai_builder_raises_if_attempting_to_place_deps_in_nonempty_dir(
- monkeypatch, p_test_dir, device
-):
- (p_test_dir / "some_file.txt").touch()
- monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None)
- monkeypatch.setattr(
- build.RedisAIBuilder, "rai_build_path", property(lambda self: p_test_dir)
- )
- monkeypatch.setattr(
- build.RedisAIBuilder, "get_deps_dir_path_for", lambda *a, **kw: p_test_dir
- )
- rai_builder = build.RedisAIBuilder()
-
- with pytest.raises(build.BuildError, match=r"is not empty"):
- rai_builder._fetch_deps_for(device)
-
-
-invalid_build_arm64 = [
- dict(build_tf=True, build_onnx=True),
- dict(build_tf=False, build_onnx=True),
- dict(build_tf=True, build_onnx=False),
-]
-invalid_build_ids = [
- ",".join([f"{key}={value}" for key, value in d.items()])
- for d in invalid_build_arm64
-]
-
-
-@pytest.mark.parametrize("build_options", invalid_build_arm64, ids=invalid_build_ids)
-def test_rai_builder_raises_if_unsupported_deps_on_arm64(build_options):
- with pytest.raises(build.BuildError, match=r"are not supported on.*ARM64"):
- build.RedisAIBuilder(
- _os=build.OperatingSystem.DARWIN,
- architecture=build.Architecture.ARM64,
- **build_options,
- )
-
-
-def _confirm_inst_presence(type_, should_be_present, seq):
- expected_num_occurrences = 1 if should_be_present else 0
- occurrences = filter(lambda item: isinstance(item, type_), seq)
- return expected_num_occurrences == len(tuple(occurrences))
-
-
-# Helper functions to check for the presence (or absence) of a
-# ``_RAIBuildDependency`` dependency in a list of dependencies that need to be
-# fetched by a ``RedisAIBuilder`` instance
-dlpack_dep_presence = functools.partial(
- _confirm_inst_presence, build._DLPackRepository, True
-)
-pt_dep_presence = functools.partial(_confirm_inst_presence, build._PTArchive)
-tf_dep_presence = functools.partial(_confirm_inst_presence, build._TFArchive)
-ort_dep_presence = functools.partial(_confirm_inst_presence, build._ORTArchive)
-
-
-@for_each_device
-@toggle_build_tf
-@toggle_build_pt
-@toggle_build_ort
-def test_rai_builder_will_add_dep_if_backend_requested_wo_duplicates(
- monkeypatch, device, build_tf, build_pt, build_ort
-):
- monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None)
-
- rai_builder = build.RedisAIBuilder(
- build_tf=build_tf, build_torch=build_pt, build_onnx=build_ort
- )
- requested_backends = rai_builder._get_deps_to_fetch_for(build.Device(device))
- assert dlpack_dep_presence(requested_backends)
- assert tf_dep_presence(build_tf, requested_backends)
- assert pt_dep_presence(build_pt, requested_backends)
- assert ort_dep_presence(build_ort, requested_backends)
-
-
-@for_each_device
-@toggle_build_tf
-@toggle_build_pt
-def test_rai_builder_will_not_add_dep_if_custom_dep_path_provided(
- monkeypatch, device, p_test_dir, build_tf, build_pt
-):
- monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None)
- mock_ml_lib = p_test_dir / "some/ml/lib"
- mock_ml_lib.mkdir(parents=True)
- rai_builder = build.RedisAIBuilder(
- build_tf=build_tf,
- build_torch=build_pt,
- build_onnx=False,
- libtf_dir=str(mock_ml_lib if build_tf else ""),
- torch_dir=str(mock_ml_lib if build_pt else ""),
- )
- requested_backends = rai_builder._get_deps_to_fetch_for(device)
- assert dlpack_dep_presence(requested_backends)
- assert tf_dep_presence(False, requested_backends)
- assert pt_dep_presence(False, requested_backends)
- assert ort_dep_presence(False, requested_backends)
- assert len(requested_backends) == 1
-
-
-def test_rai_builder_raises_if_it_fetches_an_unexpected_number_of_ml_deps(
- monkeypatch, p_test_dir
-):
- monkeypatch.setattr(build.RedisAIBuilder, "_validate_platform", lambda a: None)
- monkeypatch.setattr(
- build.RedisAIBuilder, "rai_build_path", property(lambda self: p_test_dir)
- )
- monkeypatch.setattr(
- build,
- "_place_rai_dep_at",
- lambda target, verbose: lambda dep: target
- / "whoops_all_ml_deps_extract_to_a_dir_with_this_name",
- )
- rai_builder = build.RedisAIBuilder(build_tf=True, build_torch=True, build_onnx=True)
- with pytest.raises(
- build.BuildError,
- match=r"Expected to place \d+ dependencies, but only found \d+",
- ):
- rai_builder._fetch_deps_for(build.Device.CPU)
-
-
-def test_threaded_map():
- def _some_io_op(x):
- return x * x
-
- assert (0, 1, 4, 9, 16) == tuple(build._threaded_map(_some_io_op, range(5)))
-
-
-def test_threaded_map_returns_early_if_nothing_to_map():
- sleep_duration = 60
-
- def _some_long_io_op(_):
- time.sleep(sleep_duration)
-
- start = time.time()
- build._threaded_map(_some_long_io_op, [])
- end = time.time()
- assert end - start < sleep_duration
-
-
-def test_correct_pt_variant_os():
- # Check that all Linux variants return Linux
- for linux_variant in build.OperatingSystem.LINUX.value:
- os_ = build.OperatingSystem.from_str(linux_variant)
- assert build._choose_pt_variant(os_) == build._PTArchiveLinux
-
- # Check that ARM64 and X86_64 Mac OSX return the Mac variant
- all_archs = (build.Architecture.ARM64, build.Architecture.X64)
- for arch in all_archs:
- os_ = build.OperatingSystem.DARWIN
- assert build._choose_pt_variant(os_) == build._PTArchiveMacOSX
-
-
-def test_PTArchiveMacOSX_url():
- arch = build.Architecture.X64
- pt_version = RAI_VERSIONS.torch
-
- pt_linux_cpu = build._PTArchiveLinux(
- build.Architecture.X64, build.Device.CPU, pt_version, False
- )
- x64_prefix = "https://download.pytorch.org/libtorch/"
- assert x64_prefix in pt_linux_cpu.url
-
- pt_macosx_cpu = build._PTArchiveMacOSX(
- build.Architecture.ARM64, build.Device.CPU, pt_version, False
- )
- arm64_prefix = "https://github.com/CrayLabs/ml_lib_builder/releases/download/"
- assert arm64_prefix in pt_macosx_cpu.url
-
-
-def test_PTArchiveMacOSX_gpu_error():
- with pytest.raises(build.BuildError, match="support GPU on Mac OSX"):
- build._PTArchiveMacOSX(
- build.Architecture.ARM64, build.Device.GPU, RAI_VERSIONS.torch, False
- ).url
-
-
-def test_valid_platforms():
- assert build.RedisAIBuilder(
- _os=build.OperatingSystem.LINUX,
- architecture=build.Architecture.X64,
- build_tf=True,
- build_torch=True,
- build_onnx=True,
- )
- assert build.RedisAIBuilder(
- _os=build.OperatingSystem.DARWIN,
- architecture=build.Architecture.X64,
- build_tf=True,
- build_torch=True,
- build_onnx=False,
- )
- assert build.RedisAIBuilder(
- _os=build.OperatingSystem.DARWIN,
- architecture=build.Architecture.X64,
- build_tf=False,
- build_torch=True,
- build_onnx=False,
- )
-
-
-@pytest.mark.parametrize(
- "plat,cmd,expected_cmd",
- [
- # Bare Word
- pytest.param(
- build.Platform(build.OperatingSystem.LINUX, build.Architecture.X64),
- ["git", "clone", "my-repo"],
- ["git", "clone", "my-repo"],
- id="git-Linux-X64",
- ),
- pytest.param(
- build.Platform(build.OperatingSystem.LINUX, build.Architecture.ARM64),
- ["git", "clone", "my-repo"],
- ["git", "clone", "my-repo"],
- id="git-Linux-Arm64",
- ),
- pytest.param(
- build.Platform(build.OperatingSystem.DARWIN, build.Architecture.X64),
- ["git", "clone", "my-repo"],
- ["git", "clone", "my-repo"],
- id="git-Darwin-X64",
- ),
- pytest.param(
- build.Platform(build.OperatingSystem.DARWIN, build.Architecture.ARM64),
- ["git", "clone", "my-repo"],
- [
- "git",
- "clone",
- "--config",
- "core.autocrlf=false",
- "--config",
- "core.eol=lf",
- "my-repo",
- ],
- id="git-Darwin-Arm64",
- ),
- # Abs path
- pytest.param(
- build.Platform(build.OperatingSystem.LINUX, build.Architecture.X64),
- ["/path/to/git", "clone", "my-repo"],
- ["/path/to/git", "clone", "my-repo"],
- id="Abs-Linux-X64",
- ),
- pytest.param(
- build.Platform(build.OperatingSystem.LINUX, build.Architecture.ARM64),
- ["/path/to/git", "clone", "my-repo"],
- ["/path/to/git", "clone", "my-repo"],
- id="Abs-Linux-Arm64",
- ),
- pytest.param(
- build.Platform(build.OperatingSystem.DARWIN, build.Architecture.X64),
- ["/path/to/git", "clone", "my-repo"],
- ["/path/to/git", "clone", "my-repo"],
- id="Abs-Darwin-X64",
- ),
- pytest.param(
- build.Platform(build.OperatingSystem.DARWIN, build.Architecture.ARM64),
- ["/path/to/git", "clone", "my-repo"],
- [
- "/path/to/git",
- "clone",
- "--config",
- "core.autocrlf=false",
- "--config",
- "core.eol=lf",
- "my-repo",
- ],
- id="Abs-Darwin-Arm64",
- ),
- ],
-)
-def test_git_commands_are_configered_correctly_for_platforms(plat, cmd, expected_cmd):
- assert build.config_git_command(plat, cmd) == expected_cmd
-
-
-def test_modify_source_files(p_test_dir):
- def make_text_blurb(food):
- return textwrap.dedent(f"""\
- My favorite food is {food}
- {food} is an important part of a healthy breakfast
- {food} {food} {food} {food}
- This line should be unchanged!
- --> {food} <--
- """)
-
- original_word = "SPAM"
- mutated_word = "EGGS"
-
- source_files = []
- for i in range(3):
- source_file = p_test_dir / f"test_{i}"
- source_file.touch()
- source_file.write_text(make_text_blurb(original_word))
- source_files.append(source_file)
- # Modify a single file
- build._modify_source_files(source_files[0], original_word, mutated_word)
- assert source_files[0].read_text() == make_text_blurb(mutated_word)
- assert source_files[1].read_text() == make_text_blurb(original_word)
- assert source_files[2].read_text() == make_text_blurb(original_word)
-
- # Modify multiple files
- build._modify_source_files(
- (source_files[1], source_files[2]), original_word, mutated_word
- )
- assert source_files[1].read_text() == make_text_blurb(mutated_word)
- assert source_files[2].read_text() == make_text_blurb(mutated_word)
diff --git a/tests/install/test_mlpackage.py b/tests/install/test_mlpackage.py
new file mode 100644
index 000000000..d27e69b2b
--- /dev/null
+++ b/tests/install/test_mlpackage.py
@@ -0,0 +1,122 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pathlib
+from unittest.mock import MagicMock
+
+import pytest
+
+from smartsim._core._install.mlpackages import (
+ MLPackage,
+ MLPackageCollection,
+ RAIPatch,
+ load_platform_configs,
+)
+from smartsim._core._install.platform import Platform
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+mock_platform = MagicMock(spec=Platform)
+
+
+@pytest.fixture
+def mock_ml_packages():
+ foo = MagicMock(spec=MLPackage)
+ foo.name = "foo"
+ bar = MagicMock(spec=MLPackage)
+ bar.name = "bar"
+ yield [foo, bar]
+
+
+@pytest.mark.parametrize(
+ "patch",
+ [MagicMock(spec=RAIPatch), [MagicMock(spec=RAIPatch) for i in range(3)], ()],
+ ids=["one patch", "multiple patches", "no patch"],
+)
+def test_mlpackage_constructor(patch):
+ MLPackage(
+ "foo",
+ "0.0.0",
+ "https://nothing.com",
+ ["bar==0.1", "baz==0.2"],
+ pathlib.Path("/nothing/fake"),
+ patch,
+ )
+
+
+def test_mlpackage_collection_constructor(mock_ml_packages):
+ MLPackageCollection(mock_platform, mock_ml_packages)
+
+
+def test_mlpackage_collection_mutable_mapping_methods(mock_ml_packages):
+ ml_packages = MLPackageCollection(mock_platform, mock_ml_packages)
+ for val in ml_packages._ml_packages.values():
+ val.version = "0.0.0"
+ assert ml_packages._ml_packages == ml_packages
+
+ # Test iter
+ package_names = [pkg.name for pkg in mock_ml_packages]
+ assert [name for name in ml_packages] == package_names
+
+ # Test get item
+ for pkg in mock_ml_packages:
+ assert ml_packages[pkg.name] is pkg
+
+ # Test len
+ assert len(ml_packages) == len(mock_ml_packages)
+
+ # Test delitem
+ key = next(iter(mock_ml_packages)).name
+ del ml_packages[key]
+ with pytest.raises(KeyError):
+ ml_packages[key]
+ assert len(ml_packages) == (len(mock_ml_packages) - 1)
+
+ # Test setitem
+ with pytest.raises(TypeError):
+ ml_packages["baz"] = MagicMock(spec=MLPackage)
+
+ # Test contains
+ name, package = next(iter(ml_packages.items()))
+ assert name in ml_packages
+
+ # Test str
+ assert "Package" in str(ml_packages)
+ assert "Version" in str(ml_packages)
+ assert package.version in str(ml_packages)
+ assert name in str(ml_packages)
+
+
+def test_load_configs_raises_when_dir_dne(test_dir):
+ dne_dir = pathlib.Path(test_dir, "dne")
+ dir_str = os.fspath(dne_dir)
+ with pytest.raises(
+ FileNotFoundError,
+ match=f"Platform configuration directory `{dir_str}` does not exist",
+ ):
+ load_platform_configs(dne_dir)
diff --git a/tests/install/test_package_retriever.py b/tests/install/test_package_retriever.py
new file mode 100644
index 000000000..d415ae235
--- /dev/null
+++ b/tests/install/test_package_retriever.py
@@ -0,0 +1,106 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import contextlib
+import filecmp
+import os
+import pathlib
+import random
+import string
+import tarfile
+import zipfile
+
+import pytest
+
+from smartsim._core._install.utils import retrieve
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+@contextlib.contextmanager
+def temp_cd(path):
+ original = os.getcwd()
+ os.chdir(path)
+ try:
+ yield
+ finally:
+ os.chdir(original)
+
+
+def make_test_file(test_file):
+ data = "".join(random.choices(string.ascii_letters + string.digits, k=1024))
+ with open(test_file, "w") as f:
+ f.write(data)
+
+
+def test_local_archive_zip(test_dir):
+ with temp_cd(test_dir):
+ test_file = "./test.data"
+ make_test_file(test_file)
+
+ zip_file = "./test.zip"
+ with zipfile.ZipFile(zip_file, "w") as f:
+ f.write(test_file)
+
+ retrieve(zip_file, pathlib.Path("./output"))
+
+ assert filecmp.cmp(
+ test_file, pathlib.Path("./output") / "test.data", shallow=False
+ )
+
+
+def test_local_archive_tgz(test_dir):
+ with temp_cd(test_dir):
+ test_file = "./test.data"
+ make_test_file(test_file)
+
+ tgz_file = "./test.tgz"
+ with tarfile.open(tgz_file, "w:gz") as f:
+ f.add(test_file)
+
+ retrieve(tgz_file, pathlib.Path("./output"))
+
+ assert filecmp.cmp(
+ test_file, pathlib.Path("./output") / "test.data", shallow=False
+ )
+
+
+def test_git(test_dir):
+ retrieve(
+ "https://github.com/CrayLabs/SmartSim.git",
+ f"{test_dir}/smartsim_git",
+ branch="master",
+ )
+ assert pathlib.Path(f"{test_dir}/smartsim_git").is_dir()
+
+
+def test_https(test_dir):
+ output_dir = pathlib.Path(test_dir) / "output"
+ retrieve(
+ "https://github.com/CrayLabs/SmartSim/archive/refs/tags/v0.5.0.zip", output_dir
+ )
+ assert output_dir.exists()
diff --git a/tests/install/test_platform.py b/tests/install/test_platform.py
new file mode 100644
index 000000000..76ff3f76b
--- /dev/null
+++ b/tests/install/test_platform.py
@@ -0,0 +1,89 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import os
+import platform
+
+import pytest
+
+from smartsim._core._install.platform import Architecture, Device, OperatingSystem
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+def test_device_cpu():
+ cpu_enum = Device.CPU
+ assert not cpu_enum.is_gpu()
+ assert not cpu_enum.is_cuda()
+ assert not cpu_enum.is_rocm()
+
+
+@pytest.mark.parametrize("cuda_device", Device.cuda_enums())
+def test_cuda(monkeypatch, test_dir, cuda_device):
+ version = cuda_device.value.split("-")[1]
+ fake_full_version = version + ".8888" ".9999"
+ monkeypatch.setenv("CUDA_HOME", test_dir)
+
+ mock_version = dict(cuda=dict(version=fake_full_version))
+ print(mock_version)
+ with open(f"{test_dir}/version.json", "w") as outfile:
+ json.dump(mock_version, outfile)
+
+ assert Device.detect_cuda_version() == cuda_device
+ assert cuda_device.is_gpu()
+ assert cuda_device.is_cuda()
+ assert not cuda_device.is_rocm()
+
+
+@pytest.mark.parametrize("rocm_device", Device.rocm_enums())
+def test_rocm(monkeypatch, test_dir, rocm_device):
+ version = rocm_device.value.split("-")[1]
+ fake_full_version = version + ".8888" + "-9999"
+ monkeypatch.setenv("ROCM_HOME", test_dir)
+ info_dir = f"{test_dir}/.info"
+ os.mkdir(info_dir)
+
+ with open(f"{info_dir}/version", "w") as outfile:
+ outfile.write(fake_full_version)
+
+ assert Device.detect_rocm_version() == rocm_device
+ assert rocm_device.is_gpu()
+ assert not rocm_device.is_cuda()
+ assert rocm_device.is_rocm()
+
+
+@pytest.mark.parametrize("os", ("linux", "darwin"))
+def test_operating_system(monkeypatch, os):
+ monkeypatch.setattr(platform, "system", lambda: os)
+ assert OperatingSystem.autodetect().value == os
+
+
+@pytest.mark.parametrize("arch", ("x86_64", "arm64"))
+def test_architecture(monkeypatch, arch):
+ monkeypatch.setattr(platform, "machine", lambda: arch)
+ assert Architecture.autodetect().value == arch
diff --git a/tests/install/test_redisai_builder.py b/tests/install/test_redisai_builder.py
new file mode 100644
index 000000000..81673a7f1
--- /dev/null
+++ b/tests/install/test_redisai_builder.py
@@ -0,0 +1,60 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+# list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from pathlib import Path
+
+import pytest
+
+from smartsim._core._install.buildenv import BuildEnv
+from smartsim._core._install.mlpackages import (
+ DEFAULT_MLPACKAGE_PATH,
+ MLPackage,
+ load_platform_configs,
+)
+from smartsim._core._install.platform import Platform
+from smartsim._core._install.redisaiBuilder import RedisAIBuilder
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+DEFAULT_MLPACKAGES = load_platform_configs(DEFAULT_MLPACKAGE_PATH)
+
+
+@pytest.mark.parametrize(
+ "platform",
+ [platform for platform in DEFAULT_MLPACKAGES],
+ ids=[str(platform) for platform in DEFAULT_MLPACKAGES],
+)
+def test_backends_to_be_installed(monkeypatch, test_dir, platform):
+ mlpackages = DEFAULT_MLPACKAGES[platform]
+ monkeypatch.setattr(MLPackage, "retrieve", lambda *args, **kwargs: None)
+ builder = RedisAIBuilder(platform, mlpackages, BuildEnv(), Path(test_dir))
+
+ BACKENDS = ["libtorch", "libtensorflow", "onnxruntime"]
+ TOGGLES = ["build_torch", "build_tensorflow", "build_onnxruntime"]
+
+ for backend, toggle in zip(BACKENDS, TOGGLES):
+ assert getattr(builder, toggle) == (backend in mlpackages)
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 710a9a659..1cead7625 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -436,24 +436,23 @@ def mock_execute(ns: argparse.Namespace, _unparsed: t.Optional[t.List[str]] = No
# fmt: off
@pytest.mark.parametrize(
- "command,mock_location,exp_output,optional_arg,exp_valid,exp_err_msg,check_prop,exp_prop_val",
+ "command, mock_location, exp_output, optional_arg, exp_valid, exp_err_msg, check_prop, exp_prop_val",
[
- pytest.param("build", "build_execute", "verbose mocked-build", "-v", True, "", "v", True, id="verbose 'on'"),
- pytest.param("build", "build_execute", "cpu mocked-build", "--device=cpu", True, "", "device", "cpu", id="device 'cpu'"),
- pytest.param("build", "build_execute", "gpu mocked-build", "--device=gpu", True, "", "device", "gpu", id="device 'gpu'"),
- pytest.param("build", "build_execute", "gpuX mocked-build", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="set bad device 'gpuX'"),
- pytest.param("build", "build_execute", "no tensorflow mocked-build", "--no_tf", True, "", "no_tf", True, id="set no TF"),
- pytest.param("build", "build_execute", "no torch mocked-build", "--no_pt", True, "", "no_pt", True, id="set no torch"),
- pytest.param("build", "build_execute", "onnx mocked-build", "--onnx", True, "", "onnx", True, id="set w/onnx"),
- pytest.param("build", "build_execute", "torch-dir mocked-build", "--torch_dir /foo/bar", True, "", "torch_dir", "/foo/bar", id="set torch dir"),
- pytest.param("build", "build_execute", "bad-torch-dir mocked-build", "--torch_dir", False, "error: argument --torch_dir", "", "", id="set torch dir, no path"),
- pytest.param("build", "build_execute", "keydb mocked-build", "--keydb", True, "", "keydb", True, id="keydb on"),
- pytest.param("clean", "clean_execute", "clobbering mocked-clean", "--clobber", True, "", "clobber", True, id="clean w/clobber"),
- pytest.param("validate", "validate_execute", "port mocked-validate", "--port=12345", True, "", "port", 12345, id="validate w/ manual port"),
- pytest.param("validate", "validate_execute", "abbrv port mocked-validate", "-p 12345", True, "", "port", 12345, id="validate w/ manual abbreviated port"),
- pytest.param("validate", "validate_execute", "cpu mocked-validate", "--device=cpu", True, "", "device", "cpu", id="validate: device 'cpu'"),
- pytest.param("validate", "validate_execute", "gpu mocked-validate", "--device=gpu", True, "", "device", "gpu", id="validate: device 'gpu'"),
- pytest.param("validate", "validate_execute", "gpuX mocked-validate", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="validate: set bad device 'gpuX'"),
+ pytest.param( "build", "build_execute", "verbose mocked-build", "-v", True, "", "v", True, id="verbose 'on'"),
+ pytest.param( "build", "build_execute", "cpu mocked-build", "--device=cpu", True, "", "device", "cpu", id="device 'cpu'"),
+ pytest.param( "build", "build_execute", "gpuX mocked-build", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="set bad device 'gpuX'"),
+ pytest.param( "build", "build_execute", "no tensorflow mocked-build", "--skip-tensorflow", True, "", "no_tf", True, id="Skip TF"),
+ pytest.param( "build", "build_execute", "no torch mocked-build", "--skip-torch", True, "", "no_pt", True, id="Skip Torch"),
+ pytest.param( "build", "build_execute", "onnx mocked-build", "--skip-onnx", True, "", "onnx", True, id="Skip Onnx"),
+ pytest.param( "build", "build_execute", "config-dir mocked-build", "--config-dir /foo/bar", True, "", "config-dir", "/foo/bar", id="set torch dir"),
+ pytest.param( "build", "build_execute", "bad-config-dir mocked-build", "--config-dir", False, "error: argument --config-dir", "", "", id="set config dir w/o path"),
+ pytest.param( "build", "build_execute", "keydb mocked-build", "--keydb", True, "", "keydb", True, id="keydb on"),
+ pytest.param( "clean", "clean_execute", "clobbering mocked-clean", "--clobber", True, "", "clobber", True, id="clean w/clobber"),
+ pytest.param("validate", "validate_execute", "port mocked-validate", "--port=12345", True, "", "port", 12345, id="validate w/ manual port"),
+ pytest.param("validate", "validate_execute", "abbrv port mocked-validate", "-p 12345", True, "", "port", 12345, id="validate w/ manual abbreviated port"),
+ pytest.param("validate", "validate_execute", "cpu mocked-validate", "--device=cpu", True, "", "device", "cpu", id="validate: device 'cpu'"),
+ pytest.param("validate", "validate_execute", "gpu mocked-validate", "--device=gpu", True, "", "device", "gpu", id="validate: device 'gpu'"),
+ pytest.param("validate", "validate_execute", "gpuX mocked-validate", "--device=gpux", False, "invalid choice: 'gpux'", "", "", id="validate: set bad device 'gpuX'"),
]
)
# fmt: on
@@ -735,15 +734,6 @@ def mock_operation(*args, **kwargs) -> int:
monkeypatch.setattr(smartsim._core._cli.build, "tabulate", mock_operation)
monkeypatch.setattr(smartsim._core._cli.build, "build_database", mock_operation)
monkeypatch.setattr(smartsim._core._cli.build, "build_redis_ai", mock_operation)
- monkeypatch.setattr(
- smartsim._core._cli.build, "check_py_torch_version", mock_operation
- )
- monkeypatch.setattr(
- smartsim._core._cli.build, "check_py_tf_version", mock_operation
- )
- monkeypatch.setattr(
- smartsim._core._cli.build, "check_py_onnx_version", mock_operation
- )
command = "build"
cfg = MenuItemConfig(
diff --git a/tests/test_dragon_launcher.py b/tests/test_dragon_launcher.py
index 4fe8bf71b..4bd07e920 100644
--- a/tests/test_dragon_launcher.py
+++ b/tests/test_dragon_launcher.py
@@ -593,11 +593,14 @@ def test_run_step_fail(test_dir: str) -> None:
step0 = DragonStep("step0", test_dir, rs)
step0.meta["status_dir"] = status_dir
- mock_connector = MagicMock() # DragonConnector()
+ mock_connector = MagicMock(spec=DragonConnector)
mock_connector.is_connected = True
mock_connector.send_request = MagicMock(
return_value=DragonRunResponse(step_id=step0.name, error_message="mock fail!")
)
+ mock_connector.merge_persisted_env = MagicMock(
+ return_value={"FOO": "bar", "BAZ": "boop"}
+ )
launcher = DragonLauncher()
launcher._connector = mock_connector
@@ -676,7 +679,7 @@ def test_run_step_success(test_dir: str) -> None:
step0 = DragonStep("step0", test_dir, rs)
step0.meta["status_dir"] = status_dir
- mock_connector = MagicMock() # DragonConnector()
+ mock_connector = MagicMock(spec=DragonConnector)
mock_connector.is_connected = True
mock_connector.send_request = MagicMock(
return_value=DragonRunResponse(step_id=step0.name)
@@ -684,6 +687,9 @@ def test_run_step_success(test_dir: str) -> None:
launcher = DragonLauncher()
launcher._connector = mock_connector
+ mock_connector.merge_persisted_env = MagicMock(
+ return_value={"FOO": "bar", "BAZ": "boop"}
+ )
result = launcher.run(step0)
diff --git a/tests/test_dragon_run_request_nowlm.py b/tests/test_dragon_run_request_nowlm.py
index afd25aa9d..3dd7099c8 100644
--- a/tests/test_dragon_run_request_nowlm.py
+++ b/tests/test_dragon_run_request_nowlm.py
@@ -101,5 +101,5 @@ def test_run_request_with_negative_affinity(
),
)
- assert f"{device}_affinity" in str(ex.value.args[0])
- assert "NumberNotGeError" in str(ex.value.args[0])
+ assert f"{device}_affinity" in str(ex.value)
+ assert "greater than or equal to 0" in str(ex.value)