From f2abb555efcb0a59594e92b00c054c3b23e50543 Mon Sep 17 00:00:00 2001 From: Simon Zhao Date: Thu, 14 Nov 2024 11:15:30 +0800 Subject: [PATCH] Merge multiple Dockerfiles into a single one (#2167) Merge multiple Dockerfiles into a single one --------- Signed-off-by: Simon Zhao Co-authored-by: Miguel Fierro <3491412+miguelgfierro@users.noreply.github.com> --- .devcontainer/devcontainer.json | 54 ++-- SETUP.md | 85 ++++++ tests/README.md | 23 +- tests/ci/azureml_tests/aml_utils.py | 110 +++----- .../submit_groupwise_azureml_pytest.py | 28 +- tools/docker/Dockerfile | 262 +++++++----------- tools/docker/README.md | 148 +++++----- 7 files changed, 360 insertions(+), 350 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 12d6ed822..80ee30f02 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,22 +1,18 @@ { "name": "Recommenders", - // Version list: https://github.com/devcontainers/images/tree/main/src/base-ubuntu - // Includes: curl, wget, ca-certificates, git, Oh My Zsh!, - "image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04", - "hostRequirements": { - "cpus": 4, - "memory": "16gb", - "storage": "32gb" - }, - "features": { - // https://github.com/devcontainers/features/blob/main/src/anaconda/devcontainer-feature.json - "ghcr.io/devcontainers/features/anaconda:1": { - "version": "2024.06-1" + "build": { + "dockerfile": "../tools/docker/Dockerfile", + "context": "..", + "target": "deps", + "args": { + "COMPUTE": "cpu", + "PYTHON_VERSION": "3.11" } }, "customizations": { "vscode": { - // Set *default* container specific settings.json values on container create. + // Set default container specific settings.json values on container + // create "settings": { "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", @@ -27,24 +23,32 @@ }, "isort.args": ["--profile", "black"], "python.analysis.autoImportCompletions": true, - "python.defaultInterpreterPath": "/usr/local/conda/envs/Recommenders/bin/python", + // Conda env name *must* align with the one in Dockerfle + "python.defaultInterpreterPath": "/root/conda/envs/Recommenders/bin/python", "python.testing.pytestEnabled": true, - // set the directory where all tests are + // Test directory "python.testing.pytestArgs": ["tests"] }, - // Add the IDs of extensions you want installed when the container is created. + // VS Code extensions to install on container create "extensions": [ - "ms-python.black-formatter", // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter - "ms-python.isort", // https://marketplace.visualstudio.com/items?itemName=ms-python.isort - "ms-python.mypy-type-checker", // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker - "ms-python.pylint", // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint - "ms-python.python", // https://marketplace.visualstudio.com/items?itemName=ms-python.python - "ms-toolsai.datawrangler", // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler - "ms-toolsai.jupyter" // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter + // https://marketplace.visualstudio.com/items?itemName=ms-python.black-formatter + "ms-python.black-formatter", + // https://marketplace.visualstudio.com/items?itemName=ms-python.isort + "ms-python.isort", + // https://marketplace.visualstudio.com/items?itemName=ms-python.mypy-type-checker + "ms-python.mypy-type-checker", + // https://marketplace.visualstudio.com/items?itemName=ms-python.pylint + "ms-python.pylint", + // https://marketplace.visualstudio.com/items?itemName=ms-python.python + "ms-python.python", + // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.datawrangler + "ms-toolsai.datawrangler", + // https://marketplace.visualstudio.com/items?itemName=ms-toolsai.jupyter + "ms-toolsai.jupyter" ] } }, - // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "conda create -n Recommenders -c conda-forge -y python=3.10 openjdk=21 pip && conda init bash && bash -c -i 'conda activate Recommenders && pip install -e .[dev,spark]' && conda config --set auto_activate_base false" + // Install Recommenders in development mode after container create + "postCreateCommand": "bash -i -c 'conda activate Recommenders && conda install -c conda-forge -y openjdk=21 && pip install -e .[dev,spark]'" } diff --git a/SETUP.md b/SETUP.md index 323aefddf..4593b4cfb 100644 --- a/SETUP.md +++ b/SETUP.md @@ -145,6 +145,91 @@ git checkout staging pip install -e .[all] ``` +We also provide a [devcontainer.json](./.devcontainer/devcontainer.json) +and [Dockerfile](./tools/docker/Dockerfile) for developers to +facilitate the development on +[Dev Containers with VS Code](https://code.visualstudio.com/docs/devcontainers/containers) +and [GitHub Codespaces](https://github.com/features/codespaces). + +
+VS Code Dev Containers + +The typical scenario using Docker containers for development is as +follows. Say, we want to develop applications for a specific +environment, so +1. we create a contaienr with the dependencies required, +1. and mount the folder containing the code to the container, +1. then code parsing, debugging and testing are all performed against + the container. +This workflow seperates the development environment from your local +environment, so that your local environment won't be affected. The +container used here for this end is called Dev Container in the +VS Code Dev Containers extension. And the extension eases this +development workflow with Docker containers automatically without +pain. + +To use VS Code Dev Containers, your local machine must have the +following applicatioins installed: +* [Docker](https://docs.docker.com/get-started/get-docker/) +* [VS Code Remote Development Extension Pack](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.vscode-remote-extensionpack) + +Then +* When you open your local Recommenders folder in VS Code, it will + detect [devcontainer.json](./.devcontainer/devcontainer.json), and + prompt you to **Reopen in Container**. If you'd like to reopen, + it will create a container with the required environment described + in `devcontainer.json`, install a VS Code server in the container, + and mount the folder into the container. + + If you don't see the prompt, you can use the command + **Dev Containers: Reopen in Container** +* If you don't have a local clone of Recommenders, you can also use + the command **Dev Containers: Clone Repository in Container Volume**, + and type in a branch/PR URL of Recommenders you'd like to develop + on, such as https://github.com/recommenders-team/recommenders, + https://github.com/recommenders-team/recommenders/tree/staging, or + https://github.com/recommenders-team/recommenders/pull/2098. VS + Code will create a container with the environment described in + `devcontainer.json`, and clone the specified branch of Recommenders + into the container. + +Once everything is set up, VS Code will act as a client to the server +in the container, and all subsequent operations on VS Code will be +performed against the container. + +
+ +
+GitHub Codespaces + +GitHub Codespaces also uses `devcontainer.json` and Dockerfile in the +repo to create the environment on a VM for you to develop on the Web +VS Code. To use the GitHub Codespaces on Recommenders, you can go to +[Recommenders](https://github.com/recommenders-team/recommenders) +$\to$ switch to the branch of interest $\to$ Code $\to$ Codespaces +$\to$ Create codespaces on the branch. + +
+ +
+devcontainer.json & Dockerfile + +[devcontainer.json](./.devcontainer/devcontainer.json) describes: +* the Dockerfile to use with configurable build arguments, such as + `COMPUTE` and `PYTHON_VERSION`. +* settings on VS Code server, such as Python interpreter path in the + container, Python formatter. +* extensions on VS Code server, such as black-formatter, pylint. +* how to create the Conda environment for Recommenders in + `postCreateCommand` + +[Dockerfile](./tools/docker/Dockerfile) is used in 3 places: +* Dev containers on VS Code and GitHub Codespaces +* [Testing workflows on AzureML](./tests/README.md) +* [Jupyter notebook examples on Docker](./tools/docker/README.md) + +
+ + ## Test Environments Depending on the type of recommender system and the notebook that needs to be run, there are different computational requirements. diff --git a/tests/README.md b/tests/README.md index 893df94c2..cb938c427 100644 --- a/tests/README.md +++ b/tests/README.md @@ -63,9 +63,26 @@ GitHub workflows `azureml-unit-tests.yml`, `azureml-cpu-nightly.yml`, `azureml-g There are three scripts used with each workflow, all of them are located in [ci/azureml_tests](./ci/azureml_tests/): -* `submit_groupwise_azureml_pytest.py`: this script uses parameters in the workflow yml to set up the AzureML environment for testing using the AzureML SDK. -* `run_groupwise_pytest.py`: this script uses pytest to run the tests of the libraries and notebooks. This script runs in an AzureML workspace with the environment created by the script above. -* `test_groups.py`: this script defines the groups of tests. If the tests are part of the unit tests, the total compute time of each group should be less than 15min. If the tests are part of the nightly builds, the total time of each group should be less than 35min. +* [`submit_groupwise_azureml_pytest.py`](./ci/azureml_tests/submit_groupwise_azureml_pytest.py): + this script uses parameters in the workflow yml to set up the + AzureML environment for testing using the AzureML SDK. +* [`run_groupwise_pytest.py`](./ci/azureml_tests/run_groupwise_pytest.pyy): + this script uses pytest to run the tests of the libraries and + notebooks. This script runs in an AzureML workspace with the + environment created by the script above. +* [`aml_utils.py`](./ci/azureml_tests/aml_utils.py): this script + defines several utility functions using + [the AzureML Python SDK v2](https://learn.microsoft.com/en-us/azure/machine-learning/concept-v2?view=azureml-api-2). + These functions are used by the scripts above to set up the compute and + the environment for the tests on AzureML. For example, the + environment with all dependencies of Recommenders is created by the + function `get_or_create_environment` via the [Dockerfile](../tools/docker/Dockerfile). + More details on Docker support can be found at [tools/docker/README.md](../tools/docker/README.md). +* [`test_groups.py`](./ci/azureml_tests/test_groups.py): this script + defines the groups of tests. If the tests are part of the unit + tests, the total compute time of each group should be less than + 15min. If the tests are part of the nightly builds, the total time + of each group should be less than 35min. ## How to contribute tests to the repository diff --git a/tests/ci/azureml_tests/aml_utils.py b/tests/ci/azureml_tests/aml_utils.py index 5a4d488e3..3e9a6782e 100644 --- a/tests/ci/azureml_tests/aml_utils.py +++ b/tests/ci/azureml_tests/aml_utils.py @@ -8,7 +8,7 @@ * https://learn.microsoft.com/en-us/azure/machine-learning/reference-migrate-sdk-v1-mlflow-tracking?view=azureml-api-2&tabs=aml%2Ccli%2Cmlflow """ import pathlib -import tempfile +import re from azure.ai.ml import MLClient, command from azure.ai.ml.entities import AmlCompute, BuildContext, Environment, Workspace @@ -16,6 +16,7 @@ from azure.core.exceptions import ResourceExistsError from azure.identity import DefaultAzureCredential + def get_client(subscription_id, resource_group, workspace_name): """ Get the client with specified AzureML workspace, or create one if not existing. @@ -61,9 +62,8 @@ def get_or_create_environment( environment_name, use_gpu, use_spark, - conda_pkg_jdk, + conda_openjdk_version, python_version, - commit_sha, ): """ AzureML requires the run environment to be setup prior to submission. @@ -77,81 +77,39 @@ def get_or_create_environment( added to the conda environment, else False use_spark (bool): True if PySpark packages should be added to the conda environment, else False - conda_pkg_jdk (str): "openjdk=8" by default - python_version (str): python version, such as "3.9" - commit_sha (str): the commit that triggers the workflow + conda_openjdk_version (str): "21" by default + python_version (str): python version, such as "3.11" """ - conda_env_name = "reco" - conda_env_yml = "environment.yml" - condafile = fr""" -name: {conda_env_name} -channels: - - conda-forge -dependencies: - - python={python_version} - - {conda_pkg_jdk} - - pip - - pip: - - recommenders[dev{",gpu" if use_gpu else ""}{",spark" if use_spark else ""}]@git+https://github.com/recommenders-team/recommenders.git@{commit_sha} -""" - # See https://github.com/Azure/AzureML-Containers/blob/master/base/cpu/openmpi4.1.0-ubuntu22.04 - image = "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04" - # See https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 - dockerfile = fr"""# syntax=docker/dockerfile:1 -FROM nvcr.io/nvidia/cuda:12.5.1-devel-ubuntu22.04 -SHELL ["/bin/bash", "-c"] -USER root:root -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility -ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -ENV DEBIAN_FRONTEND noninteractive -RUN apt-get update && \ - apt-get install -y wget git-all && \ - apt-get clean -y && \ - rm -rf /var/lib/apt/lists/* - -# Install Conda -ENV CONDA_PREFIX /opt/miniconda -RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py311_24.5.0-0-Linux-x86_64.sh && \ - bash /tmp/miniconda.sh -bf -p ${{CONDA_PREFIX}} && \ - ${{CONDA_PREFIX}}/bin/conda update --all -c conda-forge -y && \ - ${{CONDA_PREFIX}}/bin/conda clean -ay && \ - rm -rf ${{CONDA_PREFIX}}/pkgs && \ - rm /tmp/miniconda.sh && \ - find / -type d -name __pycache__ | xargs rm -rf - -# Create Conda environment -COPY {conda_env_yml} /tmp/{conda_env_yml} -RUN ${{CONDA_PREFIX}}/bin/conda env create -f /tmp/{conda_env_yml} - -# Activate Conda environment -ENV CONDA_DEFAULT_ENV {conda_env_name} -ENV CONDA_PREFIX ${{CONDA_PREFIX}}/envs/${{CONDA_DEFAULT_ENV}} -ENV PATH="${{CONDA_PREFIX}}/bin:${{PATH}}" LD_LIBRARY_PATH="${{CONDA_PREFIX}}/lib:$LD_LIBRARY_PATH" -""" - - with tempfile.TemporaryDirectory() as tmpdir: - tmpdir = pathlib.Path(tmpdir) - dockerfile_path = tmpdir / "Dockerfile" - condafile_path = tmpdir / conda_env_yml - build = BuildContext(path=tmpdir, dockerfile_path=dockerfile_path.name) - - with open(dockerfile_path, "w") as file: - file.write(dockerfile) - with open(condafile_path, "w") as file: - file.write(condafile) - - try: - client.environments.create_or_update( - Environment( - name=environment_name, - image=None if use_gpu else image, - build=build if use_gpu else None, - conda_file=None if use_gpu else condafile_path, - ) + compute = "gpu" if use_gpu else "cpu" + extras = ( + "[dev" + (",gpu" if use_gpu else "") + (",spark" if use_spark else "") + "]" + ) + dockerfile = pathlib.Path("tools/docker/Dockerfile") + + # Docker's --build-args is not supported by AzureML Python SDK v2 as shown + # in [the issue #33902](https://github.com/Azure/azure-sdk-for-python/issues/33902) + # so the build args are configured by regex substituion + text = dockerfile.read_text() + text = re.sub(r"(ARG\sCOMPUTE=).*", rf'\1"{compute}"', text) + text = re.sub(r"(ARG\sEXTRAS=).*", rf'\1"{extras}"', text) + text = re.sub(r"(ARG\sGIT_REF=).*", r'\1""', text) + text = re.sub(r"(ARG\sJDK_VERSION=).*", rf'\1"{conda_openjdk_version}"', text) + text = re.sub(r"(ARG\sPYTHON_VERSION=).*", rf'\1"{python_version}"', text) + dockerfile.write_text(text) + + try: + client.environments.create_or_update( + Environment( + name=environment_name, + build=BuildContext( + # Set path for Docker to access to Recommenders root + path=".", + dockerfile_path=dockerfile, + ), ) - except ResourceExistsError: - pass + ) + except ResourceExistsError: + pass def run_tests( diff --git a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py index 4ce6106bf..02698015e 100644 --- a/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py +++ b/tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py @@ -108,15 +108,15 @@ def parse_args(): help="Environment name on AzureML", ) parser.add_argument( - "--conda_pkg_jdk", + "--conda-openjdk-version", action="store", - default="openjdk=8", - help="Conda package for JDK", + default="21", + help="Conda OpenJDK package version", ) parser.add_argument( "--python-version", action="store", - default="3.8", + default="3.11", help="Python version", ) parser.add_argument( @@ -133,19 +133,16 @@ def parse_args(): logger = logging.getLogger("submit_groupwise_azureml_pytest.py") args = parse_args() - logger.info(f"Setting up workspace {args.ws}") + logger.info("Setting up workspace %s", args.ws) client = get_client( subscription_id=args.subid, resource_group=args.rg, workspace_name=args.ws, ) - logger.info(f"Setting up compute {args.cluster}") + logger.info("Setting up compute %s", args.cluster) create_or_start_compute( - client=client, - name=args.cluster, - size=args.vmsize, - max_instances=args.maxnodes + client=client, name=args.cluster, size=args.vmsize, max_instances=args.maxnodes ) # TODO: Unlike Azure DevOps pipelines, GitHub Actions only has simple @@ -159,19 +156,18 @@ def parse_args(): # * on AzureML # recommenders-unit-group_cpu_001-python3_8-c8adeafabc011b549f875dc145313ffbe3fc53a8 environment_name = correct_resource_name(args.envname) - logger.info(f"Setting up environment {environment_name}") + logger.info("Setting up environment %s", environment_name) get_or_create_environment( client=client, environment_name=environment_name, - use_gpu=True if "gpu" in args.testgroup else False, - use_spark=True if "spark" in args.testgroup else False, - conda_pkg_jdk=args.conda_pkg_jdk, + use_gpu="gpu" in args.testgroup, + use_spark="spark" in args.testgroup, + conda_openjdk_version=args.conda_openjdk_version, python_version=args.python_version, - commit_sha=args.sha, ) experiment_name = correct_resource_name(args.expname) - logger.info(f"Running experiment {experiment_name}") + logger.info("Running experiment %s", experiment_name) run_tests( client=client, compute=args.cluster, diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile index fee64adfb..fc5ba4abf 100644 --- a/tools/docker/Dockerfile +++ b/tools/docker/Dockerfile @@ -1,189 +1,115 @@ +# syntax=docker/dockerfile:1 + # Copyright (c) Recommenders contributors. # Licensed under the MIT License. -ARG ENV="cpu" -ARG HOME="/root" +##################################################################### +# Stage build order depending on the compute: +# Compute Stage (CPU/GPU) -> Dependencies Stage -> Final Stage +##################################################################### +# Valid computes: cpu, gpu +ARG COMPUTE="cpu" -FROM mcr.microsoft.com/mirror/docker/library/ubuntu:18.04 AS base -LABEL maintainer="Microsoft Recommender Project " +##################################################################### +# Compute Stage - CPU +# Choose an appropriate CPU compute image +##################################################################### +# * [buildpack-deps:24.04](https://github.com/docker-library/buildpack-deps/blob/master/ubuntu/noble/Dockerfile) +# + [Created on 2024-08-17](https://hub.docker.com/layers/library/buildpack-deps/noble/images/sha256-dbfee7e7ee2340b0d6567efd3a8a9281ce45ee78598485b4d7a7f09fe641811a) +FROM buildpack-deps@sha256:dbfee7e7ee2340b0d6567efd3a8a9281ce45ee78598485b4d7a7f09fe641811a AS cpu -ARG HOME -ARG VIRTUAL_ENV -ENV HOME="${HOME}" -WORKDIR ${HOME} -# Exit if VIRTUAL_ENV is not specified correctly -RUN if [ "${VIRTUAL_ENV}" != "conda" ] && [ "${VIRTUAL_ENV}" != "venv" ] && [ "${VIRTUAL_ENV}" != "virtualenv" ]; then \ - echo 'VIRTUAL_ENV argument should be either "conda", "venv" or "virtualenv"'; exit 1; fi +##################################################################### +# Compute Stage - GPU +# Choose an appropriate GPU compute image +##################################################################### +# * [nvidia/cuda:12.6.1-devel-ubuntu24.04](https://gitlab.com/nvidia/container-images/cuda/-/blob/master/dist/12.6.1/ubuntu2404/devel/Dockerfile) +# + [Created on 2024-09-13](https://hub.docker.com/layers/nvidia/cuda/12.6.1-devel-ubuntu24.04/images/sha256-bfc293f21611f3c47a3442cf6516ebfe99d529926a4bef4bc389ef02fd038800) +# * See also [AML GPU Base Image](https://github.com/Azure/AzureML-Containers/blob/master/base/gpu/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04) +FROM nvcr.io/nvidia/cuda:12.6.1-devel-ubuntu24.04@sha256:bfc293f21611f3c47a3442cf6516ebfe99d529926a4bef4bc389ef02fd038800 AS gpu -# Install base dependencies and libpython (for cornac) -RUN apt-get update && \ - apt-get install -y curl build-essential -RUN apt-get install -y libpython3.7-dev -RUN apt-get install -y python3-dev - -# Install Anaconda -ARG ANACONDA="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then curl ${ANACONDA} -o anaconda.sh && \ - /bin/bash anaconda.sh -b -p conda && \ - rm anaconda.sh && \ - echo ". ${HOME}/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc ; fi - -ENV PATH="${HOME}/${VIRTUAL_ENV}/bin:${PATH}" - -# --login option used to source bashrc (thus activating conda env) at every RUN statement -SHELL ["/bin/bash", "--login", "-c"] - -# Python version supported by recommenders -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then conda install python=3.7; fi -RUN if [ "${VIRTUAL_ENV}" = "venv" ] ; then apt-get -y install python3.7; \ - apt-get -y install python3-pip; \ - apt-get -y install python3.7-venv; fi -RUN if [ "${VIRTUAL_ENV}" = "virtualenv" ] ; then apt-get -y install python3.7; \ - apt-get -y install python3-pip; \ - python3.7 -m pip install --user virtualenv; fi - -# Activate the virtual environment -RUN if [ "${VIRTUAL_ENV}" = "venv" ] ; then python3.7 -m venv $HOME/${VIRTUAL_ENV}; \ - source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install --upgrade pip; \ - pip install --upgrade setuptools wheel; fi -RUN if [ "${VIRTUAL_ENV}" = "virtualenv" ] ; then python3.7 -m virtualenv $HOME/${VIRTUAL_ENV}; \ - source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install --upgrade pip; \ - pip install --upgrade setuptools wheel; fi - -########### -# CPU Stage -########### -FROM base AS cpu - -RUN if [ "${VIRTUAL_ENV}" = "venv" ] || [ "${VIRTUAL_ENV}" = "virtualenv" ]; then source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install recommenders[examples]; fi -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then pip install recommenders[examples]; fi - - -############### -# PySpark Stage -############### -FROM base AS pyspark - -# Install Java version 8 RUN apt-get update && \ - apt-get install -y libgomp1 openjdk-8-jre - -ENV JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \ - PYSPARK_PYTHON="${HOME}/${VIRTUAL_ENV}/bin/python" \ - PYSPARK_DRIVER_PYTHON="${HOME}/${VIRTUAL_ENV}/bin/python" + DEBIAN_FRONTEND=noninteractive \ + apt-get install -y wget git && \ + apt-get clean -y && \ + rm -rf /var/lib/apt/lists/* -# Install dependencies in virtual environment -RUN if [ "${VIRTUAL_ENV}" = "venv" ] || [ "${VIRTUAL_ENV}" = "virtualenv" ]; then source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install recommenders[spark,examples]; fi -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then pip install recommenders[spark,examples]; fi +##################################################################### +# Dependencies Stage +# Set up all dependencies. This Stage is used by dev containers, +# because editable installation is required. +##################################################################### +FROM ${COMPUTE} AS deps -########### -# GPU Stage -########### -FROM mcr.microsoft.com/mirror/nvcr/nvidia/cuda:11.4.2-cudnn8-runtime-ubuntu18.04 AS gpu +# Valid versions: 3.8, 3.9, 3.10, 3.11 +ARG PYTHON_VERSION="3.11" -ARG HOME -ARG VIRTUAL_ENV -ENV HOME="${HOME}" -WORKDIR ${HOME} +ENV LANG=C.UTF-8 LC_ALL=C.UTF-8 -# Exit if VIRTUAL_ENV is not specified correctly -RUN if [ "${VIRTUAL_ENV}" != "conda" ] && [ "${VIRTUAL_ENV}" != "venv" ] && [ "${VIRTUAL_ENV}" != "virtualenv" ]; then \ - echo 'VIRTUAL_ENV argument should be either "conda", "venv" or "virtualenv"'; exit 1; fi +WORKDIR /root +USER root:root -RUN apt-get update && \ - apt-get install -y curl build-essential -RUN apt-get install -y libpython3.7-dev -RUN apt-get install -y python3-dev - -# Install Anaconda -ARG ANACONDA="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then curl ${ANACONDA} -o anaconda.sh && \ - /bin/bash anaconda.sh -b -p conda && \ - rm anaconda.sh && \ - echo ". ${HOME}/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ - echo "conda activate base" >> ~/.bashrc; fi - -ENV PATH="${HOME}/${VIRTUAL_ENV}/bin:${PATH}" - -SHELL ["/bin/bash", "--login", "-c"] - -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then conda install python=3.7; fi -RUN if [ "${VIRTUAL_ENV}" = "venv" ] ; then apt-get -y install python3.7; \ - apt-get -y install python3-pip; \ - apt-get -y install python3.7-venv; fi -RUN if [ "${VIRTUAL_ENV}" = "virtualenv" ] ; then apt-get -y install python3.7; \ - apt-get -y install python3-pip; \ - python3.7 -m pip install --user virtualenv; fi - -# Activate the virtual environment -RUN if [ "${VIRTUAL_ENV}" = "venv" ] ; then python3.7 -m venv $HOME/${VIRTUAL_ENV}; \ - source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install --upgrade pip; \ - pip install --upgrade setuptools wheel; \ - pip install recommenders[gpu,examples]; fi -RUN if [ "${VIRTUAL_ENV}" = "virtualenv" ] ; then python3.7 -m virtualenv $HOME/${VIRTUAL_ENV}; \ - source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install --upgrade pip; \ - pip install --upgrade setuptools wheel; \ - pip install recommenders[gpu,examples]; fi - -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then \ - pip install recommenders[gpu,examples] -f https://download.pytorch.org/whl/cu111/torch_stable.html ; fi - - -############ -# Full Stage -############ -FROM gpu AS full - -ARG HOME -WORKDIR ${HOME} - -SHELL ["/bin/bash", "--login", "-c"] - -# Install Java version 8 -RUN apt-get update && \ - apt-get install -y libgomp1 openjdk-8-jre +SHELL ["/bin/bash", "-c"] -ENV JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64" \ - PYSPARK_PYTHON="${HOME}/${VIRTUAL_ENV}/bin/python" \ - PYSPARK_DRIVER_PYTHON="${HOME}/${VIRTUAL_ENV}/bin/python" +# Install Conda +RUN wget -qO /tmp/conda.sh "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" && \ + bash /tmp/conda.sh -bf -p /root/conda && \ + /root/conda/bin/conda clean -ay && \ + rm -rf /root/conda/pkgs && \ + rm /tmp/conda.sh && \ + /root/conda/bin/conda init bash && \ + /root/conda/bin/conda config --set auto_activate_base false -# Install dependencies in virtual environment -RUN if [ "${VIRTUAL_ENV}" = "venv" ] || [ "${VIRTUAL_ENV}" = "virtualenv" ]; then source $HOME/${VIRTUAL_ENV}/bin/activate; \ - pip install recommenders[all]; fi -RUN if [ "${VIRTUAL_ENV}" = "conda" ] ; then pip install recommenders[all]; fi +# Create Conda environment +RUN /root/conda/bin/conda create -n Recommenders -c conda-forge -y python=${PYTHON_VERSION} pip -############# +##################################################################### # Final Stage -############# -FROM $ENV AS final - -# Setup Jupyter notebook configuration -ENV NOTEBOOK_CONFIG="${HOME}/.jupyter/jupyter_notebook_config.py" -RUN mkdir ${HOME}/.jupyter && \ - echo "c.NotebookApp.token = ''" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.ip = '0.0.0.0'" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.allow_root = True" >> ${NOTEBOOK_CONFIG} && \ - echo "c.NotebookApp.open_browser = False" >> ${NOTEBOOK_CONFIG} && \ - echo "c.MultiKernelManager.default_kernel_name = 'python3'" >> ${NOTEBOOK_CONFIG} - -# Register the environment with Jupyter -RUN if [ "${VIRTUAL_ENV}" = "conda" ]; then python -m ipykernel install --user --name base --display-name "Python (base)"; fi -RUN if [ "${VIRTUAL_ENV}" = "venv" ] || [ "${VIRTUAL_ENV}" = "virtualenv" ]; then source $HOME/${VIRTUAL_ENV}/bin/activate; \ - python -m ipykernel install --user --name venv --display-name "Python (venv)"; fi - -ARG HOME -WORKDIR ${HOME} - +# Install Recommenders +##################################################################### +FROM deps AS final + +# Extra dependencies: dev, gpu, spark +ARG EXTRAS="" + +# Git ref of Recommenders to install: main, staging, etc. +# Empty value ("") indicates editable installation of current clone +ARG GIT_REF="main" + +ARG JDK_VERSION="21" + +ARG RECO_DIR="/root/Recommenders" + +# Copy Recommenders into the image +COPY ./ ${RECO_DIR} + +# Install Recommenders and its dependencies +RUN source /root/conda/bin/activate && \ + conda activate Recommenders && \ + if [[ "${EXTRAS}" =~ spark ]]; then conda install -c conda-forge -y "openjdk=${JDK_VERSION}"; fi && \ + if [ -z "${GIT_REF}" ]; then \ + pip install ${RECO_DIR}${EXTRAS}; \ + else \ + pip install recommenders${EXTRAS}@git+https://github.com/recommenders-team/recommenders.git@${GIT_REF}; \ + fi && \ + jupyter notebook --generate-config && \ + echo "c.MultiKernelManager.default_kernel_name = 'Recommenders'" >> /root/.jupyter/jupyter_notebook_config.py && \ + python -m ipykernel install --user --name Recommenders --display-name "Python (Recommenders)" + +# Activate Recommenders Conda environment +ENV PS1='(Recommenders) \[\]\[\e]0;\u@\h: \w\a\]${debian_chroot:+($debian_chroot)}\u@\h:\w\$ \[\]' +ENV PATH="/root/conda/envs/Recommenders/bin:/root/conda/condabin:${PATH}" +ENV CONDA_SHLVL='1' +ENV CONDA_PROMPT_MODIFIER='(Recommenders) ' +ENV CONDA_PREFIX="/root/conda/envs/Recommenders" +ENV CONDA_EXE="/root/conda/bin/conda" +ENV CONDA_PYTHON_EXE="/root/conda/bin/python" +ENV JAVA_HOME="/root/conda/envs/Recommenders/lib/jvm" +ENV JAVA_LD_LIBRARY_PATH="${JAVA_HOME}/lib/server" + +# Setup Jupyter notebook EXPOSE 8888 -CMD ["jupyter", "notebook"] +CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--port=8888", "--no-browser", "--allow-root", "--ServerApp.allow_origin='*'", "--IdentityProvider.token=''"] diff --git a/tools/docker/README.md b/tools/docker/README.md index adf5997a7..9ce38a00a 100644 --- a/tools/docker/README.md +++ b/tools/docker/README.md @@ -1,92 +1,116 @@ Docker Support ============== -The Dockerfile in this directory will build Docker images with all the dependencies and code needed to run example notebooks or unit tests included in this repository. +The Dockerfile in this directory will build Docker images with all +the dependencies and code needed to run example notebooks or unit +tests included in this repository. It is also used by +* [.devcontainer/devcontainer.json](../../.devcontainer/devcontainer.json) + to build + [VS Code Dev Contianers](https://code.visualstudio.com/docs/devcontainers/containers) + that can facilitate the development of Recommenders + (See [Setup Guide](../../SETUP.md)), +* and [tests/ci/azureml_tests/aml_utils.py](../../tests/ci/azureml_tests/aml_utils.py) + to create the environment in [the testing workflows of Recommenders](../../.github/workflows/) (See [Tests](../../tests/README.md)). + +Multiple environments are supported by using +[multistage builds](https://docs.docker.com/build/building/multi-stage/). +The following examples show how to build and run the Docker image for +CPU, PySpark, and GPU environments. + +Once the container is running you can access Jupyter notebooks at +http://localhost:8888. -Multiple environments are supported by using [multistage builds](https://docs.docker.com/develop/develop-images/multistage-build/). In order to efficiently build the Docker images in this way, [Docker BuildKit](https://docs.docker.com/develop/develop-images/build_enhancements/) is necessary. -The following examples show how to build and run the Docker image for CPU, PySpark, and GPU environments. - -Note: On some platforms, one needs to manually specify the environment variable for `DOCKER_BUILDKIT`to make sure the build runs well. For example, on a Windows machine, this can be done by the powershell command as below, before building the image -``` -$env:DOCKER_BUILDKIT=1 -``` - -Warning: On some platforms using Docker Buildkit interferes with Anaconda environment installation. If you find that the docker build is hanging during Anaconda environment setup stage try building the container without Buildkit enabled. - -Once the container is running you can access Jupyter notebooks at http://localhost:8888. Building and Running with Docker -------------------------------- -See examples below for the case of conda. If you use venv or virtualenv instead, replace `--build-arg VIRTUAL_ENV=conda` with `--build-arg VIRTUAL_ENV=venv` or `--build-arg VIRTUAL_ENV=virtualenv`, respectively. -
-CPU environment - -``` -DOCKER_BUILDKIT=1 docker build -t recommenders:cpu --build-arg ENV=cpu --build-arg VIRTUAL_ENV=conda . -docker run -p 8888:8888 -d recommenders:cpu -``` +* **CPU environment** -
+ ```bash + docker build -t recommenders:cpu . + docker run -v ../../examples:/root/examples -p 8888:8888 -d recommenders:cpu + ``` -
-PySpark environment -``` -DOCKER_BUILDKIT=1 docker build -t recommenders:pyspark --build-arg ENV=pyspark --build-arg VIRTUAL_ENV=conda . -docker run -p 8888:8888 -d recommenders:pyspark -``` +* **PySpark environment** -
+ ```bash + docker build -t recommenders:pyspark --build-arg EXTRAS=[spark] . + docker run -v ../../examples:/root/examples -p 8888:8888 -d recommenders:pyspark + ``` -
-GPU environment +* **GPU environment** -``` -DOCKER_BUILDKIT=1 docker build -t recommenders:gpu --build-arg ENV=gpu --build-arg VIRTUAL_ENV=conda . -docker run --runtime=nvidia -p 8888:8888 -d recommenders:gpu -``` + ```bash + docker build -t recommenders:gpu --build-arg COMPUTE=gpu . + docker run --runtime=nvidia -v ../../examples:/root/examples -p 8888:8888 -d recommenders:gpu + ``` -
-
-GPU + PySpark environment +* **GPU + PySpark environment** -``` -DOCKER_BUILDKIT=1 docker build -t recommenders:full --build-arg ENV=full --build-arg VIRTUAL_ENV=conda . -docker run --runtime=nvidia -p 8888:8888 -d recommenders:full -``` + ```bash + docker build -t recommenders:gpu-pyspark --build-arg COMPUTE=gpu --build-arg EXTRAS=[gpu,spark] . + docker run --runtime=nvidia -v ../../examples:/root/examples -p 8888:8888 -d recommenders:gpu-pyspark + ``` -
Build Arguments --------------- -There are several build arguments which can change how the image is built. Similar to the `ENV` build argument these are specified during the docker build command. +There are several build arguments which can change how the image is +built. Similar to the `ENV` build argument these are specified during +the docker build command. Build Arg|Description| ---------|-----------| -ENV|Environment to use, options: cpu, pyspark, gpu, full (defaults to cpu)| -VIRTUAL_ENV|Virtual environment to use; mandatory argument, must be one of "conda", "venv", "virtualenv"| -ANACONDA|Anaconda installation script (defaults to miniconda3 4.6.14)| +`COMPUTE`|Compute to use, options: `cpu`, `gpu` (defaults to `cpu`)| +`EXTRAS`|Extra dependencies to use, options: `dev`, `gpu`, `spark` (defaults to none ("")); For example, `[gpu,spark]`| +`GIT_REF`|Git ref of Recommenders to install, options: `main`, `staging`, etc (defaults to `main`); Empty value means editable installation of current clone| +`JDK_VERSION`|OpenJDK version to use (defaults to `21`)| +`PYTHON_VERSION`|Python version to use (defaults to `3.11`)| +`RECO_DIR`|Path to the copy of Recommenders in the container when `GIT_REF` is empty (defaults to `/root/Recommenders`)| + +Examples: +* Install Python 3.10 and the Recommenders package from the staging branch. + + ```bash + docker build -t recommenders:staging --build-arg GIT_REF=staging --build-arg PYTHON_VERSION=3.10 . + ``` + +* Install the current local clone of Recommenders and its extra 'dev' dependencies. -Example: + ```bash + # Go to the root directory of Recommenders to copy the local clone into the Docker image + cd ../../ + docker build -t recommenders:dev --build-arg GIT_REF= --build-arg EXTRAS=[dev] -f tools/docker/Dockerfile . + ``` -``` -DOCKER_BUILDKIT=1 docker build -t recommenders:cpu --build-arg ENV=cpu --build-arg VIRTUAL_ENV=conda . -``` +In order to see detailed progress you can provide a flag during the +build command: ```--progress=plain``` -In order to see detailed progress with BuildKit you can provide a flag during the build command: ```--progress=plain``` -Running tests with docker +Running tests with Docker ------------------------- -To run the tests using e.g. the CPU image, do the following: -``` -docker run -it recommenders:cpu bash -c 'pip install pytest; \ -pip install pytest-cov; \ -pip install pytest-mock; \ -apt-get install -y git; \ -git clone https://github.com/recommenders-team/recommenders.git; \ -cd recommenders; \ -pytest tests/unit -m "not spark and not gpu and not notebooks and not experimental"' -``` \ No newline at end of file +* Run the tests using the `recommenders:cpu` image built above. + NOTE: The `recommender:cpu` image only installs the Recommenders + package under [../../recommenders/](../../recommenders/). + + ```bash + docker run -it recommenders:cpu bash -c 'pip install pytest; \ + pip install pytest-cov; \ + pip install pytest-mock; \ + apt-get install -y git; \ + git clone https://github.com/recommenders-team/recommenders.git; \ + cd Recommenders; \ + pytest tests/unit -m "not spark and not gpu and not notebooks and not experimental"' + ``` + +* Run the tests using the `recommenders:dev` image built above. + NOTE: The `recommenders:dev` image has a full copy of your local + Recommenders repository. + + ```bash + docker run -it recommenders:dev bash -c 'cd Recommenders; \ + pytest tests/unit -m "not spark and not gpu and not notebooks and not experimental"' + ```