diff --git a/huggingface/base/inference/artifacts/config.properties b/huggingface/base/inference/artifacts/config.properties new file mode 100644 index 000000000000..8f7d753388bf --- /dev/null +++ b/huggingface/base/inference/artifacts/config.properties @@ -0,0 +1,5 @@ +vmargs=-XX:+UseContainerSupport -XX:InitialRAMPercentage=8.0 -XX:MaxRAMPercentage=10.0 -XX:-UseLargePages -XX:+UseG1GC -XX:+ExitOnOutOfMemoryError +model_store=/opt/ml/model +load_models=ALL +inference_address=http://0.0.0.0:8080 +management_address=http://0.0.0.0:8081 diff --git a/huggingface/base/inference/artifacts/mms-entrypoint.py b/huggingface/base/inference/artifacts/mms-entrypoint.py new file mode 100644 index 000000000000..f9252aace31f --- /dev/null +++ b/huggingface/base/inference/artifacts/mms-entrypoint.py @@ -0,0 +1,25 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. + +import shlex +import subprocess +import sys + +if sys.argv[1] == 'serve': + from sagemaker_huggingface_serving_container import serving + serving.main() +else: + subprocess.check_call(shlex.split(' '.join(sys.argv[1:]))) + +# prevent docker exit +subprocess.call(['tail', '-f', '/dev/null']) diff --git a/huggingface/base/inference/docker/py3.6/Dockerfile.cpu b/huggingface/base/inference/docker/py3.6/Dockerfile.cpu new file mode 100644 index 000000000000..b471cd3f8bf5 --- /dev/null +++ b/huggingface/base/inference/docker/py3.6/Dockerfile.cpu @@ -0,0 +1,110 @@ +FROM ubuntu:18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.6.13 +ARG OPEN_MPI_VERSION=4.0.1 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + openssl \ + openjdk-8-jdk-headless \ + vim \ + wget \ + curl \ + unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + botocore \ + && /opt/conda/bin/conda clean -ya + +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +# The ENV variables declared below are changed in the previous section +# Grouping these ENV variables in the first section causes +# ompi_info to fail. This is only observed in CPU containers +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" +RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/huggingface/base/inference/docker/py3.6/cu110/Dockerfile.gpu b/huggingface/base/inference/docker/py3.6/cu110/Dockerfile.gpu new file mode 100644 index 000000000000..6a19f9fa94c9 --- /dev/null +++ b/huggingface/base/inference/docker/py3.6/cu110/Dockerfile.gpu @@ -0,0 +1,107 @@ +FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.6.13 +ARG OPEN_MPI_VERSION=4.0.1 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + openssl \ + openjdk-8-jdk-headless \ + vim \ + wget \ + curl \ + unzip \ + libnuma1 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + botocore \ + mkl-include==2019.4 \ + mkl==2019.4 \ + && /opt/conda/bin/conda clean -ya + +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/huggingface/base/inference/docker/py3.7/Dockerfile.cpu b/huggingface/base/inference/docker/py3.7/Dockerfile.cpu new file mode 100644 index 000000000000..8fb46c5dd7d4 --- /dev/null +++ b/huggingface/base/inference/docker/py3.7/Dockerfile.cpu @@ -0,0 +1,110 @@ +FROM ubuntu:18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.7.10 +ARG OPEN_MPI_VERSION=4.0.1 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + openssl \ + openjdk-8-jdk-headless \ + vim \ + wget \ + curl \ + unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + mkl-include==2019.4 \ + mkl==2019.4 \ + botocore \ + && /opt/conda/bin/conda clean -ya + +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +# The ENV variables declared below are changed in the previous section +# Grouping these ENV variables in the first section causes +# ompi_info to fail. This is only observed in CPU containers +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" +RUN ompi_info --parsable --all | grep mpi_built_with_cuda_support:value + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"] diff --git a/huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu b/huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu new file mode 100644 index 000000000000..1fc5b2ef3cad --- /dev/null +++ b/huggingface/base/inference/docker/py3.7/cu110/Dockerfile.gpu @@ -0,0 +1,107 @@ +FROM nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04 + +LABEL maintainer="Amazon AI" +LABEL dlc_major_version="1" + +# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT +# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true +# Specify multi-models LABEL to indicate container is capable of loading and serving multiple models concurrently +# https://docs.aws.amazon.com/sagemaker/latest/dg/build-multi-model-build-container.html +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true + +ARG MMS_VERSION=1.1.2 +ARG PYTHON=python3 +ARG PYTHON_VERSION=3.7.10 +ARG OPEN_MPI_VERSION=4.0.1 + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + LD_LIBRARY_PATH="/opt/conda/lib/:${LD_LIBRARY_PATH}:/usr/local/lib" \ + PYTHONIOENCODING=UTF-8 \ + LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + TEMP=/home/model-server/tmp \ + DEBIAN_FRONTEND=noninteractive + +ENV PATH /opt/conda/bin:$PATH + +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + ca-certificates \ + build-essential \ + openssl \ + openjdk-8-jdk-headless \ + vim \ + wget \ + curl \ + unzip \ + libnuma1 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN curl -L -o ~/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && chmod +x ~/miniconda.sh \ + && ~/miniconda.sh -b -p /opt/conda \ + && rm ~/miniconda.sh \ + && /opt/conda/bin/conda update conda \ + && /opt/conda/bin/conda install -c conda-forge \ + python=$PYTHON_VERSION \ + && /opt/conda/bin/conda install -y \ + # conda 4.10.0 requires ruamel_yaml to be installed. Currently pinned at latest. + ruamel_yaml==0.15.100 \ + cython==0.29.12 \ + botocore \ + mkl-include==2019.4 \ + mkl==2019.4 \ + && /opt/conda/bin/conda clean -ya + +RUN pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \ + && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \ + && pip install packaging==20.4 \ + enum-compat==0.0.3 \ + "cryptography>3.2" + +RUN wget https://www.open-mpi.org/software/ompi/v4.0/downloads/openmpi-$OPEN_MPI_VERSION.tar.gz \ + && gunzip -c openmpi-$OPEN_MPI_VERSION.tar.gz | tar xf - \ + && cd openmpi-$OPEN_MPI_VERSION \ + && ./configure --prefix=/home/.openmpi \ + && make all install \ + && cd .. \ + && rm openmpi-$OPEN_MPI_VERSION.tar.gz \ + && rm -rf openmpi-$OPEN_MPI_VERSION + +ENV PATH="$PATH:/home/.openmpi/bin" +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/.openmpi/lib/" + +WORKDIR / + +RUN pip install --no-cache-dir \ + multi-model-server==$MMS_VERSION \ + sagemaker-inference + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp \ + && chown -R model-server /home/model-server + +COPY mms-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +COPY config.properties /home/model-server + +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py + +ADD https://raw.githubusercontent.com/aws/deep-learning-containers/master/src/deep_learning_container.py /usr/local/bin/deep_learning_container.py + +RUN chmod +x /usr/local/bin/deep_learning_container.py + +RUN HOME_DIR=/root \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \ + && rm -rf ${HOME_DIR}/oss_compliance* + +EXPOSE 8080 8081 +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] +CMD ["multi-model-server", "--start", "--mms-config", "/home/model-server/config.properties"]