From bbb8be6e5afacb3e690fa3bf7dc557905a02078e Mon Sep 17 00:00:00 2001 From: Louis Bergelson Date: Mon, 3 Apr 2023 15:34:43 -0400 Subject: [PATCH] Updates to reduce size of docker (#8259) * reduce unnecessary files in our docker * update base docker to broadinstitute/gatk:gatkbase-3.1.0 --- Dockerfile | 35 ++++++++++++++++++++---------- scripts/docker/gatkbase/Dockerfile | 20 +++++++++-------- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/Dockerfile b/Dockerfile index 98f8a3ffdfc..a513863128d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,7 @@ +ARG BASE_DOCKER=broadinstitute/gatk:gatkbase-3.1.0 + # stage 1 for constructing the GATK zip -FROM broadinstitute/gatk:gatkbase-3.0.0 AS gradleBuild +FROM ${BASE_DOCKER} AS gradleBuild LABEL stage=gatkIntermediateBuildImage ARG RELEASE=false @@ -8,12 +10,16 @@ ADD . /gatk WORKDIR /gatk # Get an updated gcloud signing key, in case the one in the base image has expired -RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list -RUN apt update -RUN apt-key list -RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - -RUN add-apt-repository universe && apt update -RUN apt-get --assume-yes install git-lfs +RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list && \ + apt update &&\ + apt-key list && \ + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ + add-apt-repository universe && apt update && \ + apt-get --assume-yes install git-lfs && \ + apt-get -y clean && \ + apt-get -y autoclean && \ + apt-get -y autoremove && \ + rm -rf /var/lib/apt/lists/* RUN git lfs install --force #Download only resources required for the build, not for testing @@ -22,15 +28,21 @@ RUN git lfs pull --include src/main/resources/large RUN export GRADLE_OPTS="-Xmx4048m -Dorg.gradle.daemon=false" && /gatk/gradlew clean collectBundleIntoDir shadowTestClassJar shadowTestJar -Drelease=$RELEASE RUN cp -r $( find /gatk/build -name "*bundle-files-collected" )/ /gatk/unzippedJar/ RUN unzip -o -j $( find /gatk/unzippedJar -name "gatkPython*.zip" ) -d /gatk/unzippedJar/scripts +RUN chmod -R a+rw /gatk/unzippedJar -FROM broadinstitute/gatk:gatkbase-3.0.0 +FROM ${BASE_DOCKER} -RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list -RUN apt update -RUN apt-key list +RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list && \ + apt update && \ + apt-key list && \ + apt-get -y clean && \ + apt-get -y autoclean && \ + apt-get -y autoremove && \ + rm -rf /var/lib/apt/lists/* WORKDIR /gatk +RUN chmod -R a+rw /gatk # Location of the unzipped gatk bundle files COPY --from=gradleBuild /gatk/unzippedJar . @@ -76,7 +88,6 @@ ENV CLASSPATH /gatk/gatk.jar:$CLASSPATH # Start GATK Python environment WORKDIR /gatk -RUN chmod -R a+rw /gatk ENV PATH $CONDA_PATH/envs/gatk/bin:$CONDA_PATH/bin:$PATH RUN conda env create -n gatk -f /gatk/gatkcondaenv.yml && \ echo "source activate gatk" >> /gatk/gatkenv.rc && \ diff --git a/scripts/docker/gatkbase/Dockerfile b/scripts/docker/gatkbase/Dockerfile index 95bafaa9350..7f4d8cb6a7b 100644 --- a/scripts/docker/gatkbase/Dockerfile +++ b/scripts/docker/gatkbase/Dockerfile @@ -22,7 +22,8 @@ RUN apt-get update && \ software-properties-common && \ apt-get -y clean && \ apt-get -y autoclean && \ - apt-get -y autoremove + apt-get -y autoremove && \ + rm -rf /var/lib/apt/lists/* RUN java -version @@ -35,6 +36,10 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.c apt-get -y clean && \ apt-get -y autoclean && \ apt-get -y autoremove && \ + rm -rf /var/lib/apt/lists/* && \ +# Remove the anthos cli tool and related files since they are very large and we / anyone using the docker are unlikely to use them +# Remove the bundled python because we have python installed separately + rm -rf /usr/lib/google-cloud-sdk/bin/anthoscli /usr/lib/google-cloud-sdk/platform/anthoscli_licenses /usr/lib/google-cloud-sdk/platform/bundledpythonunix && \ find / -wholename "*__pycache__/*.pyc" -exec rm {} + # Set environment variables. @@ -50,19 +55,16 @@ ENV JAVA_LIBRARY_PATH /usr/lib/jni # Install miniconda ENV DOWNLOAD_DIR /downloads -ENV CONDA_URL https://repo.continuum.io/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh -ENV CONDA_MD5 = "81c773ff87af5cfac79ab862942ab6b3" +ENV CONDA_URL https://repo.anaconda.com/miniconda/Miniconda3-py310_23.1.0-1-Linux-x86_64.sh +ENV CONDA_MD5 = "32d73e1bc33fda089d7cd9ef4c1be542616bd8e437d1f77afeeaf7afdb019787" ENV CONDA_PATH /opt/miniconda +ENV PATH $CONDA_PATH/bin:$PATH RUN mkdir $DOWNLOAD_DIR && \ wget -nv -O $DOWNLOAD_DIR/miniconda.sh $CONDA_URL && \ test "`md5sum $DOWNLOAD_DIR/miniconda.sh | awk -v FS=' ' '{print $1}'` = $CONDA_MD5" && \ bash $DOWNLOAD_DIR/miniconda.sh -p $CONDA_PATH -b && \ rm $DOWNLOAD_DIR/miniconda.sh && \ - ${CONDA_PATH}/bin/conda clean -afy && \ + conda clean -afy && \ find /opt/miniconda/ -follow -type f -name '*.a' -delete && \ find /opt/miniconda/ -follow -type f -name '*.pyc' -delete && \ - rm -rf /root/.cache/pip - - -# Deleting unneeded caches -RUN rm -rf /var/lib/apt/lists/* + rm -rf /root/.cache/pip