Skip to content

Commit

Permalink
Updates to reduce size of docker (#8259)
Browse files Browse the repository at this point in the history
* reduce unnecessary files in our docker
* update base docker to broadinstitute/gatk:gatkbase-3.1.0
  • Loading branch information
lbergelson authored Apr 3, 2023
1 parent 56693ff commit bbb8be6
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 21 deletions.
35 changes: 23 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
ARG BASE_DOCKER=broadinstitute/gatk:gatkbase-3.1.0

# stage 1 for constructing the GATK zip
FROM broadinstitute/gatk:gatkbase-3.0.0 AS gradleBuild
FROM ${BASE_DOCKER} AS gradleBuild
LABEL stage=gatkIntermediateBuildImage
ARG RELEASE=false

Expand All @@ -8,12 +10,16 @@ ADD . /gatk
WORKDIR /gatk

# Get an updated gcloud signing key, in case the one in the base image has expired
RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list
RUN apt update
RUN apt-key list
RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
RUN add-apt-repository universe && apt update
RUN apt-get --assume-yes install git-lfs
RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list && \
apt update &&\
apt-key list && \
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
add-apt-repository universe && apt update && \
apt-get --assume-yes install git-lfs && \
apt-get -y clean && \
apt-get -y autoclean && \
apt-get -y autoremove && \
rm -rf /var/lib/apt/lists/*
RUN git lfs install --force

#Download only resources required for the build, not for testing
Expand All @@ -22,15 +28,21 @@ RUN git lfs pull --include src/main/resources/large
RUN export GRADLE_OPTS="-Xmx4048m -Dorg.gradle.daemon=false" && /gatk/gradlew clean collectBundleIntoDir shadowTestClassJar shadowTestJar -Drelease=$RELEASE
RUN cp -r $( find /gatk/build -name "*bundle-files-collected" )/ /gatk/unzippedJar/
RUN unzip -o -j $( find /gatk/unzippedJar -name "gatkPython*.zip" ) -d /gatk/unzippedJar/scripts
RUN chmod -R a+rw /gatk/unzippedJar

FROM broadinstitute/gatk:gatkbase-3.0.0
FROM ${BASE_DOCKER}

RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list
RUN apt update
RUN apt-key list
RUN rm /etc/apt/sources.list.d/google-cloud-sdk.list && \
apt update && \
apt-key list && \
apt-get -y clean && \
apt-get -y autoclean && \
apt-get -y autoremove && \
rm -rf /var/lib/apt/lists/*

WORKDIR /gatk

RUN chmod -R a+rw /gatk
# Location of the unzipped gatk bundle files
COPY --from=gradleBuild /gatk/unzippedJar .

Expand Down Expand Up @@ -76,7 +88,6 @@ ENV CLASSPATH /gatk/gatk.jar:$CLASSPATH
# Start GATK Python environment

WORKDIR /gatk
RUN chmod -R a+rw /gatk
ENV PATH $CONDA_PATH/envs/gatk/bin:$CONDA_PATH/bin:$PATH
RUN conda env create -n gatk -f /gatk/gatkcondaenv.yml && \
echo "source activate gatk" >> /gatk/gatkenv.rc && \
Expand Down
20 changes: 11 additions & 9 deletions scripts/docker/gatkbase/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ RUN apt-get update && \
software-properties-common && \
apt-get -y clean && \
apt-get -y autoclean && \
apt-get -y autoremove
apt-get -y autoremove && \
rm -rf /var/lib/apt/lists/*

RUN java -version

Expand All @@ -35,6 +36,10 @@ RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] http://packages.c
apt-get -y clean && \
apt-get -y autoclean && \
apt-get -y autoremove && \
rm -rf /var/lib/apt/lists/* && \
# Remove the anthos cli tool and related files since they are very large and we / anyone using the docker are unlikely to use them
# Remove the bundled python because we have python installed separately
rm -rf /usr/lib/google-cloud-sdk/bin/anthoscli /usr/lib/google-cloud-sdk/platform/anthoscli_licenses /usr/lib/google-cloud-sdk/platform/bundledpythonunix && \
find / -wholename "*__pycache__/*.pyc" -exec rm {} +

# Set environment variables.
Expand All @@ -50,19 +55,16 @@ ENV JAVA_LIBRARY_PATH /usr/lib/jni

# Install miniconda
ENV DOWNLOAD_DIR /downloads
ENV CONDA_URL https://repo.continuum.io/miniconda/Miniconda3-4.7.12.1-Linux-x86_64.sh
ENV CONDA_MD5 = "81c773ff87af5cfac79ab862942ab6b3"
ENV CONDA_URL https://repo.anaconda.com/miniconda/Miniconda3-py310_23.1.0-1-Linux-x86_64.sh
ENV CONDA_MD5 = "32d73e1bc33fda089d7cd9ef4c1be542616bd8e437d1f77afeeaf7afdb019787"
ENV CONDA_PATH /opt/miniconda
ENV PATH $CONDA_PATH/bin:$PATH
RUN mkdir $DOWNLOAD_DIR && \
wget -nv -O $DOWNLOAD_DIR/miniconda.sh $CONDA_URL && \
test "`md5sum $DOWNLOAD_DIR/miniconda.sh | awk -v FS=' ' '{print $1}'` = $CONDA_MD5" && \
bash $DOWNLOAD_DIR/miniconda.sh -p $CONDA_PATH -b && \
rm $DOWNLOAD_DIR/miniconda.sh && \
${CONDA_PATH}/bin/conda clean -afy && \
conda clean -afy && \
find /opt/miniconda/ -follow -type f -name '*.a' -delete && \
find /opt/miniconda/ -follow -type f -name '*.pyc' -delete && \
rm -rf /root/.cache/pip


# Deleting unneeded caches
RUN rm -rf /var/lib/apt/lists/*
rm -rf /root/.cache/pip

0 comments on commit bbb8be6

Please sign in to comment.