diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 5597ff59..4d75a47a 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -99,12 +99,24 @@ RUN conda config --add channels nvidia && \ mamba install -y mkl cartopy imagemagick pyproj "shapely<2" && \ /tmp/clean-layer.sh +# Install spacy +{{ if eq .Accelerator "gpu" }} +RUN mamba install -y -c conda-forge spacy cupy cuda-version=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ + /tmp/clean-layer.sh +{{ else }} +RUN pip install spacy && \ + /tmp/clean-layer.sh +{{ end}} {{ if eq .Accelerator "gpu" }} # b/232247930: uninstall pyarrow to avoid double installation with the GPU specific version. RUN pip uninstall -y pyarrow && \ mamba install -y cudf cuml && \ /tmp/clean-layer.sh + +# TODO: b/296444923 - Resolve pandas dependency another way +RUN sed -i 's/^is_extension_type/# is_extension_type/g' /opt/conda/lib/python3.10/site-packages/cudf/api/types.py \ + && sed -i 's/^is_categorical/# is_categorical/g' /opt/conda/lib/python3.10/site-packages/cudf/api/types.py {{ end }} # Install PyTorch @@ -150,14 +162,6 @@ RUN pip install jax[cpu] && \ /tmp/clean-layer.sh {{ end }} -# Install spacy -{{ if eq .Accelerator "gpu" }} -RUN mamba install -y -c conda-forge spacy cupy && \ - /tmp/clean-layer.sh -{{ else }} -RUN pip install spacy && \ - /tmp/clean-layer.sh -{{ end}} # Install GPU specific packages {{ if eq .Accelerator "gpu" }} @@ -177,12 +181,13 @@ RUN JAXVER=$(pip freeze | grep -e "^jax==") && \ pandas \ polars \ flax \ - "${JAXVER}" && \ + "${JAXVER}" + +RUN apt-get install -y default-jre - # Install h2o from source. - # Use `conda install -c h2oai h2o` once Python 3.7 version is released to conda. - apt-get install -y default-jre-headless && \ - pip install -f https://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o \ +RUN pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o && /tmp/clean-layer.sh + +RUN pip install \ "tensorflow-gcs-config<=${TENSORFLOW_VERSION}" \ "tensorflow==${TENSORFLOW_VERSION}" \ tensorflow-addons \ @@ -248,7 +253,6 @@ RUN pip install scipy \ datashader \ # Boruta (python implementation) Boruta && \ - apt-get install -y graphviz && pip install graphviz && \ # Pandoc is a dependency of deap apt-get install -y pandoc && \ @@ -470,6 +474,7 @@ RUN pip install bleach \ pyarrow \ feather-format \ fastai + RUN python -m spacy download en_core_web_sm && python -m spacy download en_core_web_lg && \ apt-get update && apt-get install -y ffmpeg && \ /tmp/clean-layer.sh @@ -678,4 +683,4 @@ RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date {{ if eq .Accelerator "gpu" }} # Remove the CUDA stubs. ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH_NO_STUBS" -{{ end }} +{{ end }} \ No newline at end of file