Skip to content

Commit

Permalink
TF 2.16 with Torch 2.4.0 (#1415)
Browse files Browse the repository at this point in the history
Upgraded our base cpu and gpu base image

-updated torch eco.
-removed patch and pins placed for tf 2.15
-removed torch text, it is no longer maintained and incompatible with
torch 2.4.0
-geopanda depreciated it's datasets methods, test needed to be updated
  • Loading branch information
calderjo authored Aug 16, 2024
1 parent 3a9e7ed commit 9587f69
Show file tree
Hide file tree
Showing 10 changed files with 29 additions and 118 deletions.
34 changes: 10 additions & 24 deletions Dockerfile.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ ARG GPU_BASE_IMAGE_NAME
ARG LIGHTGBM_VERSION
ARG TORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION
ARG JAX_VERSION

Expand Down Expand Up @@ -38,16 +37,15 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib
{{ end }}

# Keep these variables in sync if base image is updated.
ENV TENSORFLOW_VERSION=2.15.0
ENV TENSORFLOW_VERSION=2.16.1
# See https://github.com/tensorflow/io#tensorflow-version-compatibility
ENV TENSORFLOW_IO_VERSION=0.35.0
ENV TENSORFLOW_IO_VERSION=0.37.0

# We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
# See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
ARG LIGHTGBM_VERSION
ARG TORCH_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION
ARG JAX_VERSION

Expand All @@ -62,7 +60,6 @@ ENV KMP_SETTINGS=false
ENV PIP_ROOT_USER_ACTION=ignore

ADD clean-layer.sh /tmp/clean-layer.sh
ADD patches/keras_patch.sh /tmp/keras_patch.sh
ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
ADD patches/template_conf.json /opt/kaggle/conf.json

Expand Down Expand Up @@ -122,21 +119,20 @@ RUN pip install spacy && \
{{ end}}

# Install PyTorch
# b/356397043: magma-cuda121 is the latest version
{{ if eq .Accelerator "gpu" }}
COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
RUN mamba install -y -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
RUN mamba install -y -c pytorch magma-cuda121 && \
pip install /tmp/torch/*.whl && \
# b/255757999 openmp (libomp.so) is an dependency of libtorchtext and libtorchaudio but
mamba install -y openmp && \
sudo apt -y install libsox-dev && \
rm -rf /tmp/torch && \
/tmp/clean-layer.sh
{{ else }}
RUN pip install \
torch==$TORCH_VERSION+cpu \
torchvision==$TORCHVISION_VERSION+cpu \
torchaudio==$TORCHAUDIO_VERSION+cpu \
torchtext==$TORCHTEXT_VERSION \
-f https://download.pytorch.org/whl/torch_stable.html && \
--index-url https://download.pytorch.org/whl/cpu && \
/tmp/clean-layer.sh
{{ end }}

Expand Down Expand Up @@ -199,32 +195,22 @@ RUN apt-get update && \

RUN pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o && /tmp/clean-layer.sh

# b/318672158 Use simply tensorflow-probability once > 0.23.0 is released.
RUN pip install \
"tensorflow==${TENSORFLOW_VERSION}" \
"tensorflow-io==${TENSORFLOW_IO_VERSION}" \
git+https://github.com/tensorflow/probability.git@fbc5ebe9b1d343113fb917010096cfd88b32eecf \
tensorflow_text \
tensorflow-probability \
tensorflow_decision_forests \
tensorflow-text \
"tensorflow_hub>=0.16.0" \
# b/331799280 remove once other packages over to dm-tre
optree \
tf-keras && \
/tmp/clean-layer.sh

# b/318672158 Use simply tensorflow_decision_forests on next release, expected with tf 2.16
RUN pip install tensorflow_decision_forests==1.8.1 --no-deps && \
/tmp/clean-layer.sh

RUN chmod +x /tmp/keras_patch.sh && \
/tmp/keras_patch.sh

ADD patches/keras_internal.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal.py
ADD patches/keras_internal_test.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal_test.py

# Remove "--no-deps" flag and "namex" package once Keras 3.* is included in our base image.
# We ignore dependencies since tf2.15 and Keras 3.* should work despite pip saying it won't.
# Currently, keras tries to install a nightly version of tf 2.16: https://github.com/keras-team/keras/blob/fe2f54aa5bc42fb23a96449cf90434ab9bb6a2cd/requirements.txt#L2
RUN pip install --no-deps "keras>3" keras-cv keras-nlp namex && \
RUN pip install "keras>3" keras-cv keras-nlp && \
/tmp/clean-layer.sh

# b/328788268 libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'"
Expand Down
1 change: 0 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ pipeline {
--package torch \
--version $TORCH_VERSION \
--build-arg TORCHAUDIO_VERSION=$TORCHAUDIO_VERSION \
--build-arg TORCHTEXT_VERSION=$TORCHTEXT_VERSION \
--build-arg TORCHVISION_VERSION=$TORCHVISION_VERSION \
--build-arg CUDA_MAJOR_VERSION=$CUDA_MAJOR_VERSION \
--build-arg CUDA_MINOR_VERSION=$CUDA_MINOR_VERSION \
Expand Down
15 changes: 7 additions & 8 deletions config.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
BASE_IMAGE_REPO=gcr.io/deeplearning-platform-release
BASE_IMAGE_TAG=m114
CPU_BASE_IMAGE_NAME=tf2-cpu.2-15.py310
GPU_BASE_IMAGE_NAME=tf2-gpu.2-15.py310
BASE_IMAGE_TAG=m122
CPU_BASE_IMAGE_NAME=tf2-cpu.2-16.py310
GPU_BASE_IMAGE_NAME=tf2-gpu.2-16.py310
LIGHTGBM_VERSION=4.2.0
TORCH_VERSION=2.1.2
TORCHAUDIO_VERSION=2.1.2
TORCHTEXT_VERSION=0.16.2
TORCHVISION_VERSION=0.16.2
TORCH_VERSION=2.4.0
TORCHAUDIO_VERSION=2.4.0
TORCHVISION_VERSION=0.19.0
JAX_VERSION=0.4.26
CUDA_MAJOR_VERSION=12
CUDA_MINOR_VERSION=1
CUDA_MINOR_VERSION=3
6 changes: 4 additions & 2 deletions packages/jaxlib.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib"
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"

# Instructions: https://jax.readthedocs.io/en/latest/developer.html#building-jaxlib-from-source
RUN apt-get update && \
apt-get install -y g++ python python3-dev
RUN sudo ln -s /usr/bin/python3 /usr/bin/python

RUN apt-get update && \
apt-get install -y g++ python3 python3-dev

RUN pip install numpy wheel build

Expand Down
16 changes: 1 addition & 15 deletions packages/torch.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ FROM ${BASE_IMAGE} AS builder

ARG PACKAGE_VERSION
ARG TORCHAUDIO_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHVISION_VERSION
ARG CUDA_MAJOR_VERSION
ARG CUDA_MINOR_VERSION
Expand All @@ -20,7 +19,7 @@ RUN conda install -c conda-forge mamba

# Build instructions: https://github.com/pytorch/pytorch#from-source
RUN mamba install astunparse numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses
RUN mamba install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}
RUN mamba install -c pytorch magma-cuda121

# By default, it uses the version from version.txt which includes the `a0` (alpha zero) suffix and part of the git hash.
# This causes dependency conflicts like these: https://paste.googleplex.com/4786486378496000
Expand Down Expand Up @@ -63,18 +62,6 @@ RUN sudo apt-get update && \
RUN sed -i 's/set(envs/set(envs\n "LIBS=-ltinfo"/' /usr/local/src/audio/third_party/sox/CMakeLists.txt
RUN cd /usr/local/src/audio && python setup.py bdist_wheel

# Build torchtext
# Instructions: https://github.com/pytorch/text#building-from-source
# See comment above for PYTORCH_BUILD_VERSION.
ENV BUILD_VERSION=$TORCHTEXT_VERSION
RUN cd /usr/local/src && \
git clone https://github.com/pytorch/text && \
cd text && \
git checkout tags/v$TORCHTEXT_VERSION && \
git submodule sync && \
git submodule update --init --recursive --jobs 1 && \
python setup.py bdist_wheel

# Build torchvision.
# Instructions: https://github.com/pytorch/vision/tree/main#installation
# See comment above for PYTORCH_BUILD_VERSION.
Expand All @@ -93,7 +80,6 @@ FROM alpine:latest
RUN mkdir -p /tmp/whl/
COPY --from=builder /usr/local/src/pytorch/dist/*.whl /tmp/whl
COPY --from=builder /usr/local/src/audio/dist/*.whl /tmp/whl
COPY --from=builder /usr/local/src/text/dist/*.whl /tmp/whl
COPY --from=builder /usr/local/src/vision/dist/*.whl /tmp/whl

# Print out the built .whl file.
Expand Down
41 changes: 0 additions & 41 deletions patches/keras_patch.sh

This file was deleted.

17 changes: 6 additions & 11 deletions tests/test_geopandas.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
import unittest

import geopandas
from shapely.geometry import Polygon

class TestGeopandas(unittest.TestCase):
def test_read(self):
df = geopandas.read_file(geopandas.datasets.get_path('nybb'))
self.assertTrue(df.size > 1)

def test_spatial_join(self):
cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities'))
world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
countries = world[['geometry', 'name']]
countries = countries.rename(columns={'name':'country'})
cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects')
self.assertTrue(cities_with_country.size > 1)
def test_GeoSeries(self):
p1 = Polygon([(0, 0), (1, 0), (1, 1)])
p2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
p3 = Polygon([(2, 0), (3, 0), (3, 1), (2, 1)])
g = geopandas.GeoSeries([p1, p2, p3])
12 changes: 0 additions & 12 deletions tests/test_torchtext.py

This file was deleted.

3 changes: 1 addition & 2 deletions tpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ ARG TENSORFLOW_VERSION
ARG TF_LIBTPU_VERSION
ARG JAX_VERSION
ARG TORCHVISION_VERSION
ARG TORCHTEXT_VERSION
ARG TORCHAUDIO_VERSION

ENV ISTPUVM=1
Expand Down Expand Up @@ -60,7 +59,7 @@ RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y
# Additional useful packages should be added here

RUN pip install tensorflow_hub https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/tensorflow/tf-${TENSORFLOW_VERSION}/tensorflow-${TENSORFLOW_VERSION}-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TF_LINUX_WHEEL_VERSION}.whl tensorflow-probability tensorflow-io \
torch~=${TORCH_VERSION} https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}+libtpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl torchvision==${TORCHVISION_VERSION} torchtext==${TORCHTEXT_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
torch~=${TORCH_VERSION} https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}+libtpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
jax[tpu]==${JAX_VERSION} -f https://storage.googleapis.com/jax-releases/libtpu_releases.html trax flax optax git+https://github.com/deepmind/dm-haiku jraph distrax \
papermill jupyterlab python-lsp-server[all] "jupyter-lsp==1.5.1" \
pandas matplotlib opencv-python-headless librosa accelerate diffusers scikit-learn transformers \
Expand Down
2 changes: 0 additions & 2 deletions tpu/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ JAX_VERSION=0.4.23
TORCH_VERSION=2.4.0
# https://github.com/pytorch/audio supports nightly
TORCHAUDIO_VERSION=2.4.0
# https://github.com/pytorch/text supports main
TORCHTEXT_VERSION=0.18.0
# https://github.com/pytorch/vision supports nightly
TORCHVISION_VERSION=0.19.0
TORCH_LINUX_WHEEL_VERSION=manylinux_2_28_x86_64

0 comments on commit 9587f69

Please sign in to comment.