TF 2.16 with Torch 2.4.0 (#1415)

Upgraded our base cpu and gpu base image -updated torch eco. -removed patch and pins placed for tf 2.15 -removed torch text, it is no longer maintained and incompatible with torch 2.4.0 -geopanda depreciated it's datasets methods, test needed to be updated
Kaggle · Aug 16, 2024 · 9587f69 · 9587f69
1 parent 3a9e7ed
commit 9587f69
Show file tree

Hide file tree

Showing 10 changed files with 29 additions and 118 deletions.
diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl
@@ -5,7 +5,6 @@ ARG GPU_BASE_IMAGE_NAME
 ARG LIGHTGBM_VERSION
 ARG TORCH_VERSION
 ARG TORCHAUDIO_VERSION
-ARG TORCHTEXT_VERSION
 ARG TORCHVISION_VERSION
 ARG JAX_VERSION
 
@@ -38,16 +37,15 @@ RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/lib
 {{ end }}
 
 # Keep these variables in sync if base image is updated.
-ENV TENSORFLOW_VERSION=2.15.0
+ENV TENSORFLOW_VERSION=2.16.1
 # See https://github.com/tensorflow/io#tensorflow-version-compatibility
-ENV TENSORFLOW_IO_VERSION=0.35.0
+ENV TENSORFLOW_IO_VERSION=0.37.0
 
 # We need to redefine the ARG here to get the ARG value defined above the FROM instruction.
 # See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact
 ARG LIGHTGBM_VERSION
 ARG TORCH_VERSION
 ARG TORCHAUDIO_VERSION
-ARG TORCHTEXT_VERSION
 ARG TORCHVISION_VERSION
 ARG JAX_VERSION
 
@@ -62,7 +60,6 @@ ENV KMP_SETTINGS=false
 ENV PIP_ROOT_USER_ACTION=ignore
 
 ADD clean-layer.sh  /tmp/clean-layer.sh
-ADD patches/keras_patch.sh  /tmp/keras_patch.sh
 ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
 ADD patches/template_conf.json /opt/kaggle/conf.json
 
@@ -122,21 +119,20 @@ RUN pip install spacy && \
 {{ end}}
 
 # Install PyTorch
+# b/356397043: magma-cuda121 is the latest version
 {{ if eq .Accelerator "gpu" }}
 COPY --from=torch_whl /tmp/whl/*.whl /tmp/torch/
-RUN mamba install -y -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION} && \
+RUN mamba install -y -c pytorch magma-cuda121 && \
     pip install /tmp/torch/*.whl && \
-    # b/255757999 openmp (libomp.so) is an dependency of libtorchtext and libtorchaudio but
-    mamba install -y openmp && \
+    sudo apt -y install libsox-dev && \
     rm -rf /tmp/torch && \
     /tmp/clean-layer.sh
 {{ else }}
 RUN pip install \
         torch==$TORCH_VERSION+cpu \
         torchvision==$TORCHVISION_VERSION+cpu \
         torchaudio==$TORCHAUDIO_VERSION+cpu \
-        torchtext==$TORCHTEXT_VERSION \
-    -f https://download.pytorch.org/whl/torch_stable.html && \
+        --index-url https://download.pytorch.org/whl/cpu && \
     /tmp/clean-layer.sh
 {{ end }}
 
@@ -199,32 +195,22 @@ RUN apt-get update && \
 
 RUN pip install -f http://h2o-release.s3.amazonaws.com/h2o/latest_stable_Py.html h2o && /tmp/clean-layer.sh
 
-# b/318672158 Use simply tensorflow-probability once > 0.23.0 is released.
 RUN pip install \
         "tensorflow==${TENSORFLOW_VERSION}" \
         "tensorflow-io==${TENSORFLOW_IO_VERSION}" \
-        git+https://github.com/tensorflow/probability.git@fbc5ebe9b1d343113fb917010096cfd88b32eecf \
-        tensorflow_text \
+        tensorflow-probability \
+        tensorflow_decision_forests \
+        tensorflow-text \
         "tensorflow_hub>=0.16.0" \
         # b/331799280 remove once other packages over to dm-tre
         optree \
         tf-keras && \
     /tmp/clean-layer.sh
 
-# b/318672158 Use simply tensorflow_decision_forests on next release, expected with tf 2.16
-RUN pip install tensorflow_decision_forests==1.8.1 --no-deps && \
-    /tmp/clean-layer.sh
-
-RUN chmod +x /tmp/keras_patch.sh && \
-    /tmp/keras_patch.sh
-
 ADD patches/keras_internal.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal.py
 ADD patches/keras_internal_test.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal_test.py
 
-# Remove "--no-deps" flag and "namex" package once Keras 3.* is included in our base image.
-# We ignore dependencies since tf2.15 and Keras 3.* should work despite pip saying it won't.
-# Currently, keras tries to install a nightly version of tf 2.16: https://github.com/keras-team/keras/blob/fe2f54aa5bc42fb23a96449cf90434ab9bb6a2cd/requirements.txt#L2
-RUN pip install --no-deps "keras>3" keras-cv keras-nlp namex && \
+RUN pip install "keras>3" keras-cv keras-nlp && \
     /tmp/clean-layer.sh
 
 # b/328788268 libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'"

diff --git a/Jenkinsfile b/Jenkinsfile
@@ -36,7 +36,6 @@ pipeline {
                 --package torch \
                 --version $TORCH_VERSION \
                 --build-arg TORCHAUDIO_VERSION=$TORCHAUDIO_VERSION \
-                --build-arg TORCHTEXT_VERSION=$TORCHTEXT_VERSION \
                 --build-arg TORCHVISION_VERSION=$TORCHVISION_VERSION \
                 --build-arg CUDA_MAJOR_VERSION=$CUDA_MAJOR_VERSION \
                 --build-arg CUDA_MINOR_VERSION=$CUDA_MINOR_VERSION \

diff --git a/config.txt b/config.txt
@@ -1,12 +1,11 @@
 BASE_IMAGE_REPO=gcr.io/deeplearning-platform-release
-BASE_IMAGE_TAG=m114
-CPU_BASE_IMAGE_NAME=tf2-cpu.2-15.py310
-GPU_BASE_IMAGE_NAME=tf2-gpu.2-15.py310
+BASE_IMAGE_TAG=m122
+CPU_BASE_IMAGE_NAME=tf2-cpu.2-16.py310
+GPU_BASE_IMAGE_NAME=tf2-gpu.2-16.py310
 LIGHTGBM_VERSION=4.2.0
-TORCH_VERSION=2.1.2
-TORCHAUDIO_VERSION=2.1.2
-TORCHTEXT_VERSION=0.16.2
-TORCHVISION_VERSION=0.16.2
+TORCH_VERSION=2.4.0
+TORCHAUDIO_VERSION=2.4.0
+TORCHVISION_VERSION=0.19.0
 JAX_VERSION=0.4.26
 CUDA_MAJOR_VERSION=12
-CUDA_MINOR_VERSION=1
+CUDA_MINOR_VERSION=3
diff --git a/packages/jaxlib.Dockerfile b/packages/jaxlib.Dockerfile
@@ -15,8 +15,10 @@ ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib"
 ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"
 
 # Instructions: https://jax.readthedocs.io/en/latest/developer.html#building-jaxlib-from-source
-RUN apt-get update && \
-    apt-get install -y g++ python python3-dev
+RUN sudo ln -s /usr/bin/python3 /usr/bin/python
+
+RUN apt-get update && \ 
+    apt-get install -y g++ python3 python3-dev
 
 RUN pip install numpy wheel build
 

diff --git a/packages/torch.Dockerfile b/packages/torch.Dockerfile
@@ -4,7 +4,6 @@ FROM ${BASE_IMAGE} AS builder
 
 ARG PACKAGE_VERSION
 ARG TORCHAUDIO_VERSION
-ARG TORCHTEXT_VERSION
 ARG TORCHVISION_VERSION
 ARG CUDA_MAJOR_VERSION
 ARG CUDA_MINOR_VERSION
@@ -20,7 +19,7 @@ RUN conda install -c conda-forge mamba
 
 # Build instructions: https://github.com/pytorch/pytorch#from-source
 RUN mamba install astunparse numpy ninja pyyaml mkl mkl-include setuptools cmake cffi typing_extensions future six requests dataclasses
-RUN mamba install -c pytorch magma-cuda${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}
+RUN mamba install -c pytorch magma-cuda121
 
 # By default, it uses the version from version.txt which includes the `a0` (alpha zero) suffix and part of the git hash.
 # This causes dependency conflicts like these: https://paste.googleplex.com/4786486378496000
@@ -63,18 +62,6 @@ RUN sudo apt-get update && \
 RUN sed -i 's/set(envs/set(envs\n  "LIBS=-ltinfo"/' /usr/local/src/audio/third_party/sox/CMakeLists.txt 
 RUN cd /usr/local/src/audio && python setup.py bdist_wheel
 
-# Build torchtext
-# Instructions: https://github.com/pytorch/text#building-from-source
-# See comment above for PYTORCH_BUILD_VERSION.
-ENV BUILD_VERSION=$TORCHTEXT_VERSION
-RUN cd /usr/local/src && \
-    git clone https://github.com/pytorch/text && \
-    cd text && \
-    git checkout tags/v$TORCHTEXT_VERSION && \
-    git submodule sync && \
-    git submodule update --init --recursive --jobs 1 && \
-    python setup.py bdist_wheel
-
 # Build torchvision.
 # Instructions: https://github.com/pytorch/vision/tree/main#installation
 # See comment above for PYTORCH_BUILD_VERSION.
@@ -93,7 +80,6 @@ FROM alpine:latest
 RUN mkdir -p /tmp/whl/
 COPY --from=builder /usr/local/src/pytorch/dist/*.whl /tmp/whl
 COPY --from=builder /usr/local/src/audio/dist/*.whl /tmp/whl
-COPY --from=builder /usr/local/src/text/dist/*.whl /tmp/whl
 COPY --from=builder /usr/local/src/vision/dist/*.whl /tmp/whl
 
 # Print out the built .whl file.

diff --git a/patches/keras_patch.sh b/patches/keras_patch.sh
diff --git a/tests/test_geopandas.py b/tests/test_geopandas.py
@@ -1,16 +1,11 @@
 import unittest
 
 import geopandas
+from shapely.geometry import Polygon
 
 class TestGeopandas(unittest.TestCase):
-    def test_read(self):
-        df = geopandas.read_file(geopandas.datasets.get_path('nybb'))
-        self.assertTrue(df.size > 1)
-
-    def test_spatial_join(self):
-        cities = geopandas.read_file(geopandas.datasets.get_path('naturalearth_cities'))
-        world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
-        countries = world[['geometry', 'name']]
-        countries = countries.rename(columns={'name':'country'})
-        cities_with_country = geopandas.sjoin(cities, countries, how="inner", op='intersects')
-        self.assertTrue(cities_with_country.size > 1)
+    def test_GeoSeries(self):
+        p1 = Polygon([(0, 0), (1, 0), (1, 1)])
+        p2 = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
+        p3 = Polygon([(2, 0), (3, 0), (3, 1), (2, 1)])
+        g = geopandas.GeoSeries([p1, p2, p3])
diff --git a/tests/test_torchtext.py b/tests/test_torchtext.py
diff --git a/tpu/Dockerfile b/tpu/Dockerfile
@@ -13,7 +13,6 @@ ARG TENSORFLOW_VERSION
 ARG TF_LIBTPU_VERSION
 ARG JAX_VERSION
 ARG TORCHVISION_VERSION
-ARG TORCHTEXT_VERSION
 ARG TORCHAUDIO_VERSION
 
 ENV ISTPUVM=1
@@ -60,7 +59,7 @@ RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y
 # Additional useful packages should be added here
 
 RUN pip install tensorflow_hub https://storage.googleapis.com/cloud-tpu-tpuvm-artifacts/tensorflow/tf-${TENSORFLOW_VERSION}/tensorflow-${TENSORFLOW_VERSION}-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TF_LINUX_WHEEL_VERSION}.whl tensorflow-probability tensorflow-io \
-    torch~=${TORCH_VERSION} https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}+libtpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl torchvision==${TORCHVISION_VERSION} torchtext==${TORCHTEXT_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
+    torch~=${TORCH_VERSION} https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-${TORCH_VERSION}+libtpu-${PYTHON_WHEEL_VERSION}-${PYTHON_WHEEL_VERSION}-${TORCH_LINUX_WHEEL_VERSION}.whl torchvision==${TORCHVISION_VERSION} torchaudio==${TORCHAUDIO_VERSION} \
     jax[tpu]==${JAX_VERSION} -f https://storage.googleapis.com/jax-releases/libtpu_releases.html trax flax optax git+https://github.com/deepmind/dm-haiku jraph distrax \
     papermill jupyterlab python-lsp-server[all] "jupyter-lsp==1.5.1" \
     pandas matplotlib opencv-python-headless librosa accelerate diffusers scikit-learn transformers \

diff --git a/tpu/config.txt b/tpu/config.txt
@@ -12,8 +12,6 @@ JAX_VERSION=0.4.23
 TORCH_VERSION=2.4.0
 # https://github.com/pytorch/audio supports nightly
 TORCHAUDIO_VERSION=2.4.0
-# https://github.com/pytorch/text supports main
-TORCHTEXT_VERSION=0.18.0
 # https://github.com/pytorch/vision supports nightly
 TORCHVISION_VERSION=0.19.0
 TORCH_LINUX_WHEEL_VERSION=manylinux_2_28_x86_64