diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 8b3c338..c003783 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -1,12 +1,12 @@ -ARG BASE_IMAGE_REPO -ARG BASE_IMAGE_TAG -ARG CPU_BASE_IMAGE_NAME -ARG GPU_BASE_IMAGE_NAME -ARG LIGHTGBM_VERSION -ARG TORCH_VERSION -ARG TORCHAUDIO_VERSION -ARG TORCHVISION_VERSION -ARG JAX_VERSION +ARG BASE_IMAGE_REPO \ + BASE_IMAGE_TAG \ + CPU_BASE_IMAGE_NAME \ + GPU_BASE_IMAGE_NAME \ + LIGHTGBM_VERSION \ + TORCH_VERSION \ + TORCHAUDIO_VERSION \ + TORCHVISION_VERSION \ + JAX_VERSION {{ if eq .Accelerator "gpu" }} FROM gcr.io/kaggle-images/python-lightgbm-whl:${GPU_BASE_IMAGE_NAME}-${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl @@ -18,61 +18,50 @@ FROM ${BASE_IMAGE_REPO}/${CPU_BASE_IMAGE_NAME}:${BASE_IMAGE_TAG} {{ end }} # Ensures shared libraries installed with conda can be found by the dynamic link loader. -ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib" -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib" +ENV LIBRARY_PATH="$LIBRARY_PATH:/opt/conda/lib" \ + LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib" {{ if eq .Accelerator "gpu" }} -ARG CUDA_MAJOR_VERSION -ARG CUDA_MINOR_VERSION -ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} -ENV CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} +ARG CUDA_MAJOR_VERSION \ + CUDA_MINOR_VERSION +ENV CUDA_MAJOR_VERSION=${CUDA_MAJOR_VERSION} \ + CUDA_MINOR_VERSION=${CUDA_MINOR_VERSION} # Make sure we are on the right version of CUDA RUN update-alternatives --set cuda /usr/local/cuda-$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION # NVIDIA binaries from the host are mounted to /opt/bin. -ENV PATH=/opt/bin:${PATH} -# Add CUDA stubs to LD_LIBRARY_PATH to support building the GPU image on a CPU machine. -ENV LD_LIBRARY_PATH_NO_STUBS="$LD_LIBRARY_PATH" -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64/stubs" +ENV PATH=/opt/bin:${PATH} \ + # Add CUDA stubs to LD_LIBRARY_PATH to support building the GPU image on a CPU machine. + LD_LIBRARY_PATH_NO_STUBS="$LD_LIBRARY_PATH" \ + LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64/stubs" RUN ln -s /usr/local/cuda/lib64/stubs/libcuda.so /usr/local/cuda/lib64/stubs/libcuda.so.1 {{ end }} # Keep these variables in sync if base image is updated. -ENV TENSORFLOW_VERSION=2.16.1 -# See https://github.com/tensorflow/io#tensorflow-version-compatibility -ENV TENSORFLOW_IO_VERSION=0.37.0 +ENV TENSORFLOW_VERSION=2.16.1 \ + # See https://github.com/tensorflow/io#tensorflow-version-compatibility + TENSORFLOW_IO_VERSION=0.37.0 # We need to redefine the ARG here to get the ARG value defined above the FROM instruction. # See: https://docs.docker.com/engine/reference/builder/#understand-how-arg-and-from-interact -ARG LIGHTGBM_VERSION -ARG TORCH_VERSION -ARG TORCHAUDIO_VERSION -ARG TORCHVISION_VERSION -ARG JAX_VERSION +ARG LIGHTGBM_VERSION \ + TORCH_VERSION \ + TORCHAUDIO_VERSION \ + TORCHVISION_VERSION \ + JAX_VERSION # Disable pesky logs like: KMP_AFFINITY: pid 6121 tid 6121 thread 0 bound to OS proc set 0 # See: https://stackoverflow.com/questions/57385766/disable-tensorflow-log-information -ENV KMP_WARNINGS=0 -# Also make the KMP logs noverbose. -# https://stackoverflow.com/questions/70250304/stop-tensorflow-from-printing-warning-message -ENV KMP_SETTINGS=false - -# Remove the pip as the root user warning. -ENV PIP_ROOT_USER_ACTION=ignore +ENV KMP_WARNINGS=0 \ + # Also make the KMP logs noverbose. + # https://stackoverflow.com/questions/70250304/stop-tensorflow-from-printing-warning-message + KMP_SETTINGS=false \ + # Remove the pip as the root user warning. + PIP_ROOT_USER_ACTION=ignore ADD clean-layer.sh /tmp/clean-layer.sh ADD patches/nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl ADD patches/template_conf.json /opt/kaggle/conf.json -# b/276344496: Install specific version of boto3, because 1.26.103 is broken. -RUN pip install boto3==1.26.100 && \ - /tmp/clean-layer.sh - -{{ if eq .Accelerator "gpu" }} -# b/200968891 Keeps horovod once torch is upgraded. -RUN pip uninstall -y horovod && \ - /tmp/clean-layer.sh -{{ end }} - # Update GPG key per documentation at https://cloud.google.com/compute/docs/troubleshooting/known-issues RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - @@ -98,7 +87,7 @@ ENV PROJ_DATA=/opt/conda/share/proj RUN conda config --add channels nvidia && \ conda config --add channels rapidsai && \ conda config --set solver libmamba && \ - # b/299991198 remove curl/libcurl install once DLVM base image includes version >= 7.86 + # b/299991198: remove curl/libcurl install once DLVM base image includes version >= 7.86 conda install -c conda-forge mamba curl libcurl && \ # Base image channel order: conda-forge (highest priority), defaults. # End state: rapidsai (highest priority), nvidia, conda-forge, defaults. @@ -178,12 +167,15 @@ RUN export PATH=/usr/local/cuda/bin:$PATH && \ /tmp/clean-layer.sh {{ end }} -# (b/308525631) Pin Matplotlib until seaborn can be upgraded +# b/308525631: Pin Matplotlib until seaborn can be upgraded # to >0.13.0 (now it's stuck by a package conflict with ydata-profiling 4.5.1). RUN JAXVER=$(pip freeze | grep -e "^jax==") && \ pip install --upgrade \ "matplotlib<3.8.0" \ + # ipympl adds interactive widget support for matplotlib + ipympl==0.7.0 \ "seaborn==0.12.2" \ + pyupset \ python-dateutil dask dask-expr igraph \ pyyaml joblib geopy mne pyshp \ pandas \ @@ -205,21 +197,17 @@ RUN pip install \ tensorflow_decision_forests \ tensorflow-text \ "tensorflow_hub>=0.16.0" \ - # b/331799280 remove once other packages over to dm-tre - optree \ - tf-keras && \ + tf-keras \ + "keras>3" \ + keras-cv \ + keras-nlp && \ /tmp/clean-layer.sh -ADD patches/keras_internal.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal.py -ADD patches/keras_internal_test.py /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/keras_internal_test.py +ADD patches/keras_internal.py \ + patches/keras_internal_test.py \ + /opt/conda/lib/python3.10/site-packages/tensorflow_decision_forests/keras/ -RUN pip install "keras>3" keras-cv keras-nlp && \ - /tmp/clean-layer.sh - -# b/328788268 libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'" -RUN pip install pysal "libpysal==4.9.2" - -# b/350573866 xgboost v2.1.0 breaks learntools +# b/350573866: xgboost v2.1.0 breaks learntools RUN apt-get install -y libfreetype6-dev && \ apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing && \ pip install gensim \ @@ -247,16 +235,15 @@ RUN apt-get install -y libfreetype6-dev && \ state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe && \ - # Stop-words - pip install stop-words \ - scikit-image && \ + pip install scikit-image && \ + pip install opencv-contrib-python opencv-python && \ /tmp/clean-layer.sh -RUN pip install opencv-contrib-python opencv-python && \ - /tmp/clean-layer.sh - -# Pin scipy until we update JAX b/335003097 -RUN pip install "scipy==1.12.0" \ +RUN pip install cython \ + fasttext \ + opencv-contrib-python \ + opencv-python \ + "scipy<1.14.0" \ # Scikit-learn accelerated library for x86 "scikit-learn-intelex>=2023.0.1" \ # HDF5 support @@ -269,17 +256,18 @@ RUN pip install "scipy==1.12.0" \ bokeh \ numba \ datashader \ - # Boruta (python implementation) + # b/328788268: libpysal 4.10 seems to fail with "module 'shapely' has no attribute 'Geometry'. Did you mean: 'geometry'" + "libpysal==4.9.2" \ + # b/276344496: Install specific version of boto3, because 1.26.103 is broken. + "boto3==1.26.100" \ Boruta && \ # Pandoc is a dependency of deap apt-get install -y pandoc && \ - pip install essentia - -RUN apt-get install -y git-lfs && \ /tmp/clean-layer.sh -# vtk with dependencies -RUN apt-get install -y libgl1-mesa-glx && \ +RUN apt-get install -y git-lfs && \ + # vtk with dependencies + apt-get install -y libgl1-mesa-glx && \ pip install vtk && \ # xvfbwrapper with dependencies apt-get install -y xvfb && \ @@ -295,22 +283,19 @@ RUN pip install mpld3 \ nibabel \ imgaug \ preprocessing \ - path.py \ - Geohash && \ + path.py && \ pip install deap \ - # b/302136621 Fix eli5 import for learntools, newer version require scikit-learn > 1.3 + # b/302136621: Fix eli5 import for learntools, newer version require scikit-learn > 1.3 "tpot==0.12.1" \ scikit-optimize \ haversine \ toolz cytoolz \ plotly \ hyperopt \ - fitter \ langid \ # Useful data exploration libraries (for missing data and generating reports) missingno \ pandas-profiling \ - s2sphere \ bayesian-optimization \ matplotlib-venn \ pyldavis \ @@ -320,32 +305,20 @@ RUN pip install mpld3 \ ecos \ CVXcanon \ pymc3 \ - imagecodecs \ tifffile \ - spectral \ - descartes \ geojson \ pydicom \ wavio \ SimpleITK \ - hmmlearn \ - gplearn \ squarify \ fuzzywuzzy \ python-louvain \ pyexcel-ods \ sklearn-pandas \ - stemming \ - # b/266272046 prophet 1.1.2 breaks the test - prophet==1.1.1 \ - # b/283847935 holidays >0.24 is broken - "holidays==0.24" \ + prophet \ + holidays \ holoviews \ - geoviews \ - hypertools \ - mlens \ scikit-multilearn \ - cleverhans \ leven \ catboost \ folium \ @@ -354,7 +327,6 @@ RUN pip install mpld3 \ plotnine \ scikit-surprise \ pymongo \ - geoplot \ eli5 \ kaggle \ kagglehub \ @@ -362,22 +334,16 @@ RUN pip install mpld3 \ pytest && \ /tmp/clean-layer.sh -RUN rm -rf /opt/conda/lib/python3.10/site-packages/numpy-1.23.5.dist-info* # Add google PAIR-code Facets RUN cd /opt/ && git clone https://github.com/PAIR-code/facets && cd facets/ && jupyter nbextension install facets-dist/ --user && \ export PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ && \ - pip install kmodes --no-dependencies && \ pip install librosa \ - polyglot \ sentencepiece \ cufflinks \ lime \ memory_profiler && \ /tmp/clean-layer.sh -RUN pip install cython \ - fasttext && \ - apt-get install -y libhunspell-dev && pip install hunspell RUN pip install annoy \ category_encoders && \ # b/183041606#comment5: the Kaggle data proxy doesn't support these APIs. If the library is missing, it falls back to using a regular BigQuery query to fetch data. @@ -390,33 +356,26 @@ RUN pip install annoy \ google-cloud-bigquery \ google-cloud-storage && \ # Split these installations to avoid `pip._vendor.resolvelib.resolvers.ResolutionTooDeep: 200000` - # TODO(b/315753846) Unpin translate package. + # b/315753846: Unpin translate package. pip install google-cloud-translate==3.12.1 \ google-cloud-language==2.* \ google-cloud-videointelligence==2.* \ google-cloud-vision==2.* \ protobuf==3.20.3 \ - ortools \ - scattertext \ # Pandas data reader pandas-datareader \ - wordsegment \ emoji \ # Add Japanese morphological analysis engine janome \ - wfdb \ - vecstack \ # yellowbrick machine learning visualization library yellowbrick \ mlcrate && \ /tmp/clean-layer.sh -# b/273059949 The pre-installed nbconvert is slow on html conversions and has to be force-uninstalled. -# b/274619697 learntools also requires a specific nbconvert right now +# b/273059949: The pre-installed nbconvert is slow on html conversions and has to be force-uninstalled. +# b/274619697: learntools also requires a specific nbconvert right now RUN rm -rf /opt/conda/lib/python3.10/site-packages/{nbconvert,nbclient,mistune,platformdirs}* -# Fix qgrid by pinning ipywidgets https://github.com/quantopian/qgrid/issues/376 -# allennlp \ RUN pip install bleach \ certifi \ cycler \ @@ -426,6 +385,7 @@ RUN pip install bleach \ ipykernel \ ipython \ ipython-genutils \ + # Fix qgrid by pinning ipywidgets https://github.com/quantopian/qgrid/issues/376 ipywidgets==7.7.1 \ isoweek \ jedi \ @@ -447,7 +407,6 @@ RUN pip install bleach \ pandocfilters \ pexpect \ pickleshare \ - # TODO(b/290035631) unpin when EasyOCR did a release. Pillow && \ # Install openslide and its python binding apt-get install -y openslide-tools && \ @@ -469,7 +428,6 @@ RUN pip install bleach \ widgetsnbextension \ # Require pyarrow newer than https://github.com/advisories/GHSA-5wvp-7f3h-6wmm {{ if eq .Accelerator "gpu" }} pyarrow {{ else }} "pyarrow>=14.0.1" {{ end }} \ - feather-format \ fastai RUN python -m spacy download en_core_web_sm && python -m spacy download en_core_web_lg && \ @@ -484,20 +442,14 @@ RUN python -m spacy download en_core_web_sm && python -m spacy download en_core_ # ########### -RUN rm /opt/conda/lib/python3.10/site-packages/google*/direct_url.json -RUN rm /opt/conda/lib/python3.10/site-packages/google*/REQUESTED - +RUN rm /opt/conda/lib/python3.10/site-packages/google*/direct_url.json && \ + rm /opt/conda/lib/python3.10/site-packages/google*/REQUESTED # dlib has a libmkl incompatibility: # test_dlib_face_detector (test_dlib.TestDLib) ... INTEL MKL ERROR: /opt/conda/bin/../lib/libmkl_avx512.so.2: undefined symbol: mkl_sparse_optimize_bsr_trsm_i8. # Intel MKL FATAL ERROR: Cannot load libmkl_avx512.so.2 or libmkl_def.so.2. # nnabla breaks protobuf compatibiilty: -RUN pip install flashtext \ - wandb \ - # b/214080882 blake3 0.3.0 is not compatible with vaex. - blake3==0.2.1 \ - vaex \ +RUN pip install wandb \ pyemd \ - pyupset \ pympler \ featuretools \ #-e git+https://github.com/SohierDane/BigQuery_Helper#egg=bq_helper \ @@ -506,30 +458,24 @@ RUN pip install flashtext \ gym \ pyarabic \ pandasql \ - # b/302136621 Fix eli5 import for learntools + # b/302136621: Fix eli5 import for learntools scikit-learn==1.2.2 \ - hpsklearn \ - kmapper \ - # b/329869023 shap 0.45.0 breaks learntools + # b/329869023 shap 0.45.0 breaks learntools shap==0.44.1 \ cesium \ rgf_python \ jieba \ - # ggplot is broken and main repo does not merge and release https://github.com/yhat/ggpy/pull/668 - https://github.com/hbasria/ggpy/archive/0.11.5.zip \ tsfresh \ - pykalman \ optuna \ plotly_express \ albumentations \ - accelerate \ - # b/290207097 switch back to the pip catalyst package when bug fixed - # https://github.com/catalyst-team/catalyst/issues/1440 - git+https://github.com/Philmod/catalyst.git@fix-fp16#egg=catalyst \ - osmnx && \ + Rtree \ + accelerate && \ apt-get -y install libspatialindex-dev -RUN pip install pytorch-ignite \ +RUN rm -rf /opt/conda/lib/python3.10/site-packages/numpy* && \ + pip install "numpy==1.26.4" && \ + pip install pytorch-ignite \ qgrid \ bqplot \ earthengine-api \ @@ -541,7 +487,6 @@ RUN pip install pytorch-ignite \ # geopandas > v0.14.4 breaks learn tools geopandas==v0.14.4 \ "shapely<2" \ - vowpalwabbit \ pydub \ pydegensac \ torchmetrics \ @@ -552,14 +497,12 @@ RUN pip install pytorch-ignite \ # pycrypto is used by competitions team. pycryptodome \ easyocr \ - # ipympl adds interactive widget support for matplotlib - ipympl==0.7.0 \ onnx \ tables \ openpyxl \ timm \ torchinfo && \ - pip install git+https://github.com/facebookresearch/segment-anything.git && \ + pip install git+https://github.com/facebookresearch/segment-anything.git && \ # b/343971718: remove duplicate aiohttp installs, and reinstall it rm -rf /opt/conda/lib/python3.10/site-packages/aiohttp* && \ mamba install --force-reinstall -y aiohttp && \ @@ -586,12 +529,12 @@ RUN apt-get install tesseract-ocr -y && \ pdf2image \ PyPDF && \ /tmp/clean-layer.sh -ENV TESSERACT_PATH=/usr/bin/tesseract -# For Facets -ENV PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ -# For Theano with MKL -ENV MKL_THREADING_LAYER=GNU +ENV TESSERACT_PATH=/usr/bin/tesseract \ + # For Facets + PYTHONPATH=$PYTHONPATH:/opt/facets/facets_overview/python/ \ + # For Theano with MKL + MKL_THREADING_LAYER=GNU # Temporary fixes and patches # Temporary patch for Dask getting downgraded, which breaks Keras @@ -605,7 +548,7 @@ RUN pip install --upgrade dask && \ mkdir -p /etc/ipython/ && echo "c = get_config(); c.IPKernelApp.matplotlib = 'inline'" > /etc/ipython/ipython_config.py && \ # Temporary patch for broken libpixman 0.38 in conda-forge, symlink to system libpixman 0.34 untile conda package gets updated to 0.38.5 or higher. ln -sf /usr/lib/x86_64-linux-gnu/libpixman-1.so.0.34.0 /opt/conda/lib/libpixman-1.so.0.38.0 && \ - # pin jupyter-server to version 2.12.5; later versions break LSP (b/333854354) + # b/333854354: pin jupyter-server to version 2.12.5; later versions break LSP (b/333854354) pip install --force-reinstall --no-deps jupyter_server==2.12.5 && \ /tmp/clean-layer.sh @@ -620,13 +563,15 @@ RUN mkdir -p ~/src && git clone https://github.com/SohierDane/BigQuery_Helper ~/ # Add BigQuery client proxy settings ENV PYTHONUSERBASE "/root/.local" -ADD patches/kaggle_gcp.py /root/.local/lib/python3.10/site-packages/kaggle_gcp.py -ADD patches/kaggle_secrets.py /root/.local/lib/python3.10/site-packages/kaggle_secrets.py -ADD patches/kaggle_session.py /root/.local/lib/python3.10/site-packages/kaggle_session.py -ADD patches/kaggle_web_client.py /root/.local/lib/python3.10/site-packages/kaggle_web_client.py -ADD patches/kaggle_datasets.py /root/.local/lib/python3.10/site-packages/kaggle_datasets.py -ADD patches/log.py /root/.local/lib/python3.10/site-packages/log.py -ADD patches/sitecustomize.py /root/.local/lib/python3.10/site-packages/sitecustomize.py +ADD patches/kaggle_gcp.py \ + patches/kaggle_secrets.py \ + patches/kaggle_session.py \ + patches/kaggle_web_client.py \ + patches/kaggle_datasets.py \ + patches/log.py \ + patches/sitecustomize.py \ + /root/.local/lib/python3.10/site-packages/ + # Override default imagemagick policies ADD patches/imagemagick-policy.xml /etc/ImageMagick-6/policy.xml @@ -635,20 +580,6 @@ ADD patches/kaggle_module_resolver.py /opt/conda/lib/python3.10/site-packages/te RUN sed -i '/from tensorflow_hub import uncompressed_module_resolver/a from tensorflow_hub import kaggle_module_resolver' /opt/conda/lib/python3.10/site-packages/tensorflow_hub/config.py && \ sed -i '/_install_default_resolvers()/a \ \ registry.resolver.add_implementation(kaggle_module_resolver.KaggleFileResolver())' /opt/conda/lib/python3.10/site-packages/tensorflow_hub/config.py -# TensorBoard Jupyter extension. Should be replaced with TensorBoard's provided magic once we have -# worker tunneling support in place. -# b/139212522 re-enable TensorBoard once solution for slowdown is implemented. -# ENV JUPYTER_CONFIG_DIR "/root/.jupyter/" -# RUN pip install jupyter_tensorboard && \ -# jupyter serverextension enable jupyter_tensorboard && \ -# jupyter tensorboard enable -# ADD patches/tensorboard/notebook.py /opt/conda/lib/python3.10/site-packages/tensorboard/notebook.py - -# Disable unnecessary jupyter extensions -#RUN jupyter-nbextension disable nb_conda --py --sys-prefix && \ -# jupyter-serverextension disable nb_conda --py --sys-prefix && \ -# python -m nb_conda_kernels.install --disable - # Disable preloaded jupyter modules (they add to startup, and break when they are missing) RUN sed -i /bq_stats/d /etc/ipython/ipython_kernel_config.py && \ sed -i /beatrix/d /etc/ipython/ipython_kernel_config.py && \ @@ -662,37 +593,37 @@ RUN rm /opt/conda/bin/../lib/libcusolver.so.11 && ln -s /usr/local/cuda/lib64/li RUN ln -s /usr/local/cuda/lib64/libcusolver.so.11 /opt/conda/bin/../lib/libcusolver.so.11 {{ end }} -# b/270147159 conda ships with a version of libtinfo which is missing version info causing warnings, replace it with a good version. +# b/270147159: conda ships with a version of libtinfo which is missing version info causing warnings, replace it with a good version. RUN rm /opt/conda/lib/libtinfo.so.6 && ln -s /usr/lib/x86_64-linux-gnu/libtinfo.so.6 /opt/conda/lib/libtinfo.so.6 -# b/276358430 fix Jupyter lsp freezing up the jupyter server +# b/276358430: fix Jupyter lsp freezing up the jupyter server RUN pip install "jupyter-lsp==1.5.1" # Set backend for matplotlib -ENV MPLBACKEND "agg" +ENV MPLBACKEND="agg" \ + # Set LC_ALL + # https://github.com/explosion/spaCy/issues/12872#issuecomment-1661847588 + LC_ALL="POSIX" -# Set LC_ALL -# https://github.com/explosion/spaCy/issues/12872#issuecomment-1661847588 -ENV LC_ALL "POSIX" +ARG GIT_COMMIT=unknown \ + BUILD_DATE=unknown -ARG GIT_COMMIT=unknown -ARG BUILD_DATE=unknown +LABEL git-commit=$GIT_COMMIT \ + build-date=$BUILD_DATE -LABEL git-commit=$GIT_COMMIT -LABEL build-date=$BUILD_DATE -ENV GIT_COMMIT=${GIT_COMMIT} -ENV BUILD_DATE=${BUILD_DATE} +ENV GIT_COMMIT=${GIT_COMMIT} \ + BUILD_DATE=${BUILD_DATE} -LABEL tensorflow-version=$TENSORFLOW_VERSION -# Used in the Jenkins `Docker GPU Build` step to restrict the images being pruned. -LABEL kaggle-lang=python +LABEL tensorflow-version=$TENSORFLOW_VERSION \ + # Used in the Jenkins `Docker GPU Build` step to restrict the images being pruned. + kaggle-lang=python # Correlate current release with the git hash inside the kernel editor by running `!cat /etc/git_commit`. RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE" > /etc/build_date {{ if eq .Accelerator "gpu" }} # Remove the CUDA stubs. -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH_NO_STUBS" -# Add the CUDA home. -ENV CUDA_HOME=/usr/local/cuda +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH_NO_STUBS" \ + # Add the CUDA home. + CUDA_HOME=/usr/local/cuda {{ end }} diff --git a/tests/test_catalyst.py b/tests/test_catalyst.py deleted file mode 100644 index 3b9c97d..0000000 --- a/tests/test_catalyst.py +++ /dev/null @@ -1,158 +0,0 @@ -import unittest -import collections -import json -import numpy as np - -import torch -import torch.nn as nn -import torch.nn.functional as F -import torchvision -import torchvision.transforms as transforms - -import catalyst -from catalyst.dl import SupervisedRunner, CheckpointCallback -from catalyst import utils - - -def _to_categorical(y, num_classes=None, dtype='float32'): - """ - Taken from - github.com/keras-team/keras/blob/master/keras/utils/np_utils.py - Converts a class vector (integers) to binary class matrix. - E.g. for use with categorical_crossentropy. - # Arguments - y: class vector to be converted into a matrix - (integers from 0 to num_classes). - num_classes: total number of classes. - dtype: The data type expected by the input, as a string - (`float32`, `float64`, `int32`...) - # Returns - A binary matrix representation of the input. The classes axis - is placed last. - # Example - ```python - # Consider an array of 5 labels out of a set of 3 classes {0, 1, 2}: - > labels - array([0, 2, 1, 2, 0]) - # `to_categorical` converts this into a matrix with as many - # columns as there are classes. The number of rows - # stays the same. - > to_categorical(labels) - array([[ 1., 0., 0.], - [ 0., 0., 1.], - [ 0., 1., 0.], - [ 0., 0., 1.], - [ 1., 0., 0.]], dtype=float32) - ``` - """ - - y = np.array(y, dtype='int') - input_shape = y.shape - if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: - input_shape = tuple(input_shape[:-1]) - y = y.ravel() - if not num_classes: - num_classes = np.max(y) + 1 - n = y.shape[0] - categorical = np.zeros((n, num_classes), dtype=dtype) - categorical[np.arange(n), y] = 1 - output_shape = input_shape + (num_classes,) - categorical = np.reshape(categorical, output_shape) - return categorical - - -class Net(nn.Module): - def __init__(self): - super().__init__() - self.conv1 = nn.Conv2d(1, 20, 5, 1) - self.conv2 = nn.Conv2d(20, 50, 5, 1) - self.fc1 = nn.Linear(4 * 4 * 50, 500) - self.fc2 = nn.Linear(500, 10) - - def forward(self, x): - x = F.relu(self.conv1(x)) - x = F.max_pool2d(x, 2, 2) - x = F.relu(self.conv2(x)) - x = F.max_pool2d(x, 2, 2) - x = x.view(-1, 4 * 4 * 50) - x = F.relu(self.fc1(x)) - x = self.fc2(x) - return x - - -class TestCatalyst(unittest.TestCase): - - def test_version(self): - self.assertIsNotNone(catalyst.__version__) - - def test_mnist(self): - utils.set_global_seed(42) - x_train = np.random.random((100, 1, 28, 28)).astype(np.float32) - y_train = _to_categorical( - np.random.randint(10, size=(100, 1)), - num_classes=10 - ).astype(np.float32) - x_valid = np.random.random((20, 1, 28, 28)).astype(np.float32) - y_valid = _to_categorical( - np.random.randint(10, size=(20, 1)), - num_classes=10 - ).astype(np.float32) - - x_train, y_train, x_valid, y_valid = \ - list(map(torch.tensor, [x_train, y_train, x_valid, y_valid])) - - bs = 32 - num_workers = 4 - data_transform = transforms.ToTensor() - - loaders = collections.OrderedDict() - - trainset = torch.utils.data.TensorDataset(x_train, y_train) - trainloader = torch.utils.data.DataLoader( - trainset, batch_size=bs, - shuffle=True, num_workers=num_workers) - - validset = torch.utils.data.TensorDataset(x_valid, y_valid) - validloader = torch.utils.data.DataLoader( - validset, batch_size=bs, - shuffle=False, num_workers=num_workers) - - loaders["train"] = trainloader - loaders["valid"] = validloader - - # experiment setup - num_epochs = 3 - logdir = "./logs" - - # model, criterion, optimizer - model = Net() - criterion = nn.BCEWithLogitsLoss() - optimizer = torch.optim.Adam(model.parameters()) - - # model runner - runner = SupervisedRunner() - - # model training - runner.train( - model=model, - criterion=criterion, - optimizer=optimizer, - loaders=loaders, - logdir=logdir, - num_epochs=num_epochs, - verbose=False, - callbacks=[CheckpointCallback( - logdir, - topk=3, - save_best=True, - loader_key="valid", - metric_key="loss", - minimize=True)] - ) - - with open('./logs/model.storage.json') as f: - metrics = json.load(f) - storage = metrics['storage'] - self.assertEqual(3, len(storage)) - self.assertTrue(storage[0]['metric'] < storage[2]['metric']) - self.assertTrue(storage[0]['metric']< 0.35) diff --git a/tests/test_essentia.py b/tests/test_essentia.py deleted file mode 100644 index 749b946..0000000 --- a/tests/test_essentia.py +++ /dev/null @@ -1,7 +0,0 @@ -import unittest - -from essentia.standard import Windowing - -class TestEssentia(unittest.TestCase): - def test_windowing(self): - Windowing(type = 'hann') diff --git a/tests/test_geoviews.py b/tests/test_geoviews.py deleted file mode 100644 index 2636cc6..0000000 --- a/tests/test_geoviews.py +++ /dev/null @@ -1,17 +0,0 @@ -import unittest - -from common import p100_exempt - -class TestGeoviews(unittest.TestCase): - - @p100_exempt # b/342143152: Uses cuDF(>=24.4v), which is no longer capitble with p100 GPUs. - - def test_viz(self): - import geoviews.feature as gf - import holoviews as hv - from cartopy import crs - - hv.extension('matplotlib') - (gf.ocean + gf.land + gf.ocean * gf.land * gf.coastline * gf.borders).options( - 'Feature', projection=crs.Geostationary(), global_extent=True - ).cols(3) diff --git a/tests/test_ggplot.py b/tests/test_ggplot.py deleted file mode 100644 index 30aec29..0000000 --- a/tests/test_ggplot.py +++ /dev/null @@ -1,12 +0,0 @@ -import unittest -import os.path - -from ggplot import * - -class TestGgplot(unittest.TestCase): - - def test_plot(self): - p = ggplot(aes(x='mpg'), data=mtcars) + geom_histogram() - p.save("myplot.png") - - self.assertTrue(os.path.isfile("myplot.png")) diff --git a/tests/test_imports.py b/tests/test_imports.py index 4977ff9..b22ebe7 100644 --- a/tests/test_imports.py +++ b/tests/test_imports.py @@ -4,6 +4,5 @@ class TestImport(unittest.TestCase): # Basic import tests for packages without any. def test_basic(self): import bq_helper - import cleverhans import tensorflow_datasets import segment_anything diff --git a/tests/test_kmapper.py b/tests/test_kmapper.py deleted file mode 100644 index c75deea..0000000 --- a/tests/test_kmapper.py +++ /dev/null @@ -1,7 +0,0 @@ -import unittest - -import kmapper as km - -class TestKMapper(unittest.TestCase): - def test_init(self): - km.KeplerMapper() diff --git a/tests/test_matplotlib.py b/tests/test_matplotlib.py index 1cbc939..c04f3f2 100644 --- a/tests/test_matplotlib.py +++ b/tests/test_matplotlib.py @@ -1,10 +1,17 @@ import unittest import os.path +from distutils.version import StrictVersion + +import matplotlib import matplotlib.pyplot as plt import numpy as np class TestMatplotlib(unittest.TestCase): + def test_version(self): + # b/308525631: newer versions of Matplotlib causes learntools to fail + self.assertLess(StrictVersion(matplotlib.__version__), StrictVersion("3.8.0")) + def test_plot(self): plt.plot(np.linspace(0,1,50), np.random.rand(50)) plt.savefig("plot1.png") diff --git a/tests/test_pykalman.py b/tests/test_pykalman.py deleted file mode 100644 index 26d8600..0000000 --- a/tests/test_pykalman.py +++ /dev/null @@ -1,47 +0,0 @@ -import unittest -import numpy as np -from pykalman import KalmanFilter -from pykalman import UnscentedKalmanFilter -from pykalman.sqrt import CholeskyKalmanFilter, AdditiveUnscentedKalmanFilter - -class TestPyKalman(unittest.TestCase): - def test_kalman_filter(self): - kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]]) - measurements = np.asarray([[1,0], [0,0], [0,1]]) # 3 observations - kf = kf.em(measurements, n_iter=5) - (filtered_state_means, filtered_state_covariances) = kf.filter(measurements) - (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements) - return filtered_state_means - - def test_kalman_missing(self): - kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]]) - measurements = np.asarray([[1,0], [0,0], [0,1]]) # 3 observations - measurements = np.ma.asarray(measurements) - measurements[1] = np.ma.masked - kf = kf.em(measurements, n_iter=5) - (filtered_state_means, filtered_state_covariances) = kf.filter(measurements) - (smoothed_state_means, smoothed_state_covariances) = kf.smooth(measurements) - return filtered_state_means - - def test_unscented_kalman(self): - ukf = UnscentedKalmanFilter(lambda x, w: x + np.sin(w), lambda x, v: x + v, transition_covariance=0.1) - (filtered_state_means, filtered_state_covariances) = ukf.filter([0, 1, 2]) - (smoothed_state_means, smoothed_state_covariances) = ukf.smooth([0, 1, 2]) - return filtered_state_means - - def test_online_update(self): - kf = KalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]]) - measurements = np.asarray([[1,0], [0,0], [0,1]]) # 3 observations - measurements = np.ma.asarray(measurements) - measurements[1] = np.ma.masked # measurement at timestep 1 is unobserved - kf = kf.em(measurements, n_iter=5) - (filtered_state_means, filtered_state_covariances) = kf.filter(measurements) - for t in range(1, 3): - filtered_state_means[t], filtered_state_covariances[t] = \ - kf.filter_update(filtered_state_means[t-1], filtered_state_covariances[t-1], measurements[t]) - return filtered_state_means - - def test_robust_sqrt(self): - kf = CholeskyKalmanFilter(transition_matrices = [[1, 1], [0, 1]], observation_matrices = [[0.1, 0.5], [-0.3, 0.0]]) - ukf = AdditiveUnscentedKalmanFilter(lambda x, w: x + np.sin(w), lambda x, v: x + v, observation_covariance=0.1) - diff --git a/tests/test_vaex.py b/tests/test_vaex.py deleted file mode 100644 index b64061b..0000000 --- a/tests/test_vaex.py +++ /dev/null @@ -1,10 +0,0 @@ -import unittest - -import vaex - -class TestVaex(unittest.TestCase): - def test_read_csv(self): - df = vaex.read_csv("/input/tests/data/train.csv") - - self.assertEqual((100, 785), df.shape) - self.assertEqual(10, df['label'].nunique()) \ No newline at end of file diff --git a/tests/test_vowpalwabbit.py b/tests/test_vowpalwabbit.py deleted file mode 100644 index 839aed0..0000000 --- a/tests/test_vowpalwabbit.py +++ /dev/null @@ -1,10 +0,0 @@ -import unittest - -from vowpalwabbit import pyvw - -class TestVowpalwabbit(unittest.TestCase): - def test_basic(self): - vw = pyvw.vw(quiet=True) - ex = vw.example('1 | a b c') - vw.learn(ex) - self.assertGreater(vw.predict(ex), 0)