diff --git a/.cloudbuild/Dockerfile b/.cloudbuild/Dockerfile deleted file mode 100644 index 456a354c84..0000000000 --- a/.cloudbuild/Dockerfile +++ /dev/null @@ -1,4 +0,0 @@ -ARG IMAGE_NAME -FROM $IMAGE_NAME -COPY . /kerasnlp -WORKDIR /kerasnlp diff --git a/.cloudbuild/README.md b/.cloudbuild/README.md deleted file mode 100644 index 064caf5f33..0000000000 --- a/.cloudbuild/README.md +++ /dev/null @@ -1,48 +0,0 @@ -# KerasNLP Accelerators Testing - -This `cloudbuild/` directory contains configurations for accelerators (GPU/TPU) -testing. Briefly, for each PR, it copies the PR's code to a base docker image -which contains KerasNLP dependencies to make a new docker image, and deploys the -new image to Google Kubernetes Engine cluster, then run all tests in -`keras_nlp/` via Google Cloud Build. - -- `cloudbuild.yaml`: The cloud build configuration that specifies steps to run - by cloud build. -- `Dockerfile`: The configuration to build the docker image for deployment. -- `requirements.txt`: Dependencies of KerasNLP. -- `unit_test_jobs.jsonnet`: Jsonnet config that tells GKE cluster to run all - unit tests in `keras_nlp/`. - -This test is powered by [ml-testing-accelerators](https://github.com/GoogleCloudPlatform/ml-testing-accelerators). - -### Adding Test Dependencies - -You must be authorized to run builds in the `keras-team-test` GCP project. -If you are not, please open a GitHub issue and ping a team member. -To authorize yourself with `keras-team-test`, run: - -```bash -gcloud config set project keras-team-test -``` - -To add/update dependency for GPU tests for a given framework: -- Add/update dependencies in `requirements.txt`. -- Add/update dependencies in `.cloudbuild/{framework}/Dockerfile`. -- Run the following: -``` -gcloud builds submit --region=us-west1 --tag us-west1-docker.pkg.dev/keras-team-test/keras-nlp-test/keras-nlp-image-{framework}:deps --timeout=30m -``` - -Alternately, to update all docker images at once, just run: -``` -./cloudbuild/update_images.sh -``` - -### Run TPU Testing - -Because of the TPU capacity limit, we cannot set automatic TPU testing. To -trigger the TPU testing, run the following command: - -``` -gcloud builds submit --config .cloudbuild/tpu_cloudbuild.yaml . --project=keras-team-test -``` diff --git a/.cloudbuild/cloudbuild.yaml b/.cloudbuild/cloudbuild.yaml deleted file mode 100644 index 474cf0e32a..0000000000 --- a/.cloudbuild/cloudbuild.yaml +++ /dev/null @@ -1,77 +0,0 @@ -substitutions: - # GCS bucket name. - _GCS_BUCKET: 'gs://keras-nlp-github-test' - # GKE cluster name. - _CLUSTER_NAME: 'keras-nlp-test-cluster' - # Location of GKE cluster. - _CLUSTER_ZONE: 'us-west1-b' - # Image name. - _IMAGE_NAME: 'us-west1-docker.pkg.dev/keras-team-test/keras-nlp-test/keras-nlp-image-${_BACKEND}' -steps: -- name: 'gcr.io/cloud-builders/docker' - id: build-image - entrypoint: 'bash' - args: - ['-c', 'docker build -f .cloudbuild/Dockerfile -t $_IMAGE_NAME:$BUILD_ID --build-arg IMAGE_NAME=$_IMAGE_NAME:deps .'] -- name: 'gcr.io/cloud-builders/docker' - id: push-image - waitFor: - - build-image - args: ['push', '$_IMAGE_NAME:$BUILD_ID'] -- name: 'golang' - id: download-jsonnet - waitFor: ['-'] - entrypoint: 'go' - args: [ - 'install', - 'github.com/google/go-jsonnet/cmd/jsonnet@latest', - ] -- name: 'gcr.io/cloud-builders/gcloud' - id: clone-templates - waitFor: ['-'] - entrypoint: 'git' - args: [ - 'clone', - 'https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git', - ] -- name: 'golang' - id: build-templates - waitFor: - - download-jsonnet - - clone-templates - entrypoint: 'jsonnet' - args: [ - '.cloudbuild/unit_test_jobs.jsonnet', - '--string', - '-J', 'ml-testing-accelerators', - '--ext-str', 'image=$_IMAGE_NAME', - '--ext-str', 'tag_name=$BUILD_ID', - '--ext-str', 'gcs_bucket=$_GCS_BUCKET', - '--ext-str', 'backend=$_BACKEND', - '-o', 'output.yaml', - ] -- name: 'gcr.io/cloud-builders/gcloud' - id: create-job - waitFor: - - push-image - - build-templates - entrypoint: bash - args: - - -c - - | - set -u - set -e - set -x - gcloud container clusters get-credentials $_CLUSTER_NAME --zone $_CLUSTER_ZONE --project keras-team-test - job_name=$(kubectl create -f output.yaml -o name) - sleep 5 - pod_name=$(kubectl wait --for condition=ready --timeout=120m pod -l job-name=${job_name#job.batch/} -o name) - kubectl logs -f $pod_name --container=train - sleep 5 - gcloud artifacts docker images delete $_IMAGE_NAME:$BUILD_ID - exit $(kubectl get $pod_name -o jsonpath={.status.containerStatuses[0].state.terminated.exitCode}) -timeout: 120m -options: - volumes: - - name: go-modules - path: /go diff --git a/.cloudbuild/cloudbuild_tpu.yaml b/.cloudbuild/cloudbuild_tpu.yaml deleted file mode 100644 index c715d71fb7..0000000000 --- a/.cloudbuild/cloudbuild_tpu.yaml +++ /dev/null @@ -1,79 +0,0 @@ -substitutions: - # GCS bucket name. - _GCS_BUCKET: 'gs://keras-nlp-github-test' - # GKE cluster name. - _CLUSTER_NAME: 'keras-nlp-tpu-test-cluster' - # Location of GKE cluster. - _CLUSTER_ZONE: 'us-central1-a' - # Image name. - _IMAGE_NAME: 'us-west1-docker.pkg.dev/keras-team-test/keras-nlp-test/keras-nlp-image' -steps: -- name: 'docker' - id: build-image - args: [ - 'build', - '.', - '-f', '.cloudbuild/Dockerfile', - '-t', '$_IMAGE_NAME:$BUILD_ID', - ] -- name: 'docker' - id: push-image - waitFor: - - build-image - args: ['push', '$_IMAGE_NAME:$BUILD_ID'] -- name: 'golang' - id: download-jsonnet - waitFor: ['-'] - entrypoint: 'go' - args: [ - 'install', - 'github.com/google/go-jsonnet/cmd/jsonnet@latest', - ] -- name: 'gcr.io/cloud-builders/gcloud' - id: clone-templates - waitFor: ['-'] - entrypoint: 'git' - args: [ - 'clone', - 'https://github.com/GoogleCloudPlatform/ml-testing-accelerators.git', - ] -- name: 'golang' - id: build-templates - waitFor: - - download-jsonnet - - clone-templates - entrypoint: 'jsonnet' - args: [ - '.cloudbuild/unit_test_jobs_tpu.jsonnet', - '--string', - '-J', 'ml-testing-accelerators', - '--ext-str', 'image=$_IMAGE_NAME', - '--ext-str', 'tag_name=$BUILD_ID', - '--ext-str', 'gcs_bucket=$_GCS_BUCKET', - '-o', 'output.yaml', - ] -- name: 'gcr.io/cloud-builders/gcloud' - id: create-job - waitFor: - - push-image - - build-templates - entrypoint: bash - args: - - -c - - | - set -u - set -e - set -x - gcloud container clusters get-credentials $_CLUSTER_NAME --zone $_CLUSTER_ZONE --project keras-team-test - job_name=$(kubectl create -f output.yaml -o name) - sleep 5 - pod_name=$(kubectl wait --for condition=ready --timeout=120m pod -l job-name=${job_name#job.batch/} -o name) - kubectl logs -f $pod_name --container=train - sleep 5 - # gcloud artifacts docker images delete $_IMAGE_NAME:$BUILD_ID - exit $(kubectl get $pod_name -o jsonpath={.status.containerStatuses[0].state.terminated.exitCode}) -timeout: 120m -options: - volumes: - - name: go-modules - path: /go diff --git a/.cloudbuild/jax/Dockerfile b/.cloudbuild/jax/Dockerfile deleted file mode 100644 index ec84817cc2..0000000000 --- a/.cloudbuild/jax/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM nvidia/cuda:11.7.1-base-ubuntu20.04 -RUN apt-get update -RUN apt-get install -y python3 python3-pip -RUN apt-get install -y git -RUN git clone https://github.com/keras-team/keras-nlp.git -RUN cd keras-nlp -RUN pip install -r keras-nlp/requirements.txt -RUN pip install --upgrade "jax[cuda11_pip]" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html diff --git a/.cloudbuild/tensorflow/Dockerfile b/.cloudbuild/tensorflow/Dockerfile deleted file mode 100644 index d452d3761b..0000000000 --- a/.cloudbuild/tensorflow/Dockerfile +++ /dev/null @@ -1,6 +0,0 @@ -FROM tensorflow/tensorflow:2.13.0-gpu -RUN apt-get -y update -RUN apt-get -y install git -RUN git clone https://github.com/keras-team/keras-nlp.git -RUN cd keras-nlp -RUN pip install -r keras-nlp/requirements.txt diff --git a/.cloudbuild/torch/Dockerfile b/.cloudbuild/torch/Dockerfile deleted file mode 100644 index ecd88b81a3..0000000000 --- a/.cloudbuild/torch/Dockerfile +++ /dev/null @@ -1,8 +0,0 @@ -FROM nvidia/cuda:11.7.1-base-ubuntu20.04 -RUN apt-get update -RUN apt-get install -y python3 python3-pip -RUN apt-get install -y git -RUN git clone https://github.com/keras-team/keras-nlp.git -RUN cd keras-nlp -RUN pip install -r keras-nlp/requirements.txt -RUN pip install torch diff --git a/.cloudbuild/unit_test_jobs.jsonnet b/.cloudbuild/unit_test_jobs.jsonnet deleted file mode 100644 index 560581c2ef..0000000000 --- a/.cloudbuild/unit_test_jobs.jsonnet +++ /dev/null @@ -1,43 +0,0 @@ -local base = import 'templates/base.libsonnet'; -local gpus = import 'templates/gpus.libsonnet'; - -local image = std.extVar('image'); -local tagName = std.extVar('tag_name'); -local gcsBucket = std.extVar('gcs_bucket'); -local backend = std.extVar('backend'); - -local unittest = base.BaseTest { - // Configure job name. - frameworkPrefix: backend, - modelName: "keras-nlp", - mode: "unit-tests", - timeout: 7200, # 2 hours, in seconds - - // Set up runtime environment. - image: image, - imageTag: tagName, - accelerator: gpus.teslaT4, - outputBucket: gcsBucket, - - entrypoint: [ - 'bash', - '-c', - std.format( - ||| - export KERAS_BACKEND=%s - - # Run whatever is in `command` here. - cd keras-nlp - ${@:0} - |||, - backend - ) - ], - command: [ - 'pytest', - 'keras_nlp', - '--run_large', - ], -}; - -std.manifestYamlDoc(unittest.oneshotJob, quote_keys=false) diff --git a/.cloudbuild/unit_test_jobs_tpu.jsonnet b/.cloudbuild/unit_test_jobs_tpu.jsonnet deleted file mode 100644 index e429da40cc..0000000000 --- a/.cloudbuild/unit_test_jobs_tpu.jsonnet +++ /dev/null @@ -1,42 +0,0 @@ -local base = import 'templates/base.libsonnet'; -local tpus = import 'templates/tpus.libsonnet'; - -local image = std.extVar('image'); -local tagName = std.extVar('tag_name'); -local gcsBucket = std.extVar('gcs_bucket'); - -local unittest = base.BaseTest { - // Configure job name. - frameworkPrefix: "tf", - modelName: "keras-nlp", - mode: "unit-tests", - timeout: 7200, # 2 hours, in seconds - - // Set up runtime environment. - image: image, - imageTag: tagName, - accelerator: tpus.v3_8, - outputBucket: gcsBucket, - tpuSettings+: { - softwareVersion: '2.10.0', - }, - - entrypoint: [ - 'bash', - '-c', - ||| - # Run whatever is in `command` here. - cd keras-nlp - ${@:0} - ||| - ], - command: [ - 'pytest', - '-m', - 'tpu', - 'keras_nlp', - '--run_tpu', - ], -}; - -std.manifestYamlDoc(unittest.oneshotJob, quote_keys=false) diff --git a/.cloudbuild/update_images.sh b/.cloudbuild/update_images.sh deleted file mode 100755 index 2876df81e8..0000000000 --- a/.cloudbuild/update_images.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -ex - -base_dir=$(dirname $0) - -for platform in "jax" "tensorflow" "torch"; do - pushd "${base_dir}/${platform}" > /dev/null - gcloud builds submit \ - --region=us-west1 \ - --project=keras-team-test \ - --tag "us-west1-docker.pkg.dev/keras-team-test/keras-nlp-test/keras-nlp-image-${platform}:deps" \ - --timeout=30m - popd -done