Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new CI scripts for running Bazel CPU presubmits #24484

Merged
merged 1 commit into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions .github/workflows/bazel_cpu_rbe.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: CI - Bazel CPU tests (RBE)

on:
workflow_dispatch:
inputs:
halt-for-connection:
description: 'Should this workflow run wait for a remote connection?'
type: choice
required: true
default: 'no'
options:
- 'yes'
- 'no'

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true

jobs:
run_tests:
if: github.event.repository.fork == false
strategy:
matrix:
runner: ["linux-x86-n2-16", "linux-arm64-t2a-16"]

runs-on: ${{ matrix.runner }}
# TODO(b/369382309): Replace Linux Arm64 container with the ml-build container once it is available
container: ${{ (contains(matrix.runner, 'linux-x86') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/ml-build:latest') ||
(contains(matrix.runner, 'linux-arm64') && 'us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/linux-arm64-arc-container:latest') }}

env:
JAXCI_HERMETIC_PYTHON_VERSION: "3.12"

steps:
- uses: actions/checkout@v3
- name: Wait For Connection
uses: google-ml-infra/actions/ci_connection@main
with:
halt-dispatch-input: ${{ inputs.halt-for-connection }}
- name: Run Bazel CPU Tests with RBE
run: ./ci/run_bazel_test_cpu_rbe.sh
10 changes: 10 additions & 0 deletions ci/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# JAX continuous integration

> [!WARNING]
> This folder is still under construction. It is part of an ongoing
> effort to improve the structure of CI and build related files within the
> JAX repo. This warning will be removed when the contents of this
> directory are stable and appropriate documentation around its usage is in
> place.

********************************************************************************
37 changes: 37 additions & 0 deletions ci/envs/default.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright 2024 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# This file contains all the default values for the "JAXCI_" environment
# variables used in the CI scripts. These variables are used to control the
# behavior of the CI scripts such as the Python version used, path to JAX/XLA
# repo, if to clone XLA repo, etc.

# The path to the JAX git repository.
export JAXCI_JAX_GIT_DIR=$(pwd)

# Controls the version of Hermetic Python to use. Use system default if not
# set.
export JAXCI_HERMETIC_PYTHON_VERSION=${JAXCI_HERMETIC_PYTHON_VERSION:-$(python3 -V | awk '{print $2}' | awk -F. '{print $1"."$2}')}

# Set JAXCI_XLA_GIT_DIR to the root of the XLA git repository to use a local
# copy of XLA instead of the pinned version in the WORKSPACE. When
# JAXCI_CLONE_MAIN_XLA=1, this gets set automatically.
export JAXCI_XLA_GIT_DIR=${JAXCI_XLA_GIT_DIR:-}

# If set to 1, the builds will clone the XLA repository at HEAD and set its
# path in JAXCI_XLA_GIT_DIR.
export JAXCI_CLONE_MAIN_XLA=${JAXCI_CLONE_MAIN_XLA:-0}

# Allows overriding the XLA commit that is used.
export JAXCI_XLA_COMMIT=${JAXCI_XLA_COMMIT:-}
68 changes: 68 additions & 0 deletions ci/run_bazel_test_cpu_rbe.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/bash
# Copyright 2024 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Runs Bazel CPU tests with RBE.
#
# -e: abort script if one command fails
# -u: error if undefined variable used
# -x: log all commands
# -o history: record shell history
# -o allexport: export all functions and variables to be available to subscripts
set -exu -o history -o allexport

# Source default JAXCI environment variables.
source ci/envs/default.env

# Clone XLA at HEAD if path to local XLA is not provided
if [[ -z "$JAXCI_XLA_GIT_DIR" ]]; then
export JAXCI_CLONE_MAIN_XLA=1
fi

# Set up the build environment.
source "ci/utilities/setup_build_environment.sh"

# Run Bazel CPU tests with RBE.
os=$(uname -s | awk '{print tolower($0)}')
arch=$(uname -m)

# When running on Mac or Linux Aarch64, we only build the test targets and
# not run them. These platforms do not have native RBE support so we
# RBE cross-compile them on remote Linux x86 machines. As the tests still
# need to be run on the host machine and because running the tests on a
# single machine can take a long time, we skip running them on these
# platforms.
if [[ $os == "darwin" ]] || ( [[ $os == "linux" ]] && [[ $arch == "aarch64" ]] ); then
echo "Building RBE CPU tests..."
bazel build --config=rbe_cross_compile_${os}_${arch} \
--repo_env=HERMETIC_PYTHON_VERSION="$JAXCI_HERMETIC_PYTHON_VERSION" \
--override_repository=xla="${JAXCI_XLA_GIT_DIR}" \
--test_env=JAX_NUM_GENERATED_CASES=25 \
--test_env=JAX_SKIP_SLOW_TESTS=true \
--action_env=JAX_ENABLE_X64=0 \
--test_output=errors \
--color=yes \
//tests:cpu_tests //tests:backend_independent_tests
else
echo "Running RBE CPU tests..."
bazel test --config=rbe_${os}_${arch} \
--repo_env=HERMETIC_PYTHON_VERSION="$JAXCI_HERMETIC_PYTHON_VERSION" \
--override_repository=xla="${JAXCI_XLA_GIT_DIR}" \
--test_env=JAX_NUM_GENERATED_CASES=25 \
--test_env=JAX_SKIP_SLOW_TESTS=true \
--action_env=JAX_ENABLE_X64=0 \
--test_output=errors \
--color=yes \
//tests:cpu_tests //tests:backend_independent_tests
fi
71 changes: 71 additions & 0 deletions ci/utilities/setup_build_environment.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/bin/bash
# Copyright 2024 The JAX Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# Set up the build environment for JAX CI jobs. This script depends on the
# "JAXCI_" environment variables set or sourced in the build script.

# Pre-emptively mark the JAX git directory as safe. This is necessary for JAX CI
# jobs running on Linux runners in GitHub Actions. Without this, git complains
# that the directory has dubious ownership and refuses to run any commands.
# Avoid running on Windows runners as git runs into issues with not being able
# to lock the config file. Other git commands seem to work on the Windows
# runners so we can skip this step for Windows.
# TODO(b/375073267): Remove this once we understand why git repositories are
# being marked as unsafe inside the self-hosted runners.
if [[ ! $(uname -s) =~ "MSYS_NT" ]]; then
git config --global --add safe.directory $JAXCI_JAX_GIT_DIR
fi

function clone_main_xla() {
echo "Cloning XLA at HEAD to $(pwd)/xla"
git clone --depth=1 https://github.com/openxla/xla.git $(pwd)/xla
export JAXCI_XLA_GIT_DIR=$(pwd)/xla
}

# Clone XLA at HEAD if required.
if [[ "$JAXCI_CLONE_MAIN_XLA" == 1 ]]; then
# Clone only if $(pwd)/xla does not exist to avoid failure on re-runs.
if [[ ! -d $(pwd)/xla ]]; then
clone_main_xla
else
echo "JAXCI_CLONE_MAIN_XLA set but local XLA folder already exists: $(pwd)/xla so using that instead."
# Set JAXCI_XLA_GIT_DIR if local XLA already exists
export JAXCI_XLA_GIT_DIR=$(pwd)/xla
fi
fi

# If a XLA commit is provided, check out XLA at that commit.
if [[ ! -z "$JAXCI_XLA_COMMIT" ]]; then
# Clone XLA at HEAD if a path to local XLA is not provided.
if [[ -z "$JAXCI_XLA_GIT_DIR" ]]; then
clone_main_xla
fi
pushd "$JAXCI_XLA_GIT_DIR"

git fetch --depth=1 origin "$JAXCI_XLA_COMMIT"
echo "JAXCI_XLA_COMMIT is set. Checking out XLA at $JAXCI_XLA_COMMIT"
git checkout "$JAXCI_XLA_COMMIT"

popd
fi

if [[ ! -z ${JAXCI_XLA_GIT_DIR} ]]; then
echo "INFO: Overriding XLA to be read from $JAXCI_XLA_GIT_DIR instead of the"
echo "pinned version in the WORKSPACE."
echo "If you would like to revert this behavior, unset JAXCI_CLONE_MAIN_XLA"
echo "and JAXCI_XLA_COMMIT in your environment. Note that the Bazel RBE test"
echo "commands overrides the XLA repository and thus require a local copy of"
echo "XLA to run."
fi
Loading