-
Notifications
You must be signed in to change notification settings - Fork 17
sccache: integrate with Velox build
#52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
02c005f
398dae0
2cb9835
3d3c7a7
0c690f5
f412028
eadaefa
4350283
188fdaf
f7eb417
a458d0d
48d0224
5a27fe2
251e566
3df2a85
7cdbd64
700d2ee
cf288c4
9a74c25
165ad9e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,85 @@ | ||
| #!/bin/bash | ||
| set -euo pipefail | ||
|
|
||
| # Check for required auth files | ||
| if [[ ! -f /sccache_auth/github_token ]]; then | ||
| echo "ERROR: GitHub token not found at /sccache_auth/github_token" | ||
| exit 1 | ||
| fi | ||
|
|
||
| if [[ ! -f /sccache_auth/aws_credentials ]]; then | ||
| echo "ERROR: AWS credentials not found at /sccache_auth/aws_credentials" | ||
| exit 1 | ||
| fi | ||
|
|
||
| # Set up directories | ||
| mkdir -p ~/.config/sccache ~/.aws | ||
|
|
||
| # Install AWS credentials (safe in Docker container environment) | ||
| cp /sccache_auth/aws_credentials ~/.aws/credentials | ||
|
|
||
| # Read GitHub token | ||
| GITHUB_TOKEN=$(cat /sccache_auth/github_token | tr -d '\n\r ') | ||
|
|
||
| # Create sccache config | ||
| SCCACHE_ARCH=$(uname -m | sed 's/x86_64/amd64/') | ||
|
|
||
| # Check if we should disable distributed compilation (disabled by default) | ||
| if [[ "${SCCACHE_DISABLE_DIST:-ON}" == "ON" ]]; then | ||
| cat > ~/.config/sccache/config << SCCACHE_EOF | ||
| [cache.disk] | ||
| size = 107374182400 | ||
|
|
||
| [cache.disk.preprocessor_cache_mode] | ||
| use_preprocessor_cache_mode = true | ||
|
|
||
| [cache.s3] | ||
| bucket = "rapids-sccache-devs" | ||
| region = "us-east-2" | ||
| no_credentials = false | ||
|
|
||
| # No [dist] section -> disables distributed compilation | ||
| SCCACHE_EOF | ||
| else | ||
| cat > ~/.config/sccache/config << SCCACHE_EOF | ||
| [cache.disk] | ||
| size = 107374182400 | ||
|
|
||
| [cache.disk.preprocessor_cache_mode] | ||
| use_preprocessor_cache_mode = true | ||
|
|
||
| [cache.s3] | ||
| bucket = "rapids-sccache-devs" | ||
| region = "us-east-2" | ||
| no_credentials = false | ||
|
|
||
| [dist] | ||
| scheduler_url = "https://${SCCACHE_ARCH}.linux.sccache.rapids.nvidia.com" | ||
|
|
||
| [dist.auth] | ||
| type = "token" | ||
| token = "${GITHUB_TOKEN}" | ||
| SCCACHE_EOF | ||
| fi | ||
|
|
||
| # Configure sccache for high parallelism | ||
| # Increase file descriptor limit for high parallelism (if possible) | ||
| ulimit -n $(ulimit -Hn) || echo "Could not increase file descriptor limit" | ||
|
|
||
| # Start sccache server | ||
| sccache --start-server | ||
|
|
||
| # Test sccache | ||
| sccache --show-stats | ||
|
|
||
| # Testing distributed compilation status (only if enabled) | ||
| if [[ "${SCCACHE_DISABLE_DIST:-ON}" == "ON" ]]; then | ||
| echo "Distributed compilation is DISABLED by default - using local compilation with remote S3 caching" | ||
| else | ||
| if sccache --dist-status; then | ||
| echo "Distributed compilation is available" | ||
| else | ||
| echo "Error: Distributed compilation not available, check connectivity" | ||
| exit 1 | ||
| fi | ||
| fi |
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this dockerfile based on an existing dockerfile or documentation?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This dockerfile is largely based on documentation in a slack channel which I can link offline. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| FROM ubuntu:22.04 | ||
|
|
||
| # Prevent interactive prompts during package installation | ||
| ENV DEBIAN_FRONTEND=noninteractive | ||
|
|
||
| # Install basic dependencies | ||
| RUN <<EOF | ||
| apt-get update && apt-get install -y \ | ||
| curl \ | ||
| wget \ | ||
| ca-certificates \ | ||
| gnupg \ | ||
| lsb-release \ | ||
| && rm -rf /var/lib/apt/lists/* | ||
| EOF | ||
|
|
||
| # Install GitHub CLI | ||
| RUN <<EOF | ||
| curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg | ||
| chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg | ||
| echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | tee /etc/apt/sources.list.d/github-cli.list > /dev/null | ||
| apt-get update | ||
| apt-get install gh -y | ||
| rm -rf /var/lib/apt/lists/* | ||
| EOF | ||
|
|
||
| # Install gh-nv-gha-aws plugin manually | ||
| RUN <<EOF | ||
| NV_GHA_AWS_VERSION="0.1.1" | ||
| ARCH=$(dpkg --print-architecture) | ||
| if [ "$ARCH" = "amd64" ]; then ARCH="amd64"; elif [ "$ARCH" = "arm64" ]; then ARCH="arm64"; fi | ||
| mkdir -p /root/.local/share/gh/extensions/gh-nv-gha-aws | ||
| wget --no-hsts -q -O /root/.local/share/gh/extensions/gh-nv-gha-aws/gh-nv-gha-aws \ | ||
| "https://github.com/nv-gha-runners/gh-nv-gha-aws/releases/download/v${NV_GHA_AWS_VERSION}/gh-nv-gha-aws_v${NV_GHA_AWS_VERSION}_linux-${ARCH}" | ||
| chmod 0755 /root/.local/share/gh/extensions/gh-nv-gha-aws/gh-nv-gha-aws | ||
| EOF | ||
|
|
||
| # Create plugin manifest | ||
| RUN <<EOF | ||
| cat > /root/.local/share/gh/extensions/gh-nv-gha-aws/manifest.yml << 'MANIFEST' | ||
| owner: nv-gha-runners | ||
| name: gh-nv-gha-aws | ||
| host: github.com | ||
| tag: v0.1.1 | ||
| ispinned: false | ||
| path: $HOME/.local/share/gh/extensions/gh-nv-gha-aws/gh-nv-gha-aws | ||
| MANIFEST | ||
| EOF | ||
|
|
||
| # Create output directory for credentials | ||
| RUN mkdir -p /output |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,18 @@ BUILD_TYPE="release" | |
| LOG_ENABLED=false | ||
| TREAT_WARNINGS_AS_ERRORS="${TREAT_WARNINGS_AS_ERRORS:-1}" | ||
| LOGFILE="./build_velox.log" | ||
| ENABLE_SCCACHE=false | ||
| SCCACHE_AUTH_DIR="${SCCACHE_AUTH_DIR:-$HOME/.sccache-auth}" | ||
| SCCACHE_ENABLE_DIST=false | ||
|
|
||
| # Cleanup function to remove copied sccache auth files | ||
| cleanup_sccache_auth() { | ||
| if [[ "$ENABLE_SCCACHE" == true && -d "../docker/sccache/sccache_auth/" ]]; then | ||
| rm -fr ../docker/sccache/sccache_auth/ | ||
| fi | ||
| } | ||
|
|
||
| trap cleanup_sccache_auth EXIT SIGTERM SIGINT SIGQUIT | ||
|
|
||
|
|
||
| print_help() { | ||
|
|
@@ -44,6 +56,8 @@ Options: | |
| --gpu Build with GPU support (enables CUDF; sets BUILD_WITH_VELOX_ENABLE_CUDF=ON) [default]. | ||
| -j|--num-threads NUM Number of threads to use for building (default: 3/4 of CPU cores). | ||
| --benchmarks true|false Enable benchmarks and nsys profiling tools (default: true). | ||
| --sccache Enable sccache distributed compilation caching (requires auth files in ~/.sccache-auth/). | ||
| --sccache-enable-dist Enable distributed compilation (WARNING: may cause compilation differences like additional warnings that could lead to build failures). | ||
| --build-type TYPE Build type: Release, Debug, or RelWithDebInfo (case insensitive, default: release). | ||
| -h, --help Show this help message and exit. | ||
|
|
||
|
|
@@ -58,6 +72,8 @@ Examples: | |
| $(basename "$0") --log mybuild.log --all-cuda-archs | ||
| $(basename "$0") -j 8 --gpu | ||
| $(basename "$0") --num-threads 16 --no-cache | ||
| $(basename "$0") --sccache # Build with sccache (remote S3 cache, local compilation) | ||
| $(basename "$0") --sccache --sccache-enable-dist # Build with sccache including distributed compilation (may cause build differences) | ||
| $(basename "$0") --build-type Debug | ||
| $(basename "$0") --build-type debug --gpu | ||
| $(basename "$0") --build-type RELWITHDEBINFO --gpu | ||
|
|
@@ -128,6 +144,14 @@ parse_args() { | |
| exit 1 | ||
| fi | ||
| ;; | ||
| --sccache) | ||
| ENABLE_SCCACHE=true | ||
| shift | ||
| ;; | ||
| --sccache-enable-dist) | ||
| SCCACHE_ENABLE_DIST=true | ||
| shift | ||
| ;; | ||
| --build-type) | ||
| if [[ -n "${2:-}" && ! "${2}" =~ ^- ]]; then | ||
| # Convert to lowercase first, then validate | ||
|
|
@@ -160,6 +184,30 @@ parse_args() { | |
| done | ||
| } | ||
|
|
||
| # Validate sccache authentication | ||
| validate_sccache_auth() { | ||
| if [[ "$ENABLE_SCCACHE" == true ]]; then | ||
| echo "Checking for sccache authentication files in: $SCCACHE_AUTH_DIR" | ||
|
|
||
| if [[ ! -d "$SCCACHE_AUTH_DIR" ]]; then | ||
| echo "ERROR: sccache auth directory not found: $SCCACHE_AUTH_DIR" >&2 | ||
| echo "Run setup_sccache_auth.sh to set up authentication." >&2 | ||
| exit 1 | ||
| fi | ||
|
|
||
| if [[ ! -f "$SCCACHE_AUTH_DIR/github_token" ]]; then | ||
| echo "ERROR: GitHub token not found: $SCCACHE_AUTH_DIR/github_token" >&2 | ||
| echo "Run setup_sccache_auth.sh to set up authentication." >&2 | ||
| exit 1 | ||
| fi | ||
|
|
||
| if [[ ! -f "$SCCACHE_AUTH_DIR/aws_credentials" ]]; then | ||
| echo "ERROR: AWS credentials not found: $SCCACHE_AUTH_DIR/aws_credentials" >&2 | ||
| echo "Run setup_sccache_auth.sh to set up authentication." >&2 | ||
| exit 1 | ||
| fi | ||
| fi | ||
| } | ||
|
|
||
| # Detect CUDA architecture since native architecture detection doesn't work | ||
| # inside Docker containers | ||
|
|
@@ -178,10 +226,11 @@ detect_cuda_architecture() { | |
| fi | ||
| } | ||
|
|
||
|
|
||
|
|
||
| parse_args "$@" | ||
|
|
||
| # Validate sccache authentication if sccache is enabled | ||
| validate_sccache_auth | ||
|
|
||
| # Validate repo layout using shared script | ||
| ../../scripts/validate_directories_exist.sh "../../../velox" | ||
|
|
||
|
|
@@ -207,6 +256,26 @@ DOCKER_BUILD_OPTS+=(--build-arg BUILD_TYPE="${BUILD_TYPE}") | |
| export DOCKER_BUILDKIT=1 | ||
| export COMPOSE_DOCKER_CLI_BUILD=1 | ||
|
|
||
| # Add sccache build arguments | ||
| if [[ "$ENABLE_SCCACHE" == true ]]; then | ||
| DOCKER_BUILD_OPTS+=(--build-arg ENABLE_SCCACHE="ON") | ||
| # Copy auth files to build context | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be cleaned up later?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I put in a trap. |
||
| mkdir -p ../docker/sccache/sccache_auth/ | ||
| cp "$SCCACHE_AUTH_DIR/github_token" ../docker/sccache/sccache_auth/ | ||
| cp "$SCCACHE_AUTH_DIR/aws_credentials" ../docker/sccache/sccache_auth/ | ||
|
|
||
| # Add distributed compilation control (disabled by default) | ||
| if [[ "$SCCACHE_ENABLE_DIST" == true ]]; then | ||
| DOCKER_BUILD_OPTS+=(--build-arg SCCACHE_DISABLE_DIST="OFF") | ||
| echo "WARNING: sccache distributed compilation enabled - may cause compilation differences" | ||
| else | ||
| DOCKER_BUILD_OPTS+=(--build-arg SCCACHE_DISABLE_DIST="ON") | ||
| fi | ||
| else | ||
| DOCKER_BUILD_OPTS+=(--build-arg ENABLE_SCCACHE="OFF") | ||
| DOCKER_BUILD_OPTS+=(--build-arg SCCACHE_DISABLE_DIST="ON") | ||
| fi | ||
|
|
||
| if [[ "$LOG_ENABLED" == true ]]; then | ||
| echo "Logging build output to $LOGFILE" | ||
| docker compose -f "$COMPOSE_FILE" build "${DOCKER_BUILD_OPTS[@]}" | tee "$LOGFILE" | ||
|
|
@@ -216,6 +285,7 @@ else | |
| BUILD_EXIT_CODE=$? | ||
| fi | ||
|
|
||
|
|
||
| if [[ "$BUILD_EXIT_CODE" == "0" ]]; then | ||
| # Update EXPECTED_OUTPUT_DIR to use the correct build directory | ||
| EXPECTED_OUTPUT_DIR="/opt/velox-build/${BUILD_TYPE}" | ||
|
|
@@ -238,6 +308,13 @@ if [[ "$BUILD_EXIT_CODE" == "0" ]]; then | |
| else | ||
| echo " Benchmarks and nsys profiling are disabled in this build." | ||
| fi | ||
| if [[ "$ENABLE_SCCACHE" == true ]]; then | ||
| echo " sccache distributed compilation caching was enabled for this build." | ||
| if [[ -n "$SCCACHE_AUTH_DIR" ]]; then | ||
| echo " To check sccache stats, run:" | ||
| echo " docker compose -f $COMPOSE_FILE run --rm ${CONTAINER_NAME} sccache --show-stats" | ||
| fi | ||
| fi | ||
| echo "" | ||
| else | ||
| echo " ERROR: Build succeeded but ${EXPECTED_OUTPUT_DIR} not found in the container." | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Running this script results in the following error:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh oops I think I forgot to include a dockerfile in this PR, my bad.