diff --git a/README.md b/README.md index 84cdb256..ccad009f 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,29 @@ A Docker-based build infrastructure has been added to facilitate building Velox Specifically, the `velox-testing` and `velox` repositories must be checked out as sibling directories under the same parent directory. Once that is done, navigate (`cd`) into the `velox-testing/velox/scripts` directory and execute the build script `build_velox.sh`. After a successful build, the Velox libraries and executables are available in the container at `/opt/velox-build/release`. +## `sccache` Usage +`sccache` has been integrated to significantly accelerate builds using remote S3 caching and optional distributed compilation. Currently supported for Velox builds only (not Presto). + +The fork `rapidsai/sccache` is integrated and configured for use with the `NVIDIA` GitHub organization. + +### Setup and Usage +First, set up authentication credentials: +```bash +cd velox-testing/velox/scripts +./setup_sccache_auth.sh +``` + +Then build Velox with sccache enabled: +```bash +# Default: Remote S3 cache + local compilation (recommended) +./build_velox.sh --sccache + +# Optional: Enable distributed compilation (may cause build differences such as additional warnings) +./build_velox.sh --sccache --sccache-enable-dist +``` + +Authentication files are stored in `~/.sccache-auth/` by default and credentials are valid for 12 hours. By default, distributed compilation is disabled to avoid compiler version differences that can cause build failures. + ## Velox Benchmarking A Docker-based benchmarking infrastructure has been added to facilitate running Velox benchmarks with support for CPU/GPU execution engines and profiling capabilities. The infrastructure uses a dedicated `velox-benchmark` Docker service with pre-configured volume mounts that automatically sync benchmark data and results. The data follows Hive directory structure, making it compatible with Presto. Currently, only TPC-H is implemented, but the infrastructure is designed to be easily extended to support additional benchmarks in the future. diff --git a/velox/docker/adapters_build.dockerfile b/velox/docker/adapters_build.dockerfile index 945b3ae9..7397c7dd 100644 --- a/velox/docker/adapters_build.dockerfile +++ b/velox/docker/adapters_build.dockerfile @@ -11,6 +11,8 @@ ARG TREAT_WARNINGS_AS_ERRORS=1 ARG VELOX_ENABLE_BENCHMARKS=ON ARG BUILD_BASE_DIR=/opt/velox-build ARG BUILD_TYPE=release +ARG ENABLE_SCCACHE=OFF +ARG SCCACHE_DISABLE_DIST=ON # Environment mirroring upstream CI defaults and incorporating build args ENV VELOX_DEPENDENCY_SOURCE=SYSTEM \ @@ -40,11 +42,13 @@ ENV VELOX_DEPENDENCY_SOURCE=SYSTEM \ -DVELOX_ENABLE_CUDF=${BUILD_WITH_VELOX_ENABLE_CUDF} \ -DVELOX_ENABLE_FAISS=ON" \ LD_LIBRARY_PATH="${BUILD_BASE_DIR}/${BUILD_TYPE}/lib:\ - ${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/cudf-build:\ - ${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/rmm-build:\ - ${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/rapids_logger-build:\ - ${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/kvikio-build:\ - ${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/nvcomp_proprietary_binary-src/lib64" \ +${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/cudf-build:\ +${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/rmm-build:\ +${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/rapids_logger-build:\ +${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/kvikio-build:\ +${BUILD_BASE_DIR}/${BUILD_TYPE}/_deps/nvcomp_proprietary_binary-src/lib64" \ + ENABLE_SCCACHE=${ENABLE_SCCACHE} \ + SCCACHE_DISABLE_DIST=${SCCACHE_DISABLE_DIST} \ CCACHE_DIR=/ccache WORKDIR /workspace/velox @@ -52,6 +56,19 @@ WORKDIR /workspace/velox # Print environment variables for debugging RUN printenv | sort +# Install sccache if enabled +RUN if [ "$ENABLE_SCCACHE" = "ON" ]; then \ + set -euxo pipefail && \ + # Install RAPIDS sccache fork + wget --no-hsts -q -O- "https://github.com/rapidsai/sccache/releases/download/v0.10.0-rapids.68/sccache-v0.10.0-rapids.68-$(uname -m)-unknown-linux-musl.tar.gz" | \ + tar -C /usr/bin -zf - --wildcards --strip-components=1 -x '*/sccache' 2>/dev/null && \ + chmod +x /usr/bin/sccache && \ + # Verify installation + sccache --version; \ + else \ + echo "Skipping sccache installation (ENABLE_SCCACHE=OFF)"; \ + fi + # Install NVIDIA Nsight Systems (nsys) for profiling - only if benchmarks are enabled RUN if [ "$VELOX_ENABLE_BENCHMARKS" = "ON" ]; then \ set -euxo pipefail && \ @@ -68,9 +85,32 @@ RUN if [ "$VELOX_ENABLE_BENCHMARKS" = "ON" ]; then \ echo "Skipping nsys installation (VELOX_ENABLE_BENCHMARKS=OFF)"; \ fi -# Build using the specified build type and directory +# Copy sccache setup script (if sccache enabled) +COPY velox-testing/velox/docker/sccache/sccache_setup.sh /sccache_setup.sh +RUN if [ "$ENABLE_SCCACHE" = "ON" ]; then chmod +x /sccache_setup.sh; fi + +# Copy sccache auth files (note source of copy must be within the docker build context) +COPY velox-testing/velox/docker/sccache/sccache_auth/ /sccache_auth/ + +# Build in Release mode into ${BUILD_BASE_DIR} RUN --mount=type=bind,source=velox,target=/workspace/velox,ro \ --mount=type=cache,target=/ccache \ set -euxo pipefail && \ - make cmake BUILD_DIR="${BUILD_TYPE}" BUILD_TYPE="${BUILD_TYPE}" EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS[*]}" BUILD_BASE_DIR="${BUILD_BASE_DIR}" && \ - make build BUILD_DIR="${BUILD_TYPE}" BUILD_BASE_DIR="${BUILD_BASE_DIR}" + # Configure sccache if enabled + if [ "$ENABLE_SCCACHE" = "ON" ]; then \ + # Run sccache setup script + /sccache_setup.sh && \ + # Add sccache CMake flags + EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS} -DCMAKE_C_COMPILER_LAUNCHER=sccache -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_CUDA_COMPILER_LAUNCHER=sccache" && \ + echo "sccache distributed status:" && \ + sccache --dist-status && \ + echo "Pre-build sccache (zeroed out) statistics:" && \ + sccache --show-stats; \ + fi && \ + make cmake BUILD_DIR="${BUILD_TYPE}" BUILD_TYPE="${BUILD_TYPE}" EXTRA_CMAKE_FLAGS="${EXTRA_CMAKE_FLAGS}" BUILD_BASE_DIR="${BUILD_BASE_DIR}" && \ + make build BUILD_DIR="${BUILD_TYPE}" BUILD_BASE_DIR="${BUILD_BASE_DIR}" && \ + # Show final sccache stats if enabled + if [ "$ENABLE_SCCACHE" = "ON" ]; then \ + echo "Post-build sccache statistics:" && \ + sccache --show-stats; \ + fi diff --git a/velox/docker/sccache/sccache_setup.sh b/velox/docker/sccache/sccache_setup.sh new file mode 100644 index 00000000..94908af2 --- /dev/null +++ b/velox/docker/sccache/sccache_setup.sh @@ -0,0 +1,85 @@ +#!/bin/bash +set -euo pipefail + +# Check for required auth files +if [[ ! -f /sccache_auth/github_token ]]; then + echo "ERROR: GitHub token not found at /sccache_auth/github_token" + exit 1 +fi + +if [[ ! -f /sccache_auth/aws_credentials ]]; then + echo "ERROR: AWS credentials not found at /sccache_auth/aws_credentials" + exit 1 +fi + +# Set up directories +mkdir -p ~/.config/sccache ~/.aws + +# Install AWS credentials (safe in Docker container environment) +cp /sccache_auth/aws_credentials ~/.aws/credentials + +# Read GitHub token +GITHUB_TOKEN=$(cat /sccache_auth/github_token | tr -d '\n\r ') + +# Create sccache config +SCCACHE_ARCH=$(uname -m | sed 's/x86_64/amd64/') + +# Check if we should disable distributed compilation (disabled by default) +if [[ "${SCCACHE_DISABLE_DIST:-ON}" == "ON" ]]; then + cat > ~/.config/sccache/config << SCCACHE_EOF +[cache.disk] +size = 107374182400 + +[cache.disk.preprocessor_cache_mode] +use_preprocessor_cache_mode = true + +[cache.s3] +bucket = "rapids-sccache-devs" +region = "us-east-2" +no_credentials = false + +# No [dist] section -> disables distributed compilation +SCCACHE_EOF +else + cat > ~/.config/sccache/config << SCCACHE_EOF +[cache.disk] +size = 107374182400 + +[cache.disk.preprocessor_cache_mode] +use_preprocessor_cache_mode = true + +[cache.s3] +bucket = "rapids-sccache-devs" +region = "us-east-2" +no_credentials = false + +[dist] +scheduler_url = "https://${SCCACHE_ARCH}.linux.sccache.rapids.nvidia.com" + +[dist.auth] +type = "token" +token = "${GITHUB_TOKEN}" +SCCACHE_EOF +fi + +# Configure sccache for high parallelism +# Increase file descriptor limit for high parallelism (if possible) +ulimit -n $(ulimit -Hn) || echo "Could not increase file descriptor limit" + +# Start sccache server +sccache --start-server + +# Test sccache +sccache --show-stats + +# Testing distributed compilation status (only if enabled) +if [[ "${SCCACHE_DISABLE_DIST:-ON}" == "ON" ]]; then + echo "Distributed compilation is DISABLED by default - using local compilation with remote S3 caching" +else + if sccache --dist-status; then + echo "Distributed compilation is available" + else + echo "Error: Distributed compilation not available, check connectivity" + exit 1 + fi +fi diff --git a/velox/docker/sccache_auth.dockerfile b/velox/docker/sccache_auth.dockerfile new file mode 100644 index 00000000..d6c3c09c --- /dev/null +++ b/velox/docker/sccache_auth.dockerfile @@ -0,0 +1,51 @@ +FROM ubuntu:22.04 + +# Prevent interactive prompts during package installation +ENV DEBIAN_FRONTEND=noninteractive + +# Install basic dependencies +RUN < /dev/null +apt-get update +apt-get install gh -y +rm -rf /var/lib/apt/lists/* +EOF + +# Install gh-nv-gha-aws plugin manually +RUN < /root/.local/share/gh/extensions/gh-nv-gha-aws/manifest.yml << 'MANIFEST' +owner: nv-gha-runners +name: gh-nv-gha-aws +host: github.com +tag: v0.1.1 +ispinned: false +path: $HOME/.local/share/gh/extensions/gh-nv-gha-aws/gh-nv-gha-aws +MANIFEST +EOF + +# Create output directory for credentials +RUN mkdir -p /output diff --git a/velox/scripts/build_velox.sh b/velox/scripts/build_velox.sh index 6b08da55..bc0dd54a 100755 --- a/velox/scripts/build_velox.sh +++ b/velox/scripts/build_velox.sh @@ -27,6 +27,18 @@ BUILD_TYPE="release" LOG_ENABLED=false TREAT_WARNINGS_AS_ERRORS="${TREAT_WARNINGS_AS_ERRORS:-1}" LOGFILE="./build_velox.log" +ENABLE_SCCACHE=false +SCCACHE_AUTH_DIR="${SCCACHE_AUTH_DIR:-$HOME/.sccache-auth}" +SCCACHE_ENABLE_DIST=false + +# Cleanup function to remove copied sccache auth files +cleanup_sccache_auth() { + if [[ "$ENABLE_SCCACHE" == true && -d "../docker/sccache/sccache_auth/" ]]; then + rm -fr ../docker/sccache/sccache_auth/ + fi +} + +trap cleanup_sccache_auth EXIT SIGTERM SIGINT SIGQUIT print_help() { @@ -44,6 +56,8 @@ Options: --gpu Build with GPU support (enables CUDF; sets BUILD_WITH_VELOX_ENABLE_CUDF=ON) [default]. -j|--num-threads NUM Number of threads to use for building (default: 3/4 of CPU cores). --benchmarks true|false Enable benchmarks and nsys profiling tools (default: true). + --sccache Enable sccache distributed compilation caching (requires auth files in ~/.sccache-auth/). + --sccache-enable-dist Enable distributed compilation (WARNING: may cause compilation differences like additional warnings that could lead to build failures). --build-type TYPE Build type: Release, Debug, or RelWithDebInfo (case insensitive, default: release). -h, --help Show this help message and exit. @@ -58,6 +72,8 @@ Examples: $(basename "$0") --log mybuild.log --all-cuda-archs $(basename "$0") -j 8 --gpu $(basename "$0") --num-threads 16 --no-cache + $(basename "$0") --sccache # Build with sccache (remote S3 cache, local compilation) + $(basename "$0") --sccache --sccache-enable-dist # Build with sccache including distributed compilation (may cause build differences) $(basename "$0") --build-type Debug $(basename "$0") --build-type debug --gpu $(basename "$0") --build-type RELWITHDEBINFO --gpu @@ -128,6 +144,14 @@ parse_args() { exit 1 fi ;; + --sccache) + ENABLE_SCCACHE=true + shift + ;; + --sccache-enable-dist) + SCCACHE_ENABLE_DIST=true + shift + ;; --build-type) if [[ -n "${2:-}" && ! "${2}" =~ ^- ]]; then # Convert to lowercase first, then validate @@ -160,6 +184,30 @@ parse_args() { done } +# Validate sccache authentication +validate_sccache_auth() { + if [[ "$ENABLE_SCCACHE" == true ]]; then + echo "Checking for sccache authentication files in: $SCCACHE_AUTH_DIR" + + if [[ ! -d "$SCCACHE_AUTH_DIR" ]]; then + echo "ERROR: sccache auth directory not found: $SCCACHE_AUTH_DIR" >&2 + echo "Run setup_sccache_auth.sh to set up authentication." >&2 + exit 1 + fi + + if [[ ! -f "$SCCACHE_AUTH_DIR/github_token" ]]; then + echo "ERROR: GitHub token not found: $SCCACHE_AUTH_DIR/github_token" >&2 + echo "Run setup_sccache_auth.sh to set up authentication." >&2 + exit 1 + fi + + if [[ ! -f "$SCCACHE_AUTH_DIR/aws_credentials" ]]; then + echo "ERROR: AWS credentials not found: $SCCACHE_AUTH_DIR/aws_credentials" >&2 + echo "Run setup_sccache_auth.sh to set up authentication." >&2 + exit 1 + fi + fi +} # Detect CUDA architecture since native architecture detection doesn't work # inside Docker containers @@ -178,10 +226,11 @@ detect_cuda_architecture() { fi } - - parse_args "$@" +# Validate sccache authentication if sccache is enabled +validate_sccache_auth + # Validate repo layout using shared script ../../scripts/validate_directories_exist.sh "../../../velox" @@ -207,6 +256,26 @@ DOCKER_BUILD_OPTS+=(--build-arg BUILD_TYPE="${BUILD_TYPE}") export DOCKER_BUILDKIT=1 export COMPOSE_DOCKER_CLI_BUILD=1 +# Add sccache build arguments +if [[ "$ENABLE_SCCACHE" == true ]]; then + DOCKER_BUILD_OPTS+=(--build-arg ENABLE_SCCACHE="ON") + # Copy auth files to build context + mkdir -p ../docker/sccache/sccache_auth/ + cp "$SCCACHE_AUTH_DIR/github_token" ../docker/sccache/sccache_auth/ + cp "$SCCACHE_AUTH_DIR/aws_credentials" ../docker/sccache/sccache_auth/ + + # Add distributed compilation control (disabled by default) + if [[ "$SCCACHE_ENABLE_DIST" == true ]]; then + DOCKER_BUILD_OPTS+=(--build-arg SCCACHE_DISABLE_DIST="OFF") + echo "WARNING: sccache distributed compilation enabled - may cause compilation differences" + else + DOCKER_BUILD_OPTS+=(--build-arg SCCACHE_DISABLE_DIST="ON") + fi +else + DOCKER_BUILD_OPTS+=(--build-arg ENABLE_SCCACHE="OFF") + DOCKER_BUILD_OPTS+=(--build-arg SCCACHE_DISABLE_DIST="ON") +fi + if [[ "$LOG_ENABLED" == true ]]; then echo "Logging build output to $LOGFILE" docker compose -f "$COMPOSE_FILE" build "${DOCKER_BUILD_OPTS[@]}" | tee "$LOGFILE" @@ -216,6 +285,7 @@ else BUILD_EXIT_CODE=$? fi + if [[ "$BUILD_EXIT_CODE" == "0" ]]; then # Update EXPECTED_OUTPUT_DIR to use the correct build directory EXPECTED_OUTPUT_DIR="/opt/velox-build/${BUILD_TYPE}" @@ -238,6 +308,13 @@ if [[ "$BUILD_EXIT_CODE" == "0" ]]; then else echo " Benchmarks and nsys profiling are disabled in this build." fi + if [[ "$ENABLE_SCCACHE" == true ]]; then + echo " sccache distributed compilation caching was enabled for this build." + if [[ -n "$SCCACHE_AUTH_DIR" ]]; then + echo " To check sccache stats, run:" + echo " docker compose -f $COMPOSE_FILE run --rm ${CONTAINER_NAME} sccache --show-stats" + fi + fi echo "" else echo " ERROR: Build succeeded but ${EXPECTED_OUTPUT_DIR} not found in the container." diff --git a/velox/scripts/setup_sccache_auth.sh b/velox/scripts/setup_sccache_auth.sh new file mode 100755 index 00000000..7ea69de3 --- /dev/null +++ b/velox/scripts/setup_sccache_auth.sh @@ -0,0 +1,135 @@ +#!/bin/bash +set -euo pipefail + +# Default output directory +DEFAULT_OUTPUT_DIR="$HOME/.sccache-auth" + +# Output directory +OUTPUT_DIR="${1:-$DEFAULT_OUTPUT_DIR}" + +# Timeout for AWS credentials +AWS_CREDENTIALS_TIMEOUT=43200 # 12 hours + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}sccache Authentication Setup${NC}" +echo "This script will help you set up authentication for distributed compilation caching." +echo "Output directory: $OUTPUT_DIR" +echo + +# Create output directory +mkdir -p "$OUTPUT_DIR" + +if [[ -f "$OUTPUT_DIR/github_token" || -f "$OUTPUT_DIR/aws_credentials" ]]; then + echo -e "${YELLOW}Warning: Existing authentication files detected in $OUTPUT_DIR.${NC}" + echo -e "${YELLOW}Continuing will overwrite your current GitHub and AWS credentials.${NC}" + echo -e "${YELLOW}Press Enter to continue or Ctrl+C to abort.${NC}" + read + rm -f "$OUTPUT_DIR/github_token" "$OUTPUT_DIR/aws_credentials" +fi + +# Use Docker BuildKit to ensure hereto RUN commands are available +export DOCKER_BUILDKIT=1 + +# Build the authentication container +echo -e "${YELLOW}Building sccache authentication container...${NC}" +docker build -f ../docker/sccache_auth.dockerfile -t sccache-auth . + +echo -e "${GREEN}Authentication container built successfully${NC}" +echo + +# Step 1: GitHub Authentication +echo -e "${BLUE}GitHub Authentication${NC}" +echo "Please follow the instructions to authenticate with GitHub." +echo "A device code will be displayed for you to enter in your browser." +echo + +docker run --rm -it \ + -v "$OUTPUT_DIR:/output" \ + sccache-auth \ + bash -c ' + echo "GitHub is authenticating with required scopes: gist, repo, read:org, read:enterprise" + echo + + BROWSER="false" gh auth login --git-protocol ssh --skip-ssh-key --web --scopes gist --scopes repo --scopes read:org --scopes read:enterprise + + echo + echo "Verifying authentication" + gh auth status + + gh auth token > /output/github_token + echo "GitHub token saved to /output/github_token" + ' + +if [[ ! -f "$OUTPUT_DIR/github_token" ]]; then + echo -e "${RED}GitHub token not found. Authentication has failed.${NC}" + exit 1 +fi + +echo -e "${GREEN}GitHub authentication successful${NC}" +echo + +# Step 2: AWS Credential Generation +echo -e "${BLUE}AWS Credential Generation${NC}" +echo "Using the gh-nv-gha-aws plugin for GitHub to generate required AWS credentials." +echo + +docker run --rm -it \ + -v "$OUTPUT_DIR:/output" \ + sccache-auth \ + bash -c ' + if [[ ! -f /output/github_token ]]; then + echo "Error: GitHub token not found" + exit 1 + fi + + # Authenticate with the saved token + cat /output/github_token | gh auth login --with-token + + # Verify GitHub CLI authentication + gh auth status + + #Generate AWS credentials + mkdir -p /root/.aws + + gh nv-gha-aws org nvidia \ + --profile default \ + --output creds-file \ + --duration '$AWS_CREDENTIALS_TIMEOUT' \ + --aud sts.amazonaws.com \ + --idp-url https://token.gha-runners.nvidia.com \ + --role-arn arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs \ + > /root/.aws/credentials + + # Copy AWS credentials to output + cp /root/.aws/credentials /output/aws_credentials + ' + +if [[ ! -f "$OUTPUT_DIR/aws_credentials" ]]; then + echo -e "${RED}AWS credentials not found. Generation has failed.${NC}" + exit 1 +fi + +echo -e "${GREEN}AWS credentials generated successfully${NC}" +echo + +# Summary +echo -e "${BLUE}Authentication Setup Complete${NC}" +echo "Authentication files created in: $OUTPUT_DIR" +echo + +echo -e "${YELLOW}Next steps:${NC}" +echo "1. Use these credentials with build_velox.sh:" +echo " ./build_velox.sh --sccache --sccache-auth-dir \"$OUTPUT_DIR\"" +echo +echo "2. Or set the environment variable:" +echo " export SCCACHE_AUTH_DIR=\"$OUTPUT_DIR\"" +echo " ./build_velox.sh --sccache" +echo + +echo -e "${GREEN}Setup complete!${NC}"