diff --git a/.github/workflows/containers.yml b/.github/workflows/containers.yml index cdfd1b81..7460c5c7 100644 --- a/.github/workflows/containers.yml +++ b/.github/workflows/containers.yml @@ -10,12 +10,18 @@ on: # Allows manual triggering of the workflow workflow_dispatch: + # Run on workflow file changes (without pushing) + push: + paths: + - '.github/workflows/containers.yml' + - 'docker/build-container.sh' + jobs: build-and-push: runs-on: ubuntu-latest strategy: matrix: - platform: [intel, cuda, vulkan, cpu, musa] + platform: [intel, cuda, vulkan, cpu, musa, rocm] fail-fast: false steps: - name: Checkout code @@ -31,7 +37,7 @@ jobs: - name: Run build-container env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: ./docker/build-container.sh ${{ matrix.platform }} true + run: ./docker/build-container.sh ${{ matrix.platform }} ${{ github.event_name != 'push' }} # note make sure mostlygeek/llama-swap has admin rights to the llama-swap package # see: https://github.com/actions/delete-package-versions/issues/74 diff --git a/CLAUDE.md b/CLAUDE.md index 4e83db46..7e82a0fa 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -12,7 +12,7 @@ llama-swap is a light weight, transparent proxy server that provides automatic m - when summarizing changes only include details that require further action - just say "Done." when there is no further action - use `gh` to create PRs and load issues -- do not mention "created by claude" in commit messages +- do include Co-Authored-By or created by when committing changes or creating PRs - keep PR descriptions short and focused on changes. - never include a test plan diff --git a/docker/build-container.sh b/docker/build-container.sh index 66235ad3..3998644d 100755 --- a/docker/build-container.sh +++ b/docker/build-container.sh @@ -2,21 +2,37 @@ cd $(dirname "$0") +# use this to test locally, example: +# GITHUB_TOKEN=$(gh auth token) LOG_DEBUG=1 DEBUG_ABORT_BUILD=1 ./docker/build-container.sh rocm +# you need read:package scope on the token. Generate a personal access token with +# the scopes: gist, read:org, repo, write:packages +# then: gh auth login (and copy/paste the new token) + +log_debug() { + if [ "$LOG_DEBUG" = "1" ]; then + echo "[DEBUG] $*" + fi +} + +log_info() { + echo "[INFO] $*" +} + ARCH=$1 PUSH_IMAGES=${2:-false} # List of allowed architectures -ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu") +ALLOWED_ARCHS=("intel" "vulkan" "musa" "cuda" "cpu" "rocm") # Check if ARCH is in the allowed list if [[ ! " ${ALLOWED_ARCHS[@]} " =~ " ${ARCH} " ]]; then - echo "Error: ARCH must be one of the following: ${ALLOWED_ARCHS[@]}" + log_info "Error: ARCH must be one of the following: ${ALLOWED_ARCHS[@]}" exit 1 fi # Check if GITHUB_TOKEN is set and not empty if [[ -z "$GITHUB_TOKEN" ]]; then - echo "Error: GITHUB_TOKEN is not set or is empty." + log_info "Error: GITHUB_TOKEN is not set or is empty." exit 1 fi @@ -32,25 +48,74 @@ LS_REPO=${GITHUB_REPOSITORY:-mostlygeek/llama-swap} # have to strip out the 'v' due to .tar.gz file naming LS_VER=$(curl -s https://api.github.com/repos/${LS_REPO}/releases/latest | jq -r .tag_name | sed 's/v//') +# Fetches the most recent llama.cpp tag matching the given prefix +# Handles pagination to search beyond the first 100 results +# $1 - tag_prefix (e.g., "server" or "server-vulkan") +# Returns: the version number extracted from the tag +fetch_llama_tag() { + local tag_prefix=$1 + local page=1 + local per_page=100 + + while true; do + log_debug "Fetching page $page for tag prefix: $tag_prefix" + + local response=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ + "https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions?per_page=${per_page}&page=${page}") + + # Check for API errors + if echo "$response" | jq -e '.message' > /dev/null 2>&1; then + local error_msg=$(echo "$response" | jq -r '.message') + log_info "GitHub API error: $error_msg" + return 1 + fi + + # Check if response is empty array (no more pages) + if [ "$(echo "$response" | jq 'length')" -eq 0 ]; then + log_debug "No more pages (empty response)" + return 1 + fi + + # Extract matching tag from this page + local found_tag=$(echo "$response" | jq -r \ + ".[] | select(.metadata.container.tags[]? | startswith(\"$tag_prefix\")) | .metadata.container.tags[] | select(startswith(\"$tag_prefix\"))" \ + | sort -r | head -n1) + + if [ -n "$found_tag" ]; then + log_debug "Found tag: $found_tag on page $page" + echo "$found_tag" | awk -F '-' '{print $NF}' + return 0 + fi + + page=$((page + 1)) + + # Safety limit to prevent infinite loops + if [ $page -gt 50 ]; then + log_info "Reached pagination safety limit (50 pages)" + return 1 + fi + done +} + if [ "$ARCH" == "cpu" ]; then - # cpu only containers just use the server tag - LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ - "https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions" \ - | jq -r '.[] | select(.metadata.container.tags[] | startswith("server")) | .metadata.container.tags[]' \ - | sort -r | head -n1 | awk -F '-' '{print $3}') + LCPP_TAG=$(fetch_llama_tag "server") BASE_TAG=server-${LCPP_TAG} else - LCPP_TAG=$(curl -s -H "Authorization: Bearer $GITHUB_TOKEN" \ - "https://api.github.com/users/ggml-org/packages/container/llama.cpp/versions" \ - | jq -r --arg arch "$ARCH" '.[] | select(.metadata.container.tags[] | startswith("server-\($arch)")) | .metadata.container.tags[]' \ - | sort -r | head -n1 | awk -F '-' '{print $3}') + LCPP_TAG=$(fetch_llama_tag "server-${ARCH}") BASE_TAG=server-${ARCH}-${LCPP_TAG} fi # Abort if LCPP_TAG is empty. if [[ -z "$LCPP_TAG" ]]; then - echo "Abort: Could not find llama-server container for arch: $ARCH" + log_info "Abort: Could not find llama-server container for arch: $ARCH" exit 1 +else + log_info "LCPP_TAG: $LCPP_TAG" +fi + +if [[ ! -z "$DEBUG_ABORT_BUILD" ]]; then + log_info "Abort: DEBUG_ABORT_BUILD set" + exit 0 fi for CONTAINER_TYPE in non-root root; do @@ -68,7 +133,7 @@ for CONTAINER_TYPE in non-root root; do USER_HOME=/app fi - echo "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER" + log_info "Building $CONTAINER_TYPE $CONTAINER_TAG $LS_VER" docker build -f llama-swap.Containerfile --build-arg BASE_TAG=${BASE_TAG} --build-arg LS_VER=${LS_VER} --build-arg UID=${USER_UID} \ --build-arg LS_REPO=${LS_REPO} --build-arg GID=${USER_GID} --build-arg USER_HOME=${USER_HOME} -t ${CONTAINER_TAG} -t ${CONTAINER_LATEST} \ --build-arg BASE_IMAGE=${BASE_IMAGE} .