From 190b09bc67aa9ffa4d34ba7a701e577a3f2e6159 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Wed, 25 Jun 2025 11:46:19 +0200
Subject: [PATCH] chore(ci): fix latest tag by using docker meta action

Also uniform tagging names

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 .github/workflows/image-pr.yml                |  2 +-
 .github/workflows/image.yml                   | 20 ++--------
 .github/workflows/image_build.yml             | 40 ++-----------------
 .../content/docs/features/GPU-acceleration.md | 16 ++++----
 .../docs/getting-started/container-images.md  | 16 ++++----
 docs/content/docs/whats-new.md                |  8 ++--
 docs/static/install.sh                        |  4 +-
 7 files changed, 31 insertions(+), 75 deletions(-)

diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
index 0fa9474488fa..0a3ed2708a6f 100644
--- a/.github/workflows/image-pr.yml
+++ b/.github/workflows/image-pr.yml
@@ -40,7 +40,7 @@ jobs:
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg'
+            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index 036973f7c028..8906d90f3b2d 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -86,8 +86,6 @@ jobs:
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
-            latest-image: 'latest-cpu'
-            latest-image-aio: 'latest-aio-cpu'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
           - build-type: 'cublas'
@@ -95,29 +93,25 @@ jobs:
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11'
+            tag-suffix: '-gpu-nvidia-cuda11'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            latest-image: 'latest-gpu-nvidia-cuda-11'
             aio: "-aio-gpu-nvidia-cuda-11"
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12'
+            tag-suffix: '-gpu-nvidia-cuda12'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
-            latest-image: 'latest-gpu-nvidia-cuda-12'
             aio: "-aio-gpu-nvidia-cuda-12"
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -127,33 +121,27 @@ jobs:
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
-            latest-image: 'latest-gpu-vulkan'
             aio: "-aio-gpu-vulkan"
-            latest-image-aio: 'latest-aio-gpu-vulkan'
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16'
+            tag-suffix: '-gpu-intel-f16'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f16'
             aio: "-aio-gpu-intel-f16"
-            latest-image-aio: 'latest-aio-gpu-intel-f16'
           - build-type: 'sycl_f32'
             platforms: 'linux/amd64'
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32'
+            tag-suffix: '-gpu-intel-f32'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f32'
             aio: "-aio-gpu-intel-f32"
-            latest-image-aio: 'latest-aio-gpu-intel-f32'
 
   gh-runner:
     uses: ./.github/workflows/image_build.yml
diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
index 020695fae1bf..fe021823b9c7 100644
--- a/.github/workflows/image_build.yml
+++ b/.github/workflows/image_build.yml
@@ -33,14 +33,6 @@ on:
         description: 'Tag latest'
         default: ''
         type: string
-      latest-image:
-          description: 'Tag latest'
-          default: ''
-          type: string
-      latest-image-aio:
-          description: 'Tag latest'
-          default: ''
-          type: string
       tag-suffix:
         description: 'Tag suffix'
         default: ''
@@ -164,7 +156,7 @@ jobs:
             type=sha
           flavor: |
             latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true
       - name: Docker meta for PR
         id: meta_pull_request
         if: github.event_name == 'pull_request'
@@ -191,7 +183,7 @@ jobs:
             type=semver,pattern={{raw}}
           flavor: |
             latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }}
+            suffix=${{ inputs.aio }},onlatest=true
 
       - name: Docker meta AIO (dockerhub)
         if: inputs.aio != ''
@@ -204,7 +196,8 @@ jobs:
             type=ref,event=branch
             type=semver,pattern={{raw}}
           flavor: |
-            suffix=${{ inputs.aio }}
+            latest=${{ inputs.tag-latest }}
+            suffix=${{ inputs.aio }},onlatest=true
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
@@ -255,7 +248,6 @@ jobs:
           cache-from: type=gha
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
 ### Start testing image
@@ -299,7 +291,6 @@ jobs:
           file: ./Dockerfile.aio
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag' }}
           tags: ${{ steps.meta_aio.outputs.tags }}
           labels: ${{ steps.meta_aio.outputs.labels }}
 
@@ -315,32 +306,9 @@ jobs:
           file: ./Dockerfile.aio
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag' }}
           tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
           labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
 
-      - name: Cleanup
-        run: |
-          docker builder prune -f
-          docker system prune --force --volumes --all
-
-      - name: Latest tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image != ''  && github.ref_type == 'tag'
-        run: |
-          docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
-          docker push localai/localai:${{ inputs.latest-image }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-      - name: Latest AIO tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image-aio != ''  && github.ref_type == 'tag'
-        run: |
-          docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
-          docker push localai/localai:${{ inputs.latest-image-aio }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-
       - name: job summary
         run: |
           echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index f8f9ca160c81..4fd3c039aec7 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -71,15 +71,15 @@ To use CUDA, use the images with the `cublas` tag, for example.
 
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
 
-- CUDA `11` tags: `master-cublas-cuda11`, `v1.40.0-cublas-cuda11`, ...
-- CUDA `12` tags: `master-cublas-cuda12`, `v1.40.0-cublas-cuda12`, ...
-- CUDA `11` + FFmpeg tags: `master-cublas-cuda11-ffmpeg`, `v1.40.0-cublas-cuda11-ffmpeg`, ...
-- CUDA `12` + FFmpeg tags: `master-cublas-cuda12-ffmpeg`, `v1.40.0-cublas-cuda12-ffmpeg`, ...
+- CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
+- CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
+- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
+- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...
 
 In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
 
 ```bash
-docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-cublas-cuda12
+docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-gpu-nvidia-cuda12
 ```
 
 If the GPU inferencing is working, you should be able to see something like:
@@ -259,7 +259,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
 
 ### Container images
 
-To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
+To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
 
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
 
@@ -268,7 +268,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
 
 ```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
 ```
 
 ### Notes
@@ -276,7 +276,7 @@ docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8
 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
 
 ```bash
-docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
+docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
 ```
 
 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index b0b7be9d6758..4edbc6191ffc 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -165,9 +165,9 @@ Standard container images do not have pre-installed models.
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda11` | `localai/localai:master-gpu-nvidia-cuda11`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda11`             |
 
 {{% /tab %}}
 
@@ -175,9 +175,9 @@ Standard container images do not have pre-installed models.
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda12` | `localai/localai:master-gpu-nvidia-cuda12`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12`                 |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda12`             |
 
 {{% /tab %}}
 
@@ -185,9 +185,9 @@ Standard container images do not have pre-installed models.
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f16` | `localai/localai:master-gpu-intel-f16`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16` | `localai/localai:{{< version >}}-gpu-intel-f16`             |
 
 {{% /tab %}}
 
@@ -195,9 +195,9 @@ Standard container images do not have pre-installed models.
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f32` | `localai/localai:master-gpu-intel-f32`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f32` | `localai/localai:{{< version >}}-gpu-intel-f32`             |
 
 {{% /tab %}}
 
diff --git a/docs/content/docs/whats-new.md b/docs/content/docs/whats-new.md
index 8f07b1cb5d05..e4f7ab25cb51 100644
--- a/docs/content/docs/whats-new.md
+++ b/docs/content/docs/whats-new.md
@@ -288,8 +288,8 @@ From this release the default behavior of images has changed. Compilation is not
 ### Container images
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.20.0`
 - FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-ffmpeg`
-- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda11-ffmpeg`
-- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda12-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda12-ffmpeg`
 
 ### Updates
 
@@ -339,8 +339,8 @@ You can check the full changelog in [Github](https://github.com/go-skynet/LocalA
 Container images:
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.19.2`
 - FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-ffmpeg`
-- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda11-ffmpeg`
-- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda12-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda12-ffmpeg`
 
 --- 
 
diff --git a/docs/static/install.sh b/docs/static/install.sh
index 7f8258359381..d0c21b63a3bc 100755
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -672,7 +672,7 @@ install_docker() {
             -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
     elif [ "$HAS_CUDA" ]; then
         # Default to CUDA 12
-        IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-nvidia-cuda12
         # AIO
         if [ "$USE_AIO" = true ]; then
             IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12
@@ -716,7 +716,7 @@ install_docker() {
             -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
     elif [ "$HAS_INTEL" ]; then
         # Default to FP32 for better compatibility
-        IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel-f32
         # AIO
         if [ "$USE_AIO" = true ]; then
             IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32