mudler · mudler · Jun 26, 2025 · Jun 25, 2025
diff --git a/.github/workflows/image-pr.yml b/.github/workflows/image-pr.yml
@@ -40,7 +40,7 @@ jobs:
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12-ffmpeg'
+            tag-suffix: '-gpu-nvidia-cuda12-ffmpeg'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"

diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
@@ -86,38 +86,32 @@ jobs:
             base-image: "ubuntu:22.04"
             runs-on: 'ubuntu-latest'
             aio: "-aio-cpu"
-            latest-image: 'latest-cpu'
-            latest-image-aio: 'latest-aio-cpu'
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
           - build-type: 'cublas'
             cuda-major-version: "11"
             cuda-minor-version: "7"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-cublas-cuda11'
+            tag-suffix: '-gpu-nvidia-cuda11'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             makeflags: "--jobs=4 --output-sync=target"
             skip-drivers: 'false'
-            latest-image: 'latest-gpu-nvidia-cuda-11'
             aio: "-aio-gpu-nvidia-cuda-11"
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-11'
           - build-type: 'cublas'
             cuda-major-version: "12"
             cuda-minor-version: "0"
             platforms: 'linux/amd64'
             tag-latest: 'false'
-            tag-suffix: '-cublas-cuda12'
+            tag-suffix: '-gpu-nvidia-cuda12'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
-            latest-image: 'latest-gpu-nvidia-cuda-12'
             aio: "-aio-gpu-nvidia-cuda-12"
-            latest-image-aio: 'latest-aio-gpu-nvidia-cuda-12'
           - build-type: 'vulkan'
             platforms: 'linux/amd64'
             tag-latest: 'false'
@@ -127,33 +121,27 @@ jobs:
             base-image: "ubuntu:22.04"
             skip-drivers: 'false'
             makeflags: "--jobs=4 --output-sync=target"
-            latest-image: 'latest-gpu-vulkan'
             aio: "-aio-gpu-vulkan"
-            latest-image-aio: 'latest-aio-gpu-vulkan'
           - build-type: 'sycl_f16'
             platforms: 'linux/amd64'
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f16'
+            tag-suffix: '-gpu-intel-f16'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f16'
             aio: "-aio-gpu-intel-f16"
-            latest-image-aio: 'latest-aio-gpu-intel-f16'
           - build-type: 'sycl_f32'
             platforms: 'linux/amd64'
             tag-latest: 'false'
             base-image: "quay.io/go-skynet/intel-oneapi-base:latest"
             grpc-base-image: "ubuntu:22.04"
-            tag-suffix: '-sycl-f32'
+            tag-suffix: '-gpu-intel-f32'
             ffmpeg: 'true'
             runs-on: 'ubuntu-latest'
             makeflags: "--jobs=3 --output-sync=target"
-            latest-image: 'latest-gpu-intel-f32'
             aio: "-aio-gpu-intel-f32"
-            latest-image-aio: 'latest-aio-gpu-intel-f32'
 
   gh-runner:
     uses: ./.github/workflows/image_build.yml

diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml
@@ -33,14 +33,6 @@ on:
         description: 'Tag latest'
         default: ''
         type: string
-      latest-image:
-          description: 'Tag latest'
-          default: ''
-          type: string
-      latest-image-aio:
-          description: 'Tag latest'
-          default: ''
-          type: string
       tag-suffix:
         description: 'Tag suffix'
         default: ''
@@ -164,7 +156,7 @@ jobs:
             type=sha
           flavor: |
             latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.tag-suffix }}
+            suffix=${{ inputs.tag-suffix }},onlatest=true
       - name: Docker meta for PR
         id: meta_pull_request
         if: github.event_name == 'pull_request'
@@ -191,7 +183,7 @@ jobs:
             type=semver,pattern={{raw}}
           flavor: |
             latest=${{ inputs.tag-latest }}
-            suffix=${{ inputs.aio }}
+            suffix=${{ inputs.aio }},onlatest=true
 
       - name: Docker meta AIO (dockerhub)
         if: inputs.aio != ''
@@ -204,7 +196,8 @@ jobs:
             type=ref,event=branch
             type=semver,pattern={{raw}}
           flavor: |
-            suffix=${{ inputs.aio }}
+            latest=${{ inputs.tag-latest }}
+            suffix=${{ inputs.aio }},onlatest=true
 
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
@@ -255,7 +248,6 @@ jobs:
           cache-from: type=gha
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image != '' && github.ref_type == 'tag' }}
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
 ### Start testing image
@@ -299,7 +291,6 @@ jobs:
           file: ./Dockerfile.aio
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag' }}
           tags: ${{ steps.meta_aio.outputs.tags }}
           labels: ${{ steps.meta_aio.outputs.labels }}
 
@@ -315,32 +306,9 @@ jobs:
           file: ./Dockerfile.aio
           platforms: ${{ inputs.platforms }}
           push: ${{ github.event_name != 'pull_request' }}
-          load: ${{ github.event_name != 'pull_request' && inputs.latest-image-aio != '' && github.ref_type == 'tag' }}
           tags: ${{ steps.meta_aio_dockerhub.outputs.tags }}
           labels: ${{ steps.meta_aio_dockerhub.outputs.labels }}
 
-      - name: Cleanup
-        run: |
-          docker builder prune -f
-          docker system prune --force --volumes --all
-
-      - name: Latest tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image != ''  && github.ref_type == 'tag'
-        run: |
-          docker tag localai/localai:${{ steps.meta.outputs.version }} localai/localai:${{ inputs.latest-image }}
-          docker push localai/localai:${{ inputs.latest-image }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image }}
-      - name: Latest AIO tag
-        # run this on branches, when it is a tag and there is a latest-image defined
-        if: github.event_name != 'pull_request' && inputs.latest-image-aio != ''  && github.ref_type == 'tag'
-        run: |
-          docker tag localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} localai/localai:${{ inputs.latest-image-aio }}
-          docker push localai/localai:${{ inputs.latest-image-aio }}
-          docker tag quay.io/go-skynet/local-ai:${{ steps.meta_aio.outputs.version }} quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-          docker push quay.io/go-skynet/local-ai:${{ inputs.latest-image-aio }}
-
       - name: job summary
         run: |
           echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY

diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
@@ -71,15 +71,15 @@ To use CUDA, use the images with the `cublas` tag, for example.
 
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
 
-- CUDA `11` tags: `master-cublas-cuda11`, `v1.40.0-cublas-cuda11`, ...
-- CUDA `12` tags: `master-cublas-cuda12`, `v1.40.0-cublas-cuda12`, ...
-- CUDA `11` + FFmpeg tags: `master-cublas-cuda11-ffmpeg`, `v1.40.0-cublas-cuda11-ffmpeg`, ...
-- CUDA `12` + FFmpeg tags: `master-cublas-cuda12-ffmpeg`, `v1.40.0-cublas-cuda12-ffmpeg`, ...
+- CUDA `11` tags: `master-gpu-nvidia-cuda11`, `v1.40.0-gpu-nvidia-cuda11`, ...
+- CUDA `12` tags: `master-gpu-nvidia-cuda12`, `v1.40.0-gpu-nvidia-cuda12`, ...
+- CUDA `11` + FFmpeg tags: `master-gpu-nvidia-cuda11-ffmpeg`, `v1.40.0-gpu-nvidia-cuda11-ffmpeg`, ...
+- CUDA `12` + FFmpeg tags: `master-gpu-nvidia-cuda12-ffmpeg`, `v1.40.0-gpu-nvidia-cuda12-ffmpeg`, ...
 
 In addition to the commands to run LocalAI normally, you need to specify `--gpus all` to docker, for example:
 
 ```bash
-docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-cublas-cuda12
+docker run --rm -ti --gpus all -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:v1.40.0-gpu-nvidia-cuda12
 ```
 
 If the GPU inferencing is working, you should be able to see something like:
@@ -259,7 +259,7 @@ If building from source, you need to install [Intel oneAPI Base Toolkit](https:/
 
 ### Container images
 
-To use SYCL, use the images with the `sycl-f16` or `sycl-f32` tag, for example `{{< version >}}-sycl-f32-core`, `{{< version >}}-sycl-f16-ffmpeg-core`, ...
+To use SYCL, use the images with the `gpu-intel-f16` or `gpu-intel-f32` tag, for example `{{< version >}}-gpu-intel-f32-core`, `{{< version >}}-gpu-intel-f16-ffmpeg-core`, ...
 
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags).
 
@@ -268,15 +268,15 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
 
 ```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-sycl-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
 ```
 
 ### Notes
 
 In addition to the commands to run LocalAI normally, you need to specify `--device /dev/dri` to docker, for example:
 
 ```bash
-docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16-ffmpeg-core
+docker run --rm -ti --device /dev/dri -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -v $PWD/models:/models quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16-ffmpeg-core
 ```
 
 Note also that sycl does have a known issue to hang with `mmap: true`. You have to disable it in the model configuration if explicitly enabled.

diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
@@ -165,39 +165,39 @@ Standard container images do not have pre-installed models.
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda11` | `localai/localai:master-gpu-nvidia-cuda11`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-11` | `localai/localai:latest-gpu-nvidia-cuda-11`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda11` | `localai/localai:{{< version >}}-gpu-nvidia-cuda11`             |
 
 {{% /tab %}}
 
 {{% tab tabName="GPU Images CUDA 12" %}}
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-nvidia-cuda12` | `localai/localai:master-gpu-nvidia-cuda12`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-nvidia-cuda-12` | `localai/localai:latest-gpu-nvidia-cuda-12`                 |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-nvidia-cuda12` | `localai/localai:{{< version >}}-gpu-nvidia-cuda12`             |
 
 {{% /tab %}}
 
 {{% tab tabName="Intel GPU (sycl f16)" %}}
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f16` | `localai/localai:master-sycl-f16`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f16` | `localai/localai:master-gpu-intel-f16`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f16` | `localai/localai:latest-gpu-intel-f16`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f16` | `localai/localai:{{< version >}}-sycl-f16`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f16` | `localai/localai:{{< version >}}-gpu-intel-f16`             |
 
 {{% /tab %}}
 
 {{% tab tabName="Intel GPU (sycl f32)" %}}
 
 | Description | Quay | Docker Hub                                                  |
 | --- | --- |-------------------------------------------------------------|
-| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-sycl-f32` | `localai/localai:master-sycl-f32`                      |
+| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-gpu-intel-f32` | `localai/localai:master-gpu-intel-f32`                      |
 | Latest tag | `quay.io/go-skynet/local-ai:latest-gpu-intel-f32` | `localai/localai:latest-gpu-intel-f32`                      |
-| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-sycl-f32` | `localai/localai:{{< version >}}-sycl-f32`             |
+| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-gpu-intel-f32` | `localai/localai:{{< version >}}-gpu-intel-f32`             |
 
 {{% /tab %}}
 

diff --git a/docs/content/docs/whats-new.md b/docs/content/docs/whats-new.md
@@ -288,8 +288,8 @@ From this release the default behavior of images has changed. Compilation is not
 ### Container images
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.20.0`
 - FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-ffmpeg`
-- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda11-ffmpeg`
-- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-cublas-cuda12-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.20.0-gpu-nvidia-cuda12-ffmpeg`
 
 ### Updates
 
@@ -339,8 +339,8 @@ You can check the full changelog in [Github](https://github.com/go-skynet/LocalA
 Container images:
 - Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.19.2`
 - FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-ffmpeg`
-- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda11-ffmpeg`
-- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-cublas-cuda12-ffmpeg`
+- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda11-ffmpeg`
+- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:v1.19.2-gpu-nvidia-cuda12-ffmpeg`
 
 --- 
 

diff --git a/docs/static/install.sh b/docs/static/install.sh
@@ -672,7 +672,7 @@ install_docker() {
             -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
     elif [ "$HAS_CUDA" ]; then
         # Default to CUDA 12
-        IMAGE_TAG=${LOCALAI_VERSION}-cublas-cuda12
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-nvidia-cuda12
         # AIO
         if [ "$USE_AIO" = true ]; then
             IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-nvidia-cuda-12
@@ -716,7 +716,7 @@ install_docker() {
             -d -p $PORT:8080 --name local-ai localai/localai:$IMAGE_TAG $STARTCOMMAND
     elif [ "$HAS_INTEL" ]; then
         # Default to FP32 for better compatibility
-        IMAGE_TAG=${LOCALAI_VERSION}-sycl-f32
+        IMAGE_TAG=${LOCALAI_VERSION}-gpu-intel-f32
         # AIO
         if [ "$USE_AIO" = true ]; then
             IMAGE_TAG=${LOCALAI_VERSION}-aio-gpu-intel-f32