diff --git a/Dockerfile b/Dockerfile
index 3bfc4a388b95..a31ce0a8c243 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,82 +2,17 @@ ARG BASE_IMAGE=ubuntu:22.04
 ARG GRPC_BASE_IMAGE=${BASE_IMAGE}
 ARG INTEL_BASE_IMAGE=${BASE_IMAGE}
 
-# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
 FROM ${BASE_IMAGE} AS requirements
 
-USER root
-
-ARG GO_VERSION=1.22.6
-ARG CMAKE_VERSION=3.26.4
-ARG CMAKE_FROM_SOURCE=false
-ARG TARGETARCH
-ARG TARGETVARIANT
-
 ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        build-essential \
-        ccache \
-        ca-certificates espeak-ng \
-        curl libssl-dev \
-        git \
-        git-lfs \
-        unzip upx-ucl python3 python-is-python3 && \
+        ca-certificates curl wget espeak-ng libgomp1 \
+        python3 python-is-python3 ffmpeg && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Install CMake (the version in 22.04 is too old)
-RUN <<EOT bash
-    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
-        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
-    else
-        apt-get update && \
-        apt-get install -y \
-            cmake && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/*
-    fi
-EOT
-
-# Install Go
-RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
-ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
-
-# Install grpc compilers and rice
-RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
-    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
-    go install github.com/GeertJohan/go.rice/rice@latest
-
-COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
-RUN update-ca-certificates
-
-RUN test -n "$TARGETARCH" \
-    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
-
-# Use the variables in subsequent instructions
-RUN echo "Target Architecture: $TARGETARCH"
-RUN echo "Target Variant: $TARGETVARIANT"
-
-# Cuda
-ENV PATH=/usr/local/cuda/bin:${PATH}
-
-# HipBLAS requirements
-ENV PATH=/opt/rocm/bin:${PATH}
-
-# OpenBLAS requirements and stable diffusion
-RUN apt-get update && \
-    apt-get install -y --no-install-recommends \
-        libopenblas-dev && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-WORKDIR /build
-
-
-###################################
-###################################
-
 # The requirements-drivers target is for BUILD_TYPE specific items.  If you need to install something specific to CUDA, or specific to ROCM, it goes here.
 FROM requirements AS requirements-drivers
 
@@ -85,7 +20,8 @@ ARG BUILD_TYPE
 ARG CUDA_MAJOR_VERSION=12
 ARG CUDA_MINOR_VERSION=0
 ARG SKIP_DRIVERS=false
-
+ARG TARGETARCH
+ARG TARGETVARIANT
 ENV BUILD_TYPE=${BUILD_TYPE}
 
 # Vulkan requirements
@@ -152,6 +88,83 @@ RUN if [ "${BUILD_TYPE}" = "hipblas" ] && [ "${SKIP_DRIVERS}" = "false" ]; then
         ldconfig \
     ; fi
 
+# Cuda
+ENV PATH=/usr/local/cuda/bin:${PATH}
+
+# HipBLAS requirements
+ENV PATH=/opt/rocm/bin:${PATH}
+
+###################################
+###################################
+
+# The requirements-core target is common to all images.  It should not be placed in requirements-core unless every single build will use it.
+FROM requirements-drivers AS build-requirements
+
+ARG GO_VERSION=1.22.6
+ARG CMAKE_VERSION=3.26.4
+ARG CMAKE_FROM_SOURCE=false
+ARG TARGETARCH
+ARG TARGETVARIANT
+
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        ccache \
+        ca-certificates espeak-ng \
+        curl libssl-dev \
+        git \
+        git-lfs \
+        unzip upx-ucl python3 python-is-python3 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+# Install CMake (the version in 22.04 is too old)
+RUN <<EOT bash
+    if [ "${CMAKE_FROM_SOURCE}}" = "true" ]; then
+        curl -L -s https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}.tar.gz -o cmake.tar.gz && tar xvf cmake.tar.gz && cd cmake-${CMAKE_VERSION} && ./configure && make && make install
+    else
+        apt-get update && \
+        apt-get install -y \
+            cmake && \
+        apt-get clean && \
+        rm -rf /var/lib/apt/lists/*
+    fi
+EOT
+
+# Install Go
+RUN curl -L -s https://go.dev/dl/go${GO_VERSION}.linux-${TARGETARCH}.tar.gz | tar -C /usr/local -xz
+ENV PATH=$PATH:/root/go/bin:/usr/local/go/bin
+
+# Install grpc compilers and rice
+RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.34.2 && \
+    go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@1958fcbe2ca8bd93af633f11e97d44e567e945af && \
+    go install github.com/GeertJohan/go.rice/rice@latest
+
+COPY --chmod=644 custom-ca-certs/* /usr/local/share/ca-certificates/
+RUN update-ca-certificates
+
+
+# OpenBLAS requirements and stable diffusion
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        libopenblas-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN test -n "$TARGETARCH" \
+    || (echo 'warn: missing $TARGETARCH, either set this `ARG` manually, or run using `docker buildkit`')
+
+# Use the variables in subsequent instructions
+RUN echo "Target Architecture: $TARGETARCH"
+RUN echo "Target Variant: $TARGETVARIANT"
+
+
+
+
+WORKDIR /build
+
+
 ###################################
 ###################################
 
@@ -218,13 +231,14 @@ RUN git clone --recurse-submodules --jobs 4 -b ${GRPC_VERSION} --depth 1 --shall
 
 # The builder-base target has the arguments, variables, and copies shared between full builder images and the uncompiled devcontainer
 
-FROM requirements-drivers AS builder-base
+FROM build-requirements AS builder-base
 
 ARG GO_TAGS="tts p2p"
 ARG GRPC_BACKENDS
 ARG MAKEFLAGS
 ARG LD_FLAGS="-s -w"
-
+ARG TARGETARCH
+ARG TARGETVARIANT
 ENV GRPC_BACKENDS=${GRPC_BACKENDS}
 ENV GO_TAGS=${GO_TAGS}
 ENV MAKEFLAGS=${MAKEFLAGS}
@@ -259,6 +273,8 @@ EOT
 
 # Compile backends first in a separate stage
 FROM builder-base AS builder-backends
+ARG TARGETARCH
+ARG TARGETVARIANT
 
 COPY --from=grpc /opt/grpc /usr/local
 
@@ -314,24 +330,13 @@ RUN if [ ! -d "/build/sources/go-piper/piper-phonemize/pi/lib/" ]; then \
 
 FROM builder-base AS devcontainer
 
-ARG FFMPEG
-
 COPY --from=grpc /opt/grpc /usr/local
 
 COPY .devcontainer-scripts /.devcontainer-scripts
 
-# Add FFmpeg
-RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            ffmpeg && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
-
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
-        ssh less wget
+        ssh less
 # For the devcontainer, leave apt functional in case additional devtools are needed at runtime.
 
 RUN go install github.com/go-delve/delve/cmd/dlv@latest
@@ -345,40 +350,16 @@ RUN go install github.com/mikefarah/yq/v4@latest
 # If you cannot find a more suitable place for an addition, this layer is a suitable place for it.
 FROM requirements-drivers
 
-ARG FFMPEG
-ARG BUILD_TYPE
-ARG TARGETARCH
-ARG MAKEFLAGS
-
-ENV BUILD_TYPE=${BUILD_TYPE}
-ENV REBUILD=false
 ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz
-ENV MAKEFLAGS=${MAKEFLAGS}
 
 ARG CUDA_MAJOR_VERSION=12
 ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
 ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0"
 ENV NVIDIA_VISIBLE_DEVICES=all
 
-# Add FFmpeg
-RUN if [ "${FFMPEG}" = "true" ]; then \
-        apt-get update && \
-        apt-get install -y --no-install-recommends \
-            ffmpeg && \
-        apt-get clean && \
-        rm -rf /var/lib/apt/lists/* \
-    ; fi
+WORKDIR /
 
-WORKDIR /build
-
-# we start fresh & re-copy all assets because `make build` does not clean up nicely after itself
-# so when `entrypoint.sh` runs `make build` again (which it does by default), the build would fail
-# see https://github.com/go-skynet/LocalAI/pull/658#discussion_r1241971626 and
-# https://github.com/go-skynet/LocalAI/pull/434
-COPY . .
-
-COPY --from=builder /build/sources ./sources/
-COPY --from=grpc /opt/grpc /usr/local
+COPY ./entrypoint.sh .
 
 # Copy the binary
 COPY --from=builder /build/local-ai ./
@@ -387,12 +368,12 @@ COPY --from=builder /build/local-ai ./
 COPY --from=builder /build/sources/go-piper/piper-phonemize/pi/lib/* /usr/lib/
 
 # Make sure the models directory exists
-RUN mkdir -p /build/models /build/backends
+RUN mkdir -p /models /backends
 
 # Define the health check command
 HEALTHCHECK --interval=1m --timeout=10m --retries=10 \
   CMD curl -f ${HEALTHCHECK_ENDPOINT} || exit 1
 
-VOLUME /build/models /build/backends
+VOLUME /models /backends
 EXPOSE 8080
-ENTRYPOINT [ "/build/entrypoint.sh" ]
+ENTRYPOINT [ "/entrypoint.sh" ]
diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh
index e24434ff9d1a..a4b83a9daccc 100755
--- a/aio/entrypoint.sh
+++ b/aio/entrypoint.sh
@@ -135,4 +135,4 @@ check_vars
 
 echo "===> Starting LocalAI[$PROFILE] with the following models: $MODELS"
 
-exec /build/entrypoint.sh "$@"
+exec /entrypoint.sh "$@"
diff --git a/docs/content/docs/features/GPU-acceleration.md b/docs/content/docs/features/GPU-acceleration.md
index 4fd3c039aec7..550c013c1cdb 100644
--- a/docs/content/docs/features/GPU-acceleration.md
+++ b/docs/content/docs/features/GPU-acceleration.md
@@ -268,7 +268,7 @@ The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=ta
 To run LocalAI with Docker and sycl starting `phi-2`, you can use the following command as an example:
 
 ```bash
-docker run -e DEBUG=true --privileged -ti -v $PWD/models:/build/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
+docker run -e DEBUG=true --privileged -ti -v $PWD/models:/models -p 8080:8080  -v /dev/dri:/dev/dri --rm quay.io/go-skynet/local-ai:master-gpu-intel-f32-ffmpeg-core phi-2
 ```
 
 ### Notes
@@ -296,7 +296,7 @@ To use Vulkan, use the images with the `vulkan` tag, for example `{{< version >}
 To run LocalAI with Docker and Vulkan, you can use the following command as an example:
 
 ```bash
-docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models localai/localai:latest-vulkan-ffmpeg-core
+docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models localai/localai:latest-vulkan-ffmpeg-core
 ```
 
 ### Notes
@@ -308,7 +308,7 @@ These flags are the same as the sections above, depending on the hardware, for [
 If you have mixed hardware, you can pass flags for multiple GPUs, for example:
 
 ```bash
-docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/build/models \
+docker run -p 8080:8080 -e DEBUG=true -v $PWD/models:/models \
 --gpus=all \ # nvidia passthrough
 --device /dev/dri --device /dev/kfd \ # AMD/Intel passthrough
 localai/localai:latest-vulkan-ffmpeg-core
diff --git a/docs/content/docs/getting-started/container-images.md b/docs/content/docs/getting-started/container-images.md
index 4edbc6191ffc..67b0c44ee54b 100644
--- a/docs/content/docs/getting-started/container-images.md
+++ b/docs/content/docs/getting-started/container-images.md
@@ -92,7 +92,7 @@ services:
       - DEBUG=true
       # ...
     volumes:
-      - ./models:/build/models:cached
+      - ./models:/models:cached
     # decomment the following piece if running with Nvidia GPUs
     # deploy:
     #   resources:
@@ -105,21 +105,21 @@ services:
 
 {{% alert icon="💡" %}}
 
-**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/build/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
+**Models caching**: The **AIO** image will download the needed models on the first run if not already present and store those in `/models` inside the container. The AIO models will be automatically updated with new versions of AIO images.
 
 You can change the directory inside the container by specifying a `MODELS_PATH` environment variable (or `--models-path`). 
 
-If you want to use a named model or a local directory, you can mount it as a volume to `/build/models`:
+If you want to use a named model or a local directory, you can mount it as a volume to `/models`:
 
 ```bash
-docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/build/models localai/localai:latest-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti -v $PWD/models:/models localai/localai:latest-aio-cpu
 ```
 
 or associate a volume:
 
 ```bash
 docker volume create localai-models
-docker run -p 8080:8080 --name local-ai -ti -v localai-models:/build/models localai/localai:latest-aio-cpu
+docker run -p 8080:8080 --name local-ai -ti -v localai-models:/models localai/localai:latest-aio-cpu
 ```
 
 {{% /alert %}}
diff --git a/docs/content/docs/reference/nvidia-l4t.md b/docs/content/docs/reference/nvidia-l4t.md
index ce0fd5e95c6f..b019aa70cb3f 100644
--- a/docs/content/docs/reference/nvidia-l4t.md
+++ b/docs/content/docs/reference/nvidia-l4t.md
@@ -35,7 +35,7 @@ docker pull quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
 Run the LocalAI container on Nvidia ARM64 devices using the following command, where `/data/models` is the directory containing the models:
 
 ```bash
-docker run -e DEBUG=true -p 8080:8080 -v /data/models:/build/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
+docker run -e DEBUG=true -p 8080:8080 -v /data/models:/models  -ti --restart=always --name local-ai --runtime nvidia --gpus all quay.io/go-skynet/local-ai:master-nvidia-l4t-arm64-core
 ```
 
 Note: `/data/models` is the directory containing the models. You can replace it with the directory containing your models.
diff --git a/docs/static/install.sh b/docs/static/install.sh
index d0c21b63a3bc..357444387572 100755
--- a/docs/static/install.sh
+++ b/docs/static/install.sh
@@ -663,7 +663,7 @@ install_docker() {
         IMAGE_TAG=${LOCALAI_VERSION}-vulkan
 
         info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
             --device /dev/dri \
             --restart=always \
             -e API_KEY=$API_KEY \
@@ -690,7 +690,7 @@ install_docker() {
         fi
 
         info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
             --gpus all \
             --restart=always \
             -e API_KEY=$API_KEY \
@@ -705,7 +705,7 @@ install_docker() {
         fi
 
         info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
             --device /dev/dri \
             --device /dev/kfd \
             --group-add=video \
@@ -723,7 +723,7 @@ install_docker() {
         fi
 
         info "Starting LocalAI Docker container..."
-        $SUDO docker run -v local-ai-data:/build/models \
+        $SUDO docker run -v local-ai-data:/models \
             --device /dev/dri \
             --restart=always \
             -e API_KEY=$API_KEY \
diff --git a/entrypoint.sh b/entrypoint.sh
index 389c846d53f3..fdaa92eba0f1 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -e
 
-cd /build
+cd /
 
 # If we have set EXTRA_BACKENDS, then we need to prepare the backends
 if [ -n "$EXTRA_BACKENDS" ]; then
@@ -13,38 +13,23 @@ if [ -n "$EXTRA_BACKENDS" ]; then
 	done
 fi
 
-if [ "$REBUILD" != "false" ]; then
-	rm -rf ./local-ai
-	make build -j${BUILD_PARALLELISM:-1}
+echo "CPU info:"
+grep -e "model\sname" /proc/cpuinfo | head -1
+grep -e "flags" /proc/cpuinfo | head -1
+if grep -q -e "\savx\s" /proc/cpuinfo ; then
+	echo "CPU:    AVX    found OK"
 else
-	echo "@@@@@"
-	echo "Skipping rebuild"
-	echo "@@@@@"
-	echo "If you are experiencing issues with the pre-compiled builds, try setting REBUILD=true"
-	echo "If you are still experiencing issues with the build, try setting CMAKE_ARGS and disable the instructions set as needed:"
-	echo 'CMAKE_ARGS="-DGGML_F16C=OFF -DGGML_AVX512=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF"'
-	echo "see the documentation at: https://localai.io/basics/build/index.html"
-	echo "Note: See also https://github.com/go-skynet/LocalAI/issues/288"
-	echo "@@@@@"
-	echo "CPU info:"
-	grep -e "model\sname" /proc/cpuinfo | head -1
-	grep -e "flags" /proc/cpuinfo | head -1
-	if grep -q -e "\savx\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX    found OK"
-	else
-		echo "CPU: no AVX    found"
-	fi
-	if grep -q -e "\savx2\s" /proc/cpuinfo ; then
-		echo "CPU:    AVX2   found OK"
-	else
-		echo "CPU: no AVX2   found"
-	fi
-	if grep -q -e "\savx512" /proc/cpuinfo ; then
-		echo "CPU:    AVX512 found OK"
-	else
-		echo "CPU: no AVX512 found"
-	fi
-	echo "@@@@@"
+	echo "CPU: no AVX    found"
+fi
+if grep -q -e "\savx2\s" /proc/cpuinfo ; then
+	echo "CPU:    AVX2   found OK"
+else
+	echo "CPU: no AVX2   found"
+fi
+if grep -q -e "\savx512" /proc/cpuinfo ; then
+	echo "CPU:    AVX512 found OK"
+else
+	echo "CPU: no AVX512 found"
 fi
 
 exec ./local-ai "$@"