From 9dc1c3f16479ac3e0f779bb6f0a717cadb1795b8 Mon Sep 17 00:00:00 2001 From: coffeerunhobby Date: Mon, 5 Jan 2026 00:31:04 +0200 Subject: [PATCH 1/2] Fix BMI2 crash on AVX-only CPUs (Intel Ivy Bridge/Sandy Bridge) Signed-off-by: coffeerunhobby --- Dockerfile | 2 +- backend/Dockerfile.llama-cpp | 52 +++++++++++++------ backend/cpp/llama-cpp/CMakeLists.txt | 4 +- backend/cpp/llama-cpp/Makefile | 14 ++--- .../go/stablediffusion-ggml/CMakeLists.txt | 3 +- backend/go/whisper/CMakeLists.txt | 2 +- 6 files changed, 49 insertions(+), 28 deletions(-) diff --git a/Dockerfile b/Dockerfile index a253237c974c..28147e75b856 100644 --- a/Dockerfile +++ b/Dockerfile @@ -142,7 +142,7 @@ ENV PATH=/opt/rocm/bin:${PATH} FROM requirements-drivers AS build-requirements ARG GO_VERSION=1.22.6 -ARG CMAKE_VERSION=3.26.4 +ARG CMAKE_VERSION=3.31.10 ARG CMAKE_FROM_SOURCE=false ARG TARGETARCH ARG TARGETVARIANT diff --git a/backend/Dockerfile.llama-cpp b/backend/Dockerfile.llama-cpp index 8d5e0b7fbdb0..7990720af00f 100644 --- a/backend/Dockerfile.llama-cpp +++ b/backend/Dockerfile.llama-cpp @@ -10,7 +10,8 @@ FROM ${GRPC_BASE_IMAGE} AS grpc ARG GRPC_MAKEFLAGS="-j4 -Otarget" ARG GRPC_VERSION=v1.65.0 ARG CMAKE_FROM_SOURCE=false -ARG CMAKE_VERSION=3.26.4 +# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues +ARG CMAKE_VERSION=3.31.10 ENV MAKEFLAGS=${GRPC_MAKEFLAGS} @@ -26,7 +27,7 @@ RUN apt-get update && \ # Install CMake (the version in 22.04 is too old) RUN </dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 1) ARCH?=$(shell uname -m) # Disable Shared libs as we are linking on static gRPC and we can't mix shared and static @@ -109,10 +109,10 @@ llama-cpp-avx: llama.cpp $(info ${GREEN}I llama-cpp build info:avx${RESET}) ifeq ($(OS),Darwin) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server -else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) +else ifneq ($(filter $(ARCH),aarch64 arm64),) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server else - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS=-mno-bmi2 -DCMAKE_CXX_FLAGS=-mno-bmi2" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server + CFLAGS="-mno-bmi2" CXXFLAGS="-mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx @@ -122,10 +122,10 @@ llama-cpp-fallback: llama.cpp $(info ${GREEN}I llama-cpp build info:fallback${RESET}) ifeq ($(OS),Darwin) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server -else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) +else ifneq ($(filter $(ARCH),aarch64 arm64),) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server else - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server + CFLAGS="-mno-bmi -mno-bmi2" CXXFLAGS="-mno-bmi -mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback @@ -135,10 +135,10 @@ llama-cpp-grpc: llama.cpp $(info ${GREEN}I llama-cpp build info:grpc${RESET}) ifeq ($(OS),Darwin) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server -else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) +else ifneq ($(filter $(ARCH),aarch64 arm64),) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server else - CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DCMAKE_C_FLAGS='-mno-bmi -mno-bmi2' -DCMAKE_CXX_FLAGS='-mno-bmi -mno-bmi2'" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server + CFLAGS="-mno-bmi -mno-bmi2" CXXFLAGS="-mno-bmi -mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc diff --git a/backend/go/stablediffusion-ggml/CMakeLists.txt b/backend/go/stablediffusion-ggml/CMakeLists.txt index 0d1d003e18eb..0316fffa2383 100644 --- a/backend/go/stablediffusion-ggml/CMakeLists.txt +++ b/backend/go/stablediffusion-ggml/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required(VERSION 3.12) +# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues +cmake_minimum_required(VERSION 3.31.10) project(gosd LANGUAGES C CXX) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/backend/go/whisper/CMakeLists.txt b/backend/go/whisper/CMakeLists.txt index 60cc178f2b23..4857a55f2c81 100644 --- a/backend/go/whisper/CMakeLists.txt +++ b/backend/go/whisper/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.12) +cmake_minimum_required(VERSION 3.31.10) project(gowhisper LANGUAGES C CXX) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) From 988922608ed6030d4da30ff10f4cca9fb228652d Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Mon, 5 Jan 2026 22:44:26 +0000 Subject: [PATCH 2/2] Address feedback from review Signed-off-by: Ettore Di Giacinto --- backend/cpp/llama-cpp/CMakeLists.txt | 6 +++--- backend/cpp/llama-cpp/Makefile | 12 ++++++------ backend/go/stablediffusion-ggml/CMakeLists.txt | 3 +-- backend/go/whisper/CMakeLists.txt | 2 +- 4 files changed, 11 insertions(+), 12 deletions(-) diff --git a/backend/cpp/llama-cpp/CMakeLists.txt b/backend/cpp/llama-cpp/CMakeLists.txt index 97bed6b62ce1..598461975532 100644 --- a/backend/cpp/llama-cpp/CMakeLists.txt +++ b/backend/cpp/llama-cpp/CMakeLists.txt @@ -1,6 +1,6 @@ -# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues -cmake_minimum_required(VERSION 3.31.10) +set(TARGET grpc-server) set(CMAKE_CXX_STANDARD 17) +cmake_minimum_required(VERSION 3.15) set(TARGET grpc-server) set(_PROTOBUF_LIBPROTOBUF libprotobuf) set(_REFLECTION grpc++_reflection) @@ -70,4 +70,4 @@ target_link_libraries(${TARGET} PRIVATE common llama mtmd ${CMAKE_THREAD_LIBS_IN target_compile_features(${TARGET} PRIVATE cxx_std_11) if(TARGET BUILD_INFO) add_dependencies(${TARGET} BUILD_INFO) -endif() \ No newline at end of file +endif() diff --git a/backend/cpp/llama-cpp/Makefile b/backend/cpp/llama-cpp/Makefile index 1e109ba4fb49..f709661740d5 100644 --- a/backend/cpp/llama-cpp/Makefile +++ b/backend/cpp/llama-cpp/Makefile @@ -109,10 +109,10 @@ llama-cpp-avx: llama.cpp $(info ${GREEN}I llama-cpp build info:avx${RESET}) ifeq ($(OS),Darwin) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server -else ifneq ($(filter $(ARCH),aarch64 arm64),) +else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server else - CFLAGS="-mno-bmi2" CXXFLAGS="-mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-avx-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-avx-build/grpc-server llama-cpp-avx @@ -122,10 +122,10 @@ llama-cpp-fallback: llama.cpp $(info ${GREEN}I llama-cpp build info:fallback${RESET}) ifeq ($(OS),Darwin) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server -else ifneq ($(filter $(ARCH),aarch64 arm64),) +else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server else - CFLAGS="-mno-bmi -mno-bmi2" CXXFLAGS="-mno-bmi -mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" $(MAKE) VARIANT="llama-cpp-fallback-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-fallback-build/grpc-server llama-cpp-fallback @@ -135,10 +135,10 @@ llama-cpp-grpc: llama.cpp $(info ${GREEN}I llama-cpp build info:grpc${RESET}) ifeq ($(OS),Darwin) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server -else ifneq ($(filter $(ARCH),aarch64 arm64),) +else ifeq ($(ARCH),$(filter $(ARCH),aarch64 arm64)) CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server else - CFLAGS="-mno-bmi -mno-bmi2" CXXFLAGS="-mno-bmi -mno-bmi2" CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server + CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_BMI2=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-cpp-grpc-build" build-llama-cpp-grpc-server endif cp -rfv $(CURRENT_MAKEFILE_DIR)/../llama-cpp-grpc-build/grpc-server llama-cpp-grpc diff --git a/backend/go/stablediffusion-ggml/CMakeLists.txt b/backend/go/stablediffusion-ggml/CMakeLists.txt index 0316fffa2383..0d1d003e18eb 100644 --- a/backend/go/stablediffusion-ggml/CMakeLists.txt +++ b/backend/go/stablediffusion-ggml/CMakeLists.txt @@ -1,5 +1,4 @@ -# CUDA Toolkit 13.x compatibility: CMake 3.31.9+ fixes toolchain detection/arch table issues -cmake_minimum_required(VERSION 3.31.10) +cmake_minimum_required(VERSION 3.12) project(gosd LANGUAGES C CXX) set(CMAKE_POSITION_INDEPENDENT_CODE ON) diff --git a/backend/go/whisper/CMakeLists.txt b/backend/go/whisper/CMakeLists.txt index 4857a55f2c81..60cc178f2b23 100644 --- a/backend/go/whisper/CMakeLists.txt +++ b/backend/go/whisper/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.31.10) +cmake_minimum_required(VERSION 3.12) project(gowhisper LANGUAGES C CXX) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)