delphix
diff --git a/‎.github/workflows/test.yaml‎
Lines changed: 34 additions & 1 deletion b/‎.github/workflows/test.yaml‎
Lines changed: 34 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 12 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 9 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 22 additions & 2 deletions b/‎Dockerfile‎
Lines changed: 22 additions & 2 deletions
diff --git a/‎discover/gpu.go‎
Lines changed: 30 additions & 1 deletion b/‎discover/gpu.go‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎discover/runner.go‎
Lines changed: 53 additions & 4 deletions b/‎discover/runner.go‎
Lines changed: 53 additions & 4 deletions
diff --git a/‎discover/types.go‎
Lines changed: 3 additions & 2 deletions b/‎discover/types.go‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎envconfig/config.go‎
Lines changed: 2 additions & 0 deletions b/‎envconfig/config.go‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎llama/llama.go‎
Lines changed: 3 additions & 1 deletion b/‎llama/llama.go‎
Lines changed: 3 additions & 1 deletion
@@ -52,14 +52,32 @@ jobs:
             container: rocm/dev-ubuntu-22.04:6.1.2
             extra-packages: rocm-libs
             flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_PREFIX_PATH=/opt/rocm'
+          - preset: Vulkan
+            container: ubuntu:22.04
+            extra-packages: >
+              mesa-vulkan-drivers vulkan-tools
+              libvulkan1 libvulkan-dev
+              vulkan-sdk cmake ccache g++ make
     runs-on: linux
     container: ${{ matrix.container }}
     steps:
       - uses: actions/checkout@v4
       - run: |
           [ -n "${{ matrix.container }}" ] || sudo=sudo
           $sudo apt-get update
+          # Add LunarG Vulkan SDK apt repo for Ubuntu 22.04
+          if [ "${{ matrix.preset }}" = "Vulkan" ]; then
+            $sudo apt-get install -y --no-install-recommends wget gnupg ca-certificates software-properties-common
+            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | $sudo gpg --dearmor -o /usr/share/keyrings/lunarg-archive-keyring.gpg
+            # Use signed-by to bind the repo to the installed keyring to avoid NO_PUBKEY
+            echo "deb [signed-by=/usr/share/keyrings/lunarg-archive-keyring.gpg]  https://packages.lunarg.com/vulkan/1.4.313 jammy main" | $sudo tee /etc/apt/sources.list.d/lunarg-vulkan-1.4.313-jammy.list > /dev/null
+            $sudo apt-get update
+          fi
           $sudo apt-get install -y cmake ccache ${{ matrix.extra-packages }}
+          # Export VULKAN_SDK if provided by LunarG package (defensive)
+          if [ -d "/usr/lib/x86_64-linux-gnu/vulkan" ] && [ "${{ matrix.preset }}" = "Vulkan" ]; then
+            echo "VULKAN_SDK=/usr" >> $GITHUB_ENV
+          fi
         env:
           DEBIAN_FRONTEND: noninteractive
       - uses: actions/cache@v4
@@ -92,18 +110,21 @@ jobs:
           - preset: ROCm
             install: https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q4-WinSvr2022-For-HIP.exe
             flags: '-DAMDGPU_TARGETS=gfx1010 -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma" -DCMAKE_CXX_FLAGS="-parallel-jobs=4 -Wno-ignored-attributes -Wno-deprecated-pragma"'
+          - preset: Vulkan
+            install: https://sdk.lunarg.com/sdk/download/1.4.321.1/windows/vulkansdk-windows-X64-1.4.321.1.exe
     runs-on: windows
     steps:
       - run: |
           choco install -y --no-progress ccache ninja
           ccache -o cache_dir=${{ github.workspace }}\.ccache
-      - if: matrix.preset == 'CUDA' || matrix.preset == 'ROCm'
+      - if: matrix.preset == 'CUDA' || matrix.preset == 'ROCm' || matrix.preset == 'Vulkan'
         id: cache-install
         uses: actions/cache/restore@v4
         with:
           path: |
             C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA
             C:\Program Files\AMD\ROCm
+            C:\VulkanSDK
           key: ${{ matrix.install }}
       - if: matrix.preset == 'CUDA'
         name: Install CUDA ${{ matrix.cuda-version }}
@@ -133,6 +154,18 @@ jobs:
           echo "HIPCXX=$hipPath\bin\clang++.exe" | Out-File -FilePath $env:GITHUB_ENV -Append
           echo "HIP_PLATFORM=amd" | Out-File -FilePath $env:GITHUB_ENV -Append
           echo "CMAKE_PREFIX_PATH=$hipPath" | Out-File -FilePath $env:GITHUB_ENV -Append
+      - if: matrix.preset == 'Vulkan'
+        name: Install Vulkan ${{ matrix.rocm-version }}
+        run: |
+          $ErrorActionPreference = "Stop"
+          if ("${{ steps.cache-install.outputs.cache-hit }}" -ne 'true') {
+            Invoke-WebRequest -Uri "${{ matrix.install }}" -OutFile "install.exe"
+            Start-Process -FilePath .\install.exe -ArgumentList "-c","--am","--al","in" -NoNewWindow -Wait
+          }
+          
+          $vulkanPath = (Resolve-Path "C:\VulkanSDK\*").path
+          echo "$vulkanPath\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
+          echo "VULKAN_SDK=$vulkanPath" >> $env:GITHUB_ENV
       - if: ${{ !cancelled() && steps.cache-install.outputs.cache-hit != 'true' }}
         uses: actions/cache/save@v4
         with:
 
@@ -139,3 +139,15 @@ if(CMAKE_HIP_COMPILER)
         endforeach()
     endif()
 endif()
+
+find_package(Vulkan)
+if(Vulkan_FOUND)
+    add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/ml/backend/ggml/ggml/src/ggml-vulkan)
+    install(TARGETS ggml-vulkan
+        RUNTIME_DEPENDENCIES
+            PRE_INCLUDE_REGEXES vulkan
+            PRE_EXCLUDE_REGEXES ".*"
+        RUNTIME DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
+        LIBRARY DESTINATION ${OLLAMA_INSTALL_DIR} COMPONENT Vulkan
+    )
+endif()
@@ -70,6 +70,10 @@
         "CMAKE_HIP_FLAGS": "-parallel-jobs=4",
         "AMDGPU_TARGETS": "gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"
       }
+    },
+    {
+      "name": "Vulkan",
+      "inherits": [ "Default" ]
     }
   ],
   "buildPresets": [
@@ -122,6 +126,11 @@
       "name": "ROCm 6",
       "inherits": [ "ROCm" ],
       "configurePreset": "ROCm 6"
+    },
+    {
+      "name": "Vulkan",
+      "targets": [ "ggml-vulkan" ],
+      "configurePreset": "Vulkan"
     }
   ]
 }
@@ -7,6 +7,7 @@ ARG ROCMVERSION=6.3.3
 ARG JETPACK5VERSION=r35.4.1
 ARG JETPACK6VERSION=r36.4.0
 ARG CMAKEVERSION=3.31.2
+ARG VULKANVERSION=1.4.321.1
 
 # We require gcc v10 minimum.  v10.3 has regressions, so the rockylinux 8.5 AppStream has the latest compatible version
 FROM --platform=linux/amd64 rocm/dev-almalinux-8:${ROCMVERSION}-complete AS base-amd64
@@ -17,6 +18,16 @@ RUN yum install -y yum-utils \
     && dnf install -y ccache \
     && yum-config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo
 ENV PATH=/opt/rh/gcc-toolset-10/root/usr/bin:$PATH
+ARG VULKANVERSION
+RUN wget https://sdk.lunarg.com/sdk/download/${VULKANVERSION}/linux/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz -O /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
+    && tar xvf /tmp/vulkansdk-linux-x86_64-${VULKANVERSION}.tar.xz \
+    && dnf -y install ninja-build \
+    && ln -s /usr/bin/python3 /usr/bin/python \  
+    && /${VULKANVERSION}/vulkansdk -j 8 vulkan-headers \
+    && /${VULKANVERSION}/vulkansdk -j 8 shaderc
+RUN cp -r /${VULKANVERSION}/x86_64/include/* /usr/local/include/ \
+    && cp -r /${VULKANVERSION}/x86_64/lib/* /usr/local/lib
+ENV PATH=/${VULKANVERSION}/x86_64/bin:$PATH
 
 FROM --platform=linux/arm64 almalinux:8 AS base-arm64
 # install epel-release for ccache
@@ -106,6 +117,13 @@ RUN --mount=type=cache,target=/root/.ccache \
         && cmake --build --parallel ${PARALLEL} --preset 'JetPack 6' \
         && cmake --install build --component CUDA --strip --parallel ${PARALLEL}
 
+FROM base AS vulkan
+RUN --mount=type=cache,target=/root/.ccache \
+    cmake --preset 'Vulkan' -DOLLAMA_RUNNER_DIR="vulkan" \
+        && cmake --build --parallel --preset 'Vulkan' \
+        && cmake --install build --component Vulkan --strip --parallel 8 
+
+
 FROM base AS build
 WORKDIR /go/src/github.com/ollama/ollama
 COPY go.mod go.sum .
@@ -123,7 +141,8 @@ RUN --mount=type=cache,target=/root/.cache/go-build \
 FROM --platform=linux/amd64 scratch AS amd64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
 COPY --from=cuda-12 dist/lib/ollama /lib/ollama/
-COPY --from=cuda-13 dist/lib/ollama/ /lib/ollama/
+COPY --from=cuda-13 dist/lib/ollama /lib/ollama/
+COPY --from=vulkan  dist/lib/ollama  /lib/ollama/
 
 FROM --platform=linux/arm64 scratch AS arm64
 # COPY --from=cuda-11 dist/lib/ollama/ /lib/ollama/
@@ -136,12 +155,13 @@ FROM scratch AS rocm
 COPY --from=rocm-6 dist/lib/ollama /lib/ollama
 
 FROM ${FLAVOR} AS archive
+ARG VULKANVERSION
 COPY --from=cpu dist/lib/ollama /lib/ollama
 COPY --from=build /bin/ollama /bin/ollama
 
 FROM ubuntu:24.04
 RUN apt-get update \
-    && apt-get install -y ca-certificates \
+    && apt-get install -y ca-certificates libvulkan1 \
     && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 COPY --from=archive /bin /usr/bin
 
@@ -70,6 +70,7 @@ func devInfoToInfoList(devs []ml.DeviceInfo) GpuInfoList {
 		if dev.Library == "ROCm" && rocmDir != "" {
 			info.DependencyPath = append(info.DependencyPath, rocmDir)
 		}
+		// TODO any special processing of Vulkan devices?
 		resp = append(resp, info)
 	}
 	if len(resp) == 0 {
@@ -97,7 +98,16 @@ func (l GpuInfoList) GetVisibleDevicesEnv() []string {
 	if len(l) == 0 {
 		return nil
 	}
-	return []string{rocmGetVisibleDevicesEnv(l)}
+	res := []string{}
+	envVar := rocmGetVisibleDevicesEnv(l)
+	if envVar != "" {
+		res = append(res, envVar)
+	}
+	envVar = vkGetVisibleDevicesEnv(l)
+	if envVar != "" {
+		res = append(res, envVar)
+	}
+	return res
 }
 
 func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
@@ -127,6 +137,25 @@ func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
 	return envVar + strings.Join(ids, ",")
 }
 
+func vkGetVisibleDevicesEnv(gpuInfo []GpuInfo) string {
+	ids := []string{}
+	for _, info := range gpuInfo {
+		if info.Library != "Vulkan" {
+			continue
+		}
+		if info.filterID != "" {
+			ids = append(ids, info.filterID)
+		} else {
+			ids = append(ids, info.ID)
+		}
+	}
+	if len(ids) == 0 {
+		return ""
+	}
+	envVar := "GGML_VK_VISIBLE_DEVICES="
+	return envVar + strings.Join(ids, ",")
+}
+
 // GetSystemInfo returns the last cached state of the GPUs on the system
 func GetSystemInfo() SystemInfo {
 	deviceMu.Lock()
 
@@ -86,6 +86,7 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 		// are enumerated, but not actually supported.
 		// We run this in serial to avoid potentially initializing a GPU multiple
 		// times concurrently leading to memory contention
+		// TODO refactor so we group the lib dirs and do serial per version, but parallel for different libs
 		for dir := range libDirs {
 			var dirs []string
 			if dir != "" {
@@ -131,19 +132,25 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 			go func(i int) {
 				defer wg.Done()
 				var envVar string
+				id := devices[i].ID
 				if devices[i].Library == "ROCm" {
 					if runtime.GOOS != "linux" {
 						envVar = "HIP_VISIBLE_DEVICES"
 					} else {
 						envVar = "ROCR_VISIBLE_DEVICES"
 					}
-				} else {
+				} else if devices[i].Library == "CUDA" {
 					envVar = "CUDA_VISIBLE_DEVICES"
+				} else if devices[i].Library == "Vulkan" {
+					id = devices[i].FilteredID
+					envVar = "GGML_VK_VISIBLE_DEVICES"
+				} else {
+					slog.Error("Unknown Library:" + devices[i].Library)
 				}
 
 				extraEnvs := []string{
-					"GGML_CUDA_INIT=1",           // force deep initialization to trigger crash on unsupported GPUs
-					envVar + "=" + devices[i].ID, // Filter to just this one GPU
+					"GGML_CUDA_INIT=1", // force deep initialization to trigger crash on unsupported GPUs
+					envVar + "=" + id,  // Filter to just this one GPU
 				}
 				if len(bootstrapDevices(ctx2ndPass, devices[i].LibraryPath, extraEnvs)) == 0 {
 					needsDelete[i] = true
@@ -163,6 +170,8 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 		wg.Wait()
 		logutil.Trace("supported GPU library combinations", "supported", supported)
 
+		filterOutVulkanThatAreSupportedByOtherGPU(needsDelete)
+
 		// Mark for deletion any overlaps - favoring the library version that can cover all GPUs if possible
 		filterOverlapByLibrary(supported, needsDelete)
 
@@ -184,7 +193,7 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 			}
 		}
 
-		// Now filter out any overlap with different libraries (favor CUDA/ROCm over others)
+		// Now filter out any overlap with different libraries (favor CUDA/HIP over others)
 		for i := 0; i < len(devices); i++ {
 			for j := i + 1; j < len(devices); j++ {
 				// For this pass, we only drop exact duplicates
@@ -346,6 +355,37 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 	return devices
 }
 
+func filterOutVulkanThatAreSupportedByOtherGPU(needsDelete []bool) {
+	// Filter out Vulkan devices that share a PCI ID with a non-Vulkan device that is not marked for deletion
+	for i := range devices {
+		if devices[i].Library != "Vulkan" || needsDelete[i] {
+			continue
+		}
+		if devices[i].PCIID == "" {
+			continue
+		}
+		for j := range devices {
+			if i == j {
+				continue
+			}
+			if devices[j].PCIID == "" {
+				continue
+			}
+			if devices[j].PCIID == devices[i].PCIID && devices[j].Library != "Vulkan" && !needsDelete[j] {
+				needsDelete[i] = true
+				slog.Debug("dropping Vulkan duplicate by PCI ID",
+					"vulkan_id", devices[i].ID,
+					"vulkan_libdir", devices[i].LibraryPath[len(devices[i].LibraryPath)-1],
+					"pci_id", devices[i].PCIID,
+					"kept_library", devices[j].Library,
+					"kept_id", devices[j].ID,
+				)
+				break
+			}
+		}
+	}
+}
+
 func filterOverlapByLibrary(supported map[string]map[string]map[string]int, needsDelete []bool) {
 	// For multi-GPU systems, use the newest version that supports all the GPUs
 	for _, byLibDirs := range supported {
@@ -451,6 +491,7 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs []s
 		cmd.Stdout = os.Stdout
 		cmd.Stderr = os.Stderr
 	}
+
 	// cmd.SysProcAttr = llm.LlamaServerSysProcAttr // circular dependency - bring back once refactored
 	pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
 	pathNeeded := true
@@ -508,6 +549,14 @@ func bootstrapDevices(ctx context.Context, ollamaLibDirs []string, extraEnvs []s
 		}
 	}
 	logutil.Trace("runner enumerated devices", "OLLAMA_LIBRARY_PATH", ollamaLibDirs, "devices", devices)
+
+	// Enumerate returned devices starting at 0 per library and assign the per-library index as FilteredID
+	libCounts := make(map[string]int)
+	for i := range devices {
+		lib := devices[i].Library
+		devices[i].FilteredID = strconv.Itoa(libCounts[lib])
+		libCounts[lib]++
+	}
 	return devices
 }
 
 
@@ -37,7 +37,7 @@ type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
 	UnreliableFreeMemory bool
 
 	// GPU information
-	filterID     string // AMD Workaround: The numeric ID of the device used to filter out other devices
+	filterID     string // AMD/Vulkan Workaround: The numeric ID of the device used to filter out other devices
 	Name         string `json:"name"`          // user friendly name if available
 	ComputeMajor int    `json:"compute_major"` // Compute Capability or gfx
 	ComputeMinor int    `json:"compute_minor"`
@@ -175,7 +175,8 @@ func (l GpuInfoList) FlashAttentionSupported() bool {
 		supportsFA := gpu.Library == "cpu" ||
 			gpu.Name == "Metal" || gpu.Library == "Metal" ||
 			(gpu.Library == "CUDA" && gpu.DriverMajor >= 7 && !(gpu.ComputeMajor == 7 && gpu.ComputeMinor == 2)) || // We don't have kernels for Jetson Xavier
-			gpu.Library == "ROCm"
+			gpu.Library == "ROCm" ||
+			gpu.Library == "Vulkan"
 
 		if !supportsFA {
 			return false
 
@@ -217,6 +217,7 @@ var (
 	CudaVisibleDevices    = String("CUDA_VISIBLE_DEVICES")
 	HipVisibleDevices     = String("HIP_VISIBLE_DEVICES")
 	RocrVisibleDevices    = String("ROCR_VISIBLE_DEVICES")
+	VkVisibleDevices      = String("GGML_VK_VISIBLE_DEVICES")
 	GpuDeviceOrdinal      = String("GPU_DEVICE_ORDINAL")
 	HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
 )
@@ -307,6 +308,7 @@ func AsMap() map[string]EnvVar {
 		ret["CUDA_VISIBLE_DEVICES"] = EnvVar{"CUDA_VISIBLE_DEVICES", CudaVisibleDevices(), "Set which NVIDIA devices are visible"}
 		ret["HIP_VISIBLE_DEVICES"] = EnvVar{"HIP_VISIBLE_DEVICES", HipVisibleDevices(), "Set which AMD devices are visible by numeric ID"}
 		ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices(), "Set which AMD devices are visible by UUID or numeric ID"}
+		ret["GGML_VK_VISIBLE_DEVICES"] = EnvVar{"GGML_VK_VISIBLE_DEVICES", VkVisibleDevices(), "Set which Vulkan devices are visible by numeric ID"}
 		ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal(), "Set which AMD devices are visible by numeric ID"}
 		ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion(), "Override the gfx used for all detected AMD GPUs"}
 		ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGPU(), "Enable experimental Intel GPU detection"}
 
@@ -69,7 +69,9 @@ func EnumerateGPUs() []ml.DeviceID {
 	for i := range C.ggml_backend_dev_count() {
 		device := C.ggml_backend_dev_get(i)
 
-		if C.ggml_backend_dev_type(device) == C.GGML_BACKEND_DEVICE_TYPE_GPU {
+		switch C.ggml_backend_dev_type(device) {
+		case C.GGML_BACKEND_DEVICE_TYPE_GPU,
+			C.GGML_BACKEND_DEVICE_TYPE_IGPU:
 			var props C.struct_ggml_backend_dev_props
 			C.ggml_backend_dev_get_props(device, &props)
 			ids = append(ids, ml.DeviceID{
Original file line number	Diff line number	Diff line change
`@@ -70,6 +70,10 @@`
`70`	`70`	`"CMAKE_HIP_FLAGS": "-parallel-jobs=4",`
`71`	`71`	`"AMDGPU_TARGETS": "gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1200;gfx1201;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-"`
`72`	`72`	`}`
	`73`	`+ },`
	`74`	`+ {`
	`75`	`+ "name": "Vulkan",`
	`76`	`+ "inherits": [ "Default" ]`
`73`	`77`	`}`
`74`	`78`	`],`
`75`	`79`	`"buildPresets": [`
`@@ -122,6 +126,11 @@`
`122`	`126`	`"name": "ROCm 6",`
`123`	`127`	`"inherits": [ "ROCm" ],`
`124`	`128`	`"configurePreset": "ROCm 6"`
	`129`	`+ },`
	`130`	`+ {`
	`131`	`+ "name": "Vulkan",`
	`132`	`+ "targets": [ "ggml-vulkan" ],`
	`133`	`+ "configurePreset": "Vulkan"`
`125`	`134`	`}`
`126`	`135`	`]`
`127`	`136`	`}`