-
-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Studio: add Vulkan llama.cpp support #5819
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
oobabooga
wants to merge
16
commits into
unslothai:main
Choose a base branch
from
oobabooga:vulkan-support
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+577
−13
Open
Changes from all commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
9e9729b
Studio: add Vulkan llama.cpp support
oobabooga c401f10
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 84d98a7
Address gemini's feedback
oobabooga 11acf22
Studio: move the Vulkan VRAM probe into a standalone script
oobabooga 7dd21f3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] 2401515
Merge branch 'main' into vulkan-support
Imagineer99 e50b0af
Improve Vulkan probe error reporting
oobabooga 4fefeeb
Resolve llama-server symlink so Vulkan build is detected
oobabooga ea7cd94
Merge branch 'main' into vulkan-support
oobabooga 10faad1
Drop unreachable Vulkan fallback in GPU free-memory dispatcher
oobabooga dafeb79
Skip the Intel GPU probe when NVIDIA or ROCm is present
oobabooga 1980e59
Reserve host RAM headroom for Vulkan integrated GPUs
oobabooga 31f4a36
Add a `UNSLOTH_FORCE_VULKAN` environment variable
oobabooga c3482d4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] cca5ca5
Merge branch 'main' into vulkan-support
oobabooga 7563d91
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| """Standalone free-VRAM probe for the bundled ggml Vulkan backend. | ||
|
|
||
| Run in a short-lived subprocess (``python _vulkan_probe.py <bindir>``) so the | ||
| Vulkan instance never lives in the long-running backend process. Loads the | ||
| bundled ggml Vulkan backend from ``<bindir>`` and prints one | ||
| ``<idx>\\t<free_bytes>\\t<is_igpu>`` line per device to stdout. The indices | ||
| are ggml's own Vulkan device ordinals (the space GGML_VK_VISIBLE_DEVICES | ||
| expects), which need not match nvidia-smi order. ``is_igpu`` is ``1`` for an | ||
| integrated GPU (shared system RAM) and ``0`` otherwise, taken from ggml's own | ||
| device type so the reader needn't guess from VRAM-vs-RAM ratios. | ||
|
|
||
| Uses only the standard library so it stays runnable as a bare script without | ||
| importing the backend package. | ||
| """ | ||
|
|
||
| import ctypes | ||
| import os | ||
| import sys | ||
|
|
||
| # ggml_backend_dev_type enum (ggml-backend.h): CPU=0, GPU=1, IGPU=2, ... | ||
| _GGML_BACKEND_DEVICE_TYPE_IGPU = 2 | ||
|
|
||
|
|
||
| def _igpu_flags(base, lib, count: int) -> list[bool]: | ||
| """Per-device integrated-GPU flags via ggml's backend registry. | ||
|
|
||
| The Vulkan reg enumerates devices in the same order as | ||
| ``ggml_backend_vk_get_device_memory`` (ggml-vulkan builds each device | ||
| context with ``ctx->device = i``), so reg index == device ordinal. | ||
| Returns all-False on any failure so the reader never over-caps a | ||
| discrete card just because the type couldn't be read. | ||
| """ | ||
| flags = [False] * count | ||
| try: | ||
| lib.ggml_backend_vk_reg.restype = ctypes.c_void_p | ||
| lib.ggml_backend_vk_reg.argtypes = [] | ||
| base.ggml_backend_reg_dev_count.restype = ctypes.c_size_t | ||
| base.ggml_backend_reg_dev_count.argtypes = [ctypes.c_void_p] | ||
| base.ggml_backend_reg_dev_get.restype = ctypes.c_void_p | ||
| base.ggml_backend_reg_dev_get.argtypes = [ctypes.c_void_p, ctypes.c_size_t] | ||
| base.ggml_backend_dev_type.restype = ctypes.c_int | ||
| base.ggml_backend_dev_type.argtypes = [ctypes.c_void_p] | ||
|
|
||
| reg = lib.ggml_backend_vk_reg() | ||
| if not reg: | ||
| return flags | ||
| dev_count = base.ggml_backend_reg_dev_count(reg) | ||
| for i in range(min(count, dev_count)): | ||
| dev = base.ggml_backend_reg_dev_get(reg, i) | ||
| if dev: | ||
| flags[i] = base.ggml_backend_dev_type(dev) == _GGML_BACKEND_DEVICE_TYPE_IGPU | ||
| except Exception: | ||
| # iGPU detection is best-effort: any failure (missing symbol, | ||
| # registry call error) degrades to "discrete" so the memory | ||
| # readings still get through instead of crashing the probe. | ||
| pass | ||
| return flags | ||
|
|
||
|
|
||
| def main() -> int: | ||
| if len(sys.argv) < 2: | ||
| return 0 | ||
| bindir = sys.argv[1] | ||
|
|
||
| if sys.platform == "win32": | ||
| base_name, vk_name = "ggml-base.dll", "ggml-vulkan.dll" | ||
| try: | ||
| os.add_dll_directory(bindir) | ||
| except Exception: | ||
| pass | ||
| else: | ||
| base_name, vk_name = "libggml-base.so", "libggml-vulkan.so" | ||
|
|
||
| try: | ||
| base = ctypes.CDLL(os.path.join(bindir, base_name), mode = ctypes.RTLD_GLOBAL) | ||
| lib = ctypes.CDLL(os.path.join(bindir, vk_name), mode = ctypes.RTLD_GLOBAL) | ||
| except OSError as e: | ||
| print(f"ggml-vulkan load failed: {e}", file = sys.stderr) | ||
| return 1 | ||
|
|
||
| lib.ggml_backend_vk_get_device_count.restype = ctypes.c_int | ||
| lib.ggml_backend_vk_get_device_count.argtypes = [] | ||
| lib.ggml_backend_vk_get_device_memory.restype = None | ||
| lib.ggml_backend_vk_get_device_memory.argtypes = [ | ||
| ctypes.c_int, | ||
| ctypes.POINTER(ctypes.c_size_t), | ||
| ctypes.POINTER(ctypes.c_size_t), | ||
| ] | ||
|
|
||
| count = lib.ggml_backend_vk_get_device_count() | ||
| igpu = _igpu_flags(base, lib, count) | ||
| rows = [] | ||
| for i in range(count): | ||
| free, total = ctypes.c_size_t(0), ctypes.c_size_t(0) | ||
| # total is a required out-param of the C call but unused: the reader | ||
| # leaves a flat per-device margin, not a fraction of total. | ||
| lib.ggml_backend_vk_get_device_memory(i, ctypes.byref(free), ctypes.byref(total)) | ||
| rows.append("%d\t%d\t%d" % (i, free.value, int(igpu[i]))) | ||
| sys.stdout.write("\n".join(rows)) | ||
| return 0 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| raise SystemExit(main()) | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.