heiervang-technologies · marksverdhei · Jun 7, 2026 · Jun 4, 2026 · Jun 4, 2026 · Jun 4, 2026
diff --git a/.github/workflows/build-msys.yml b/.github/workflows/build-msys.yml
@@ -27,8 +27,8 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - { sys: UCRT64,  env: ucrt-x86_64,  build: Release }
-          - { sys: CLANG64, env: clang-x86_64, build: Release }
+          - { sys: UCRT64,  env: ucrt-x86_64,  compiler: gcc,   build: Release }
+          - { sys: CLANG64, env: clang-x86_64, compiler: clang, build: Release }
 
     steps:
       - name: Clone
@@ -48,9 +48,7 @@ jobs:
           update: true
           msystem: ${{matrix.sys}}
           install: >-
-            base-devel
-            git
-            mingw-w64-${{matrix.env}}-toolchain
+            mingw-w64-${{matrix.env}}-${{matrix.compiler}}
             mingw-w64-${{matrix.env}}-cmake
             mingw-w64-${{matrix.env}}-openblas
 

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -82,8 +82,8 @@ jobs:
             { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
             { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
             { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
-            { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
-            { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.1.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
+            { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.3.0", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
+            { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.3.0", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
             { "tag": "musa", "dockerfile": ".devops/musa.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
             { "tag": "intel", "dockerfile": ".devops/intel.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
             { "tag": "vulkan", "dockerfile": ".devops/vulkan.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },

diff --git a/.pi/gg/SYSTEM.md b/.pi/gg/SYSTEM.md
@@ -16,12 +16,12 @@ Pull requests (PRs):
 - New branch names are prefixed with "gg/"
 - Before opening a pull request, ask the user to confirm the description
 - When creating a pull request, look for the repository's PR template and follow it
-- For the AI usage disclosure section, write "YES. llama.cpp + pi + [MODEL]"
+- For the AI usage disclosure section, write "YES. pi:llama.cpp/[MODEL]"
 - Ask the user to tell you what model was used and write it in place of [MODEL]
 - Always create the pull requests in draft mode
 
 Commits:
-- On every commit that you make, include a "Assisted-by: llama.cpp:local pi" tag
+- On every commit that you make, include a "Assisted-by: pi:llama.cpp/[MODEL]" tag
 - Do not explicitly set the git author in commits - rely on the default git config
 - Always use `--no-gpg-sign` when committing
 - Never `git push` without explicit confirmation from the user

diff --git a/build-xcframework.sh b/build-xcframework.sh
@@ -130,14 +130,7 @@ setup_framework_structure() {
     # Create module map (common for all platforms)
     cat > ${module_path}module.modulemap << EOF
 framework module llama {
-    header "llama.h"
-    header "ggml.h"
-    header "ggml-alloc.h"
-    header "ggml-backend.h"
-    header "ggml-metal.h"
-    header "ggml-cpu.h"
-    header "ggml-blas.h"
-    header "gguf.h"
+    umbrella "Headers"
 
     link "c++"
     link framework "Accelerate"

diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt
@@ -78,6 +78,8 @@ add_library(${TARGET}
     hf-cache.cpp
     hf-cache.h
     http.h
+    imatrix-loader.cpp
+    imatrix-loader.h
     json-partial.cpp
     json-partial.h
     json-schema-to-grammar.cpp

diff --git a/common/arg.cpp b/common/arg.cpp
@@ -446,7 +446,13 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
     opts.offline         = params.offline;
     opts.skip_download   = params.skip_download;
     opts.download_mtp    = spec_type_draft_mtp;
-    opts.download_mmproj = !params.no_mmproj;
+    opts.download_mmproj = !params.no_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty();
+
+    // sub-models (draft, mmproj, vocoder) are explicitly specified by the user,
+    // so we should not auto-discover mtp/mmproj siblings for them
+    common_download_opts sub_opts = opts;
+    sub_opts.download_mtp    = false;
+    sub_opts.download_mmproj = false;
 
     try {
         auto res = common_params_handle_model(params.model, opts);
@@ -459,7 +465,7 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
         // only download mmproj if the current example is using it
         for (const auto & ex : mmproj_examples) {
             if (curr_ex == ex) {
-                common_params_handle_model(params.mmproj, opts);
+                common_params_handle_model(params.mmproj, sub_opts);
                 break;
             }
         }
@@ -472,8 +478,8 @@ bool common_params_handle_models(common_params & params, llama_example curr_ex)
             params.speculative.draft.mparams.url.empty()) {
             params.speculative.draft.mparams.path = res.mtp.path;
         }
-        common_params_handle_model(params.speculative.draft.mparams, opts);
-        common_params_handle_model(params.vocoder.model,             opts);
+        common_params_handle_model(params.speculative.draft.mparams, sub_opts);
+        common_params_handle_model(params.vocoder.model,             sub_opts);
         return true;
     } catch (const common_skip_download_exception &) {
         return false;

diff --git a/common/chat-peg-parser.cpp b/common/chat-peg-parser.cpp
@@ -87,6 +87,8 @@ static std::string normalize_quotes_to_json(const std::string & input) {
     bool in_single_quoted = false;
     bool in_double_quoted = false;
 
+    auto is_word_char = [](char ch) { return std::isalnum(static_cast<unsigned char>(ch)) || ch == '_'; };
+
     for (size_t i = 0; i < input.size(); ++i) {
         char c = input[i];
 
@@ -151,6 +153,29 @@ static std::string normalize_quotes_to_json(const std::string & input) {
                 in_single_quoted = true;
                 result += '"';
             }
+        } else if (!in_single_quoted && !in_double_quoted && (c == 'T' || c == 'F' || c == 'N') &&
+                   (i == 0 || !is_word_char(input[i - 1]))) {
+            // Python literals -> JSON; prefix match keeps streamed partials monotonic.
+            static constexpr std::pair<std::string_view, std::string_view> literals[] = {
+                { "True", "true" }, { "False", "false" }, { "None", "null" },
+            };
+            size_t n = 0;
+            while (i + n < input.size() && is_word_char(input[i + n])) {
+                ++n;
+            }
+            std::string_view token(input.data() + i, n);
+            bool matched = false;
+            for (const auto & [py, js] : literals) {
+                if (py.substr(0, n) == token) {
+                    result += js.substr(0, n);
+                    i += n - 1;
+                    matched = true;
+                    break;
+                }
+            }
+            if (!matched) {
+                result += c;
+            }
         } else {
             result += c;
         }
@@ -353,12 +378,8 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
             }
             value_to_add += escape_json_string_inner(value_content);
         } else if (!value_content.empty()) {
-            // For potential containers, normalize Python-style single quotes to JSON double quotes
-            bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
-            if (is_potential_container) {
-                value_content = normalize_container_value(value_content);
-            }
-            value_to_add += value_content;
+            // Pythonic scalars/containers -> JSON.
+            value_to_add += normalize_container_value(value_content);
         }
 
         args_target() += value_to_add;
@@ -466,11 +487,34 @@ common_peg_parser common_chat_peg_builder::standard_constructed_tools(
     return force_tool_calls ? section : optional(section);
 }
 
+// Like python_value(), but the leaf also accepts JSON-cased true/false/null, used by LFM2/LFM2.5
+common_peg_parser common_chat_peg_builder::python_or_json_value() {
+    return rule("python-or-json-value", [this]() {
+        auto ws    = space();
+        auto value = python_or_json_value();
+
+        auto member  = sequence({ python_string(), ws, literal(":"), ws, value });
+        auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
+        auto dict    = rule("python-or-json-dict", [&]() {
+            return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }), ws });
+        });
+
+        auto elements = sequence({ value, zero_or_more(sequence({ literal(","), ws, value })) });
+        auto array    = rule("python-or-json-array", [&]() {
+            return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }), ws });
+        });
+
+        return choice({ dict, array, python_string(), python_number(),
+                        python_bool(), python_null(), json_bool(), json_null() });
+    });
+}
+
 // Python-style tool calls: name(arg1="value1", arg2=123)
 // Used only by LFM2 for now, so we don't merge it into autoparser
 common_peg_parser common_chat_peg_builder::python_style_tool_calls(
     const ordered_json & tools,
-    bool                 parallel_tool_calls) {
+    bool                 parallel_tool_calls,
+    bool                 allow_json_literals) {
     if (!tools.is_array() || tools.empty()) {
         return eps();
     }
@@ -504,7 +548,7 @@ common_peg_parser common_chat_peg_builder::python_style_tool_calls(
                 if (is_string_type) {
                     arg_value_parser = string_value_parser;
                 } else {
-                    arg_value_parser = tool_arg_value(python_value());
+                    arg_value_parser = tool_arg_value(allow_json_literals ? python_or_json_value() : python_value());
                 }
 
                 // Full argument: name="value" or name=value

diff --git a/common/chat-peg-parser.h b/common/chat-peg-parser.h
@@ -132,9 +132,13 @@ class common_chat_peg_builder : public common_peg_parser_builder {
     // Helper for Python-style function call format: name(arg1="value1", arg2=123)
     // Used by LFM2 and similar templates
     common_peg_parser python_style_tool_calls(const nlohmann::ordered_json & tools,
-                                              bool                           parallel_tool_calls);
+                                              bool                           parallel_tool_calls,
+                                              bool                           allow_json_literals);
 
   private:
+    // Python values plus JSON true/false/null.
+    common_peg_parser python_or_json_value();
+
     // Implementation helpers for standard_json_tools — one per JSON tool call layout mode
     common_peg_parser build_json_tools_function_is_key(const nlohmann::ordered_json & tools,
                                                        const std::string &            args_key,
@@ -195,4 +199,3 @@ struct tagged_peg_parser {
 
 tagged_peg_parser build_tagged_peg_parser(
     const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
-