tetherto · maxim-smotrov · Feb 28, 2026 · Feb 28, 2026
diff --git a/.github/workflows/benchmark-qvac-lib-infer-llamacpp-embed.yml b/.github/workflows/benchmark-qvac-lib-infer-llamacpp-embed.yml
@@ -422,11 +422,12 @@ jobs:
       - name: Display results summary
         if: always()
         shell: bash
-        working-directory: ${{ env.WORKDIR }}
         run: |
-          LATEST_MD=$(ls -t benchmarks/results/*.md 2>/dev/null | head -n 1 || true)
-          if [[ -n "${LATEST_MD:-}" ]]; then
-            cat "$LATEST_MD" >> "$GITHUB_STEP_SUMMARY"
+          if [ -d "$WORKDIR/benchmarks/results" ]; then
+            LATEST_MD=$(find "$WORKDIR/benchmarks/results" -type f -name "*.md" -mtime -1 2>/dev/null | head -1)
+            if [[ -n "$LATEST_MD" ]]; then
+              cat "$LATEST_MD" >> $GITHUB_STEP_SUMMARY
+            fi
           fi
 
       - name: Upload benchmark results

diff --git a/.github/workflows/benchmark-qvac-lib-infer-llamacpp-llm.yml b/.github/workflows/benchmark-qvac-lib-infer-llamacpp-llm.yml
@@ -455,9 +455,11 @@ jobs:
         shell: bash
         working-directory: ${{ inputs.workdir }}
         run: |
-          LATEST_MD=$(ls -t benchmarks/results/*.md 2>/dev/null | head -n 1 || true)
-          if [[ -n "${LATEST_MD:-}" ]]; then
-            cat "$LATEST_MD" >> "$GITHUB_STEP_SUMMARY"
+          if [ -d "benchmarks/results" ]; then
+            LATEST_MD=$(find benchmarks/results -type f -name "*.md" -mtime -1 2>/dev/null | head -1)
+            if [[ -n "$LATEST_MD" ]]; then
+              cat "$LATEST_MD" >> $GITHUB_STEP_SUMMARY
+            fi
           fi
 
       - name: Upload benchmark results

diff --git a/packages/qvac-lib-infer-llamacpp-embed/benchmarks/client/model_handler.py b/packages/qvac-lib-infer-llamacpp-embed/benchmarks/client/model_handler.py
@@ -6,6 +6,7 @@
 import os
 import time
 import yaml
+from types import SimpleNamespace
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import hf_hub_download, list_repo_files
 
@@ -511,6 +512,20 @@ def __init__(self, model_name: str = "thenlper/gte-large"):
         self.tags = ["sentence-transformers", "embedding"]
 
 
+class _TokenizerProxyModule:
+    """Minimal module used to satisfy SentenceTransformer tokenizer access."""
+
+    def __init__(self, vocab_size: int):
+        import torch
+
+        self._module = torch.nn.Module()
+        self._module.tokenizer = SimpleNamespace(vocab=range(vocab_size))
+
+    @property
+    def module(self):
+        return self._module
+
+
 class MTEBModelWrapper(SentenceTransformer):
     """
     Wrapper to make QvacEmbedHandler compatible with MTEB.
@@ -522,6 +537,7 @@ class MTEBModelWrapper(SentenceTransformer):
     # Default embedding dimension (GTE-large = 1024)
     _embedding_dim: int = 1024
     _max_seq_length: int = 512
+    _vocab_size: int = 30522  # GTE/BERT-family default tokenizer vocab size
 
     def __init__(self, handler, batch_size: int = 32, embedding_dim: int = 1024, max_seq_length: int = 512):
         """
@@ -553,6 +569,11 @@ def __init__(self, handler, batch_size: int = 32, embedding_dim: int = 1024, max
 
         # Required for MTEB metadata extraction
         self.model_card_data = MockModelCardData(self.model_name_or_path)
+        # Newer MTEB versions estimate embedding parameters via len(model.tokenizer.vocab).
+        # SentenceTransformer resolves `tokenizer` via the first registered module, so we
+        # register a tiny proxy module exposing a vocab with a deterministic length.
+        tokenizer_proxy = _TokenizerProxyModule(self._vocab_size)
+        self.add_module("_mteb_tokenizer_proxy", tokenizer_proxy.module)
 
     @property
     def max_seq_length(self) -> int: