diff --git a/.github/workflows/benchmark-qvac-lib-infer-llamacpp-embed.yml b/.github/workflows/benchmark-qvac-lib-infer-llamacpp-embed.yml index e8ffcd1bbf..e555b06b04 100644 --- a/.github/workflows/benchmark-qvac-lib-infer-llamacpp-embed.yml +++ b/.github/workflows/benchmark-qvac-lib-infer-llamacpp-embed.yml @@ -422,11 +422,12 @@ jobs: - name: Display results summary if: always() shell: bash - working-directory: ${{ env.WORKDIR }} run: | - LATEST_MD=$(ls -t benchmarks/results/*.md 2>/dev/null | head -n 1 || true) - if [[ -n "${LATEST_MD:-}" ]]; then - cat "$LATEST_MD" >> "$GITHUB_STEP_SUMMARY" + if [ -d "$WORKDIR/benchmarks/results" ]; then + LATEST_MD=$(find "$WORKDIR/benchmarks/results" -type f -name "*.md" -mtime -1 2>/dev/null | head -1) + if [[ -n "$LATEST_MD" ]]; then + cat "$LATEST_MD" >> $GITHUB_STEP_SUMMARY + fi fi - name: Upload benchmark results diff --git a/.github/workflows/benchmark-qvac-lib-infer-llamacpp-llm.yml b/.github/workflows/benchmark-qvac-lib-infer-llamacpp-llm.yml index a27408ace9..882065a004 100644 --- a/.github/workflows/benchmark-qvac-lib-infer-llamacpp-llm.yml +++ b/.github/workflows/benchmark-qvac-lib-infer-llamacpp-llm.yml @@ -455,9 +455,11 @@ jobs: shell: bash working-directory: ${{ inputs.workdir }} run: | - LATEST_MD=$(ls -t benchmarks/results/*.md 2>/dev/null | head -n 1 || true) - if [[ -n "${LATEST_MD:-}" ]]; then - cat "$LATEST_MD" >> "$GITHUB_STEP_SUMMARY" + if [ -d "benchmarks/results" ]; then + LATEST_MD=$(find benchmarks/results -type f -name "*.md" -mtime -1 2>/dev/null | head -1) + if [[ -n "$LATEST_MD" ]]; then + cat "$LATEST_MD" >> $GITHUB_STEP_SUMMARY + fi fi - name: Upload benchmark results diff --git a/packages/qvac-lib-infer-llamacpp-embed/benchmarks/client/model_handler.py b/packages/qvac-lib-infer-llamacpp-embed/benchmarks/client/model_handler.py index 4f21a14842..0c24124683 100644 --- a/packages/qvac-lib-infer-llamacpp-embed/benchmarks/client/model_handler.py +++ b/packages/qvac-lib-infer-llamacpp-embed/benchmarks/client/model_handler.py @@ -6,6 +6,7 @@ import os import time import yaml +from types import SimpleNamespace from sentence_transformers import SentenceTransformer from huggingface_hub import hf_hub_download, list_repo_files @@ -511,6 +512,20 @@ def __init__(self, model_name: str = "thenlper/gte-large"): self.tags = ["sentence-transformers", "embedding"] +class _TokenizerProxyModule: + """Minimal module used to satisfy SentenceTransformer tokenizer access.""" + + def __init__(self, vocab_size: int): + import torch + + self._module = torch.nn.Module() + self._module.tokenizer = SimpleNamespace(vocab=range(vocab_size)) + + @property + def module(self): + return self._module + + class MTEBModelWrapper(SentenceTransformer): """ Wrapper to make QvacEmbedHandler compatible with MTEB. @@ -522,6 +537,7 @@ class MTEBModelWrapper(SentenceTransformer): # Default embedding dimension (GTE-large = 1024) _embedding_dim: int = 1024 _max_seq_length: int = 512 + _vocab_size: int = 30522 # GTE/BERT-family default tokenizer vocab size def __init__(self, handler, batch_size: int = 32, embedding_dim: int = 1024, max_seq_length: int = 512): """ @@ -553,6 +569,11 @@ def __init__(self, handler, batch_size: int = 32, embedding_dim: int = 1024, max # Required for MTEB metadata extraction self.model_card_data = MockModelCardData(self.model_name_or_path) + # Newer MTEB versions estimate embedding parameters via len(model.tokenizer.vocab). + # SentenceTransformer resolves `tokenizer` via the first registered module, so we + # register a tiny proxy module exposing a vocab with a deterministic length. + tokenizer_proxy = _TokenizerProxyModule(self._vocab_size) + self.add_module("_mteb_tokenizer_proxy", tokenizer_proxy.module) @property def max_seq_length(self) -> int: