From c1c3d99ef61bb37f2d6af7f02ebd57a6fc770498 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Fri, 31 Oct 2025 17:42:24 -0700 Subject: [PATCH 1/3] Add e2e tests for embedding raw flag --- .github/workflows/embeddings.yml | 59 ++++++++++ examples/tests/__init__.py | 0 examples/tests/test_embedding.py | 184 +++++++++++++++++++++++++++++++ 3 files changed, 243 insertions(+) create mode 100644 .github/workflows/embeddings.yml create mode 100644 examples/tests/__init__.py create mode 100644 examples/tests/test_embedding.py diff --git a/.github/workflows/embeddings.yml b/.github/workflows/embeddings.yml new file mode 100644 index 0000000000000..c530a696d6f9e --- /dev/null +++ b/.github/workflows/embeddings.yml @@ -0,0 +1,59 @@ +# Embedding CLI build and tests +name: Embedding CLI + +on: + workflow_dispatch: + push: + branches: + - feature/* + - master + paths: + - '.github/workflows/embeddings.yml' + - 'examples/embedding/**' + - 'examples/tests/**' + pull_request: + types: [opened, synchronize, reopened] + paths: + - '.github/workflows/embeddings.yml' + - 'examples/embedding/**' + - 'examples/tests/**' + +jobs: + embedding-cli-tests: + runs-on: ubuntu-latest + + steps: + - name: Install system deps + run: | + sudo apt-get update + sudo apt-get -y install \ + build-essential \ + cmake \ + curl \ + libcurl4-openssl-dev \ + python3-pip + + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Python deps + run: | + pip install -r requirements.txt || echo "No extra requirements found" + pip install pytest + + - name: Build llama-embedding + run: | + cmake -B build \ + -DCMAKE_BUILD_TYPE=Release + cmake --build build --target llama-embedding -j $(nproc) + + - name: Run embedding tests + run: | + pytest -v examples/tests diff --git a/examples/tests/__init__.py b/examples/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/examples/tests/test_embedding.py b/examples/tests/test_embedding.py new file mode 100644 index 0000000000000..414b01d4d475f --- /dev/null +++ b/examples/tests/test_embedding.py @@ -0,0 +1,184 @@ +import os, json, subprocess, hashlib +from pathlib import Path +import numpy as np +import pytest + +# --------------------------------------------------------------------------- +# Configuration constants +# --------------------------------------------------------------------------- + +EPS = 1e-3 +REPO_ROOT = Path(__file__).resolve().parents[2] +EXE = REPO_ROOT / ("build/bin/llama-embedding.exe" if os.name == "nt" else "build/bin/llama-embedding") +DEFAULT_ENV = {**os.environ, "LLAMA_CACHE": os.environ.get("LLAMA_CACHE", "tmp")} +SEED = "42" + + +# --------------------------------------------------------------------------- +# Model setup helpers +# --------------------------------------------------------------------------- + +def get_model_hf_params(): + """Default lightweight embedding model.""" + return { + "hf_repo": "ggml-org/embeddinggemma-300M-qat-q4_0-GGUF", + "hf_file": "embeddinggemma-300M-qat-Q4_0.gguf", + } + + +@pytest.fixture(scope="session") +def embedding_model(): + """Download/cache model once per session.""" + exe_path = EXE + if not exe_path.exists(): + alt = REPO_ROOT / "build/bin/Release/llama-embedding.exe" + if alt.exists(): + exe_path = alt + else: + raise FileNotFoundError(f"llama-embedding binary not found under {REPO_ROOT}/build/bin") + + params = get_model_hf_params() + cmd = [ + str(exe_path), + "-hfr", params["hf_repo"], + "-hff", params["hf_file"], + "--ctx-size", "16", + "--embd-output-format", "json", + "--no-warmup", + "--threads", "1", + "--seed", SEED, + ] + res = subprocess.run(cmd, input="ok", capture_output=True, text=True, env=DEFAULT_ENV) + assert res.returncode == 0, f"model download failed: {res.stderr}" + return params + + +# --------------------------------------------------------------------------- +# Utility functions +# --------------------------------------------------------------------------- + +def run_embedding(text: str, fmt: str = "raw", params=None) -> str: + """Runs llama-embedding and returns stdout (string).""" + exe_path = EXE + if not exe_path.exists(): + raise FileNotFoundError(f"Missing binary: {exe_path}") + params = params or get_model_hf_params() + cmd = [ + str(exe_path), + "-hfr", params["hf_repo"], + "-hff", params["hf_file"], + "--ctx-size", "2048", + "--embd-output-format", fmt, + "--threads", "1", + "--seed", SEED, + ] + result = subprocess.run(cmd, input=text, capture_output=True, text=True, env=DEFAULT_ENV) + if result.returncode: + raise AssertionError(f"embedding failed ({result.returncode}):\n{result.stderr[:400]}") + out = result.stdout.strip() + assert out, f"empty output for text={text!r}, fmt={fmt}" + return out + + +def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: + return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))) + + +def embedding_hash(vec: np.ndarray) -> str: + """Return short deterministic signature for regression tracking.""" + return hashlib.sha256(vec[:8].tobytes()).hexdigest()[:16] + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +# Register custom mark so pytest doesn't warn about it +pytestmark = pytest.mark.filterwarnings("ignore::pytest.PytestUnknownMarkWarning") + +@pytest.mark.slow +@pytest.mark.parametrize("fmt", ["raw", "json"]) +@pytest.mark.parametrize("text", ["hello world", "hi 🌎", "line1\nline2\nline3"]) +def test_embedding_runs_and_finite(fmt, text, embedding_model): + """Ensure embeddings run end-to-end and produce finite floats.""" + out = run_embedding(text, fmt, embedding_model) + floats = ( + np.array(out.split(), float) + if fmt == "raw" + else np.array(json.loads(out)["data"][0]["embedding"], float) + ) + assert len(floats) > 100 + assert np.all(np.isfinite(floats)), f"non-finite values in {fmt} output" + assert 0.1 < np.linalg.norm(floats) < 10 + + +def test_raw_vs_json_consistency(embedding_model): + """Compare raw vs JSON embedding output for same text.""" + text = "hello world" + raw = np.array(run_embedding(text, "raw", embedding_model).split(), float) + jsn = np.array(json.loads(run_embedding(text, "json", embedding_model))["data"][0]["embedding"], float) + + assert raw.shape == jsn.shape + cos = cosine_similarity(raw, jsn) + assert cos > 0.999, f"divergence: cos={cos:.4f}" + assert embedding_hash(raw) == embedding_hash(jsn), "hash mismatch → possible nondeterminism" + + +def test_empty_input_deterministic(embedding_model): + """Empty input should yield finite, deterministic vector.""" + v1 = np.array(run_embedding("", "raw", embedding_model).split(), float) + v2 = np.array(run_embedding("", "raw", embedding_model).split(), float) + assert np.all(np.isfinite(v1)) + cos = cosine_similarity(v1, v2) + assert cos > 0.9999, f"Empty input not deterministic (cos={cos:.5f})" + assert 0.1 < np.linalg.norm(v1) < 10 + + +@pytest.mark.slow +def test_very_long_input_stress(embedding_model): + """Stress test: large input near context window.""" + text = "lorem " * 2000 + vec = np.array(run_embedding(text, "raw", embedding_model).split(), float) + assert len(vec) > 100 + assert np.isfinite(np.linalg.norm(vec)) + + +@pytest.mark.parametrize( + "text", + [" ", "\n\n\n", "123 456 789"], +) +def test_low_information_inputs_stable(text, embedding_model): + """Whitespace/numeric inputs should yield stable embeddings.""" + v1 = np.array(run_embedding(text, "raw", embedding_model).split(), float) + v2 = np.array(run_embedding(text, "raw", embedding_model).split(), float) + cos = cosine_similarity(v1, v2) + assert cos > 0.999, f"unstable embedding for {text!r}" + + +@pytest.mark.parametrize("flag", ["--no-such-flag", "--help"]) +def test_invalid_or_help_flag(flag): + """Invalid flags should fail; help should succeed.""" + res = subprocess.run([str(EXE), flag], capture_output=True, text=True) + if flag == "--no-such-flag": + assert res.returncode != 0 + assert any(k in res.stderr.lower() for k in ("error", "invalid", "unknown")) + else: + assert res.returncode == 0 + assert "usage" in (res.stdout.lower() + res.stderr.lower()) + + +@pytest.mark.parametrize("fmt", ["raw", "json"]) +@pytest.mark.parametrize("text", ["deterministic test", "deterministic test again"]) +def test_repeated_call_consistent(fmt, text, embedding_model): + """Same input → same hash across repeated runs.""" + out1 = run_embedding(text, fmt, embedding_model) + out2 = run_embedding(text, fmt, embedding_model) + + if fmt == "json": + v1 = np.array(json.loads(out1)["data"][0]["embedding"], float) + v2 = np.array(json.loads(out2)["data"][0]["embedding"], float) + else: + v1 = np.array(out1.split(), float) + v2 = np.array(out2.split(), float) + + assert embedding_hash(v1) == embedding_hash(v2) From 2de1e6871f99d324ebfa703495cb06a6ed0a2521 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Sun, 2 Nov 2025 13:55:01 -0800 Subject: [PATCH 2/3] Increase scope of embedding cli tests --- .../{embeddings.yml => embedding.yml} | 26 ++++++++++++------- examples/tests/__init__.py | 0 .../e2e/embedding/test_embedding_cli.py | 10 ++++--- 3 files changed, 23 insertions(+), 13 deletions(-) rename .github/workflows/{embeddings.yml => embedding.yml} (71%) delete mode 100644 examples/tests/__init__.py rename examples/tests/test_embedding.py => tests/e2e/embedding/test_embedding_cli.py (98%) diff --git a/.github/workflows/embeddings.yml b/.github/workflows/embedding.yml similarity index 71% rename from .github/workflows/embeddings.yml rename to .github/workflows/embedding.yml index c530a696d6f9e..db566b36d5f77 100644 --- a/.github/workflows/embeddings.yml +++ b/.github/workflows/embedding.yml @@ -4,19 +4,25 @@ name: Embedding CLI on: workflow_dispatch: push: - branches: - - feature/* - - master + branches: [master, feature/**] paths: - - '.github/workflows/embeddings.yml' - - 'examples/embedding/**' - - 'examples/tests/**' + - '.github/workflows/embedding.yml' + - 'examples/**' + - 'src/**' + - 'ggml/**' + - 'include/**' + - '**/CMakeLists.txt' + - 'tests/e2e/embedding/**' pull_request: types: [opened, synchronize, reopened] paths: - - '.github/workflows/embeddings.yml' - - 'examples/embedding/**' - - 'examples/tests/**' + - '.github/workflows/embedding.yml' + - 'examples/**' + - 'src/**' + - 'ggml/**' + - 'include/**' + - '**/CMakeLists.txt' + - 'tests/e2e/embedding/**' jobs: embedding-cli-tests: @@ -56,4 +62,4 @@ jobs: - name: Run embedding tests run: | - pytest -v examples/tests + pytest -v tests/e2e/embedding diff --git a/examples/tests/__init__.py b/examples/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/examples/tests/test_embedding.py b/tests/e2e/embedding/test_embedding_cli.py similarity index 98% rename from examples/tests/test_embedding.py rename to tests/e2e/embedding/test_embedding_cli.py index 414b01d4d475f..cf4731d6bfd65 100644 --- a/examples/tests/test_embedding.py +++ b/tests/e2e/embedding/test_embedding_cli.py @@ -1,14 +1,17 @@ -import os, json, subprocess, hashlib +import json +import hashlib +import os +import pytest +import subprocess from pathlib import Path import numpy as np -import pytest # --------------------------------------------------------------------------- # Configuration constants # --------------------------------------------------------------------------- EPS = 1e-3 -REPO_ROOT = Path(__file__).resolve().parents[2] +REPO_ROOT = Path(__file__).resolve().parents[3] EXE = REPO_ROOT / ("build/bin/llama-embedding.exe" if os.name == "nt" else "build/bin/llama-embedding") DEFAULT_ENV = {**os.environ, "LLAMA_CACHE": os.environ.get("LLAMA_CACHE", "tmp")} SEED = "42" @@ -96,6 +99,7 @@ def embedding_hash(vec: np.ndarray) -> str: # Register custom mark so pytest doesn't warn about it pytestmark = pytest.mark.filterwarnings("ignore::pytest.PytestUnknownMarkWarning") + @pytest.mark.slow @pytest.mark.parametrize("fmt", ["raw", "json"]) @pytest.mark.parametrize("text", ["hello world", "hi 🌎", "line1\nline2\nline3"]) From 5ce810ee516d87134bd5a1536a38a9564204c26a Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Mon, 3 Nov 2025 00:35:33 -0800 Subject: [PATCH 3/3] Update test and workflow to match new RFC --- .github/workflows/embedding.yml | 193 +++++++++++++++-- tests/e2e/embedding/test_embedding_cli.py | 247 ++++++++++++++-------- 2 files changed, 329 insertions(+), 111 deletions(-) diff --git a/.github/workflows/embedding.yml b/.github/workflows/embedding.yml index db566b36d5f77..f3689461774af 100644 --- a/.github/workflows/embedding.yml +++ b/.github/workflows/embedding.yml @@ -25,41 +25,196 @@ on: - 'tests/e2e/embedding/**' jobs: - embedding-cli-tests: + embedding-cli-tests-linux: runs-on: ubuntu-latest + env: + LLAMA_CACHE: tmp # stable path for cache + EMBD_TEST_DEBUG: "1" steps: + - uses: actions/checkout@v4 + with: { fetch-depth: 0 } + + - name: Restore model cache + uses: actions/cache@v4 + with: + path: | + ~/.cache/llama.cpp + tmp + key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1 + restore-keys: | + hf-${{ runner.os }}- + hf- + - name: Install system deps run: | sudo apt-get update sudo apt-get -y install \ - build-essential \ - cmake \ - curl \ - libcurl4-openssl-dev \ - python3-pip - - - name: Checkout repository - uses: actions/checkout@v4 - with: - fetch-depth: 0 + build-essential cmake curl libcurl4-openssl-dev python3-pip - name: Set up Python uses: actions/setup-python@v5 - with: - python-version: '3.11' + with: { python-version: '3.11' } - name: Install Python deps run: | - pip install -r requirements.txt || echo "No extra requirements found" - pip install pytest + python -m pip install -r requirements.txt || echo "No extra requirements found" + python -m pip install pytest numpy pytest-timeout - name: Build llama-embedding run: | - cmake -B build \ - -DCMAKE_BUILD_TYPE=Release + cmake -B build -DCMAKE_BUILD_TYPE=Release cmake --build build --target llama-embedding -j $(nproc) - - name: Run embedding tests + - name: Pre-download tiny model (retry x3 on network) + run: | + set -e + tries=0 + until ./build/bin/llama-embedding \ + -hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \ + -hff embeddinggemma-300M-qat-Q4_0.gguf \ + --ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do + tries=$((tries+1)) + if [ $tries -ge 3 ]; then + echo "Pre-download failed after $tries attempts" + exit 1 + fi + echo "Retrying download ($tries/3)..." + sleep 3 + done + + - name: Run embedding tests (30s per-test cap) + shell: bash + run: | + set -o pipefail + pytest -v tests/e2e/embedding \ + --timeout=30 \ + --durations=10 \ + --junitxml=pytest-report.xml | tee pytest-output.txt + + - name: Upload test artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: linux-embedding-tests + path: | + pytest-output.txt + pytest-report.xml + + - name: Save model cache + if: always() + uses: actions/cache@v4 + with: + path: | + ~/.cache/llama.cpp + tmp + key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1 + + embedding-cli-tests-windows: + runs-on: windows-latest + continue-on-error: true + env: + LLAMA_CACHE: tmp + EMBD_TEST_DEBUG: "1" + + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: '3.11' } + + # --- vcpkg plain bootstrap (no actions, no submodules) --- + - name: Bootstrap vcpkg + shell: pwsh + run: | + $env:VCPKG_ROOT = "$env:RUNNER_TEMP\vcpkg" + git clone https://github.com/microsoft/vcpkg $env:VCPKG_ROOT + & "$env:VCPKG_ROOT\bootstrap-vcpkg.bat" -disableMetrics + echo "VCPKG_ROOT=$env:VCPKG_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append + + - name: Install curl with OpenSSL via vcpkg + shell: pwsh + run: | + & "$env:VCPKG_ROOT\vcpkg.exe" install curl[openssl]:x64-windows + + - name: Restore model cache + uses: actions/cache@v4 + with: + path: | + $HOME/.cache/llama.cpp + tmp + key: hf-${{ runner.os }}-embeddinggemma-300M-q4_0-v1 + restore-keys: | + hf-${{ runner.os }}- + hf- + + - name: Install Python deps + run: pip install pytest numpy + + - name: Configure & Build (Release) + shell: pwsh + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release ` + -DCMAKE_TOOLCHAIN_FILE="$env:VCPKG_ROOT\scripts\buildsystems\vcpkg.cmake" + cmake --build build --target llama-embedding --config Release -j 2 + + - name: Pre-download tiny model (retry x3) + shell: bash + run: | + set -e + tries=0 + until ./build/bin/Release/llama-embedding.exe \ + -hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \ + -hff embeddinggemma-300M-qat-Q4_0.gguf \ + --ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do + tries=$((tries+1)) + if [ $tries -ge 3 ]; then + echo "Pre-download failed after $tries attempts"; exit 1 + fi + echo "Retrying download ($tries/3)..."; sleep 3 + done + + - name: Run smoke tests + shell: bash + run: | + pytest -q tests/e2e/embedding -k raw_vs_json_consistency + + + + embedding-cli-tests-macos: + runs-on: macos-latest + continue-on-error: true + env: + LLAMA_CACHE: tmp + EMBD_TEST_DEBUG: "1" + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: { python-version: '3.11' } + + - name: Install Python deps + run: pip install pytest numpy + + - name: Build + run: | + cmake -B build -DCMAKE_BUILD_TYPE=Release + cmake --build build --target llama-embedding -j 3 + + - name: Pre-download tiny model (retry x3) + run: | + set -e + tries=0 + until ./build/bin/llama-embedding \ + -hfr ggml-org/embeddinggemma-300M-qat-q4_0-GGUF \ + -hff embeddinggemma-300M-qat-Q4_0.gguf \ + --ctx-size 16 --embd-output-format json --no-warmup --threads 1 --seed 42 <<< "ok"; do + tries=$((tries+1)) + if [ $tries -ge 3 ]; then + echo "Pre-download failed after $tries attempts"; exit 1 + fi + echo "Retrying download ($tries/3)..."; sleep 3 + done + + - name: Warm cache & run a tiny smoke run: | - pytest -v tests/e2e/embedding + ./build/bin/llama-embedding --help >/dev/null 2>&1 + pytest -q tests/e2e/embedding -k raw_vs_json_consistency diff --git a/tests/e2e/embedding/test_embedding_cli.py b/tests/e2e/embedding/test_embedding_cli.py index cf4731d6bfd65..80f986ec86bb6 100644 --- a/tests/e2e/embedding/test_embedding_cli.py +++ b/tests/e2e/embedding/test_embedding_cli.py @@ -1,10 +1,13 @@ import json import hashlib +import logging import os import pytest import subprocess from pathlib import Path import numpy as np +import time +from typing import Optional, List # --------------------------------------------------------------------------- # Configuration constants @@ -15,72 +18,111 @@ EXE = REPO_ROOT / ("build/bin/llama-embedding.exe" if os.name == "nt" else "build/bin/llama-embedding") DEFAULT_ENV = {**os.environ, "LLAMA_CACHE": os.environ.get("LLAMA_CACHE", "tmp")} SEED = "42" +ALLOWED_DIMS = {384, 768, 1024, 4096} +SMALL_CTX = 16 # preflight/cache +TEST_CTX = 1024 # main tests + +log = logging.getLogger(__name__) # --------------------------------------------------------------------------- -# Model setup helpers +# Shared helpers (single source of truth for command building) # --------------------------------------------------------------------------- -def get_model_hf_params(): - """Default lightweight embedding model.""" + +def resolve_exe() -> Path: + exe = EXE + if not exe.exists() and os.name == "nt": + alt = REPO_ROOT / "build/bin/Release/llama-embedding.exe" + if alt.exists(): + exe = alt + if not exe.exists(): + raise FileNotFoundError(f"llama-embedding not found under {REPO_ROOT}/build/bin") + return exe + + +def hf_params_default(): return { "hf_repo": "ggml-org/embeddinggemma-300M-qat-q4_0-GGUF", "hf_file": "embeddinggemma-300M-qat-Q4_0.gguf", } -@pytest.fixture(scope="session") -def embedding_model(): - """Download/cache model once per session.""" - exe_path = EXE - if not exe_path.exists(): - alt = REPO_ROOT / "build/bin/Release/llama-embedding.exe" - if alt.exists(): - exe_path = alt - else: - raise FileNotFoundError(f"llama-embedding binary not found under {REPO_ROOT}/build/bin") - - params = get_model_hf_params() +def build_cmd( + *, + exe: Path, + params: dict, + fmt: str, + threads: int, + ctx: int, + seed: str, + extra: Optional[List[str]] = None, # was: list[str] | None +) -> List[str]: # was: list[str] + assert fmt in {"raw", "json"}, f"unsupported fmt={fmt}" cmd = [ - str(exe_path), + str(exe), "-hfr", params["hf_repo"], "-hff", params["hf_file"], - "--ctx-size", "16", - "--embd-output-format", "json", - "--no-warmup", - "--threads", "1", - "--seed", SEED, + "--ctx-size", str(ctx), + "--embd-output-format", fmt, + "--threads", str(threads), + "--seed", seed, ] - res = subprocess.run(cmd, input="ok", capture_output=True, text=True, env=DEFAULT_ENV) - assert res.returncode == 0, f"model download failed: {res.stderr}" - return params + if extra: + cmd.extend(extra) + return cmd + + +def run_cmd(cmd: list[str], text: str, timeout: int = 60) -> str: + t0 = time.perf_counter() + res = subprocess.run(cmd, input=text, capture_output=True, text=True, + env=DEFAULT_ENV, timeout=timeout) + dur_ms = (time.perf_counter() - t0) * 1000.0 + if os.environ.get("EMBD_TEST_DEBUG") == "1": + log.debug("embedding cmd finished in %.1f ms", dur_ms) + + if res.returncode != 0: + raise AssertionError(f"embedding failed ({res.returncode}):\n{res.stderr[:400]}") + out = res.stdout.strip() + assert out, "empty stdout from llama-embedding" + return out + +# --------------------------------------------------------------------------- +# Session model preflight/cache +# --------------------------------------------------------------------------- +@pytest.fixture(scope="session") +def embedding_model(): + """Download/cache model once per session with a tiny ctx + no warmup.""" + exe = resolve_exe() + params = hf_params_default() + cmd = build_cmd( + exe=exe, params=params, fmt="json", + threads=1, ctx=SMALL_CTX, seed=SEED, + extra=["--no-warmup"], + ) + _ = run_cmd(cmd, text="ok") + return params + # --------------------------------------------------------------------------- # Utility functions # --------------------------------------------------------------------------- -def run_embedding(text: str, fmt: str = "raw", params=None) -> str: - """Runs llama-embedding and returns stdout (string).""" - exe_path = EXE - if not exe_path.exists(): - raise FileNotFoundError(f"Missing binary: {exe_path}") - params = params or get_model_hf_params() - cmd = [ - str(exe_path), - "-hfr", params["hf_repo"], - "-hff", params["hf_file"], - "--ctx-size", "2048", - "--embd-output-format", fmt, - "--threads", "1", - "--seed", SEED, - ] - result = subprocess.run(cmd, input=text, capture_output=True, text=True, env=DEFAULT_ENV) - if result.returncode: - raise AssertionError(f"embedding failed ({result.returncode}):\n{result.stderr[:400]}") - out = result.stdout.strip() - assert out, f"empty output for text={text!r}, fmt={fmt}" - return out + +def run_embedding( + text: str, + *, + fmt: str = "raw", + threads: int = 1, + ctx: int = TEST_CTX, + params: Optional[dict] = None, # was: dict | None + timeout: int = 60, +) -> str: + exe = resolve_exe() + params = params or hf_params_default() + cmd = build_cmd(exe=exe, params=params, fmt=fmt, threads=threads, ctx=ctx, seed=SEED) + return run_cmd(cmd, text, timeout=timeout) def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: @@ -92,69 +134,65 @@ def embedding_hash(vec: np.ndarray) -> str: return hashlib.sha256(vec[:8].tobytes()).hexdigest()[:16] +def parse_vec(out: str, fmt: str) -> np.ndarray: + if fmt == "raw": + arr = np.array(out.split(), dtype=np.float32) + else: + arr = np.array(json.loads(out)["data"][0]["embedding"], dtype=np.float32) + return arr + # --------------------------------------------------------------------------- # Tests # --------------------------------------------------------------------------- + # Register custom mark so pytest doesn't warn about it pytestmark = pytest.mark.filterwarnings("ignore::pytest.PytestUnknownMarkWarning") -@pytest.mark.slow @pytest.mark.parametrize("fmt", ["raw", "json"]) @pytest.mark.parametrize("text", ["hello world", "hi 🌎", "line1\nline2\nline3"]) def test_embedding_runs_and_finite(fmt, text, embedding_model): - """Ensure embeddings run end-to-end and produce finite floats.""" - out = run_embedding(text, fmt, embedding_model) - floats = ( - np.array(out.split(), float) - if fmt == "raw" - else np.array(json.loads(out)["data"][0]["embedding"], float) - ) - assert len(floats) > 100 - assert np.all(np.isfinite(floats)), f"non-finite values in {fmt} output" - assert 0.1 < np.linalg.norm(floats) < 10 + out = run_embedding(text, fmt=fmt, threads=1, ctx=TEST_CTX, params=embedding_model) + vec = parse_vec(out, fmt) + assert vec.dtype == np.float32 + # dim & finiteness + assert len(vec) in ALLOWED_DIMS, f"unexpected dim={len(vec)}" + assert np.all(np.isfinite(vec)) + assert 0.1 < np.linalg.norm(vec) < 10 def test_raw_vs_json_consistency(embedding_model): - """Compare raw vs JSON embedding output for same text.""" text = "hello world" - raw = np.array(run_embedding(text, "raw", embedding_model).split(), float) - jsn = np.array(json.loads(run_embedding(text, "json", embedding_model))["data"][0]["embedding"], float) - + raw = parse_vec(run_embedding(text, fmt="raw", params=embedding_model), "raw") + jsn = parse_vec(run_embedding(text, fmt="json", params=embedding_model), "json") assert raw.shape == jsn.shape cos = cosine_similarity(raw, jsn) - assert cos > 0.999, f"divergence: cos={cos:.4f}" - assert embedding_hash(raw) == embedding_hash(jsn), "hash mismatch → possible nondeterminism" + assert cos > 0.999, f"raw/json divergence: cos={cos:.6f}" + assert embedding_hash(raw) == embedding_hash(jsn) def test_empty_input_deterministic(embedding_model): - """Empty input should yield finite, deterministic vector.""" - v1 = np.array(run_embedding("", "raw", embedding_model).split(), float) - v2 = np.array(run_embedding("", "raw", embedding_model).split(), float) + v1 = parse_vec(run_embedding("", fmt="raw", params=embedding_model), "raw") + v2 = parse_vec(run_embedding("", fmt="raw", params=embedding_model), "raw") assert np.all(np.isfinite(v1)) - cos = cosine_similarity(v1, v2) - assert cos > 0.9999, f"Empty input not deterministic (cos={cos:.5f})" - assert 0.1 < np.linalg.norm(v1) < 10 + assert embedding_hash(v1) == embedding_hash(v2) + assert cosine_similarity(v1, v2) > 0.99999 -@pytest.mark.slow def test_very_long_input_stress(embedding_model): """Stress test: large input near context window.""" text = "lorem " * 2000 - vec = np.array(run_embedding(text, "raw", embedding_model).split(), float) - assert len(vec) > 100 + vec = parse_vec(run_embedding(text, fmt="raw", params=embedding_model), "raw") + assert len(vec) in ALLOWED_DIMS assert np.isfinite(np.linalg.norm(vec)) -@pytest.mark.parametrize( - "text", - [" ", "\n\n\n", "123 456 789"], -) +@pytest.mark.parametrize("text", [" ", "\n\n\n", "123 456 789"]) def test_low_information_inputs_stable(text, embedding_model): """Whitespace/numeric inputs should yield stable embeddings.""" - v1 = np.array(run_embedding(text, "raw", embedding_model).split(), float) - v2 = np.array(run_embedding(text, "raw", embedding_model).split(), float) + v1 = parse_vec(run_embedding(text, fmt="raw", params=embedding_model), "raw") + v2 = parse_vec(run_embedding(text, fmt="raw", params=embedding_model), "raw") cos = cosine_similarity(v1, v2) assert cos > 0.999, f"unstable embedding for {text!r}" @@ -162,7 +200,8 @@ def test_low_information_inputs_stable(text, embedding_model): @pytest.mark.parametrize("flag", ["--no-such-flag", "--help"]) def test_invalid_or_help_flag(flag): """Invalid flags should fail; help should succeed.""" - res = subprocess.run([str(EXE), flag], capture_output=True, text=True) + exe = resolve_exe() + res = subprocess.run([str(exe), flag], capture_output=True, text=True, env=DEFAULT_ENV) if flag == "--no-such-flag": assert res.returncode != 0 assert any(k in res.stderr.lower() for k in ("error", "invalid", "unknown")) @@ -172,17 +211,41 @@ def test_invalid_or_help_flag(flag): @pytest.mark.parametrize("fmt", ["raw", "json"]) -@pytest.mark.parametrize("text", ["deterministic test", "deterministic test again"]) -def test_repeated_call_consistent(fmt, text, embedding_model): - """Same input → same hash across repeated runs.""" - out1 = run_embedding(text, fmt, embedding_model) - out2 = run_embedding(text, fmt, embedding_model) - - if fmt == "json": - v1 = np.array(json.loads(out1)["data"][0]["embedding"], float) - v2 = np.array(json.loads(out2)["data"][0]["embedding"], float) - else: - v1 = np.array(out1.split(), float) - v2 = np.array(out2.split(), float) - - assert embedding_hash(v1) == embedding_hash(v2) +def test_threads_two_similarity_vs_single(fmt, embedding_model): + text = "determinism vs threads" + single = parse_vec(run_embedding(text, fmt=fmt, threads=1, params=embedding_model), fmt) + multi = parse_vec(run_embedding(text, fmt=fmt, threads=2, params=embedding_model), fmt) + assert single.shape == multi.shape + cos = cosine_similarity(single, multi) + assert cos >= 0.999, f"threads>1 similarity too low: {cos:.6f}" + + +def test_json_shape_schema_minimal(embedding_model): + js = json.loads(run_embedding("schema check", fmt="json", params=embedding_model)) + assert isinstance(js, dict) + + # Top-level “object” (present in CLI) is optional for us + if "object" in js: + assert js["object"] in ("list", "embeddings", "embedding_list") + + # Required: data[0].embedding + index + assert "data" in js and isinstance(js["data"], list) and len(js["data"]) >= 1 + item0 = js["data"][0] + assert isinstance(item0, dict) + if "object" in item0: + assert item0["object"] in ("embedding",) + assert "index" in item0 and item0["index"] == 0 + assert "embedding" in item0 and isinstance(item0["embedding"], list) + assert len(item0["embedding"]) in ALLOWED_DIMS + + # Optional fields: tolerate absence in current CLI output + if "model" in js: + assert isinstance(js["model"], str) + if "dim" in js: + assert js["dim"] == len(item0["embedding"]) + usage = js.get("usage", {}) + if usage: + assert isinstance(usage, dict) + # if present, prompt_tokens should be int + if "prompt_tokens" in usage: + assert isinstance(usage["prompt_tokens"], int)