|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +import pytest |
| 4 | + |
| 5 | +pytest.importorskip("openai") |
| 6 | + |
| 7 | +from unittest.mock import patch |
| 8 | + |
| 9 | +import numpy as np |
| 10 | +from openai.types.create_embedding_response import CreateEmbeddingResponse |
| 11 | +from openai.types.embedding import Embedding as OpenAIEmbedding |
| 12 | + |
| 13 | +from daft.ai.openai import LMStudioProvider |
| 14 | +from daft.ai.protocols import TextEmbedder, TextEmbedderDescriptor |
| 15 | + |
| 16 | + |
| 17 | +@pytest.mark.parametrize( |
| 18 | + "model, embedding_dim", |
| 19 | + [ |
| 20 | + ("text-embedding-qwen3-embedding-0.6b", 1024), |
| 21 | + ("text-embedding-nomic-embed-text-v1.5", 768), |
| 22 | + ], |
| 23 | +) |
| 24 | +def test_lm_studio_text_embedder(model, embedding_dim): |
| 25 | + text_data = [ |
| 26 | + "Alice was beginning to get very tired of sitting by her sister on the bank.", |
| 27 | + "So she was considering in her own mind (as well as she could, for the hot day made her feel very sleepy and stupid),", |
| 28 | + "whether the pleasure of making a daisy-chain would be worth the trouble of getting up and picking the daisies,", |
| 29 | + "when suddenly a White Rabbit with pink eyes ran close by her.", |
| 30 | + "There was nothing so very remarkable in that;", |
| 31 | + "nor did Alice think it so very much out of the way to hear the Rabbit say to itself, 'Oh dear! Oh dear! I shall be late!'", |
| 32 | + ] |
| 33 | + |
| 34 | + def mock_embedding_response(input_data): |
| 35 | + if isinstance(input_data, list): |
| 36 | + num_texts = len(input_data) |
| 37 | + else: |
| 38 | + num_texts = 1 |
| 39 | + |
| 40 | + embeddings = [] |
| 41 | + for i in range(num_texts): |
| 42 | + embedding_values = [0.1] * embedding_dim |
| 43 | + embedding_obj = OpenAIEmbedding(embedding=embedding_values, index=i, object="embedding") |
| 44 | + embeddings.append(embedding_obj) |
| 45 | + |
| 46 | + response = CreateEmbeddingResponse( |
| 47 | + data=embeddings, model=model, object="list", usage={"prompt_tokens": 0, "total_tokens": 0} |
| 48 | + ) |
| 49 | + return response |
| 50 | + |
| 51 | + with patch("openai.resources.embeddings.Embeddings.create") as mock_embed: |
| 52 | + mock_embed.side_effect = lambda **kwargs: mock_embedding_response(kwargs.get("input")) |
| 53 | + |
| 54 | + descriptor = LMStudioProvider().get_text_embedder(model=model) |
| 55 | + assert isinstance(descriptor, TextEmbedderDescriptor) |
| 56 | + assert descriptor.get_provider() == "lm_studio" |
| 57 | + assert descriptor.get_model() == model |
| 58 | + assert descriptor.get_dimensions().size == embedding_dim |
| 59 | + |
| 60 | + embedder = descriptor.instantiate() |
| 61 | + assert isinstance(embedder, TextEmbedder) |
| 62 | + embeddings = embedder.embed_text(text_data) |
| 63 | + assert len(embeddings) == len(text_data) |
| 64 | + assert all(isinstance(embedding, np.ndarray) for embedding in embeddings) |
| 65 | + assert all(len(embedding) == embedding_dim for embedding in embeddings) |
0 commit comments