diff --git a/tests/transports-integrations/README.md b/tests/transports-integrations/README.md index 0282116c29..aa105e5231 100644 --- a/tests/transports-integrations/README.md +++ b/tests/transports-integrations/README.md @@ -1,6 +1,6 @@ # Bifrost Integration Tests -Production-ready end-to-end test suite for testing AI integrations through Bifrost proxy. This test suite provides uniform testing across multiple AI integrations with comprehensive coverage of chat, tool calling, image processing, and multimodal workflows. +Production-ready end-to-end test suite for testing AI integrations through Bifrost proxy. This test suite provides uniform testing across multiple AI integrations with comprehensive coverage of chat, tool calling, image processing, embeddings, speech synthesis, and multimodal workflows. ## 🌉 Architecture Overview @@ -27,15 +27,16 @@ The Bifrost integration tests use a centralized configuration system that routes - **🌉 Bifrost Gateway Integration**: All integrations route through Bifrost proxy - **🤖 Centralized Configuration**: YAML-based configuration with environment variable support - **🔧 Integration-Specific Clients**: Type-safe, integration-optimized implementations -- **📋 Comprehensive Test Coverage**: 13 categories covering all major AI functionality +- **📋 Comprehensive Test Coverage**: 14 categories covering all major AI functionality - **⚙️ Flexible Execution**: Selective test running with command-line flags - **🛡️ Robust Error Handling**: Graceful error handling and detailed error reporting - **🎯 Production-Ready**: Async support, timeouts, retries, and logging - **🎵 Speech & Audio Support**: Text-to-speech synthesis and speech-to-text transcription testing +- **🔗 Embeddings Support**: Text-to-vector conversion and similarity analysis testing ## 📋 Test Categories -Our test suite covers 21 comprehensive scenarios for each integration: +Our test suite covers 30 comprehensive scenarios for each integration: ### Core Chat & Conversation Tests 1. **Simple Chat** - Basic single-message conversations @@ -62,10 +63,22 @@ Our test suite covers 21 comprehensive scenarios for each integration: 16. **Transcription Error Handling** - Invalid audio format and model error handling 17. **Voice & Format Testing** - Multiple voices and audio format validation +### Embeddings Tests (OpenAI) +18. **Single Text Embedding** - Basic text-to-vector conversion +19. **Batch Text Embeddings** - Multiple text embeddings in single request +20. **Embedding Similarity Analysis** - Cosine similarity testing for similar texts +21. **Embedding Dissimilarity Analysis** - Validation of different topic embeddings +22. **Different Embedding Models** - Testing various embedding model capabilities +23. **Long Text Embedding** - Handling of longer text inputs and token usage +24. **Embedding Error Handling** - Invalid model and input error processing +25. **Dimensionality Reduction** - Custom embedding dimensions (if supported) +26. **Encoding Format Testing** - Different embedding output formats +27. **Usage Tracking** - Token consumption and batch processing validation + ### Integration & Error Tests -19. **Complex End-to-End** - Comprehensive multimodal workflows -20. **Integration-Specific Features** - Integration-unique capabilities -21. **Error Handling** - Invalid request error processing and propagation +28. **Complex End-to-End** - Comprehensive multimodal workflows +29. **Integration-Specific Features** - Integration-unique capabilities +30. **Error Handling** - Invalid request error processing and propagation ## 📁 Directory Structure @@ -649,10 +662,10 @@ vision_model = get_model("anthropic", "vision") #### OpenAI - ✅ **Full Bifrost Integration**: Complete base URL support -- ✅ **Models**: gpt-3.5-turbo, gpt-4, gpt-4o, gpt-4o-mini -- ✅ **Features**: Chat, tools, vision +- ✅ **Models**: gpt-3.5-turbo, gpt-4, gpt-4o, gpt-4o-mini, text-embedding-3-small, tts-1, whisper-1 +- ✅ **Features**: Chat, tools, vision, speech synthesis, transcription, embeddings - ✅ **Settings**: Organization/project IDs, timeouts, retries -- ✅ **All Test Categories**: 11/11 scenarios supported +- ✅ **All Test Categories**: 30/30 scenarios supported (including speech & embeddings) #### Anthropic @@ -815,6 +828,12 @@ pytest tests/integrations/test_google.py::TestGoogleIntegration::test_07_image_u # Test 9: Multiple Images pytest tests/integrations/test_litellm.py::TestLiteLLMIntegration::test_09_multiple_images -v + +# Test 21: Single Text Embedding (OpenAI only) +pytest tests/integrations/test_openai.py::TestOpenAIIntegration::test_21_single_text_embedding -v + +# Test 23: Embedding Similarity Analysis (OpenAI only) +pytest tests/integrations/test_openai.py::TestOpenAIIntegration::test_23_embedding_similarity_analysis -v ``` #### Running Test Categories by Pattern @@ -829,11 +848,17 @@ pytest tests/integrations/ -k "tool_call" -v # Run all image-related tests pytest tests/integrations/ -k "image" -v +# Run all embedding tests (OpenAI only) +pytest tests/integrations/test_openai.py -k "embedding" -v + +# Run all speech and audio tests (OpenAI only) +pytest tests/integrations/test_openai.py -k "speech or transcription" -v + # Run all end-to-end tests pytest tests/integrations/ -k "end2end" -v # Run integration-specific feature tests -pytest tests/integrations/ -k "test_11_integration_specific" -v +pytest tests/integrations/ -k "integration_specific" -v ``` #### Running Tests by Integration diff --git a/tests/transports-integrations/config.yml b/tests/transports-integrations/config.yml index 301f0d97f0..cd4b869f90 100644 --- a/tests/transports-integrations/config.yml +++ b/tests/transports-integrations/config.yml @@ -34,6 +34,7 @@ models: tools: "gpt-3.5-turbo" speech: "tts-1" transcription: "whisper-1" + embeddings: "text-embedding-3-small" alternatives: - "gpt-4" - "gpt-4-turbo-preview" @@ -43,6 +44,9 @@ models: - "tts-1-hd" transcription_alternatives: - "whisper-1" + embeddings_alternatives: + - "text-embedding-3-large" + - "text-embedding-ada-002" anthropic: chat: "claude-3-haiku-20240307" @@ -144,9 +148,47 @@ model_capabilities: streaming: false speech: false transcription: true + embeddings: false max_tokens: null context_window: null + # OpenAI Embedding Models + "text-embedding-3-small": + chat: false + tools: false + vision: false + streaming: false + speech: false + transcription: false + embeddings: true + max_tokens: null + context_window: 8191 + dimensions: 1536 + + "text-embedding-3-large": + chat: false + tools: false + vision: false + streaming: false + speech: false + transcription: false + embeddings: true + max_tokens: null + context_window: 8191 + dimensions: 3072 + + "text-embedding-ada-002": + chat: false + tools: false + vision: false + streaming: false + speech: false + transcription: false + embeddings: true + max_tokens: null + context_window: 8191 + dimensions: 1536 + # Anthropic Models "claude-3-haiku-20240307": chat: true @@ -207,6 +249,7 @@ test_settings: complex: 300 speech: null # Speech doesn't use token limits transcription: null # Transcription doesn't use token limits + embeddings: null # Embeddings don't use token limits (text is the input) # Timeout settings for tests timeouts: diff --git a/tests/transports-integrations/tests/integrations/test_openai.py b/tests/transports-integrations/tests/integrations/test_openai.py index 0899cf524f..4a9a61ea0b 100644 --- a/tests/transports-integrations/tests/integrations/test_openai.py +++ b/tests/transports-integrations/tests/integrations/test_openai.py @@ -5,9 +5,12 @@ - Chat: gpt-3.5-turbo - Vision: gpt-4o - Tools: gpt-3.5-turbo +- Speech: tts-1 +- Transcription: whisper-1 +- Embeddings: text-embedding-3-small - Alternatives: gpt-4, gpt-4-turbo-preview, gpt-4o, gpt-4o-mini -Tests all 11 core scenarios using OpenAI SDK directly: +Tests all core scenarios using OpenAI SDK directly: 1. Simple chat 2. Multi turn conversation 3. Tool calls @@ -19,6 +22,25 @@ 9. Multiple images 10. Complete end2end test with conversation history, tool calls, tool results and images 11. Integration specific tests +12. Error handling +13. Streaming chat +14. Speech synthesis +15. Audio transcription +16. Transcription streaming +17. Speech-transcription round trip +18. Speech error handling +19. Transcription error handling +20. Different voices and audio formats +21. Single text embedding +22. Batch text embeddings +23. Embedding similarity analysis +24. Embedding dissimilarity analysis +25. Different embedding models +26. Long text embedding +27. Embedding error handling +28. Embedding dimensionality reduction +29. Embedding encoding formats +30. Embedding usage tracking """ import pytest @@ -67,6 +89,18 @@ assert_valid_streaming_transcription_response, collect_streaming_speech_content, collect_streaming_transcription_content, + # Embeddings utilities + EMBEDDINGS_SINGLE_TEXT, + EMBEDDINGS_MULTIPLE_TEXTS, + EMBEDDINGS_SIMILAR_TEXTS, + EMBEDDINGS_DIFFERENT_TEXTS, + EMBEDDINGS_EMPTY_TEXTS, + EMBEDDINGS_LONG_TEXT, + assert_valid_embedding_response, + assert_valid_embeddings_batch_response, + calculate_cosine_similarity, + assert_embeddings_similarity, + assert_embeddings_dissimilarity, ) from ..utils.config_loader import get_model @@ -720,3 +754,303 @@ def test_20_speech_different_voices_and_formats(self, openai_client, test_config # At least MP3 should be supported assert "mp3" in format_results, "MP3 format should be supported" + + @skip_if_no_api_key("openai") + def test_21_single_text_embedding(self, openai_client, test_config): + """Test Case 21: Single text embedding generation""" + response = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_SINGLE_TEXT + ) + + assert_valid_embedding_response(response, expected_dimensions=1536) + + # Verify response structure + assert len(response.data) == 1, "Should have exactly one embedding" + assert response.data[0].index == 0, "First embedding should have index 0" + assert ( + response.data[0].object == "embedding" + ), "Object type should be 'embedding'" + + # Verify model in response + assert response.model is not None, "Response should include model name" + assert "text-embedding" in response.model, "Model should be an embedding model" + + @skip_if_no_api_key("openai") + def test_22_batch_text_embeddings(self, openai_client, test_config): + """Test Case 22: Batch text embedding generation""" + response = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_MULTIPLE_TEXTS + ) + + expected_count = len(EMBEDDINGS_MULTIPLE_TEXTS) + assert_valid_embeddings_batch_response( + response, expected_count, expected_dimensions=1536 + ) + + # Verify each embedding has correct index + for i, embedding_obj in enumerate(response.data): + assert embedding_obj.index == i, f"Embedding {i} should have index {i}" + assert ( + embedding_obj.object == "embedding" + ), f"Embedding {i} should have object type 'embedding'" + + @skip_if_no_api_key("openai") + def test_23_embedding_similarity_analysis(self, openai_client, test_config): + """Test Case 23: Embedding similarity analysis with similar texts""" + response = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_SIMILAR_TEXTS + ) + + assert_valid_embeddings_batch_response( + response, len(EMBEDDINGS_SIMILAR_TEXTS), expected_dimensions=1536 + ) + + embeddings = [item.embedding for item in response.data] + + # Test similarity between the first two embeddings (similar weather texts) + similarity_1_2 = calculate_cosine_similarity(embeddings[0], embeddings[1]) + similarity_1_3 = calculate_cosine_similarity(embeddings[0], embeddings[2]) + similarity_2_3 = calculate_cosine_similarity(embeddings[1], embeddings[2]) + + # Similar texts should have high similarity (> 0.7) + assert ( + similarity_1_2 > 0.7 + ), f"Similar texts should have high similarity, got {similarity_1_2:.4f}" + assert ( + similarity_1_3 > 0.7 + ), f"Similar texts should have high similarity, got {similarity_1_3:.4f}" + assert ( + similarity_2_3 > 0.7 + ), f"Similar texts should have high similarity, got {similarity_2_3:.4f}" + + @skip_if_no_api_key("openai") + def test_24_embedding_dissimilarity_analysis(self, openai_client, test_config): + """Test Case 24: Embedding dissimilarity analysis with different texts""" + response = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_DIFFERENT_TEXTS + ) + + assert_valid_embeddings_batch_response( + response, len(EMBEDDINGS_DIFFERENT_TEXTS), expected_dimensions=1536 + ) + + embeddings = [item.embedding for item in response.data] + + # Test dissimilarity between different topic embeddings + # Weather vs Programming + weather_prog_similarity = calculate_cosine_similarity( + embeddings[0], embeddings[1] + ) + # Weather vs Stock Market + weather_stock_similarity = calculate_cosine_similarity( + embeddings[0], embeddings[2] + ) + # Programming vs Machine Learning (should be more similar) + prog_ml_similarity = calculate_cosine_similarity(embeddings[1], embeddings[3]) + + # Different topics should have lower similarity + assert ( + weather_prog_similarity < 0.8 + ), f"Different topics should have lower similarity, got {weather_prog_similarity:.4f}" + assert ( + weather_stock_similarity < 0.8 + ), f"Different topics should have lower similarity, got {weather_stock_similarity:.4f}" + + # Programming and ML should be more similar than completely different topics + assert ( + prog_ml_similarity > weather_prog_similarity + ), "Related tech topics should be more similar than unrelated topics" + + @skip_if_no_api_key("openai") + def test_25_embedding_different_models(self, openai_client, test_config): + """Test Case 25: Test different embedding models""" + test_text = EMBEDDINGS_SINGLE_TEXT + + # Test with text-embedding-3-small (default) + response_small = openai_client.embeddings.create( + model="text-embedding-3-small", input=test_text + ) + assert_valid_embedding_response(response_small, expected_dimensions=1536) + + # Test with text-embedding-3-large if available + try: + response_large = openai_client.embeddings.create( + model="text-embedding-3-large", input=test_text + ) + assert_valid_embedding_response(response_large, expected_dimensions=3072) + + # Verify different models produce different embeddings + embedding_small = response_small.data[0].embedding + embedding_large = response_large.data[0].embedding + + # They should have different dimensions + assert len(embedding_small) != len( + embedding_large + ), "Different models should produce different dimension embeddings" + + except Exception as e: + # If text-embedding-3-large is not available, just log it + print(f"text-embedding-3-large not available: {e}") + + @skip_if_no_api_key("openai") + def test_26_embedding_long_text(self, openai_client, test_config): + """Test Case 26: Embedding generation with longer text""" + response = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_LONG_TEXT + ) + + assert_valid_embedding_response(response, expected_dimensions=1536) + + # Verify token usage is reported for longer text + assert response.usage is not None, "Usage should be reported for longer text" + assert ( + response.usage.total_tokens > 20 + ), "Longer text should consume more tokens" + + @skip_if_no_api_key("openai") + def test_27_embedding_error_handling(self, openai_client, test_config): + """Test Case 27: Embedding error handling""" + + # Test with invalid model + with pytest.raises(Exception) as exc_info: + openai_client.embeddings.create( + model="invalid-embedding-model", input=EMBEDDINGS_SINGLE_TEXT + ) + + error = exc_info.value + assert_valid_error_response(error, "invalid-embedding-model") + + # Test with empty text (depending on implementation, might be handled) + try: + response = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input="" + ) + # If it doesn't throw an error, check that response is still valid + if response: + assert_valid_embedding_response(response) + + except Exception as e: + # Empty input might be rejected, which is acceptable + assert ( + "empty" in str(e).lower() or "invalid" in str(e).lower() + ), "Error should mention empty or invalid input" + + @skip_if_no_api_key("openai") + def test_28_embedding_dimensionality_reduction(self, openai_client, test_config): + """Test Case 28: Embedding with custom dimensions (if supported)""" + try: + # Test custom dimensions with text-embedding-3-small + custom_dimensions = 512 + response = openai_client.embeddings.create( + model="text-embedding-3-small", + input=EMBEDDINGS_SINGLE_TEXT, + dimensions=custom_dimensions, + ) + + assert_valid_embedding_response( + response, expected_dimensions=custom_dimensions + ) + + # Compare with default dimensions + response_default = openai_client.embeddings.create( + model="text-embedding-3-small", input=EMBEDDINGS_SINGLE_TEXT + ) + + embedding_custom = response.data[0].embedding + embedding_default = response_default.data[0].embedding + + assert ( + len(embedding_custom) == custom_dimensions + ), f"Custom dimensions should be {custom_dimensions}" + assert len(embedding_default) == 1536, "Default dimensions should be 1536" + assert len(embedding_custom) != len( + embedding_default + ), "Custom and default dimensions should be different" + + except Exception as e: + # Custom dimensions might not be supported by all models + print(f"Custom dimensions not supported: {e}") + + @skip_if_no_api_key("openai") + def test_29_embedding_encoding_format(self, openai_client, test_config): + """Test Case 29: Different encoding formats (if supported)""" + try: + # Test with float encoding (default) + response_float = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), + input=EMBEDDINGS_SINGLE_TEXT, + encoding_format="float", + ) + + assert_valid_embedding_response(response_float, expected_dimensions=1536) + embedding_float = response_float.data[0].embedding + assert all( + isinstance(x, float) for x in embedding_float + ), "Float encoding should return float values" + + # Test with base64 encoding if supported + try: + response_base64 = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), + input=EMBEDDINGS_SINGLE_TEXT, + encoding_format="base64", + ) + + # Base64 encoding returns string data + assert ( + response_base64.data[0].embedding is not None + ), "Base64 encoding should return data" + + except Exception as base64_error: + print(f"Base64 encoding not supported: {base64_error}") + + except Exception as e: + # Encoding format parameter might not be supported + print(f"Encoding format parameter not supported: {e}") + + @skip_if_no_api_key("openai") + def test_30_embedding_usage_tracking(self, openai_client, test_config): + """Test Case 30: Embedding usage tracking and token counting""" + # Single text embedding + response_single = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_SINGLE_TEXT + ) + + assert_valid_embedding_response(response_single) + assert ( + response_single.usage is not None + ), "Single embedding should have usage data" + assert ( + response_single.usage.total_tokens > 0 + ), "Single embedding should consume tokens" + single_tokens = response_single.usage.total_tokens + + # Batch embedding + response_batch = openai_client.embeddings.create( + model=get_model("openai", "embeddings"), input=EMBEDDINGS_MULTIPLE_TEXTS + ) + + assert_valid_embeddings_batch_response( + response_batch, len(EMBEDDINGS_MULTIPLE_TEXTS) + ) + assert ( + response_batch.usage is not None + ), "Batch embedding should have usage data" + assert ( + response_batch.usage.total_tokens > 0 + ), "Batch embedding should consume tokens" + batch_tokens = response_batch.usage.total_tokens + + # Batch should consume more tokens than single + assert ( + batch_tokens > single_tokens + ), f"Batch embedding ({batch_tokens} tokens) should consume more than single ({single_tokens} tokens)" + + # Verify proportional token usage + texts_ratio = len(EMBEDDINGS_MULTIPLE_TEXTS) + token_ratio = batch_tokens / single_tokens + + # Token ratio should be roughly proportional to text count (allowing for some variance) + assert ( + 0.5 * texts_ratio <= token_ratio <= 2.0 * texts_ratio + ), f"Token usage ratio ({token_ratio:.2f}) should be roughly proportional to text count ({texts_ratio})" diff --git a/tests/transports-integrations/tests/utils/common.py b/tests/transports-integrations/tests/utils/common.py index ab53c701e9..a909f0d86a 100644 --- a/tests/transports-integrations/tests/utils/common.py +++ b/tests/transports-integrations/tests/utils/common.py @@ -80,6 +80,41 @@ class Config: ALL_TOOLS = [WEATHER_TOOL, CALCULATOR_TOOL, SEARCH_TOOL] +# Embeddings Test Data +EMBEDDINGS_SINGLE_TEXT = "The quick brown fox jumps over the lazy dog." + +EMBEDDINGS_MULTIPLE_TEXTS = [ + "Artificial intelligence is transforming our world.", + "Machine learning algorithms learn from data to make predictions.", + "Natural language processing helps computers understand human language.", + "Computer vision enables machines to interpret and analyze visual information.", + "Robotics combines AI with mechanical engineering to create autonomous systems.", +] + +EMBEDDINGS_SIMILAR_TEXTS = [ + "The weather is sunny and warm today.", + "Today has bright sunshine and pleasant temperatures.", + "It's a beautiful day with clear skies and warmth.", +] + +EMBEDDINGS_DIFFERENT_TEXTS = [ + "The weather is sunny and warm today.", + "Python is a popular programming language.", + "The stock market closed higher yesterday.", + "Machine learning requires large datasets.", +] + +EMBEDDINGS_EMPTY_TEXTS = ["", " ", "\n\t", ""] + +EMBEDDINGS_LONG_TEXT = """ +This is a longer text sample designed to test how embedding models handle +larger inputs. It contains multiple sentences with various topics including +technology, science, literature, and general knowledge. The purpose is to +ensure that the embedding generation works correctly with substantial text +inputs that might be closer to real-world usage scenarios where users +embed entire paragraphs or documents rather than just short phrases. +""".strip() + # Tool Call Test Messages SINGLE_TOOL_CALL_MESSAGES = [ {"role": "user", "content": "What's the weather like in San Francisco?"} @@ -993,6 +1028,168 @@ def assert_valid_transcription_response(response: Any, min_text_length: int = 1) ), f"Transcribed text should be at least {min_text_length} characters, got: '{text_content}'" +def assert_valid_embedding_response( + response: Any, expected_dimensions: Optional[int] = None +) -> None: + """Assert that an embedding response is valid""" + assert response is not None, "Embedding response should not be None" + + # Check if it's an OpenAI-style response object + if hasattr(response, "data"): + assert ( + len(response.data) > 0 + ), "Embedding response should contain at least one embedding" + + embedding = response.data[0].embedding + assert isinstance( + embedding, list + ), f"Embedding should be a list, got {type(embedding)}" + assert len(embedding) > 0, "Embedding should not be empty" + assert all( + isinstance(x, (int, float)) for x in embedding + ), "All embedding values should be numeric" + + if expected_dimensions: + assert ( + len(embedding) == expected_dimensions + ), f"Expected {expected_dimensions} dimensions, got {len(embedding)}" + + # Check if usage information is present + if hasattr(response, "usage") and response.usage: + assert hasattr( + response.usage, "total_tokens" + ), "Usage should include total_tokens" + assert ( + response.usage.total_tokens > 0 + ), "Token usage should be greater than 0" + + # Check if it's a direct list (embedding vector) + elif isinstance(response, list): + assert len(response) > 0, "Embedding should not be empty" + assert all( + isinstance(x, (int, float)) for x in response + ), "All embedding values should be numeric" + + if expected_dimensions: + assert ( + len(response) == expected_dimensions + ), f"Expected {expected_dimensions} dimensions, got {len(response)}" + + else: + raise AssertionError(f"Invalid embedding response format: {type(response)}") + + +def assert_valid_embeddings_batch_response( + response: Any, expected_count: int, expected_dimensions: Optional[int] = None +) -> None: + """Assert that a batch embeddings response is valid""" + assert response is not None, "Embeddings batch response should not be None" + + # Check if it's an OpenAI-style response object + if hasattr(response, "data"): + assert ( + len(response.data) == expected_count + ), f"Expected {expected_count} embeddings, got {len(response.data)}" + + for i, embedding_obj in enumerate(response.data): + assert hasattr( + embedding_obj, "embedding" + ), f"Embedding object {i} should have 'embedding' attribute" + embedding = embedding_obj.embedding + + assert isinstance( + embedding, list + ), f"Embedding {i} should be a list, got {type(embedding)}" + assert len(embedding) > 0, f"Embedding {i} should not be empty" + assert all( + isinstance(x, (int, float)) for x in embedding + ), f"All values in embedding {i} should be numeric" + + if expected_dimensions: + assert ( + len(embedding) == expected_dimensions + ), f"Embedding {i}: expected {expected_dimensions} dimensions, got {len(embedding)}" + + # Check usage information + if hasattr(response, "usage") and response.usage: + assert hasattr( + response.usage, "total_tokens" + ), "Usage should include total_tokens" + assert ( + response.usage.total_tokens > 0 + ), "Token usage should be greater than 0" + + # Check if it's a direct list of embeddings + elif isinstance(response, list): + assert ( + len(response) == expected_count + ), f"Expected {expected_count} embeddings, got {len(response)}" + + for i, embedding in enumerate(response): + assert isinstance( + embedding, list + ), f"Embedding {i} should be a list, got {type(embedding)}" + assert len(embedding) > 0, f"Embedding {i} should not be empty" + assert all( + isinstance(x, (int, float)) for x in embedding + ), f"All values in embedding {i} should be numeric" + + if expected_dimensions: + assert ( + len(embedding) == expected_dimensions + ), f"Embedding {i}: expected {expected_dimensions} dimensions, got {len(embedding)}" + + else: + raise AssertionError( + f"Invalid embeddings batch response format: {type(response)}" + ) + + +def calculate_cosine_similarity( + embedding1: List[float], embedding2: List[float] +) -> float: + """Calculate cosine similarity between two embedding vectors""" + import math + + assert len(embedding1) == len(embedding2), "Embeddings must have the same dimension" + + # Calculate dot product + dot_product = sum(a * b for a, b in zip(embedding1, embedding2)) + + # Calculate magnitudes + magnitude1 = math.sqrt(sum(a * a for a in embedding1)) + magnitude2 = math.sqrt(sum(b * b for b in embedding2)) + + # Avoid division by zero + if magnitude1 == 0 or magnitude2 == 0: + return 0.0 + + return dot_product / (magnitude1 * magnitude2) + + +def assert_embeddings_similarity( + embedding1: List[float], + embedding2: List[float], + min_similarity: float = 0.8, + max_similarity: float = 1.0, +) -> None: + """Assert that two embeddings have expected similarity""" + similarity = calculate_cosine_similarity(embedding1, embedding2) + assert ( + min_similarity <= similarity <= max_similarity + ), f"Embedding similarity {similarity:.4f} should be between {min_similarity} and {max_similarity}" + + +def assert_embeddings_dissimilarity( + embedding1: List[float], embedding2: List[float], max_similarity: float = 0.5 +) -> None: + """Assert that two embeddings are sufficiently different""" + similarity = calculate_cosine_similarity(embedding1, embedding2) + assert ( + similarity <= max_similarity + ), f"Embedding similarity {similarity:.4f} should be at most {max_similarity} for dissimilar texts" + + def assert_valid_streaming_speech_response(chunk: Any, integration: str): """Assert that a streaming speech response chunk is valid""" assert chunk is not None, "Streaming speech chunk should not be None" diff --git a/transports/bifrost-http/integrations/openai/router.go b/transports/bifrost-http/integrations/openai/router.go index 226e8abbb9..1d228be6cc 100644 --- a/transports/bifrost-http/integrations/openai/router.go +++ b/transports/bifrost-http/integrations/openai/router.go @@ -55,6 +55,32 @@ func NewOpenAIRouter(client *bifrost.Bifrost) *OpenAIRouter { }) } + // Embeddings endpoint + for _, path := range []string{ + "/openai/v1/embeddings", + "/openai/embeddings", + } { + routes = append(routes, integrations.RouteConfig{ + Path: path, + Method: "POST", + GetRequestTypeInstance: func() interface{} { + return &OpenAIEmbeddingRequest{} + }, + RequestConverter: func(req interface{}) (*schemas.BifrostRequest, error) { + if embeddingReq, ok := req.(*OpenAIEmbeddingRequest); ok { + return embeddingReq.ConvertToBifrostRequest(), nil + } + return nil, errors.New("invalid embedding request type") + }, + ResponseConverter: func(resp *schemas.BifrostResponse) (interface{}, error) { + return DeriveOpenAIEmbeddingFromBifrostResponse(resp), nil + }, + ErrorConverter: func(err *schemas.BifrostError) interface{} { + return DeriveOpenAIErrorFromBifrostError(err) + }, + }) + } + // Speech synthesis endpoint for _, path := range []string{ "/openai/v1/audio/speech", diff --git a/transports/bifrost-http/integrations/openai/types.go b/transports/bifrost-http/integrations/openai/types.go index b11ae1594f..7133920d23 100644 --- a/transports/bifrost-http/integrations/openai/types.go +++ b/transports/bifrost-http/integrations/openai/types.go @@ -52,6 +52,15 @@ type OpenAITranscriptionRequest struct { Stream *bool `json:"stream,omitempty"` } +// OpenAIEmbeddingRequest represents an OpenAI embedding request +type OpenAIEmbeddingRequest struct { + Model string `json:"model"` + Input interface{} `json:"input"` // Can be string or []string + EncodingFormat *string `json:"encoding_format,omitempty"` + Dimensions *int `json:"dimensions,omitempty"` + User *string `json:"user,omitempty"` +} + // IsStreamingRequested implements the StreamingRequest interface func (r *OpenAIChatRequest) IsStreamingRequested() bool { return r.Stream != nil && *r.Stream @@ -67,6 +76,12 @@ func (r *OpenAITranscriptionRequest) IsStreamingRequested() bool { return r.Stream != nil && *r.Stream } +// IsStreamingRequested implements the StreamingRequest interface for embeddings +// Note: Embeddings don't support streaming in OpenAI API +func (r *OpenAIEmbeddingRequest) IsStreamingRequested() bool { + return false +} + // OpenAIChatResponse represents an OpenAI chat completion response type OpenAIChatResponse struct { ID string `json:"id"` @@ -79,6 +94,23 @@ type OpenAIChatResponse struct { SystemFingerprint *string `json:"system_fingerprint,omitempty"` } +// OpenAIEmbeddingResponse represents an OpenAI embedding response +type OpenAIEmbeddingResponse struct { + Object string `json:"object"` + Data []OpenAIEmbedding `json:"data"` + Model string `json:"model"` + Usage *schemas.LLMUsage `json:"usage,omitempty"` + ServiceTier *string `json:"service_tier,omitempty"` + SystemFingerprint *string `json:"system_fingerprint,omitempty"` +} + +// OpenAIEmbedding represents a single embedding in the response +type OpenAIEmbedding struct { + Object string `json:"object"` + Embedding []float32 `json:"embedding"` + Index int `json:"index"` +} + // OpenAIChatError represents an OpenAI chat completion error response type OpenAIChatError struct { EventID string `json:"event_id"` // Unique identifier for the error event @@ -215,6 +247,46 @@ func (r *OpenAITranscriptionRequest) ConvertToBifrostRequest() *schemas.BifrostR return bifrostReq } +// ConvertToBifrostRequest converts an OpenAI embedding request to Bifrost format +func (r *OpenAIEmbeddingRequest) ConvertToBifrostRequest() *schemas.BifrostRequest { + provider, model := integrations.ParseModelString(r.Model, schemas.OpenAI) + + // Prepare input texts array + var texts []string + switch input := r.Input.(type) { + case string: + texts = []string{input} + case []string: + texts = input + case []interface{}: + // Handle JSON unmarshaling which converts arrays to []interface{} + texts = make([]string, len(input)) + for i, v := range input { + if str, ok := v.(string); ok { + texts[i] = str + } + } + } + + // Create embedding input + embeddingInput := &schemas.EmbeddingInput{ + Texts: texts, + } + + bifrostReq := &schemas.BifrostRequest{ + Provider: provider, + Model: model, + Input: schemas.RequestInput{ + EmbeddingInput: embeddingInput, + }, + } + + // Map parameters + bifrostReq.Params = r.convertEmbeddingParameters() + + return bifrostReq +} + // convertParameters converts OpenAI request parameters to Bifrost ModelParameters // using direct field access for better performance and type safety. func (r *OpenAIChatRequest) convertParameters() *schemas.ModelParameters { @@ -303,6 +375,26 @@ func (r *OpenAITranscriptionRequest) convertTranscriptionParameters() *schemas.M return params } +// convertEmbeddingParameters converts OpenAI embedding request parameters to Bifrost ModelParameters +func (r *OpenAIEmbeddingRequest) convertEmbeddingParameters() *schemas.ModelParameters { + params := &schemas.ModelParameters{ + ExtraParams: make(map[string]interface{}), + } + + // Add embedding-specific parameters + if r.EncodingFormat != nil { + params.EncodingFormat = r.EncodingFormat + } + if r.Dimensions != nil { + params.Dimensions = r.Dimensions + } + if r.User != nil { + params.User = r.User + } + + return params +} + // DeriveOpenAIFromBifrostResponse converts a Bifrost response to OpenAI format func DeriveOpenAIFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *OpenAIChatResponse { if bifrostResp == nil { @@ -340,6 +432,31 @@ func DeriveOpenAITranscriptionFromBifrostResponse(bifrostResp *schemas.BifrostRe return bifrostResp.Transcribe } +// DeriveOpenAIEmbeddingFromBifrostResponse converts a Bifrost embedding response to OpenAI format +func DeriveOpenAIEmbeddingFromBifrostResponse(bifrostResp *schemas.BifrostResponse) *OpenAIEmbeddingResponse { + if bifrostResp == nil || bifrostResp.Embedding == nil { + return nil + } + + var embeddingData []OpenAIEmbedding + for i, embedding := range bifrostResp.Embedding { + embeddingData = append(embeddingData, OpenAIEmbedding{ + Object: "embedding", + Embedding: embedding, + Index: i, + }) + } + + return &OpenAIEmbeddingResponse{ + Object: "list", + Data: embeddingData, + Model: bifrostResp.Model, + Usage: bifrostResp.Usage, + ServiceTier: bifrostResp.ServiceTier, + SystemFingerprint: bifrostResp.SystemFingerprint, + } +} + // DeriveOpenAIErrorFromBifrostError derives a OpenAIChatError from a BifrostError func DeriveOpenAIErrorFromBifrostError(bifrostErr *schemas.BifrostError) *OpenAIChatError { if bifrostErr == nil { diff --git a/transports/bifrost-http/integrations/utils.go b/transports/bifrost-http/integrations/utils.go index d81ba8260e..bad4cad511 100644 --- a/transports/bifrost-http/integrations/utils.go +++ b/transports/bifrost-http/integrations/utils.go @@ -300,6 +300,8 @@ func (g *GenericRouter) handleNonStreamingRequest(ctx *fasthttp.RequestCtx, conf result, bifrostErr = g.client.TextCompletionRequest(*bifrostCtx, bifrostReq) } else if bifrostReq.Input.ChatCompletionInput != nil { result, bifrostErr = g.client.ChatCompletionRequest(*bifrostCtx, bifrostReq) + } else if bifrostReq.Input.EmbeddingInput != nil { + result, bifrostErr = g.client.EmbeddingRequest(*bifrostCtx, bifrostReq) } else if bifrostReq.Input.SpeechInput != nil { result, bifrostErr = g.client.SpeechRequest(*bifrostCtx, bifrostReq) } else if bifrostReq.Input.TranscriptionInput != nil { diff --git a/transports/bifrost-http/lib/store.go b/transports/bifrost-http/lib/store.go index 29aba1fcf1..5369daca1e 100644 --- a/transports/bifrost-http/lib/store.go +++ b/transports/bifrost-http/lib/store.go @@ -1977,7 +1977,7 @@ func (s *ConfigStore) LoadConfiguration() error { return s.loadWithFileCheck(s.configPath) } else { // No file - load from DB only - s.logger.Info("No config.json file found, loading from database only") + s.logger.Info("No config.json file found, loading from database") return s.loadFromDatabaseInternal() } }