diff --git a/.github/workflows/scripts/release-core.sh b/.github/workflows/scripts/release-core.sh
index 158ab9b9c5..7b29f2237b 100755
--- a/.github/workflows/scripts/release-core.sh
+++ b/.github/workflows/scripts/release-core.sh
@@ -35,15 +35,11 @@ go build ./...
 cd ..
 echo "✅ Core build validation successful"
 
-# Run core provider tests
+# Run core tests
 echo "🔧 Running core tests..."
 cd core
-# go test -v ./...
+go test -v ./...
 cd ..
-echo "🔧 Running core provider tests..."
-cd tests/core-providers
-go test -v -run .
-cd ../..
 
 # Capturing changelog
 CHANGELOG_BODY=$(cat core/changelog.md)
diff --git a/Makefile b/Makefile
index 45930908fa..5d66379bd1 100644
--- a/Makefile
+++ b/Makefile
@@ -315,10 +315,10 @@ test-core: install-gotestsum ## Run core tests (Usage: make test-core PROVIDER=o
 	REPORT_FILE=""; \
 	if [ -n "$(PROVIDER)" ]; then \
 		echo "$(CYAN)Running tests for provider: $(PROVIDER)$(NC)"; \
-		if [ ! -f "tests/core-providers/$(PROVIDER)_test.go" ]; then \
-			echo "$(RED)Error: Provider test file '$(PROVIDER)_test.go' not found$(NC)"; \
+		if [ ! -f "core/providers/$(PROVIDER)/$(PROVIDER)_test.go" ]; then \
+			echo "$(RED)Error: Provider test file '$(PROVIDER)_test.go' not found in core/providers/$(PROVIDER)/$(NC)"; \
 			echo "$(YELLOW)Available providers:$(NC)"; \
-			ls tests/core-providers/*_test.go 2>/dev/null | grep -v cross_provider | xargs -n 1 basename | sed 's/_test\.go//' | sed 's/^/  - /'; \
+			find core/providers -name "*_test.go" -type f 2>/dev/null | sed 's|core/providers/\([^/]*\)/.*|\1|' | sort -u | sed 's/^/  - /'; \
 			exit 1; \
 		fi; \
 	fi; \
@@ -335,11 +335,11 @@ test-core: install-gotestsum ## Run core tests (Usage: make test-core PROVIDER=o
 			CLEAN_TESTCASE=$$(echo "$$CLEAN_TESTCASE" | sed 's|^Test[A-Z][A-Za-z]*/[A-Z][A-Za-z]*Tests/||'); \
 			echo "$(CYAN)Running Test$${PROVIDER_TEST_NAME}/$${PROVIDER_TEST_NAME}Tests/$$CLEAN_TESTCASE...$(NC)"; \
 			REPORT_FILE="$(TEST_REPORTS_DIR)/core-$(PROVIDER)-$$(echo $$CLEAN_TESTCASE | sed 's|/|_|g').xml"; \
-			cd tests/core-providers && GOWORK=off gotestsum \
+			cd core/providers/$(PROVIDER) && GOWORK=off gotestsum \
 				--format=$(GOTESTSUM_FORMAT) \
-				--junitfile=../../$$REPORT_FILE \
+				--junitfile=../../../$$REPORT_FILE \
 				-- -v -run "^Test$${PROVIDER_TEST_NAME}$$/.*Tests/$$CLEAN_TESTCASE$$" || TEST_FAILED=1; \
-			cd ../..; \
+			cd ../../..; \
 			$(MAKE) cleanup-junit-xml REPORT_FILE=$$REPORT_FILE; \
 			if [ -z "$$CI" ] && [ -z "$$GITHUB_ACTIONS" ] && [ -z "$$GITLAB_CI" ] && [ -z "$$CIRCLECI" ] && [ -z "$$JENKINS_HOME" ]; then \
 				if which junit-viewer > /dev/null 2>&1; then \
@@ -359,11 +359,11 @@ test-core: install-gotestsum ## Run core tests (Usage: make test-core PROVIDER=o
 		else \
 			echo "$(CYAN)Running Test$${PROVIDER_TEST_NAME}...$(NC)"; \
 			REPORT_FILE="$(TEST_REPORTS_DIR)/core-$(PROVIDER).xml"; \
-			cd tests/core-providers && GOWORK=off gotestsum \
+			cd core/providers/$(PROVIDER) && GOWORK=off gotestsum \
 				--format=$(GOTESTSUM_FORMAT) \
-				--junitfile=../../$$REPORT_FILE \
+				--junitfile=../../../$$REPORT_FILE \
 				-- -v -run "^Test$${PROVIDER_TEST_NAME}$$" || TEST_FAILED=1; \
-			cd ../..; \
+			cd ../../..; \
 			$(MAKE) cleanup-junit-xml REPORT_FILE=$$REPORT_FILE; \
 			if [ -z "$$CI" ] && [ -z "$$GITHUB_ACTIONS" ] && [ -z "$$GITLAB_CI" ] && [ -z "$$CIRCLECI" ] && [ -z "$$JENKINS_HOME" ]; then \
 				if which junit-viewer > /dev/null 2>&1; then \
@@ -388,11 +388,11 @@ test-core: install-gotestsum ## Run core tests (Usage: make test-core PROVIDER=o
 			exit 1; \
 		fi; \
 		REPORT_FILE="$(TEST_REPORTS_DIR)/core-all.xml"; \
-		cd tests/core-providers && GOWORK=off gotestsum \
+		cd core && GOWORK=off gotestsum \
 			--format=$(GOTESTSUM_FORMAT) \
-			--junitfile=../../$$REPORT_FILE \
-			-- -v ./... || TEST_FAILED=1; \
-		cd ../..; \
+			--junitfile=../$$REPORT_FILE \
+			-- -v ./providers/... || TEST_FAILED=1; \
+		cd ..; \
 		$(MAKE) cleanup-junit-xml REPORT_FILE=$$REPORT_FILE; \
 		if [ -z "$$CI" ] && [ -z "$$GITHUB_ACTIONS" ] && [ -z "$$GITLAB_CI" ] && [ -z "$$CIRCLECI" ] && [ -z "$$JENKINS_HOME" ]; then \
 			if which junit-viewer > /dev/null 2>&1; then \
@@ -525,6 +525,24 @@ test-all: test-core test-plugins test ## Run all tests
 		echo ""; \
 	fi
 
+test-chatbot: ## Run interactive chatbot integration test (Usage: RUN_CHATBOT_TEST=1 make test-chatbot)
+	@echo "$(GREEN)Running interactive chatbot integration test...$(NC)"
+	@if [ -z "$(RUN_CHATBOT_TEST)" ]; then \
+		echo "$(YELLOW)⚠️  This is an interactive test. Set RUN_CHATBOT_TEST=1 to run it.$(NC)"; \
+		echo "$(CYAN)Usage: RUN_CHATBOT_TEST=1 make test-chatbot$(NC)"; \
+		echo ""; \
+		echo "$(YELLOW)Required environment variables:$(NC)"; \
+		echo "  - OPENAI_API_KEY (required)"; \
+		echo "  - ANTHROPIC_API_KEY (optional)"; \
+		echo "  - Additional provider keys as needed"; \
+		exit 0; \
+	fi
+	@if [ -f .env ]; then \
+		echo "$(YELLOW)Loading environment variables from .env...$(NC)"; \
+		set -a; . ./.env; set +a; \
+	fi
+	@cd core && RUN_CHATBOT_TEST=1 go test -v -run TestChatbot
+
 # Quick start with example config
 quick-start: ## Quick start with example config and maxim plugin
 	@echo "$(GREEN)Quick starting Bifrost with example configuration...$(NC)"
diff --git a/core/bifrost.go b/core/bifrost.go
index 16b71dde22..0bd3718711 100644
--- a/core/bifrost.go
+++ b/core/bifrost.go
@@ -15,16 +15,21 @@ import (
 	"time"
 
 	"github.com/google/uuid"
-	"github.com/maximhq/bifrost/core/providers"
 	"github.com/maximhq/bifrost/core/providers/anthropic"
 	"github.com/maximhq/bifrost/core/providers/azure"
 	"github.com/maximhq/bifrost/core/providers/bedrock"
+	"github.com/maximhq/bifrost/core/providers/cerebras"
 	"github.com/maximhq/bifrost/core/providers/cohere"
 	"github.com/maximhq/bifrost/core/providers/elevenlabs"
 	"github.com/maximhq/bifrost/core/providers/gemini"
+	"github.com/maximhq/bifrost/core/providers/groq"
 	"github.com/maximhq/bifrost/core/providers/mistral"
+	"github.com/maximhq/bifrost/core/providers/ollama"
 	"github.com/maximhq/bifrost/core/providers/openai"
+	"github.com/maximhq/bifrost/core/providers/openrouter"
+	"github.com/maximhq/bifrost/core/providers/parasail"
 	"github.com/maximhq/bifrost/core/providers/perplexity"
+	"github.com/maximhq/bifrost/core/providers/sgl"
 	providerUtils "github.com/maximhq/bifrost/core/providers/utils"
 	"github.com/maximhq/bifrost/core/providers/vertex"
 	schemas "github.com/maximhq/bifrost/core/schemas"
@@ -1301,21 +1306,21 @@ func (bifrost *Bifrost) createBaseProvider(providerKey schemas.ModelProvider, co
 	case schemas.Mistral:
 		return mistral.NewMistralProvider(config, bifrost.logger), nil
 	case schemas.Ollama:
-		return providers.NewOllamaProvider(config, bifrost.logger)
+		return ollama.NewOllamaProvider(config, bifrost.logger)
 	case schemas.Groq:
-		return providers.NewGroqProvider(config, bifrost.logger)
+		return groq.NewGroqProvider(config, bifrost.logger)
 	case schemas.SGL:
-		return providers.NewSGLProvider(config, bifrost.logger)
+		return sgl.NewSGLProvider(config, bifrost.logger)
 	case schemas.Parasail:
-		return providers.NewParasailProvider(config, bifrost.logger)
+		return parasail.NewParasailProvider(config, bifrost.logger)
 	case schemas.Perplexity:
 		return perplexity.NewPerplexityProvider(config, bifrost.logger)
 	case schemas.Cerebras:
-		return providers.NewCerebrasProvider(config, bifrost.logger)
+		return cerebras.NewCerebrasProvider(config, bifrost.logger)
 	case schemas.Gemini:
 		return gemini.NewGeminiProvider(config, bifrost.logger), nil
 	case schemas.OpenRouter:
-		return providers.NewOpenRouterProvider(config, bifrost.logger), nil
+		return openrouter.NewOpenRouterProvider(config, bifrost.logger), nil
 	case schemas.Elevenlabs:
 		return elevenlabs.NewElevenlabsProvider(config, bifrost.logger), nil
 	default:
diff --git a/tests/core-chatbot/main.go b/core/chatbot_test.go
similarity index 98%
rename from tests/core-chatbot/main.go
rename to core/chatbot_test.go
index 1344c23fc5..9f5ad7679b 100644
--- a/tests/core-chatbot/main.go
+++ b/core/chatbot_test.go
@@ -1,4 +1,4 @@
-package main
+package bifrost_test
 
 import (
 	"bufio"
@@ -10,6 +10,7 @@ import (
 	"strings"
 	"sync"
 	"syscall"
+	"testing"
 	"time"
 
 	bifrost "github.com/maximhq/bifrost/core"
@@ -581,7 +582,7 @@ func (s *ChatSession) handleToolCalls(assistantMessage schemas.ChatMessage) (str
 			errorResult := schemas.ChatMessage{
 				Role: schemas.ChatMessageRoleTool,
 				Content: &schemas.ChatMessageContent{
-					ContentStr: bifrost.Ptr(fmt.Sprintf("Error executing tool: %v", err)),					
+					ContentStr: bifrost.Ptr(fmt.Sprintf("Error executing tool: %v", err)),
 				},
 				ChatToolMessage: &schemas.ChatToolMessage{
 					ToolCallID: toolCall.ID,
@@ -630,9 +631,11 @@ func (s *ChatSession) synthesizeToolResults() (string, error) {
 
 	// Create synthesis request
 	synthesisRequest := &schemas.BifrostChatRequest{
-		Input:    conversationWithSynthesis,
-		Params:   &schemas.ChatParameters{
-			Temperature: s.config.Temperature,
+		Provider: s.config.Provider,
+		Model:    s.config.Model,
+		Input: conversationWithSynthesis,
+		Params: &schemas.ChatParameters{
+			Temperature:         s.config.Temperature,
 			MaxCompletionTokens: s.config.MaxTokens,
 		},
 	}
@@ -826,7 +829,7 @@ func stopLoader(stopChan chan bool, wg *sync.WaitGroup) {
 	wg.Wait()
 }
 
-func main() {
+func runChatbot() {
 	// Check for required environment variables
 	if os.Getenv("OPENAI_API_KEY") == "" {
 		fmt.Println("❌ Error: OPENAI_API_KEY environment variable is required")
@@ -934,3 +937,13 @@ func main() {
 	// Cleanup
 	session.Cleanup()
 }
+
+// TestChatbot is the test wrapper for the interactive chatbot
+func TestChatbot(t *testing.T) {
+	// Skip by default as this is an interactive integration test
+	if os.Getenv("RUN_CHATBOT_TEST") == "" {
+		t.Skip("Skipping interactive chatbot test. Set RUN_CHATBOT_TEST=1 to run")
+	}
+
+	runChatbot()
+}
diff --git a/core/go.mod b/core/go.mod
index 3e5f09328a..cada6c53e3 100644
--- a/core/go.mod
+++ b/core/go.mod
@@ -11,6 +11,7 @@ require (
 	github.com/google/uuid v1.6.0
 	github.com/mark3labs/mcp-go v0.41.1
 	github.com/rs/zerolog v1.34.0
+	github.com/stretchr/testify v1.11.1
 	github.com/valyala/fasthttp v1.67.0
 	golang.org/x/oauth2 v0.32.0
 )
@@ -43,7 +44,6 @@ require (
 	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
 	github.com/rogpeppe/go-internal v1.14.1 // indirect
 	github.com/spf13/cast v1.10.0 // indirect
-	github.com/stretchr/testify v1.11.1 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/valyala/bytebufferpool v1.0.0 // indirect
 	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
diff --git a/tests/core-providers/config/account.go b/core/internal/testutil/account.go
similarity index 99%
rename from tests/core-providers/config/account.go
rename to core/internal/testutil/account.go
index 738d672bcc..c6966162e7 100644
--- a/tests/core-providers/config/account.go
+++ b/core/internal/testutil/account.go
@@ -1,7 +1,7 @@
-// Package config provides comprehensive test account and configuration management for the Bifrost system.
+// Package testutil provides comprehensive test account and configuration management for the Bifrost system.
 // It implements account functionality for testing purposes, supporting multiple AI providers
 // and comprehensive test scenarios.
-package config
+package testutil
 
 import (
 	"context"
diff --git a/tests/core-providers/scenarios/automatic_function_calling.go b/core/internal/testutil/automatic_function_calling.go
similarity index 94%
rename from tests/core-providers/scenarios/automatic_function_calling.go
rename to core/internal/testutil/automatic_function_calling.go
index fcac87c4e0..93b9711a5e 100644
--- a/tests/core-providers/scenarios/automatic_function_calling.go
+++ b/core/internal/testutil/automatic_function_calling.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,14 +6,12 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
-
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunAutomaticFunctionCallingTest executes the automatic function calling test scenario using dual API testing framework
-func RunAutomaticFunctionCallingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunAutomaticFunctionCallingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.AutomaticFunctionCall {
 		t.Logf("Automatic function calling not supported for provider %s", testConfig.Provider)
 		return
@@ -161,11 +159,12 @@ func RunAutomaticFunctionCallingTest(t *testing.T, client *bifrost.Bifrost, ctx
 }
 
 func validateAutomaticToolCall(t *testing.T, toolCalls []ToolCallInfo, apiName string) {
-	foundValidToolCall := false
-
+	// Validation for tool call already happened inside WithDualAPITestRetry
+	// If we reach here, the tool call was successful
+	// This function just provides additional logging for tool call details
+	
 	for _, toolCall := range toolCalls {
 		if toolCall.Name == string(SampleToolTypeTime) {
-			foundValidToolCall = true
 			t.Logf("✅ %s automatic function call: %s", apiName, toolCall.Arguments)
 
 			// Additional validation for timezone argument
@@ -178,8 +177,4 @@ func validateAutomaticToolCall(t *testing.T, toolCalls []ToolCallInfo, apiName s
 			break
 		}
 	}
-
-	if !foundValidToolCall {
-		t.Fatalf("Expected %s API to have automatic tool call for 'time'", apiName)
-	}
 }
diff --git a/tests/core-providers/scenarios/chat_completion_stream.go b/core/internal/testutil/chat_completion_stream.go
similarity index 98%
rename from tests/core-providers/scenarios/chat_completion_stream.go
rename to core/internal/testutil/chat_completion_stream.go
index b6a909ca56..d95f890ad4 100644
--- a/tests/core-providers/scenarios/chat_completion_stream.go
+++ b/core/internal/testutil/chat_completion_stream.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -8,14 +8,13 @@ import (
 	"testing"
 	"time"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunChatCompletionStreamTest executes the chat completion stream test scenario
-func RunChatCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunChatCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.CompletionStream {
 		t.Logf("Chat completion stream not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/complete_end_to_end.go b/core/internal/testutil/complete_end_to_end.go
similarity index 99%
rename from tests/core-providers/scenarios/complete_end_to_end.go
rename to core/internal/testutil/complete_end_to_end.go
index 344e6da6cf..852135f492 100644
--- a/tests/core-providers/scenarios/complete_end_to_end.go
+++ b/core/internal/testutil/complete_end_to_end.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,14 +6,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunCompleteEnd2EndTest executes the complete end-to-end test scenario
-func RunCompleteEnd2EndTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunCompleteEnd2EndTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.CompleteEnd2End {
 		t.Logf("Complete end-to-end not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/cross_provider_scenarios.go b/core/internal/testutil/cross_provider_scenarios.go
similarity index 99%
rename from tests/core-providers/scenarios/cross_provider_scenarios.go
rename to core/internal/testutil/cross_provider_scenarios.go
index 70c5504c15..b8fa6fc789 100644
--- a/tests/core-providers/scenarios/cross_provider_scenarios.go
+++ b/core/internal/testutil/cross_provider_scenarios.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
diff --git a/tests/core-providers/cross_provider_test.go b/core/internal/testutil/cross_provider_test.go
similarity index 69%
rename from tests/core-providers/cross_provider_test.go
rename to core/internal/testutil/cross_provider_test.go
index 7351e52bcc..0244ffc4df 100644
--- a/tests/core-providers/cross_provider_test.go
+++ b/core/internal/testutil/cross_provider_test.go
@@ -1,11 +1,8 @@
-package tests
+package testutil
 
 import (
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
-	"github.com/maximhq/bifrost/tests/core-providers/scenarios"
-
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
@@ -14,7 +11,7 @@ func TestCrossProviderScenarios(t *testing.T) {
 	t.Skip("Skipping cross provider scenarios test")
 	return
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
@@ -22,7 +19,7 @@ func TestCrossProviderScenarios(t *testing.T) {
 	defer client.Shutdown()
 
 	// Define available providers for cross-provider testing
-	providers := []scenarios.ProviderConfig{
+	providers := []ProviderConfig{
 		{
 			Provider:        schemas.OpenAI,
 			ChatModel:       "gpt-4o-mini",
@@ -80,35 +77,35 @@ func TestCrossProviderScenarios(t *testing.T) {
 	}
 
 	// Test configuration
-	testConfig := scenarios.CrossProviderTestConfig{
+	testConfig := CrossProviderTestConfig{
 		Providers: providers,
-		ConversationSettings: scenarios.ConversationSettings{
+		ConversationSettings: ConversationSettings{
 			MaxMessages:                25,
 			ConversationGeneratorModel: "gpt-4o",
-			RequiredMessageTypes: []scenarios.MessageModality{
-				scenarios.ModalityText,
-				scenarios.ModalityTool,
-				scenarios.ModalityVision,
+			RequiredMessageTypes: []MessageModality{
+				ModalityText,
+				ModalityTool,
+				ModalityVision,
 			},
 		},
-		TestSettings: scenarios.TestSettings{
+		TestSettings: TestSettings{
 			EnableRetries:        true,
 			MaxRetriesPerMessage: 2,
-			ValidationStrength:   scenarios.ValidationModerate,
+			ValidationStrength:   ValidationModerate,
 		},
 	}
 
 	// Get predefined scenarios
-	scenariosList := scenarios.GetPredefinedScenarios()
+	scenariosList := GetPredefinedScenarios()
 
 	for _, scenario := range scenariosList {
 		// Test each scenario with both Chat Completions and Responses API
 		t.Run(scenario.Name+"_ChatCompletions", func(t *testing.T) {
-			scenarios.RunCrossProviderScenarioTest(t, client, ctx, testConfig, scenario, false) // false = Chat Completions API
+			RunCrossProviderScenarioTest(t, client, ctx, testConfig, scenario, false) // false = Chat Completions API
 		})
 
 		t.Run(scenario.Name+"_ResponsesAPI", func(t *testing.T) {
-			scenarios.RunCrossProviderScenarioTest(t, client, ctx, testConfig, scenario, true) // true = Responses API
+			RunCrossProviderScenarioTest(t, client, ctx, testConfig, scenario, true) // true = Responses API
 		})
 	}
 }
@@ -118,33 +115,33 @@ func TestCrossProviderConsistency(t *testing.T) {
 	t.Skip("Skipping cross provider consistency test")
 	return
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 	defer client.Shutdown()
 
-	providers := []scenarios.ProviderConfig{
+	providers := []ProviderConfig{
 		{Provider: schemas.OpenAI, ChatModel: "gpt-4o-mini", Available: true},
 		{Provider: schemas.Anthropic, ChatModel: "claude-3-5-sonnet-20241022", Available: true},
 		{Provider: schemas.Groq, ChatModel: "llama-3.1-70b-versatile", Available: true},
 		{Provider: schemas.Gemini, ChatModel: "gemini-1.5-pro", Available: true},
 	}
 
-	testConfig := scenarios.CrossProviderTestConfig{
+	testConfig := CrossProviderTestConfig{
 		Providers: providers,
-		TestSettings: scenarios.TestSettings{
-			ValidationStrength: scenarios.ValidationLenient, // More lenient for consistency testing
+		TestSettings: TestSettings{
+			ValidationStrength: ValidationLenient, // More lenient for consistency testing
 		},
 	}
 
 	// Test same prompt across different providers
 	t.Run("SamePrompt_DifferentProviders_ChatCompletions", func(t *testing.T) {
-		scenarios.RunCrossProviderConsistencyTest(t, client, ctx, testConfig, false) // Chat Completions
+		RunCrossProviderConsistencyTest(t, client, ctx, testConfig, false) // Chat Completions
 	})
 
 	t.Run("SamePrompt_DifferentProviders_ResponsesAPI", func(t *testing.T) {
-		scenarios.RunCrossProviderConsistencyTest(t, client, ctx, testConfig, true) // Responses API
+		RunCrossProviderConsistencyTest(t, client, ctx, testConfig, true) // Responses API
 	})
 }
diff --git a/tests/core-providers/scenarios/embedding.go b/core/internal/testutil/embedding.go
similarity index 97%
rename from tests/core-providers/scenarios/embedding.go
rename to core/internal/testutil/embedding.go
index e46684ba66..b044858b97 100644
--- a/tests/core-providers/scenarios/embedding.go
+++ b/core/internal/testutil/embedding.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -8,7 +8,6 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
@@ -38,7 +37,7 @@ func cosineSimilarity(a, b []float32) float64 {
 }
 
 // RunEmbeddingTest executes the embedding test scenario
-func RunEmbeddingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunEmbeddingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.Embedding {
 		t.Logf("Embedding not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/end_to_end_tool_calling.go b/core/internal/testutil/end_to_end_tool_calling.go
similarity index 98%
rename from tests/core-providers/scenarios/end_to_end_tool_calling.go
rename to core/internal/testutil/end_to_end_tool_calling.go
index 70f9c4500c..cd9294253d 100644
--- a/tests/core-providers/scenarios/end_to_end_tool_calling.go
+++ b/core/internal/testutil/end_to_end_tool_calling.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,14 +6,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunEnd2EndToolCallingTest executes the end-to-end tool calling test scenario
-func RunEnd2EndToolCallingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunEnd2EndToolCallingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.End2EndToolCalling {
 		t.Logf("End-to-end tool calling not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/error_parser.go b/core/internal/testutil/error_parser.go
similarity index 99%
rename from tests/core-providers/scenarios/error_parser.go
rename to core/internal/testutil/error_parser.go
index cc0c6c57fe..af4e5ca788 100644
--- a/tests/core-providers/scenarios/error_parser.go
+++ b/core/internal/testutil/error_parser.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"fmt"
diff --git a/tests/core-providers/scenarios/image_base64.go b/core/internal/testutil/image_base64.go
similarity index 97%
rename from tests/core-providers/scenarios/image_base64.go
rename to core/internal/testutil/image_base64.go
index a01d504c13..b11265319e 100644
--- a/tests/core-providers/scenarios/image_base64.go
+++ b/core/internal/testutil/image_base64.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,14 +6,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunImageBase64Test executes the image base64 test scenario using dual API testing framework
-func RunImageBase64Test(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunImageBase64Test(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.ImageBase64 {
 		t.Logf("Image base64 not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/image_url.go b/core/internal/testutil/image_url.go
similarity index 97%
rename from tests/core-providers/scenarios/image_url.go
rename to core/internal/testutil/image_url.go
index 3661ebfc9b..db9d931950 100644
--- a/tests/core-providers/scenarios/image_url.go
+++ b/core/internal/testutil/image_url.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,14 +6,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunImageURLTest executes the image URL test scenario using dual API testing framework
-func RunImageURLTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunImageURLTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.ImageURL {
 		t.Logf("Image URL not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/list_models.go b/core/internal/testutil/list_models.go
similarity index 97%
rename from tests/core-providers/scenarios/list_models.go
rename to core/internal/testutil/list_models.go
index 662dffe9ec..43be92f2a5 100644
--- a/tests/core-providers/scenarios/list_models.go
+++ b/core/internal/testutil/list_models.go
@@ -1,18 +1,17 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunListModelsTest executes the list models test scenario
-func RunListModelsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunListModelsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.ListModels {
 		t.Logf("List models not supported for provider %s", testConfig.Provider)
 		return
@@ -110,7 +109,7 @@ func RunListModelsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Contex
 }
 
 // RunListModelsPaginationTest executes pagination test for list models
-func RunListModelsPaginationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunListModelsPaginationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.ListModels {
 		t.Logf("List models not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/multi_turn_conversation.go b/core/internal/testutil/multi_turn_conversation.go
similarity index 81%
rename from tests/core-providers/scenarios/multi_turn_conversation.go
rename to core/internal/testutil/multi_turn_conversation.go
index a1194e97db..e7d471c92e 100644
--- a/tests/core-providers/scenarios/multi_turn_conversation.go
+++ b/core/internal/testutil/multi_turn_conversation.go
@@ -1,19 +1,16 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"os"
-	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
-
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunMultiTurnConversationTest executes the multi-turn conversation test scenario
-func RunMultiTurnConversationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunMultiTurnConversationTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.MultiTurnConversation {
 		t.Logf("Multi-turn conversation not supported for provider %s", testConfig.Provider)
 		return
@@ -135,25 +132,18 @@ func RunMultiTurnConversationTest(t *testing.T, client *bifrost.Bifrost, ctx con
 		expectations2.ShouldContainKeywords = []string{"alice"}                                  // Case insensitive
 		expectations2.ShouldNotContainWords = []string{"don't know", "can't remember", "forgot"} // Memory failure indicators
 
-		response2, bifrostErr := WithChatTestRetry(t, chatRetryConfig2, retryContext2, expectations2, "MultiTurnConversation_Step2", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
-			return client.ChatCompletionRequest(ctx, secondRequest)
-		})
-
-		if bifrostErr != nil {
-			t.Fatalf("MultiTurnConversation_Step2 request failed after retries: %v", GetErrorMessage(bifrostErr))
-		}
-
-		content := GetChatContent(response2)
+	response2, bifrostErr := WithChatTestRetry(t, chatRetryConfig2, retryContext2, expectations2, "MultiTurnConversation_Step2", func() (*schemas.BifrostChatResponse, *schemas.BifrostError) {
+		return client.ChatCompletionRequest(ctx, secondRequest)
+	})
 
-		// Specific memory validation
-		contentLower := strings.ToLower(content)
-		if strings.Contains(contentLower, "alice") {
-			t.Logf("✅ Model successfully remembered the name: %s", content)
-		} else {
-			// This is a critical failure for multi-turn conversation
-			t.Fatalf("❌ Model failed to remember the name 'Alice' in multi-turn conversation. Response: %s", content)
-		}
+	if bifrostErr != nil {
+		t.Fatalf("❌ MultiTurnConversation_Step2 request failed after retries: %v", GetErrorMessage(bifrostErr))
+	}
 
-		t.Logf("✅ Multi-turn conversation completed successfully")
+	// Validation already happened inside WithChatTestRetry via expectations2
+	// If we reach here, the model successfully remembered "Alice"
+	content := GetChatContent(response2)
+	t.Logf("✅ Model successfully remembered the name: %s", content)
+	t.Logf("✅ Multi-turn conversation completed successfully")
 	})
 }
diff --git a/tests/core-providers/scenarios/multiple_images.go b/core/internal/testutil/multiple_images.go
similarity index 97%
rename from tests/core-providers/scenarios/multiple_images.go
rename to core/internal/testutil/multiple_images.go
index 7f50d72723..9c1ea07b3a 100644
--- a/tests/core-providers/scenarios/multiple_images.go
+++ b/core/internal/testutil/multiple_images.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,14 +6,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunMultipleImagesTest executes the multiple images test scenario
-func RunMultipleImagesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunMultipleImagesTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.MultipleImages {
 		t.Logf("Multiple images not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/multiple_tool_calls.go b/core/internal/testutil/multiple_tool_calls.go
similarity index 97%
rename from tests/core-providers/scenarios/multiple_tool_calls.go
rename to core/internal/testutil/multiple_tool_calls.go
index 4a10e17784..d1cf7bfbdb 100644
--- a/tests/core-providers/scenarios/multiple_tool_calls.go
+++ b/core/internal/testutil/multiple_tool_calls.go
@@ -1,11 +1,10 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
@@ -21,7 +20,7 @@ func getKeysFromMap(m map[string]bool) []string {
 }
 
 // RunMultipleToolCallsTest executes the multiple tool calls test scenario using dual API testing framework
-func RunMultipleToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunMultipleToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.MultipleToolCalls {
 		t.Logf("Multiple tool calls not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/reasoning.go b/core/internal/testutil/reasoning.go
similarity index 98%
rename from tests/core-providers/scenarios/reasoning.go
rename to core/internal/testutil/reasoning.go
index 5fd13c8bdb..fb0abc0583 100644
--- a/tests/core-providers/scenarios/reasoning.go
+++ b/core/internal/testutil/reasoning.go
@@ -1,18 +1,17 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunReasoningTest executes the reasoning test scenario to test thinking capabilities via Responses API only
-func RunReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunReasoningTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.Reasoning {
 		t.Logf("⏭️ Reasoning not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/response_validation.go b/core/internal/testutil/response_validation.go
similarity index 99%
rename from tests/core-providers/scenarios/response_validation.go
rename to core/internal/testutil/response_validation.go
index 74721ed7f7..fcb722bc75 100644
--- a/tests/core-providers/scenarios/response_validation.go
+++ b/core/internal/testutil/response_validation.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"encoding/json"
diff --git a/tests/core-providers/scenarios/responses_stream.go b/core/internal/testutil/responses_stream.go
similarity index 99%
rename from tests/core-providers/scenarios/responses_stream.go
rename to core/internal/testutil/responses_stream.go
index 98cc1f0859..72aff15239 100644
--- a/tests/core-providers/scenarios/responses_stream.go
+++ b/core/internal/testutil/responses_stream.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -8,14 +8,13 @@ import (
 	"testing"
 	"time"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunResponsesStreamTest executes the responses streaming test scenario
-func RunResponsesStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunResponsesStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.CompletionStream {
 		t.Logf("Responses completion stream not supported for provider %s", testConfig.Provider)
 		return
@@ -819,7 +818,7 @@ type StreamingValidationResult struct {
 }
 
 // validateResponsesStreamingResponse validates streaming-specific aspects of responses API
-func validateResponsesStreamingResponse(t *testing.T, eventTypes map[schemas.ResponsesStreamResponseType]int, sequenceNumbers []int, finalContent string, lastResponse *schemas.BifrostStream, testConfig config.ComprehensiveTestConfig) StreamingValidationResult {
+func validateResponsesStreamingResponse(t *testing.T, eventTypes map[schemas.ResponsesStreamResponseType]int, sequenceNumbers []int, finalContent string, lastResponse *schemas.BifrostStream, testConfig ComprehensiveTestConfig) StreamingValidationResult {
 	var errors []string
 
 	// Basic content validation
diff --git a/tests/core-providers/scenarios/media/lion_base64.txt b/core/internal/testutil/scenarios/media/lion_base64.txt
similarity index 100%
rename from tests/core-providers/scenarios/media/lion_base64.txt
rename to core/internal/testutil/scenarios/media/lion_base64.txt
diff --git a/tests/core-providers/config/setup.go b/core/internal/testutil/setup.go
similarity index 94%
rename from tests/core-providers/config/setup.go
rename to core/internal/testutil/setup.go
index ed793615bf..24410d86cc 100644
--- a/tests/core-providers/config/setup.go
+++ b/core/internal/testutil/setup.go
@@ -1,7 +1,7 @@
-// Package config provides comprehensive test utilities and configurations for the Bifrost system.
+// Package testutil provides comprehensive test utilities and configurations for the Bifrost system.
 // It includes comprehensive test implementations covering all major AI provider scenarios,
 // including text completion, chat, tool calling, image processing, and end-to-end workflows.
-package config
+package testutil
 
 import (
 	"context"
diff --git a/tests/core-providers/scenarios/simple_chat.go b/core/internal/testutil/simple_chat.go
similarity index 97%
rename from tests/core-providers/scenarios/simple_chat.go
rename to core/internal/testutil/simple_chat.go
index 33ca7f7cbe..df9f95d81b 100644
--- a/tests/core-providers/scenarios/simple_chat.go
+++ b/core/internal/testutil/simple_chat.go
@@ -1,18 +1,17 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunSimpleChatTest executes the simple chat test scenario using dual API testing framework
-func RunSimpleChatTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunSimpleChatTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.SimpleChat {
 		t.Logf("Simple chat not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/speech_synthesis.go b/core/internal/testutil/speech_synthesis.go
similarity index 98%
rename from tests/core-providers/scenarios/speech_synthesis.go
rename to core/internal/testutil/speech_synthesis.go
index 748e66a052..f04bba8575 100644
--- a/tests/core-providers/scenarios/speech_synthesis.go
+++ b/core/internal/testutil/speech_synthesis.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -6,7 +6,6 @@ import (
 	"path/filepath"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 	"github.com/stretchr/testify/require"
 
 	bifrost "github.com/maximhq/bifrost/core"
@@ -14,7 +13,7 @@ import (
 )
 
 // RunSpeechSynthesisTest executes the speech synthesis test scenario
-func RunSpeechSynthesisTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunSpeechSynthesisTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.SpeechSynthesis {
 		t.Logf("Speech synthesis not supported for provider %s", testConfig.Provider)
 		return
@@ -148,7 +147,7 @@ func RunSpeechSynthesisTest(t *testing.T, client *bifrost.Bifrost, ctx context.C
 }
 
 // RunSpeechSynthesisAdvancedTest executes advanced speech synthesis test scenarios
-func RunSpeechSynthesisAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunSpeechSynthesisAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.SpeechSynthesis {
 		t.Logf("Speech synthesis not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/speech_synthesis_stream.go b/core/internal/testutil/speech_synthesis_stream.go
similarity index 98%
rename from tests/core-providers/scenarios/speech_synthesis_stream.go
rename to core/internal/testutil/speech_synthesis_stream.go
index e7121b626b..728bf7a24c 100644
--- a/tests/core-providers/scenarios/speech_synthesis_stream.go
+++ b/core/internal/testutil/speech_synthesis_stream.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -7,14 +7,13 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunSpeechSynthesisStreamTest executes the streaming speech synthesis test scenario
-func RunSpeechSynthesisStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunSpeechSynthesisStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.SpeechSynthesisStream {
 		t.Logf("Speech synthesis streaming not supported for provider %s", testConfig.Provider)
 		return
@@ -227,7 +226,7 @@ func RunSpeechSynthesisStreamTest(t *testing.T, client *bifrost.Bifrost, ctx con
 }
 
 // RunSpeechSynthesisStreamAdvancedTest executes advanced streaming speech synthesis test scenarios
-func RunSpeechSynthesisStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunSpeechSynthesisStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.SpeechSynthesisStream {
 		t.Logf("Speech synthesis streaming not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/test_retry_conditions.go b/core/internal/testutil/test_retry_conditions.go
similarity index 99%
rename from tests/core-providers/scenarios/test_retry_conditions.go
rename to core/internal/testutil/test_retry_conditions.go
index f03805a289..908bdc0253 100644
--- a/tests/core-providers/scenarios/test_retry_conditions.go
+++ b/core/internal/testutil/test_retry_conditions.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"encoding/json"
diff --git a/tests/core-providers/scenarios/test_retry_framework.go b/core/internal/testutil/test_retry_framework.go
similarity index 99%
rename from tests/core-providers/scenarios/test_retry_framework.go
rename to core/internal/testutil/test_retry_framework.go
index c7b106829b..d612fc4276 100644
--- a/tests/core-providers/scenarios/test_retry_framework.go
+++ b/core/internal/testutil/test_retry_framework.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"fmt"
@@ -8,8 +8,6 @@ import (
 	"testing"
 	"time"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
-
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
@@ -1115,7 +1113,7 @@ func WithDualAPITestRetry(
 }
 
 // GetTestRetryConfigForScenario returns an appropriate retry config for a scenario
-func GetTestRetryConfigForScenario(scenarioName string, testConfig config.ComprehensiveTestConfig) TestRetryConfig {
+func GetTestRetryConfigForScenario(scenarioName string, testConfig ComprehensiveTestConfig) TestRetryConfig {
 	switch scenarioName {
 	case "ToolCalls", "SingleToolCall":
 		return ToolCallRetryConfig("") // Will be set by specific test
diff --git a/tests/core-providers/tests.go b/core/internal/testutil/tests.go
similarity index 67%
rename from tests/core-providers/tests.go
rename to core/internal/testutil/tests.go
index 260d390177..9ad659c1ef 100644
--- a/tests/core-providers/tests.go
+++ b/core/internal/testutil/tests.go
@@ -1,21 +1,18 @@
-package tests
+package testutil
 
 import (
 	"context"
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
-	"github.com/maximhq/bifrost/tests/core-providers/scenarios"
-
 	bifrost "github.com/maximhq/bifrost/core"
 )
 
 // TestScenarioFunc defines the function signature for test scenario functions
-type TestScenarioFunc func(*testing.T, *bifrost.Bifrost, context.Context, config.ComprehensiveTestConfig)
+type TestScenarioFunc func(*testing.T, *bifrost.Bifrost, context.Context, ComprehensiveTestConfig)
 
-// runAllComprehensiveTests executes all comprehensive test scenarios for a given configuration
-func runAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+// RunAllComprehensiveTests executes all comprehensive test scenarios for a given configuration
+func RunAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if testConfig.SkipReason != "" {
 		t.Skipf("Skipping %s: %s", testConfig.Provider, testConfig.SkipReason)
 		return
@@ -25,33 +22,33 @@ func runAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context
 
 	// Define all test scenario functions in a slice
 	testScenarios := []TestScenarioFunc{
-		scenarios.RunTextCompletionTest,
-		scenarios.RunTextCompletionStreamTest,
-		scenarios.RunSimpleChatTest,
-		scenarios.RunChatCompletionStreamTest,
-		scenarios.RunResponsesStreamTest,
-		scenarios.RunMultiTurnConversationTest,
-		scenarios.RunToolCallsTest,
-		scenarios.RunToolCallsStreamingTest,
-		scenarios.RunMultipleToolCallsTest,
-		scenarios.RunEnd2EndToolCallingTest,
-		scenarios.RunAutomaticFunctionCallingTest,
-		scenarios.RunImageURLTest,
-		scenarios.RunImageBase64Test,
-		scenarios.RunMultipleImagesTest,
-		scenarios.RunCompleteEnd2EndTest,
-		scenarios.RunSpeechSynthesisTest,
-		scenarios.RunSpeechSynthesisAdvancedTest,
-		scenarios.RunSpeechSynthesisStreamTest,
-		scenarios.RunSpeechSynthesisStreamAdvancedTest,
-		scenarios.RunTranscriptionTest,
-		scenarios.RunTranscriptionAdvancedTest,
-		scenarios.RunTranscriptionStreamTest,
-		scenarios.RunTranscriptionStreamAdvancedTest,
-		scenarios.RunEmbeddingTest,
-		scenarios.RunReasoningTest,
-		scenarios.RunListModelsTest,
-		scenarios.RunListModelsPaginationTest,
+		RunTextCompletionTest,
+		RunTextCompletionStreamTest,
+		RunSimpleChatTest,
+		RunChatCompletionStreamTest,
+		RunResponsesStreamTest,
+		RunMultiTurnConversationTest,
+		RunToolCallsTest,
+		RunToolCallsStreamingTest,
+		RunMultipleToolCallsTest,
+		RunEnd2EndToolCallingTest,
+		RunAutomaticFunctionCallingTest,
+		RunImageURLTest,
+		RunImageBase64Test,
+		RunMultipleImagesTest,
+		RunCompleteEnd2EndTest,
+		RunSpeechSynthesisTest,
+		RunSpeechSynthesisAdvancedTest,
+		RunSpeechSynthesisStreamTest,
+		RunSpeechSynthesisStreamAdvancedTest,
+		RunTranscriptionTest,
+		RunTranscriptionAdvancedTest,
+		RunTranscriptionStreamTest,
+		RunTranscriptionStreamAdvancedTest,
+		RunEmbeddingTest,
+		RunReasoningTest,
+		RunListModelsTest,
+		RunListModelsPaginationTest,
 	}
 
 	// Execute all test scenarios
@@ -64,7 +61,7 @@ func runAllComprehensiveTests(t *testing.T, client *bifrost.Bifrost, ctx context
 }
 
 // printTestSummary prints a detailed summary of all test scenarios
-func printTestSummary(t *testing.T, testConfig config.ComprehensiveTestConfig) {
+func printTestSummary(t *testing.T, testConfig ComprehensiveTestConfig) {
 	testScenarios := []struct {
 		name      string
 		supported bool
diff --git a/tests/core-providers/scenarios/text_completion.go b/core/internal/testutil/text_completion.go
similarity index 94%
rename from tests/core-providers/scenarios/text_completion.go
rename to core/internal/testutil/text_completion.go
index 2f7956bf50..6f5eab740c 100644
--- a/tests/core-providers/scenarios/text_completion.go
+++ b/core/internal/testutil/text_completion.go
@@ -1,18 +1,17 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunTextCompletionTest tests text completion functionality
-func RunTextCompletionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunTextCompletionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.TextCompletion || testConfig.TextModel == "" {
 		t.Logf("⏭️ Text completion not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/text_completion_stream.go b/core/internal/testutil/text_completion_stream.go
similarity index 99%
rename from tests/core-providers/scenarios/text_completion_stream.go
rename to core/internal/testutil/text_completion_stream.go
index a45c6291f8..c731bd3382 100644
--- a/tests/core-providers/scenarios/text_completion_stream.go
+++ b/core/internal/testutil/text_completion_stream.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -8,14 +8,13 @@ import (
 	"testing"
 	"time"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunTextCompletionStreamTest executes the text completion streaming test scenario
-func RunTextCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunTextCompletionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.TextCompletionStream {
 		t.Logf("Text completion stream not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/tool_calls.go b/core/internal/testutil/tool_calls.go
similarity index 97%
rename from tests/core-providers/scenarios/tool_calls.go
rename to core/internal/testutil/tool_calls.go
index d873f89074..739684b1f4 100644
--- a/tests/core-providers/scenarios/tool_calls.go
+++ b/core/internal/testutil/tool_calls.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -7,7 +7,6 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
@@ -15,7 +14,7 @@ import (
 )
 
 // RunToolCallsTest executes the tool calls test scenario using dual API testing framework
-func RunToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunToolCallsTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.ToolCalls {
 		t.Logf("Tool calls not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/tool_calls_streaming.go b/core/internal/testutil/tool_calls_streaming.go
similarity index 99%
rename from tests/core-providers/scenarios/tool_calls_streaming.go
rename to core/internal/testutil/tool_calls_streaming.go
index 28f5432f39..522fd6af85 100644
--- a/tests/core-providers/scenarios/tool_calls_streaming.go
+++ b/core/internal/testutil/tool_calls_streaming.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -7,7 +7,6 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
@@ -209,7 +208,7 @@ func (acc *StreamingToolCallAccumulator) GetFinalResponsesToolCalls() []ToolCall
 }
 
 // RunToolCallsStreamingTest executes the tool calls streaming test scenario
-func RunToolCallsStreamingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunToolCallsStreamingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.ToolCallsStreaming {
 		t.Logf("Tool calls streaming not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/transcription.go b/core/internal/testutil/transcription.go
similarity index 98%
rename from tests/core-providers/scenarios/transcription.go
rename to core/internal/testutil/transcription.go
index 182390a58e..0e5c1d18bc 100644
--- a/tests/core-providers/scenarios/transcription.go
+++ b/core/internal/testutil/transcription.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -8,7 +8,6 @@ import (
 	"strings"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 	"github.com/stretchr/testify/require"
 
 	bifrost "github.com/maximhq/bifrost/core"
@@ -16,7 +15,7 @@ import (
 )
 
 // RunTranscriptionTest executes the transcription test scenario
-func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.Transcription {
 		t.Logf("Transcription not supported for provider %s", testConfig.Provider)
 		return
@@ -276,7 +275,7 @@ func RunTranscriptionTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
 }
 
 // RunTranscriptionAdvancedTest executes advanced transcription test scenarios
-func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.Transcription {
 		t.Logf("Transcription not supported for provider %s", testConfig.Provider)
 		return
@@ -499,7 +498,7 @@ func RunTranscriptionAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx con
 
 // validateTranscriptionRoundTrip performs round-trip validation for transcription responses
 // This is complementary to the main validation framework and focuses on transcription accuracy
-func validateTranscriptionRoundTrip(t *testing.T, response *schemas.BifrostTranscriptionResponse, originalText string, testName string, testConfig config.ComprehensiveTestConfig) {
+func validateTranscriptionRoundTrip(t *testing.T, response *schemas.BifrostTranscriptionResponse, originalText string, testName string, testConfig ComprehensiveTestConfig) {
 	if response == nil || response.Text == "" {
 		t.Fatal("Transcription response missing transcribed text")
 	}
diff --git a/tests/core-providers/scenarios/transcription_stream.go b/core/internal/testutil/transcription_stream.go
similarity index 98%
rename from tests/core-providers/scenarios/transcription_stream.go
rename to core/internal/testutil/transcription_stream.go
index a53187bf76..25d4e31dd7 100644
--- a/tests/core-providers/scenarios/transcription_stream.go
+++ b/core/internal/testutil/transcription_stream.go
@@ -1,4 +1,4 @@
-package scenarios
+package testutil
 
 import (
 	"context"
@@ -9,14 +9,13 @@ import (
 	"testing"
 	"time"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	bifrost "github.com/maximhq/bifrost/core"
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 // RunTranscriptionStreamTest executes the streaming transcription test scenario
-func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.TranscriptionStream {
 		t.Logf("Transcription streaming not supported for provider %s", testConfig.Provider)
 		return
@@ -324,7 +323,7 @@ func RunTranscriptionStreamTest(t *testing.T, client *bifrost.Bifrost, ctx conte
 }
 
 // RunTranscriptionStreamAdvancedTest executes advanced streaming transcription test scenarios
-func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig config.ComprehensiveTestConfig) {
+func RunTranscriptionStreamAdvancedTest(t *testing.T, client *bifrost.Bifrost, ctx context.Context, testConfig ComprehensiveTestConfig) {
 	if !testConfig.Scenarios.TranscriptionStream {
 		t.Logf("Transcription streaming not supported for provider %s", testConfig.Provider)
 		return
diff --git a/tests/core-providers/scenarios/utils.go b/core/internal/testutil/utils.go
similarity index 98%
rename from tests/core-providers/scenarios/utils.go
rename to core/internal/testutil/utils.go
index 6c7c54e4fb..5b670643e6 100644
--- a/tests/core-providers/scenarios/utils.go
+++ b/core/internal/testutil/utils.go
@@ -1,9 +1,11 @@
-package scenarios
+package testutil
 
 import (
 	"context"
 	"fmt"
 	"os"
+	"path/filepath"
+	"runtime"
 	"strings"
 	"testing"
 
@@ -190,14 +192,21 @@ const TestImageBase64 = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBD
 
 // GetLionBase64Image loads and returns the lion base64 image data from file
 func GetLionBase64Image() (string, error) {
-	data, err := os.ReadFile("scenarios/media/lion_base64.txt")
+	_, filename, _, ok := runtime.Caller(0)
+	if !ok {
+		return "", fmt.Errorf("failed to get current file path")
+	}
+	dir := filepath.Dir(filename)
+	filePath := filepath.Join(dir, "scenarios", "media", "lion_base64.txt")
+	
+	data, err := os.ReadFile(filePath)
 	if err != nil {
 		return "", err
 	}
 	return "data:image/png;base64," + string(data), nil
 }
 
-// CreateSpeechInput creates a basic speech input for testing
+// CreateSpeechRequest creates a basic speech input for testing
 func CreateSpeechRequest(text, voice, format string) *schemas.BifrostSpeechRequest {
 	return &schemas.BifrostSpeechRequest{
 		Input: &schemas.SpeechInput{
diff --git a/tests/core-providers/scenarios/validation_presets.go b/core/internal/testutil/validation_presets.go
similarity index 98%
rename from tests/core-providers/scenarios/validation_presets.go
rename to core/internal/testutil/validation_presets.go
index 4735369307..c096be72f1 100644
--- a/tests/core-providers/scenarios/validation_presets.go
+++ b/core/internal/testutil/validation_presets.go
@@ -1,9 +1,8 @@
-package scenarios
+package testutil
 
 import (
 	"regexp"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -206,7 +205,7 @@ func ReasoningExpectations() ResponseExpectations {
 // =============================================================================
 
 // GetExpectationsForScenario returns appropriate validation expectations for a given scenario
-func GetExpectationsForScenario(scenarioName string, testConfig config.ComprehensiveTestConfig, customParams map[string]interface{}) ResponseExpectations {
+func GetExpectationsForScenario(scenarioName string, testConfig ComprehensiveTestConfig, customParams map[string]interface{}) ResponseExpectations {
 	switch scenarioName {
 	case "SimpleChat":
 		return BasicChatExpectations()
diff --git a/tests/core-providers/anthropic_test.go b/core/providers/anthropic/anthropic_test.go
similarity index 82%
rename from tests/core-providers/anthropic_test.go
rename to core/providers/anthropic/anthropic_test.go
index e07acf4960..22e44de3bc 100644
--- a/tests/core-providers/anthropic_test.go
+++ b/core/providers/anthropic/anthropic_test.go
@@ -1,10 +1,10 @@
-package tests
+package anthropic_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,13 +15,13 @@ func TestAnthropic(t *testing.T) {
 		t.Skip("Skipping Anthropic tests because ANTHROPIC_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:  schemas.Anthropic,
 		ChatModel: "claude-sonnet-4-20250514",
 		Fallbacks: []schemas.Fallback{
@@ -29,7 +29,7 @@ func TestAnthropic(t *testing.T) {
 			{Provider: schemas.Anthropic, Model: "claude-sonnet-4-20250514"},
 		},
 		VisionModel: "claude-3-7-sonnet-20250219", // Same model supports vision
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -50,7 +50,7 @@ func TestAnthropic(t *testing.T) {
 	}
 
 	t.Run("AnthropicTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/azure_test.go b/core/providers/azure/azure_test.go
similarity index 83%
rename from tests/core-providers/azure_test.go
rename to core/providers/azure/azure_test.go
index b7faa8a021..d56e4a7d91 100644
--- a/tests/core-providers/azure_test.go
+++ b/core/providers/azure/azure_test.go
@@ -1,29 +1,28 @@
-package tests
+package azure_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
 
 func TestAzure(t *testing.T) {
 	t.Parallel()
-	t.Skip("Skipping Azure tests because Azure.")
-
+	
 	if os.Getenv("AZURE_API_KEY") == "" {
 		t.Skip("Skipping Azure tests because AZURE_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:    schemas.Azure,
 		ChatModel:   "gpt-4o-backup",
 		VisionModel: "gpt-4o",
@@ -33,7 +32,7 @@ func TestAzure(t *testing.T) {
 		TextModel:      "", // Azure OpenAI doesn't support text completion in newer models
 		EmbeddingModel: "text-embedding-ada-002",
 		ReasoningModel: "o1",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -61,7 +60,7 @@ func TestAzure(t *testing.T) {
 	}
 
 	t.Run("AzureTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/core/providers/cerebras.go b/core/providers/cerebras/cerebras.go
similarity index 98%
rename from core/providers/cerebras.go
rename to core/providers/cerebras/cerebras.go
index e29bddadc8..a154657962 100644
--- a/core/providers/cerebras.go
+++ b/core/providers/cerebras/cerebras.go
@@ -1,6 +1,5 @@
-// Package providers implements various LLM providers and their utility functions.
-// This file contains the Cerebras provider implementation.
-package providers
+// Package cerebras implements the Cerebras LLM provider.
+package cerebras
 
 import (
 	"context"
diff --git a/tests/core-providers/cerebras_test.go b/core/providers/cerebras/cerebras_test.go
similarity index 81%
rename from tests/core-providers/cerebras_test.go
rename to core/providers/cerebras/cerebras_test.go
index 17089ad80e..0f7185efde 100644
--- a/tests/core-providers/cerebras_test.go
+++ b/core/providers/cerebras/cerebras_test.go
@@ -1,10 +1,10 @@
-package tests
+package cerebras_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,13 +15,13 @@ func TestCerebras(t *testing.T) {
 		t.Skip("Skipping Cerebras tests because CEREBRAS_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:  schemas.Cerebras,
 		ChatModel: "llama-3.3-70b",
 		Fallbacks: []schemas.Fallback{
@@ -30,7 +30,7 @@ func TestCerebras(t *testing.T) {
 		},
 		TextModel:      "llama3.1-8b",
 		EmbeddingModel: "", // Cerebras doesn't support embedding
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        true,
 			TextCompletionStream:  true,
 			SimpleChat:            true,
@@ -51,7 +51,7 @@ func TestCerebras(t *testing.T) {
 	}
 
 	t.Run("CerebrasTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/cohere_test.go b/core/providers/cohere/cohere_test.go
similarity index 83%
rename from tests/core-providers/cohere_test.go
rename to core/providers/cohere/cohere_test.go
index 797da03408..6fc5d971e9 100644
--- a/tests/core-providers/cohere_test.go
+++ b/core/providers/cohere/cohere_test.go
@@ -1,10 +1,10 @@
-package tests
+package cohere_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,19 +15,19 @@ func TestCohere(t *testing.T) {
 		t.Skip("Skipping Cohere tests because COHERE_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.Cohere,
 		ChatModel:      "command-a-03-2025",
 		VisionModel:    "command-a-vision-07-2025", // Cohere's latest vision model
 		TextModel:      "",                         // Cohere focuses on chat
 		EmbeddingModel: "embed-v4.0",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not typical for Cohere
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -48,7 +48,7 @@ func TestCohere(t *testing.T) {
 	}
 
 	t.Run("CohereTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/elevenlabs_test.go b/core/providers/elevenlabs/elevenlabs_test.go
similarity index 81%
rename from tests/core-providers/elevenlabs_test.go
rename to core/providers/elevenlabs/elevenlabs_test.go
index 909381c593..5243181686 100644
--- a/tests/core-providers/elevenlabs_test.go
+++ b/core/providers/elevenlabs/elevenlabs_test.go
@@ -1,10 +1,10 @@
-package tests
+package elevenlabs_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,17 +15,17 @@ func TestElevenlabs(t *testing.T) {
 		t.Skip("Skipping Elevenlabs tests because ELEVENLABS_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:             schemas.Elevenlabs,
 		SpeechSynthesisModel: "eleven_turbo_v2_5",
 		TranscriptionModel:   "scribe_v1",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false,
 			TextCompletionStream:  false,
 			SimpleChat:            false,
@@ -50,7 +50,7 @@ func TestElevenlabs(t *testing.T) {
 	}
 
 	t.Run("ElevenlabsTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
\ No newline at end of file
diff --git a/tests/core-providers/gemini_test.go b/core/providers/gemini/gemini_test.go
similarity index 85%
rename from tests/core-providers/gemini_test.go
rename to core/providers/gemini/gemini_test.go
index f4d9a133ff..c76d37a576 100644
--- a/tests/core-providers/gemini_test.go
+++ b/core/providers/gemini/gemini_test.go
@@ -1,10 +1,10 @@
-package tests
+package gemini_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,13 +15,13 @@ func TestGemini(t *testing.T) {
 		t.Skip("Skipping Gemini tests because GEMINI_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:             schemas.Gemini,
 		ChatModel:            "gemini-2.0-flash",
 		VisionModel:          "gemini-2.0-flash",
@@ -32,7 +32,7 @@ func TestGemini(t *testing.T) {
 			{Provider: schemas.Gemini, Model: "gemini-2.5-pro-preview-tts"},
 		},
 		ReasoningModel: "gemini-2.5-pro",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -57,7 +57,7 @@ func TestGemini(t *testing.T) {
 	}
 
 	t.Run("GeminiTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
\ No newline at end of file
diff --git a/core/providers/groq.go b/core/providers/groq/groq.go
similarity index 98%
rename from core/providers/groq.go
rename to core/providers/groq/groq.go
index b4033ebc5d..b7cec4e8e0 100644
--- a/core/providers/groq.go
+++ b/core/providers/groq/groq.go
@@ -1,6 +1,5 @@
-// Package providers implements various LLM providers and their utility functions.
-// This file contains the Groq provider implementation.
-package providers
+// Package groq implements the Groq provider and its utility functions.
+package groq
 
 import (
 	"context"
diff --git a/tests/core-providers/groq_test.go b/core/providers/groq/groq_test.go
similarity index 84%
rename from tests/core-providers/groq_test.go
rename to core/providers/groq/groq_test.go
index 4af8f1155a..799011c897 100644
--- a/tests/core-providers/groq_test.go
+++ b/core/providers/groq/groq_test.go
@@ -1,11 +1,11 @@
-package tests
+package groq_test
 
 import (
 	"context"
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -16,13 +16,13 @@ func TestGroq(t *testing.T) {
 		t.Skip("Skipping Groq tests because GROQ_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:  schemas.Groq,
 		ChatModel: "llama-3.3-70b-versatile",
 		Fallbacks: []schemas.Fallback{
@@ -33,7 +33,7 @@ func TestGroq(t *testing.T) {
 			{Provider: schemas.Groq, Model: "openai/gpt-oss-20b"},
 		},
 		EmbeddingModel: "", // Groq doesn't support embedding
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        true, // Supported via chat completion conversion
 			TextCompletionStream:  true, // Supported via chat completion streaming conversion
 			SimpleChat:            true,
@@ -56,7 +56,7 @@ func TestGroq(t *testing.T) {
 	ctx = context.WithValue(ctx, schemas.BifrostContextKey("x-litellm-fallback"), "true")
 
 	t.Run("GroqTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/mistral_test.go b/core/providers/mistral/mistral_test.go
similarity index 80%
rename from tests/core-providers/mistral_test.go
rename to core/providers/mistral/mistral_test.go
index 070437f55c..bbaa104158 100644
--- a/tests/core-providers/mistral_test.go
+++ b/core/providers/mistral/mistral_test.go
@@ -1,10 +1,10 @@
-package tests
+package mistral_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,13 +15,13 @@ func TestMistral(t *testing.T) {
 		t.Skip("Skipping Mistral tests because MISTRAL_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:  schemas.Mistral,
 		ChatModel: "mistral-medium-2508",
 		Fallbacks: []schemas.Fallback{
@@ -29,7 +29,7 @@ func TestMistral(t *testing.T) {
 		},
 		VisionModel:    "pixtral-12b-latest",
 		EmbeddingModel: "codestral-embed",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -49,7 +49,7 @@ func TestMistral(t *testing.T) {
 	}
 
 	t.Run("MistralTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/core/providers/ollama.go b/core/providers/ollama/ollama.go
similarity index 99%
rename from core/providers/ollama.go
rename to core/providers/ollama/ollama.go
index 1a933e83e4..0bcc399c95 100644
--- a/core/providers/ollama.go
+++ b/core/providers/ollama/ollama.go
@@ -1,6 +1,6 @@
 // Package providers implements various LLM providers and their utility functions.
 // This file contains the Ollama provider implementation.
-package providers
+package ollama
 
 import (
 	"context"
diff --git a/tests/core-providers/ollama_test.go b/core/providers/ollama/ollama_test.go
similarity index 80%
rename from tests/core-providers/ollama_test.go
rename to core/providers/ollama/ollama_test.go
index a133b293e4..de1bdbad55 100644
--- a/tests/core-providers/ollama_test.go
+++ b/core/providers/ollama/ollama_test.go
@@ -1,10 +1,10 @@
-package tests
+package ollama_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,18 +15,18 @@ func TestOllama(t *testing.T) {
 		t.Skip("Skipping Ollama tests because OLLAMA_BASE_URL is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.Ollama,
 		ChatModel:      "llama3.1:latest",
 		TextModel:      "", // Ollama doesn't support text completion in newer models
 		EmbeddingModel: "", // Ollama doesn't support embedding
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -46,7 +46,7 @@ func TestOllama(t *testing.T) {
 	}
 
 	t.Run("OllamaTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/openai_test.go b/core/providers/openai/openai_test.go
similarity index 85%
rename from tests/core-providers/openai_test.go
rename to core/providers/openai/openai_test.go
index 36a5451b45..fadbf1c520 100644
--- a/tests/core-providers/openai_test.go
+++ b/core/providers/openai/openai_test.go
@@ -1,10 +1,10 @@
-package tests
+package openai_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,13 +15,13 @@ func TestOpenAI(t *testing.T) {
 		t.Skip("Skipping OpenAI tests because OPENAI_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:  schemas.OpenAI,
 		TextModel: "gpt-3.5-turbo-instruct",
 		ChatModel: "gpt-4o-mini",
@@ -36,7 +36,7 @@ func TestOpenAI(t *testing.T) {
 		},
 		SpeechSynthesisModel: "gpt-4o-mini-tts",
 		ReasoningModel:       "gpt-5",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        true,
 			TextCompletionStream:  true,
 			SimpleChat:            true,
@@ -62,7 +62,7 @@ func TestOpenAI(t *testing.T) {
 	}
 
 	t.Run("OpenAITests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/core/providers/openrouter.go b/core/providers/openrouter/openrouter.go
similarity index 98%
rename from core/providers/openrouter.go
rename to core/providers/openrouter/openrouter.go
index a2512d3211..af019e2d7d 100644
--- a/core/providers/openrouter.go
+++ b/core/providers/openrouter/openrouter.go
@@ -1,6 +1,5 @@
-// Package providers implements various LLM providers and their utility functions.
-// This file contains the OpenRouter provider implementation.
-package providers
+// Package openrouter implements the OpenRouter LLM provider.
+package openrouter
 
 import (
 	"context"
diff --git a/tests/core-providers/openrouter_test.go b/core/providers/openrouter/openrouter_test.go
similarity index 82%
rename from tests/core-providers/openrouter_test.go
rename to core/providers/openrouter/openrouter_test.go
index b00bdc90cb..f5123c76c4 100644
--- a/tests/core-providers/openrouter_test.go
+++ b/core/providers/openrouter/openrouter_test.go
@@ -1,10 +1,10 @@
-package tests
+package openrouter_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,20 +15,20 @@ func TestOpenRouter(t *testing.T) {
 		t.Skip("Skipping OpenRouter tests because OPENROUTER_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.OpenRouter,
 		ChatModel:      "openai/gpt-4o",
 		VisionModel:    "openai/gpt-4o",
 		TextModel:      "google/gemini-2.5-flash",
 		EmbeddingModel: "",
 		ReasoningModel: "openai/o1",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        true,
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -48,7 +48,7 @@ func TestOpenRouter(t *testing.T) {
 	}
 
 	t.Run("OpenRouterTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/core/providers/parasail.go b/core/providers/parasail/parasail.go
similarity index 99%
rename from core/providers/parasail.go
rename to core/providers/parasail/parasail.go
index 5e06a745e2..62da04b4bb 100644
--- a/core/providers/parasail.go
+++ b/core/providers/parasail/parasail.go
@@ -1,6 +1,6 @@
 // Package providers implements various LLM providers and their utility functions.
 // This file contains the Parasail provider implementation.
-package providers
+package parasail
 
 import (
 	"context"
diff --git a/tests/core-providers/parasail_test.go b/core/providers/parasail/parasail_test.go
similarity index 81%
rename from tests/core-providers/parasail_test.go
rename to core/providers/parasail/parasail_test.go
index 2d917d39b2..b5628dc3f0 100644
--- a/tests/core-providers/parasail_test.go
+++ b/core/providers/parasail/parasail_test.go
@@ -1,10 +1,10 @@
-package tests
+package parasail_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,18 +15,18 @@ func TestParasail(t *testing.T) {
 		t.Skip("Skipping Parasail tests because PARASAIL_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.Parasail,
 		ChatModel:      "Qwen/Qwen3-VL-30B-A3B-Instruct-FP8",
 		TextModel:      "", // Parasail doesn't support text completion
 		EmbeddingModel: "", // Parasail doesn't support embedding
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -46,7 +46,7 @@ func TestParasail(t *testing.T) {
 	}
 
 	t.Run("ParasailTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/perplexity_test.go b/core/providers/perplexity/perplexity_test.go
similarity index 80%
rename from tests/core-providers/perplexity_test.go
rename to core/providers/perplexity/perplexity_test.go
index 548dfb53c9..6a24236993 100644
--- a/tests/core-providers/perplexity_test.go
+++ b/core/providers/perplexity/perplexity_test.go
@@ -1,10 +1,10 @@
-package tests
+package perplexity_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,18 +15,18 @@ func TestPerplexity(t *testing.T) {
 		t.Skip("Skipping Perplexity tests because PERPLEXITY_API_KEY is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.Perplexity,
 		ChatModel:      "sonar-pro",
 		TextModel:      "", // Perplexity doesn't support text completion
 		EmbeddingModel: "", // Perplexity doesn't support embedding
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -45,7 +45,7 @@ func TestPerplexity(t *testing.T) {
 	}
 
 	t.Run("PerplexityTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/core/providers/sgl.go b/core/providers/sgl/sgl.go
similarity index 99%
rename from core/providers/sgl.go
rename to core/providers/sgl/sgl.go
index 24a442f860..6864197118 100644
--- a/core/providers/sgl.go
+++ b/core/providers/sgl/sgl.go
@@ -1,6 +1,6 @@
 // Package providers implements various LLM providers and their utility functions.
 // This file contains the SGL provider implementation.
-package providers
+package sgl
 
 import (
 	"context"
diff --git a/tests/core-providers/sgl_test.go b/core/providers/sgl/sgl_test.go
similarity index 80%
rename from tests/core-providers/sgl_test.go
rename to core/providers/sgl/sgl_test.go
index 247fffd020..1c37a439ba 100644
--- a/tests/core-providers/sgl_test.go
+++ b/core/providers/sgl/sgl_test.go
@@ -1,10 +1,10 @@
-package tests
+package sgl_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,19 +15,19 @@ func TestSGL(t *testing.T) {
 		t.Skip("Skipping SGL tests because SGL_BASE_URL is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.SGL,
 		ChatModel:      "qwen/qwen2.5-0.5b-instruct",
 		VisionModel:    "Qwen/Qwen2.5-VL-7B-Instruct",
 		TextModel:      "qwen/qwen2.5-0.5b-instruct",
 		EmbeddingModel: "Alibaba-NLP/gte-Qwen2-1.5B-instruct",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        true,
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -47,7 +47,7 @@ func TestSGL(t *testing.T) {
 	}
 
 	t.Run("SGLTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-providers/vertex_test.go b/core/providers/vertex/vertex_test.go
similarity index 82%
rename from tests/core-providers/vertex_test.go
rename to core/providers/vertex/vertex_test.go
index 634f1fb75c..3ba4822576 100644
--- a/tests/core-providers/vertex_test.go
+++ b/core/providers/vertex/vertex_test.go
@@ -1,10 +1,10 @@
-package tests
+package vertex_test
 
 import (
 	"os"
 	"testing"
 
-	"github.com/maximhq/bifrost/tests/core-providers/config"
+	"github.com/maximhq/bifrost/core/internal/testutil"
 
 	"github.com/maximhq/bifrost/core/schemas"
 )
@@ -15,19 +15,19 @@ func TestVertex(t *testing.T) {
 		t.Skip("Skipping Vertex tests because VERTEX_API_KEY is not set and VERTEX_PROJECT_ID or VERTEX_CREDENTIALS is not set")
 	}
 
-	client, ctx, cancel, err := config.SetupTest()
+	client, ctx, cancel, err := testutil.SetupTest()
 	if err != nil {
 		t.Fatalf("Error initializing test setup: %v", err)
 	}
 	defer cancel()
 
-	testConfig := config.ComprehensiveTestConfig{
+	testConfig := testutil.ComprehensiveTestConfig{
 		Provider:       schemas.Vertex,
 		ChatModel:      "google/gemini-2.0-flash-001",
 		VisionModel:    "google/gemini-2.0-flash-001",
 		TextModel:      "", // Vertex doesn't support text completion in newer models
 		EmbeddingModel: "text-multilingual-embedding-002",
-		Scenarios: config.TestScenarios{
+		Scenarios: testutil.TestScenarios{
 			TextCompletion:        false, // Not supported
 			SimpleChat:            true,
 			CompletionStream:      true,
@@ -47,7 +47,7 @@ func TestVertex(t *testing.T) {
 	}
 
 	t.Run("VertexTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
+		testutil.RunAllComprehensiveTests(t, client, ctx, testConfig)
 	})
 	client.Shutdown()
 }
diff --git a/tests/core-chatbot/go.mod b/tests/core-chatbot/go.mod
deleted file mode 100644
index cf6c4444cd..0000000000
--- a/tests/core-chatbot/go.mod
+++ /dev/null
@@ -1,53 +0,0 @@
-module github.com/maximhq/bifrost/tests/core-chatbot
-
-go 1.24.3
-
-require (
-	github.com/maximhq/bifrost/core v1.2.22
-	golang.org/x/text v0.30.0
-)
-
-require (
-	cloud.google.com/go/compute/metadata v0.9.0 // indirect
-	github.com/andybalholm/brotli v1.2.0 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.39.5 // indirect
-	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 // indirect
-	github.com/aws/aws-sdk-go-v2/config v1.31.13 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.18.17 // indirect
-	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.12 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.12 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sso v1.29.7 // indirect
-	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sts v1.38.7 // indirect
-	github.com/aws/smithy-go v1.23.1 // indirect
-	github.com/bahlo/generic-list-go v0.2.0 // indirect
-	github.com/buger/jsonparser v1.1.1 // indirect
-	github.com/bytedance/gopkg v0.1.3 // indirect
-	github.com/bytedance/sonic v1.14.1 // indirect
-	github.com/bytedance/sonic/loader v0.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.6 // indirect
-	github.com/google/uuid v1.6.0 // indirect
-	github.com/invopop/jsonschema v0.13.0 // indirect
-	github.com/klauspost/compress v1.18.0 // indirect
-	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
-	github.com/mailru/easyjson v0.9.1 // indirect
-	github.com/mark3labs/mcp-go v0.41.1 // indirect
-	github.com/mattn/go-colorable v0.1.14 // indirect
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/rs/zerolog v1.34.0 // indirect
-	github.com/spf13/cast v1.10.0 // indirect
-	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/fasthttp v1.67.0 // indirect
-	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
-	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
-	golang.org/x/arch v0.22.0 // indirect
-	golang.org/x/net v0.46.0 // indirect
-	golang.org/x/oauth2 v0.32.0 // indirect
-	golang.org/x/sys v0.37.0 // indirect
-	gopkg.in/yaml.v3 v3.0.1 // indirect
-)
diff --git a/tests/core-chatbot/go.sum b/tests/core-chatbot/go.sum
deleted file mode 100644
index 11cf9a0b84..0000000000
--- a/tests/core-chatbot/go.sum
+++ /dev/null
@@ -1,129 +0,0 @@
-cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
-cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
-github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
-github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
-github.com/aws/aws-sdk-go-v2 v1.39.5 h1:e/SXuia3rkFtapghJROrydtQpfQaaUgd1cUvyO1mp2w=
-github.com/aws/aws-sdk-go-v2 v1.39.5/go.mod h1:yWSxrnioGUZ4WVv9TgMrNUeLV3PFESn/v+6T/Su8gnM=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 h1:t9yYsydLYNBk9cJ73rgPhPWqOh/52fcWDQB5b1JsKSY=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2/go.mod h1:IusfVNTmiSN3t4rhxWFaBAqn+mcNdwKtPcV16eYdgko=
-github.com/aws/aws-sdk-go-v2/config v1.31.13 h1:wcqQB3B0PgRPUF5ZE/QL1JVOyB0mbPevHFoAMpemR9k=
-github.com/aws/aws-sdk-go-v2/config v1.31.13/go.mod h1:ySB5D5ybwqGbT6c3GszZ+u+3KvrlYCUQNo62+hkKOFk=
-github.com/aws/aws-sdk-go-v2/credentials v1.18.17 h1:skpEwzN/+H8cdrrtT8y+rvWJGiWWv0DeNAe+4VTf+Vs=
-github.com/aws/aws-sdk-go-v2/credentials v1.18.17/go.mod h1:Ed+nXsaYa5uBINovJhcAWkALvXw2ZLk36opcuiSZfJM=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 h1:UuGVOX48oP4vgQ36oiKmW9RuSeT8jlgQgBFQD+HUiHY=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10/go.mod h1:vM/Ini41PzvudT4YkQyE/+WiQJiQ6jzeDyU8pQKwCac=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.12 h1:p/9flfXdoAnwJnuW9xHEAFY22R3A6skYkW19JFF9F+8=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.12/go.mod h1:ZTLHakoVCTtW8AaLGSwJ3LXqHD9uQKnOcv1TrpO6u2k=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.12 h1:2lTWFvRcnWFFLzHWmtddu5MTchc5Oj2OOey++99tPZ0=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.12/go.mod h1:hI92pK+ho8HVcWMHKHrK3Uml4pfG7wvL86FzO0LVtQQ=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 h1:xtuxji5CS0JknaXoACOunXOYOQzgfTvGAc9s2QdCJA4=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2/go.mod h1:zxwi0DIR0rcRcgdbl7E2MSOvxDyyXGBlScvBkARFaLQ=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10 h1:DRND0dkCKtJzCj4Xl4OpVbXZgfttY5q712H9Zj7qc/0=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10/go.mod h1:tGGNmJKOTernmR2+VJ0fCzQRurcPZj9ut60Zu5Fi6us=
-github.com/aws/aws-sdk-go-v2/service/sso v1.29.7 h1:fspVFg6qMx0svs40YgRmE7LZXh9VRZvTT35PfdQR6FM=
-github.com/aws/aws-sdk-go-v2/service/sso v1.29.7/go.mod h1:BQTKL3uMECaLaUV3Zc2L4Qybv8C6BIXjuu1dOPyxTQs=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2 h1:scVnW+NLXasGOhy7HhkdT9AGb6kjgW7fJ5xYkUaqHs0=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2/go.mod h1:FRNCY3zTEWZXBKm2h5UBUPvCVDOecTad9KhynDyGBc0=
-github.com/aws/aws-sdk-go-v2/service/sts v1.38.7 h1:VEO5dqFkMsl8QZ2yHsFDJAIZLAkEbaYDB+xdKi0Feic=
-github.com/aws/aws-sdk-go-v2/service/sts v1.38.7/go.mod h1:L1xxV3zAdB+qVrVW/pBIrIAnHFWHo6FBbFe4xOGsG/o=
-github.com/aws/smithy-go v1.23.1 h1:sLvcH6dfAFwGkHLZ7dGiYF7aK6mg4CgKA/iDKjLDt9M=
-github.com/aws/smithy-go v1.23.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
-github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
-github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
-github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
-github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
-github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
-github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
-github.com/bytedance/sonic v1.14.1 h1:FBMC0zVz5XUmE4z9wF4Jey0An5FueFvOsTKKKtwIl7w=
-github.com/bytedance/sonic v1.14.1/go.mod h1:gi6uhQLMbTdeP0muCnrjHLeCUPyb70ujhnNlhOylAFc=
-github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
-github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
-github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
-github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
-github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
-github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
-github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
-github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
-github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
-github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
-github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
-github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
-github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
-github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
-github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/mailru/easyjson v0.9.1 h1:LbtsOm5WAswyWbvTEOqhypdPeZzHavpZx96/n553mR8=
-github.com/mailru/easyjson v0.9.1/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
-github.com/mark3labs/mcp-go v0.41.1 h1:w78eWfiQam2i8ICL7AL0WFiq7KHNJQ6UB53ZVtH4KGA=
-github.com/mark3labs/mcp-go v0.41.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
-github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
-github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
-github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
-github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
-github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/maximhq/bifrost/core v1.2.22 h1:bwY7gYPlWTH06Esd7Qn6flarbTloI802vomP+KTKTjw=
-github.com/maximhq/bifrost/core v1.2.22/go.mod h1:tCsM7mGAUgs+jY9yfotSsE0HFr7J7SjzEItKhVDvLPo=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
-github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
-github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
-github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
-github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
-github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
-github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
-github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
-github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
-github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
-github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.67.0 h1:tqKlJMUP6iuNG8hGjK/s9J4kadH7HLV4ijEcPGsezac=
-github.com/valyala/fasthttp v1.67.0/go.mod h1:qYSIpqt/0XNmShgo/8Aq8E3UYWVVwNS2QYmzd8WIEPM=
-github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
-github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
-github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
-github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
-github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
-github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
-golang.org/x/arch v0.22.0 h1:c/Zle32i5ttqRXjdLyyHZESLD/bB90DCU1g9l/0YBDI=
-golang.org/x/arch v0.22.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
-golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
-golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
-golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
-golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
-golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
-golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
-golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/tests/core-providers/README.md b/tests/core-providers/README.md
deleted file mode 100644
index ba9a786ef2..0000000000
--- a/tests/core-providers/README.md
+++ /dev/null
@@ -1,476 +0,0 @@
-# Bifrost Core Providers Test Suite 🚀
-
-This directory contains comprehensive tests for all Bifrost AI providers, ensuring compatibility and functionality across different AI services.
-
-## 📋 Supported Providers
-
-- **OpenAI** - GPT models and function calling
-- **Anthropic** - Claude models
-- **Azure OpenAI** - Azure-hosted OpenAI models
-- **AWS Bedrock** - Amazon's managed AI service
-- **Cohere** - Cohere's language models
-- **Google Vertex AI** - Google Cloud's AI platform
-- **Mistral** - Mistral AI models with vision capabilities
-- **Ollama** - Local LLM serving platform
-- **Groq** - OSS models
-- **SGLang** - OSS models
-- **Parasail** - OSS models
-- **Perplexity** - Sonar models
-- **Cerebras** - Llama, Qwen and GPT-OSS models
-- **Gemini** - Gemini models
-- **OpenRouter** - Models supported by OpenRouter
-
-## 🏃‍♂️ Running Tests
-
-### Parallel Test Execution
-
-All provider tests are configured to run in parallel using Go's `t.Parallel()` function. This allows multiple provider tests to execute concurrently, significantly reducing total test execution time.
-
-**Benefits:**
-- Faster test execution when testing multiple providers
-- Better resource utilization
-- Isolated test execution (each test creates its own client instance)
-
-**Usage:**
-```bash
-# Default: Tests run in parallel (up to GOMAXPROCS concurrent tests)
-go test -v ./tests/core-providers/
-
-# Explicitly set number of parallel tests
-go test -v ./tests/core-providers/ -parallel 10
-
-# Run tests sequentially (disable parallel execution)
-go test -v ./tests/core-providers/ -parallel 1
-```
-
-**Note:** Each test function creates its own isolated Bifrost client instance via `config.SetupTest()`, ensuring no shared state between parallel test executions.
-
-### Development with Local Bifrost Core
-
-To test changes with a forked or local version of bifrost-core:
-
-1. **Uncomment the replace directive** in `tests/core-providers/go.mod`:
-
-   ```go
-   // Uncomment this line to use your local bifrost-core
-   replace github.com/maximhq/bifrost/core => ../../core
-   ```
-
-2. **Update dependencies**:
-
-   ```bash
-   cd tests/core-providers
-   go mod tidy
-   ```
-
-3. **Run tests** with your local changes:
-
-   ```bash
-   go test -v ./tests/core-providers/
-   ```
-
-⚠️ **Important**: Ensure your local `../../core` directory contains your bifrost-core implementation. The path should be relative to the `tests/core-providers` directory.
-
-### Prerequisites
-
-Set up environment variables for the providers you want to test:
-
-```bash
-# OpenAI
-export OPENAI_API_KEY="your-openai-key"
-
-# Anthropic
-export ANTHROPIC_API_KEY="your-anthropic-key"
-
-# Azure OpenAI
-export AZURE_API_KEY="your-azure-key"
-export AZURE_ENDPOINT="your-azure-endpoint"
-
-# AWS Bedrock
-export AWS_ACCESS_KEY_ID_ID="your-aws-access-key"
-export AWS_SECRET_ACCESS_KEY="your-aws-secret-key"
-export AWS_REGION="us-east-1"
-
-# Cohere
-export COHERE_API_KEY="your-cohere-key"
-
-# Google Vertex AI
-export GOOGLE_APPLICATION_CREDENTIALS="path/to/service-account.json"
-export GOOGLE_PROJECT_ID="your-project-id"
-
-# Mistral AI
-export MISTRAL_API_KEY="your-mistral-key"
-
-# Gemini
-export GEMINI_API_KEY="your-gemini-key"
-
-# Ollama (local installation)
-# No API key required - ensure Ollama is running locally
-# Default endpoint: http://localhost:11434
-```
-
-### Run All Provider Tests
-
-```bash
-# Run all tests with verbose output (recommended)
-go test -v ./tests/core-providers/
-
-# Run all tests in parallel (faster execution)
-# Tests are configured to run in parallel by default
-go test -v ./tests/core-providers/ -parallel 10
-
-# Run with debug logs
-go test -v ./tests/core-providers/ -debug
-```
-
-**Note**: All provider tests are configured to run in parallel using `t.Parallel()`. This means multiple provider tests can execute concurrently, significantly reducing total test execution time. The number of parallel tests can be controlled using the `-parallel` flag (default is the number of CPUs).
-
-### Run Specific Provider Tests
-
-```bash
-# Test only OpenAI
-go test -v ./tests/core-providers/ -run TestOpenAI
-
-# Test only Anthropic
-go test -v ./tests/core-providers/ -run TestAnthropic
-
-# Test only Azure
-go test -v ./tests/core-providers/ -run TestAzure
-
-# Test only Bedrock
-go test -v ./tests/core-providers/ -run TestBedrock
-
-# Test only Cohere
-go test -v ./tests/core-providers/ -run TestCohere
-
-# Test only Vertex AI
-go test -v ./tests/core-providers/ -run TestVertex
-
-# Test only Mistral
-go test -v ./tests/core-providers/ -run TestMistral
-
-# Test only Gemini
-go test -v ./tests/core-providers/ -run TestGemini
-
-# Test only Ollama
-go test -v ./tests/core-providers/ -run TestOllama
-```
-
-### Run Specific Test Scenarios
-
-You can run specific scenarios across all providers:
-
-```bash
-# Test only chat completion
-go test -v ./tests/core-providers/ -run "Chat"
-
-# Test only function calling
-go test -v ./tests/core-providers/ -run "Function"
-```
-
-### Run Specific Scenario for Specific Provider
-
-You can combine provider and scenario filters to test specific functionality:
-
-```bash
-# Test only OpenAI simple chat
-go test -v ./tests/core-providers/ -run "TestOpenAI/SimpleChat"
-
-# Test only Anthropic tool calls
-go test -v ./tests/core-providers/ -run "TestAnthropic/ToolCalls"
-
-# Test only Azure multi-turn conversation
-go test -v ./tests/core-providers/ -run "TestAzure/MultiTurnConversation"
-
-# Test only Bedrock text completion
-go test -v ./tests/core-providers/ -run "TestBedrock/TextCompletion"
-
-# Test only Cohere image URL processing
-go test -v ./tests/core-providers/ -run "TestCohere/ImageURL"
-
-# Test only Vertex automatic function calling
-go test -v ./tests/core-providers/ -run "TestVertex/AutomaticFunctionCalling"
-
-# Test only Mistral image processing
-go test -v ./tests/core-providers/ -run "TestMistral/ImageURL"
-
-# Test only Gemini simple chat
-go test -v ./tests/core-providers/ -run "TestGemini/SimpleChat"
-
-# Test only Ollama simple chat
-go test -v ./tests/core-providers/ -run "TestOllama/SimpleChat"
-
-# Test only OpenAI reasoning capabilities
-go test -v ./tests/core-providers/ -run "TestOpenAI/Reasoning"
-```
-
-**Available Scenario Names:**
-
-- `SimpleChat` - Basic chat completion
-- `TextCompletion` - Text completion (legacy models)
-- `MultiTurnConversation` - Multi-turn chat conversations
-- `ToolCalls` - Basic function/tool calling
-- `MultipleToolCalls` - Multiple tool calls in one request
-- `End2EndToolCalling` - Complete tool calling workflow
-- `AutomaticFunctionCalling` - Automatic function selection
-- `ImageURL` - Image processing from URLs
-- `ImageBase64` - Image processing from base64
-- `MultipleImages` - Multiple image processing
-- `CompleteEnd2End` - Full end-to-end test
-- `ProviderSpecific` - Provider-specific features
-- `Embedding` - Basic embedding request
-- `Reasoning` - Step-by-step reasoning and thinking capabilities via Responses API
-
-## 🧪 Test Scenarios
-
-Each provider is tested against these scenarios when supported:
-
-✅ **Supported by Most Providers:**
-
-- Simple Text Completion
-- Simple Chat Completion
-- Multi-turn Chat Conversation
-- Chat with System Message
-- Text Completion with Parameters
-- Chat Completion with Parameters
-- Error Handling (Invalid Model)
-- Model Information Retrieval
-- Simple Function Calling
-
-❌ **Provider-Specific Support:**
-
-- **Automatic Function Calling**: OpenAI, Anthropic, Bedrock, Azure, Vertex, Mistral, Ollama, Gemini
-- **Vision/Image Analysis**: OpenAI, Anthropic, Bedrock, Azure, Vertex, Mistral, Gemini (limited support for Cohere and Ollama)
-- **Text Completion**: Legacy models only (most providers now focus on chat completion)
-- **Reasoning/Thinking**: Advanced reasoning models with step-by-step thinking capabilities via Responses API (provider support varies)
-
-## 📊 Understanding Test Output
-
-The test suite provides rich visual feedback:
-
-- 🚀 **Test suite starting**
-- ✅ **Successful operations and supported tests**
-- ❌ **Failed operations and unsupported features**
-- ⏭️ **Skipped scenarios (not supported by provider)**
-- 📊 **Summary statistics**
-- ℹ️ **Informational notes**
-
-Example output:
-
-```text
-=== RUN   TestOpenAI
-🚀 Starting comprehensive test suite for OpenAI provider...
-✅ Simple Text Completion test completed successfully
-✅ Simple Chat Completion test completed successfully
-⏭️ Automatic Function Calling not supported by this provider
-📊 Test Summary for OpenAI:
-✅✅ Supported Tests: 11
-❌ Unsupported Tests: 1
-```
-
-## 🔧 Adding New Providers
-
-To add a new provider to the test suite:
-
-### 1. Create Provider Test File
-
-Create a new file `{provider}_test.go`:
-
-```go
-package tests
-
-import (
-    "testing"
-    "github.com/BifrostDev/bifrost/pkg/client"
-)
-
-func TestNewProvider(t *testing.T) {
-    config := client.Config{
-        Provider: "newprovider",
-        APIKey:   getEnvVar("NEW_PROVIDER_API_KEY"),
-        // Add other required config fields
-    }
-
-    // Skip if no API key provided
-    if config.APIKey == "" {
-        t.Skip("NEW_PROVIDER_API_KEY not set, skipping NewProvider tests")
-    }
-
-    runProviderTests(t, config, "NewProvider")
-}
-```
-
-### 2. Update Provider Configuration
-
-Add your provider's capabilities in `tests.go`:
-
-```go
-func getProviderCapabilities(providerName string) ProviderCapabilities {
-    switch providerName {
-    case "NewProvider":
-        return ProviderCapabilities{
-            SupportsTextCompletion:       true,
-            SupportsChatCompletion:       true,
-            SupportsFunctionCalling:     false, // Update based on provider
-            SupportsAutomaticFunctions:  false,
-            SupportsVision:              false,
-            SupportsSystemMessages:      true,
-            SupportsMultiTurn:           true,
-            SupportsParameters:          true,
-            SupportsModelInfo:           true,
-            SupportsErrorHandling:       true,
-        }
-    // ... other cases
-    }
-}
-```
-
-### 3. Add Default Models
-
-Add default models for your provider:
-
-```go
-func getDefaultModel(providerName string) string {
-    switch providerName {
-    case "NewProvider":
-        return "newprovider-model-name"
-    // ... other cases
-    }
-}
-```
-
-### 4. Environment Variables
-
-Document any required environment variables in this README and ensure they're handled in the test setup.
-
-### 5. Test Your Implementation
-
-Run your new provider tests:
-
-```bash
-go test -v ./tests/core-providers/ -run TestNewProvider
-```
-
-## 🛠️ Troubleshooting
-
-### Common Issues
-
-1. **Tests being skipped**: Make sure environment variables are set correctly
-2. **Connection timeouts**: Check your network connection and API endpoints
-3. **Authentication errors**: Verify your API keys are valid and have proper permissions
-4. **Missing logs**: Use `-v` flag to see detailed test output
-5. **Rate limiting**: Some providers have rate limits; tests may need delays
-6. **Ollama connection issues**: Ensure Ollama is running locally (`ollama serve`)
-7. **Mistral vision failures**: Check if your account has access to Pixtral models
-
-### Debug Mode
-
-Enable debug logging to see detailed API interactions:
-
-```bash
-go test -v ./tests/core-providers/ -debug
-```
-
-### Provider-Specific Considerations
-
-#### Mistral AI
-
-- **Models**: Uses `pixtral-12b-latest` for vision tasks
-- **Capabilities**: Full support for chat, tools, and vision
-- **API Key**: Required via `MISTRAL_API_KEY` environment variable
-
-#### Gemini
-
-- **Models**: Uses `gemini-2.0-flash` for chat and `text-embedding-004` for embeddings
-- **Capabilities**: Full support for chat, tools, vision (base64), speech synthesis, and transcription
-- **API Key**: Required via `GEMINI_API_KEY` environment variable
-- **Limitations**: No text completion support, limited image URL support (base64 preferred)
-
-#### Ollama
-
-- **Local Setup**: Requires Ollama to be running locally (default: `http://localhost:11434`)
-- **Models**: Uses `llama3.2` model by default
-- **No API Key**: Authentication not required for local instances
-- **Limitations**: No vision/image processing support
-- **Installation**: [Download from ollama.ai](https://ollama.ai/) and ensure the service is running
-
-### Checking Provider Status
-
-If a provider seems to be failing, you can check their status pages:
-
-- [OpenAI Status](https://status.openai.com/)
-- [Anthropic Status](https://status.anthropic.com/)
-- [Azure Status](https://status.azure.com/)
-- [AWS Status](https://status.aws.amazon.com/)
-- [Mistral Status](https://status.mistral.ai/)
-
-## 📝 Test Coverage
-
-The comprehensive test suite covers:
-
-- ✅ **Text Completion** - Legacy completion models (where supported)
-- ✅ **Simple Chat** - Basic chat completion functionality
-- ✅ **Multi-Turn Conversations** - Context maintenance across messages
-- ✅ **Tool Calls** - Basic function/tool calling capabilities
-- ✅ **Multiple Tool Calls** - Multiple tools in a single request
-- ✅ **End-to-End Tool Calling** - Complete tool workflow with result integration
-- ✅ **Automatic Function Calling** - Provider-managed tool execution
-- ✅ **Image URL Processing** - Image analysis from URLs
-- ✅ **Image Base64 Processing** - Image analysis from base64 encoded data
-- ✅ **Multiple Images** - Multi-image analysis and comparison
-- ✅ **Complete End-to-End** - Full multimodal workflows
-- ✅ **Provider-Specific Features** - Integration-unique capabilities
-
-### Provider Capability Matrix
-
-| Provider  | Chat | Tools | Vision | Text Completion | Auto Functions |
-| --------- | ---- | ----- | ------ | --------------- | -------------- |
-| OpenAI    | ✅   | ✅    | ✅     | ❌              | ✅             |
-| Anthropic | ✅   | ✅    | ✅     | ✅              | ✅             |
-| Azure     | ✅   | ✅    | ✅     | ✅              | ✅             |
-| Bedrock   | ✅   | ✅    | ✅     | ✅              | ✅             |
-| Vertex    | ✅   | ✅    | ✅     | ❌              | ✅             |
-| Cohere    | ✅   | ✅    | ❌     | ❌              | ❌             |
-| Mistral   | ✅   | ✅    | ✅     | ❌              | ✅             |
-| Ollama    | ✅   | ✅    | ❌     | ❌              | ✅             |
-| Gemini    | ✅   | ✅    | ✅     | ❌              | ✅             |
-
-## 🤝 Contributing
-
-When adding new providers or test scenarios:
-
-### Adding New Providers
-
-1. **Create test file**: Add `{provider}_test.go` following the existing pattern
-2. **Update config**: Add provider configuration in `config/account.go`:
-   - Add to `GetKeysForProvider()` (if API key required)
-   - Add to `GetConfigForProvider()`
-   - Add to `GetConfiguredProviders()` list
-3. **Test scenarios**: Configure supported scenarios in the test file
-4. **Documentation**: Update this README with environment variables and capabilities
-5. **Testing**: Test with multiple scenarios to verify integration
-
-### Adding New Test Scenarios
-
-1. **Implement scenario**: Add new test function in `scenarios/` directory
-2. **Update structure**: Add scenario to `TestScenarios` struct in `config/account.go`
-3. **Configure providers**: Update each provider's scenario configuration
-4. **Update runner**: Add scenario call to `runAllComprehensiveTests()` in `tests.go`
-5. **Documentation**: Update README with scenario description and examples
-
-### Testing Your Changes
-
-```bash
-# Test specific provider
-go test -v ./tests/core-providers/ -run TestYourProvider
-
-# Test all providers
-go test -v ./tests/core-providers/
-
-# Test with debug output
-go test -v ./tests/core-providers/ -debug
-```
-
-## 📄 License
-
-This test suite is part of the Bifrost project and follows the same license terms.
diff --git a/tests/core-providers/bedrock_test.go b/tests/core-providers/bedrock_test.go
deleted file mode 100644
index 483788d5b6..0000000000
--- a/tests/core-providers/bedrock_test.go
+++ /dev/null
@@ -1,58 +0,0 @@
-package tests
-
-import (
-	"os"
-	"testing"
-
-	"github.com/maximhq/bifrost/tests/core-providers/config"
-
-	"github.com/maximhq/bifrost/core/schemas"
-)
-
-func TestBedrock(t *testing.T) {
-	t.Parallel()
-	if os.Getenv("AWS_ACCESS_KEY_ID") == "" || os.Getenv("AWS_SECRET_ACCESS_KEY") == "" {
-		t.Skip("Skipping Bedrock embedding: AWS credentials not set")
-	}
-
-	client, ctx, cancel, err := config.SetupTest()
-	if err != nil {
-		t.Fatalf("Error initializing test setup: %v", err)
-	}
-	defer cancel()
-
-	testConfig := config.ComprehensiveTestConfig{
-		Provider:    schemas.Bedrock,
-		ChatModel:   "anthropic.claude-3-5-sonnet-20240620-v1:0",
-		VisionModel: "claude-sonnet-4",
-		Fallbacks: []schemas.Fallback{
-			{Provider: schemas.Bedrock, Model: "claude-3.7-sonnet"},
-		},
-		TextModel:      "mistral.mistral-7b-instruct-v0:2", // Bedrock Claude doesn't support text completion
-		EmbeddingModel: "cohere.embed-v4:0",
-		ReasoningModel: "claude-sonnet-4",
-		Scenarios: config.TestScenarios{
-			TextCompletion:        false, // Not supported for Claude
-			SimpleChat:            true,
-			CompletionStream:      true,
-			MultiTurnConversation: true,
-			ToolCalls:             true,
-			ToolCallsStreaming:    true,
-			MultipleToolCalls:     true,
-			End2EndToolCalling:    true,
-			AutomaticFunctionCall: true,
-			ImageURL:              false, // Direct Image URL is not supported for Bedrock
-			ImageBase64:           true,
-			MultipleImages:        false, // Direct Image URL is not supported for Bedrock
-			CompleteEnd2End:       true,
-			Embedding:             true,
-			Reasoning:             true,
-			ListModels:            true,
-		},
-	}
-
-	t.Run("BedrockTests", func(t *testing.T) {
-		runAllComprehensiveTests(t, client, ctx, testConfig)
-	})
-	client.Shutdown()
-}
diff --git a/tests/core-providers/go.mod b/tests/core-providers/go.mod
deleted file mode 100644
index 65bf495563..0000000000
--- a/tests/core-providers/go.mod
+++ /dev/null
@@ -1,58 +0,0 @@
-module github.com/maximhq/bifrost/tests/core-providers
-
-go 1.24.3
-
-replace github.com/maximhq/bifrost/core => ../../core
-
-require (
-	github.com/maximhq/bifrost/core v0.0.0-00010101000000-000000000000
-	github.com/stretchr/testify v1.11.1
-)
-
-require (
-	cloud.google.com/go/compute/metadata v0.9.0 // indirect
-	github.com/andybalholm/brotli v1.2.0 // indirect
-	github.com/aws/aws-sdk-go-v2 v1.39.5 // indirect
-	github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 // indirect
-	github.com/aws/aws-sdk-go-v2/config v1.31.13 // indirect
-	github.com/aws/aws-sdk-go-v2/credentials v1.18.17 // indirect
-	github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.12 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.12 // indirect
-	github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sso v1.29.7 // indirect
-	github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2 // indirect
-	github.com/aws/aws-sdk-go-v2/service/sts v1.38.7 // indirect
-	github.com/aws/smithy-go v1.23.1 // indirect
-	github.com/bahlo/generic-list-go v0.2.0 // indirect
-	github.com/buger/jsonparser v1.1.1 // indirect
-	github.com/bytedance/gopkg v0.1.3 // indirect
-	github.com/bytedance/sonic v1.14.1 // indirect
-	github.com/bytedance/sonic/loader v0.3.0 // indirect
-	github.com/cloudwego/base64x v0.1.6 // indirect
-	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
-	github.com/google/uuid v1.6.0 // indirect
-	github.com/invopop/jsonschema v0.13.0 // indirect
-	github.com/klauspost/compress v1.18.1 // indirect
-	github.com/klauspost/cpuid/v2 v2.3.0 // indirect
-	github.com/mailru/easyjson v0.9.1 // indirect
-	github.com/mark3labs/mcp-go v0.41.1 // indirect
-	github.com/mattn/go-colorable v0.1.14 // indirect
-	github.com/mattn/go-isatty v0.0.20 // indirect
-	github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
-	github.com/rs/zerolog v1.34.0 // indirect
-	github.com/spf13/cast v1.10.0 // indirect
-	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
-	github.com/valyala/bytebufferpool v1.0.0 // indirect
-	github.com/valyala/fasthttp v1.67.0 // indirect
-	github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
-	github.com/yosida95/uritemplate/v3 v3.0.2 // indirect
-	golang.org/x/arch v0.22.0 // indirect
-	golang.org/x/net v0.47.0 // indirect
-	golang.org/x/oauth2 v0.32.0 // indirect
-	golang.org/x/sys v0.38.0 // indirect
-	golang.org/x/text v0.31.0 // indirect
-	gopkg.in/yaml.v3 v3.0.1 // indirect
-)
diff --git a/tests/core-providers/go.sum b/tests/core-providers/go.sum
deleted file mode 100644
index 2a4b1b95a1..0000000000
--- a/tests/core-providers/go.sum
+++ /dev/null
@@ -1,127 +0,0 @@
-cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs=
-cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10=
-github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ=
-github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY=
-github.com/aws/aws-sdk-go-v2 v1.39.5 h1:e/SXuia3rkFtapghJROrydtQpfQaaUgd1cUvyO1mp2w=
-github.com/aws/aws-sdk-go-v2 v1.39.5/go.mod h1:yWSxrnioGUZ4WVv9TgMrNUeLV3PFESn/v+6T/Su8gnM=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2 h1:t9yYsydLYNBk9cJ73rgPhPWqOh/52fcWDQB5b1JsKSY=
-github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.2/go.mod h1:IusfVNTmiSN3t4rhxWFaBAqn+mcNdwKtPcV16eYdgko=
-github.com/aws/aws-sdk-go-v2/config v1.31.13 h1:wcqQB3B0PgRPUF5ZE/QL1JVOyB0mbPevHFoAMpemR9k=
-github.com/aws/aws-sdk-go-v2/config v1.31.13/go.mod h1:ySB5D5ybwqGbT6c3GszZ+u+3KvrlYCUQNo62+hkKOFk=
-github.com/aws/aws-sdk-go-v2/credentials v1.18.17 h1:skpEwzN/+H8cdrrtT8y+rvWJGiWWv0DeNAe+4VTf+Vs=
-github.com/aws/aws-sdk-go-v2/credentials v1.18.17/go.mod h1:Ed+nXsaYa5uBINovJhcAWkALvXw2ZLk36opcuiSZfJM=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10 h1:UuGVOX48oP4vgQ36oiKmW9RuSeT8jlgQgBFQD+HUiHY=
-github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.10/go.mod h1:vM/Ini41PzvudT4YkQyE/+WiQJiQ6jzeDyU8pQKwCac=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.12 h1:p/9flfXdoAnwJnuW9xHEAFY22R3A6skYkW19JFF9F+8=
-github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.12/go.mod h1:ZTLHakoVCTtW8AaLGSwJ3LXqHD9uQKnOcv1TrpO6u2k=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.12 h1:2lTWFvRcnWFFLzHWmtddu5MTchc5Oj2OOey++99tPZ0=
-github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.12/go.mod h1:hI92pK+ho8HVcWMHKHrK3Uml4pfG7wvL86FzO0LVtQQ=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk=
-github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2 h1:xtuxji5CS0JknaXoACOunXOYOQzgfTvGAc9s2QdCJA4=
-github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.2/go.mod h1:zxwi0DIR0rcRcgdbl7E2MSOvxDyyXGBlScvBkARFaLQ=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10 h1:DRND0dkCKtJzCj4Xl4OpVbXZgfttY5q712H9Zj7qc/0=
-github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.10/go.mod h1:tGGNmJKOTernmR2+VJ0fCzQRurcPZj9ut60Zu5Fi6us=
-github.com/aws/aws-sdk-go-v2/service/sso v1.29.7 h1:fspVFg6qMx0svs40YgRmE7LZXh9VRZvTT35PfdQR6FM=
-github.com/aws/aws-sdk-go-v2/service/sso v1.29.7/go.mod h1:BQTKL3uMECaLaUV3Zc2L4Qybv8C6BIXjuu1dOPyxTQs=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2 h1:scVnW+NLXasGOhy7HhkdT9AGb6kjgW7fJ5xYkUaqHs0=
-github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.2/go.mod h1:FRNCY3zTEWZXBKm2h5UBUPvCVDOecTad9KhynDyGBc0=
-github.com/aws/aws-sdk-go-v2/service/sts v1.38.7 h1:VEO5dqFkMsl8QZ2yHsFDJAIZLAkEbaYDB+xdKi0Feic=
-github.com/aws/aws-sdk-go-v2/service/sts v1.38.7/go.mod h1:L1xxV3zAdB+qVrVW/pBIrIAnHFWHo6FBbFe4xOGsG/o=
-github.com/aws/smithy-go v1.23.1 h1:sLvcH6dfAFwGkHLZ7dGiYF7aK6mg4CgKA/iDKjLDt9M=
-github.com/aws/smithy-go v1.23.1/go.mod h1:LEj2LM3rBRQJxPZTB4KuzZkaZYnZPnvgIhb4pu07mx0=
-github.com/bahlo/generic-list-go v0.2.0 h1:5sz/EEAK+ls5wF+NeqDpk5+iNdMDXrh3z3nPnH1Wvgk=
-github.com/bahlo/generic-list-go v0.2.0/go.mod h1:2KvAjgMlE5NNynlg/5iLrrCCZ2+5xWbdbCW3pNTGyYg=
-github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
-github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
-github.com/bytedance/gopkg v0.1.3 h1:TPBSwH8RsouGCBcMBktLt1AymVo2TVsBVCY4b6TnZ/M=
-github.com/bytedance/gopkg v0.1.3/go.mod h1:576VvJ+eJgyCzdjS+c4+77QF3p7ubbtiKARP3TxducM=
-github.com/bytedance/sonic v1.14.1 h1:FBMC0zVz5XUmE4z9wF4Jey0An5FueFvOsTKKKtwIl7w=
-github.com/bytedance/sonic v1.14.1/go.mod h1:gi6uhQLMbTdeP0muCnrjHLeCUPyb70ujhnNlhOylAFc=
-github.com/bytedance/sonic/loader v0.3.0 h1:dskwH8edlzNMctoruo8FPTJDF3vLtDT0sXZwvZJyqeA=
-github.com/bytedance/sonic/loader v0.3.0/go.mod h1:N8A3vUdtUebEY2/VQC0MyhYeKUFosQU6FxH2JmUe6VI=
-github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M=
-github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU=
-github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
-github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
-github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
-github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
-github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
-github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
-github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
-github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
-github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
-github.com/invopop/jsonschema v0.13.0 h1:KvpoAJWEjR3uD9Kbm2HWJmqsEaHt8lBUpd0qHcIi21E=
-github.com/invopop/jsonschema v0.13.0/go.mod h1:ffZ5Km5SWWRAIN6wbDXItl95euhFz2uON45H2qjYt+0=
-github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co=
-github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0=
-github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y=
-github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0=
-github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
-github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
-github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
-github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/mailru/easyjson v0.9.1 h1:LbtsOm5WAswyWbvTEOqhypdPeZzHavpZx96/n553mR8=
-github.com/mailru/easyjson v0.9.1/go.mod h1:1+xMtQp2MRNVL/V1bOzuP3aP8VNwRW55fQUto+XFtTU=
-github.com/mark3labs/mcp-go v0.41.1 h1:w78eWfiQam2i8ICL7AL0WFiq7KHNJQ6UB53ZVtH4KGA=
-github.com/mark3labs/mcp-go v0.41.1/go.mod h1:T7tUa2jO6MavG+3P25Oy/jR7iCeJPHImCZHRymCn39g=
-github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
-github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
-github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
-github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
-github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
-github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
-github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
-github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
-github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
-github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
-github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
-github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
-github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY=
-github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ=
-github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY=
-github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo=
-github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
-github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
-github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
-github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
-github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
-github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
-github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
-github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
-github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
-github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
-github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw=
-github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc=
-github.com/valyala/fasthttp v1.67.0 h1:tqKlJMUP6iuNG8hGjK/s9J4kadH7HLV4ijEcPGsezac=
-github.com/valyala/fasthttp v1.67.0/go.mod h1:qYSIpqt/0XNmShgo/8Aq8E3UYWVVwNS2QYmzd8WIEPM=
-github.com/wk8/go-ordered-map/v2 v2.1.8 h1:5h/BUHu93oj4gIdvHHHGsScSTMijfx5PeYkE/fJgbpc=
-github.com/wk8/go-ordered-map/v2 v2.1.8/go.mod h1:5nJHM5DyteebpVlHnWMV0rPz6Zp7+xBAnxjb1X5vnTw=
-github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
-github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
-github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4=
-github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4=
-golang.org/x/arch v0.22.0 h1:c/Zle32i5ttqRXjdLyyHZESLD/bB90DCU1g9l/0YBDI=
-golang.org/x/arch v0.22.0/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
-golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY=
-golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU=
-golang.org/x/oauth2 v0.32.0 h1:jsCblLleRMDrxMN29H3z/k1KliIvpLgCkE6R8FXXNgY=
-golang.org/x/oauth2 v0.32.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
-golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
-golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM=
-golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM=
-gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
-gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
-gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
-gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
-gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/tests/governance/README.md b/tests/governance/README.md
deleted file mode 100644
index 1cbc0f988d..0000000000
--- a/tests/governance/README.md
+++ /dev/null
@@ -1,388 +0,0 @@
-# Bifrost Governance Plugin Test Suite
-
-A comprehensive test suite for the Bifrost Governance Plugin, testing hierarchical governance, budgets, rate limiting, usage tracking, and CRUD operations.
-
-## Overview
-
-This test suite provides extensive coverage of the Bifrost governance system including:
-
-- **Virtual Key Management**: Complete CRUD operations with comprehensive field update testing
-- **Team Management**: Team CRUD with customer relationships and budget inheritance
-- **Customer Management**: Customer CRUD with team hierarchies and budget controls
-- **Usage Tracking**: Real-time usage monitoring and audit logging
-- **Rate Limiting**: Flexible token and request rate limiting with configurable reset periods
-- **Budget Enforcement**: Hierarchical budget controls (Customer → Team → Virtual Key)
-- **Integration Testing**: End-to-end testing with chat completion API
-- **Edge Cases**: Boundary conditions, concurrency, and error scenarios
-
-## Test Structure
-
-### Test Files
-
-1. **`test_virtual_keys_crud.py`** - Virtual Key CRUD operations
-   - Complete CRUD lifecycle testing
-   - Comprehensive field update testing (individual and batch)
-   - Mutual exclusivity validation (team_id vs customer_id)
-   - Budget and rate limit management
-   - Relationship testing with teams and customers
-
-2. **`test_teams_crud.py`** - Team CRUD operations
-   - Team lifecycle management
-   - Customer association testing
-   - Budget inheritance and conflicts
-   - Comprehensive field updates
-   - Filtering and relationships
-
-3. **`test_customers_crud.py`** - Customer CRUD operations
-   - Customer lifecycle management
-   - Team relationship management
-   - Budget management and hierarchies
-   - Comprehensive field updates
-   - Cascading operations
-
-4. **`test_usage_tracking.py`** - Usage tracking and monitoring
-   - Chat completion integration with governance headers
-   - Usage tracking and budget enforcement
-   - Rate limiting enforcement
-   - Monitoring endpoints
-   - Reset functionality
-   - Debug and health endpoints
-
-### Configuration Files
-
-- **`conftest.py`** - Test fixtures, utilities, and configuration
-- **`pytest.ini`** - pytest configuration with markers and settings
-- **`requirements.txt`** - Test dependencies
-- **`__init__.py`** - Package initialization
-
-## Key Features
-
-### Comprehensive Field Update Testing
-
-Each entity (Virtual Key, Team, Customer) has exhaustive field update tests that verify:
-
-- **Individual field updates** - Each field updated independently
-- **Unchanged field verification** - Other fields remain unmodified
-- **Relationship preservation** - Associated data maintained correctly
-- **Timestamp validation** - updated_at changes, created_at preserved
-- **Multiple field updates** - Batch field modifications
-- **Nested object updates** - Budget and rate limit sub-objects
-- **Edge cases** - Empty updates, null values, invalid data
-
-### Mutual Exclusivity Testing
-
-Critical validation of Virtual Key constraints:
-- VK can have `team_id` OR `customer_id`, but NEVER both
-- Switching between team and customer associations
-- Validation error scenarios for invalid combinations
-
-### Hierarchical Testing
-
-Testing the Customer → Team → Virtual Key hierarchy:
-- Budget inheritance and override scenarios
-- Rate limit cascading and conflicts
-- Usage tracking across hierarchy levels
-- Permission and access control validation
-
-### Integration Testing
-
-End-to-end testing with actual chat completion requests:
-- Governance header validation (`x-bf-vk`)
-- Usage tracking during real requests
-- Budget enforcement during streaming
-- Rate limiting during concurrent requests
-- Provider and model access control
-
-## Setup and Usage
-
-### Prerequisites
-
-1. **Bifrost Server Running**: The governance plugin must be running on `localhost:8080`
-2. **Python 3.8+**: Required for the test suite
-3. **Dependencies**: Install via `pip install -r requirements.txt`
-
-### Environment Configuration
-
-Set the following environment variables (optional):
-
-```bash
-export BIFROST_BASE_URL="http://localhost:8080"  # Default
-export GOVERNANCE_TEST_TIMEOUT="300"             # Test timeout in seconds
-export GOVERNANCE_TEST_CLEANUP="true"            # Auto-cleanup entities
-```
-
-### Running Tests
-
-```bash
-# Install dependencies
-pip install -r requirements.txt
-
-# Run all governance tests
-pytest
-
-# Run specific test files
-pytest test_virtual_keys_crud.py
-pytest test_teams_crud.py
-pytest test_customers_crud.py
-pytest test_usage_tracking.py
-
-# Run with specific markers
-pytest -m "virtual_keys"
-pytest -m "field_updates"
-pytest -m "edge_cases"
-pytest -m "integration"
-
-# Run with coverage
-pytest --cov=. --cov-report=html
-
-# Run in parallel
-pytest -n auto
-
-# Run with verbose output
-pytest -v
-
-# Run smoke tests only
-pytest -m "smoke"
-```
-
-### Test Markers
-
-The test suite uses pytest markers for categorization:
-
-- `@pytest.mark.virtual_keys` - Virtual Key related tests
-- `@pytest.mark.teams` - Team related tests
-- `@pytest.mark.customers` - Customer related tests
-- `@pytest.mark.field_updates` - Comprehensive field update tests
-- `@pytest.mark.mutual_exclusivity` - Mutual exclusivity constraint tests
-- `@pytest.mark.budget` - Budget related tests
-- `@pytest.mark.rate_limit` - Rate limiting tests
-- `@pytest.mark.usage_tracking` - Usage tracking tests
-- `@pytest.mark.integration` - Integration tests
-- `@pytest.mark.edge_cases` - Edge case tests
-- `@pytest.mark.concurrency` - Concurrency tests
-- `@pytest.mark.slow` - Slow running tests (>5s)
-- `@pytest.mark.smoke` - Quick smoke tests
-
-## API Endpoints Tested
-
-### Virtual Key Endpoints
-- `GET /api/governance/virtual-keys` - List all VKs with relationships
-- `POST /api/governance/virtual-keys` - Create VK with optional budget/rate limits
-- `GET /api/governance/virtual-keys/{vk_id}` - Get specific VK
-- `PUT /api/governance/virtual-keys/{vk_id}` - Update VK
-- `DELETE /api/governance/virtual-keys/{vk_id}` - Delete VK
-
-### Team Endpoints
-- `GET /api/governance/teams` - List teams with optional customer filter
-- `POST /api/governance/teams` - Create team with optional customer/budget
-- `GET /api/governance/teams/{team_id}` - Get specific team
-- `PUT /api/governance/teams/{team_id}` - Update team
-- `DELETE /api/governance/teams/{team_id}` - Delete team
-
-### Customer Endpoints
-- `GET /api/governance/customers` - List customers with teams/budgets
-- `POST /api/governance/customers` - Create customer with optional budget
-- `GET /api/governance/customers/{customer_id}` - Get specific customer
-- `PUT /api/governance/customers/{customer_id}` - Update customer
-- `DELETE /api/governance/customers/{customer_id}` - Delete customer
-
-### Monitoring Endpoints
-- `GET /api/governance/usage-stats` - Usage statistics with optional VK filter
-- `POST /api/governance/usage-reset` - Reset VK usage counters
-- `GET /api/governance/debug/stats` - Debug statistics
-- `GET /api/governance/debug/counters` - All VK usage counters
-- `GET /api/governance/debug/health` - Health check
-
-### Integration Endpoints
-- `POST /v1/chat/completions` - Chat completion with governance headers
-
-## Test Data and Schemas
-
-### Virtual Key Request Schema
-```json
-{
-  "name": "string (required)",
-  "description": "string (optional)",
-  "allowed_models": ["string"] (optional),
-  "allowed_providers": ["string"] (optional),
-  "team_id": "string (optional, mutually exclusive with customer_id)",
-  "customer_id": "string (optional, mutually exclusive with team_id)",
-  "budget": {
-    "max_limit": "integer (cents)",
-    "reset_duration": "string (e.g., '1h', '1d')"
-  },
-  "rate_limit": {
-    "token_max_limit": "integer (optional)",
-    "token_reset_duration": "string (optional)",
-    "request_max_limit": "integer (optional)", 
-    "request_reset_duration": "string (optional)"
-  },
-  "is_active": "boolean (optional, default true)"
-}
-```
-
-### Team Request Schema
-```json
-{
-  "name": "string (required)",
-  "customer_id": "string (optional)",
-  "budget": {
-    "max_limit": "integer (cents)",
-    "reset_duration": "string"
-  }
-}
-```
-
-### Customer Request Schema
-```json
-{
-  "name": "string (required)",
-  "budget": {
-    "max_limit": "integer (cents)",
-    "reset_duration": "string"
-  }
-}
-```
-
-## Edge Cases Covered
-
-### Budget Edge Cases
-- Boundary values: 0, negative, max int64, overflow
-- Reset timing: exact boundaries, concurrent resets
-- Hierarchical conflicts: VK vs Team vs Customer budgets
-- Fractional costs: proper cents handling
-- Concurrent usage: multiple requests hitting limits
-- Reset during flight: budget resets while processing
-- Streaming cost tracking: partial vs final costs
-
-### Rate Limiting Edge Cases
-- Independent limits: token vs request limits with different resets
-- Sub-second precision: very short reset durations
-- Burst scenarios: simultaneous requests
-- Provider variations: different limits per provider/model
-- Streaming rate limits: token counting across chunks
-- Reset race conditions: limits resetting during validation
-
-### Relationship Edge Cases
-- Orphaned entities: VKs without parent relationships
-- Invalid references: team_id pointing to non-existent team
-- Mutual exclusivity: VK with both team_id and customer_id (MUST FAIL)
-- Circular dependencies: prevention testing
-- Deep hierarchies: Customer → Team → VK inheritance
-
-### Update Edge Cases
-- Partial updates: only some fields updated
-- Null handling: null values clearing optional fields
-- Type validation: wrong data types in requests
-- Concurrent updates: multiple clients updating same entity
-- Cache invalidation: in-memory cache updates after DB changes
-- Rollback scenarios: failed updates don't leave partial changes
-
-### Integration Edge Cases
-- Missing headers: requests without x-bf-vk header
-- Invalid headers: malformed or non-existent VK values
-- Provider/model validation: invalid combinations
-- Error propagation: governance vs completion errors
-- Streaming interruption: governance blocking mid-stream
-- Context preservation: headers passed through request lifecycle
-
-## Utilities and Helpers
-
-### Test Fixtures
-- `governance_client` - API client for governance endpoints
-- `cleanup_tracker` - Automatic entity cleanup after tests
-- `sample_customer` - Pre-created customer for testing
-- `sample_team` - Pre-created team for testing
-- `sample_virtual_key` - Pre-created virtual key for testing
-- `field_update_tester` - Helper for comprehensive field update testing
-
-### Utility Functions
-- `generate_unique_name()` - Generate unique test entity names
-- `wait_for_condition()` - Wait for async conditions
-- `assert_response_success()` - Assert HTTP response success
-- `deep_compare_entities()` - Deep comparison of entity data
-- `verify_unchanged_fields()` - Verify fields remain unchanged
-- `create_complete_virtual_key_data()` - Generate complete VK data
-
-### Error Handling
-- Comprehensive error assertion helpers
-- Automatic retry for transient failures
-- Detailed error logging and reporting
-- Clean failure modes with proper cleanup
-
-## Performance and Concurrency
-
-### Performance Testing
-- Response time benchmarks for all endpoints
-- Memory usage monitoring during tests
-- Database query optimization validation
-- Cache performance verification
-
-### Concurrency Testing
-- Race condition detection
-- Concurrent entity creation/updates
-- Simultaneous budget usage scenarios
-- Rate limit burst testing
-- Cache consistency under load
-
-## Debugging and Monitoring
-
-### Test Logging
-- Comprehensive test execution logging
-- API request/response logging
-- Error details and stack traces
-- Performance metrics and timing
-
-### Debug Endpoints
-- Test coverage of debug/stats endpoint
-- Usage counter validation
-- Health check verification
-- Database state inspection
-
-## Contributing
-
-When adding new tests:
-
-1. **Follow naming conventions**: `test_<feature>_<scenario>.py`
-2. **Use appropriate markers**: Mark tests with relevant pytest markers
-3. **Include cleanup**: Use `cleanup_tracker` fixture for entity cleanup
-4. **Document edge cases**: Comment complex test scenarios
-5. **Add field update tests**: For any new entity fields, add comprehensive update tests
-6. **Test relationships**: Verify entity relationships and cascading effects
-7. **Include negative tests**: Test validation and error scenarios
-
-### Test Development Guidelines
-
-1. **Comprehensive Coverage**: Test all CRUD operations, field updates, and edge cases
-2. **Isolation**: Tests should be independent and not rely on other test state
-3. **Cleanup**: Always clean up created entities to avoid test interference
-4. **Documentation**: Comment complex test logic and expected behaviors
-5. **Performance**: Mark slow tests appropriately and optimize where possible
-6. **Error Scenarios**: Test both success and failure paths
-7. **Relationships**: Verify entity relationships are properly maintained
-
-## Troubleshooting
-
-### Common Issues
-
-1. **Server Not Running**: Ensure Bifrost server is running on localhost:8080
-2. **Permission Errors**: Check that test has access to create/delete entities
-3. **Cleanup Failures**: Manually clean up test entities if auto-cleanup fails
-4. **Timeout Errors**: Increase timeout for slow-running tests
-5. **Concurrency Issues**: Use appropriate locks for shared resource tests
-
-### Debug Commands
-
-```bash
-# Run with maximum verbosity
-pytest -vvv --tb=long
-
-# Run single test with debugging
-pytest -s test_virtual_keys_crud.py::test_vk_create_basic
-
-# Run with profiling
-pytest --profile-svg
-
-# Check test coverage
-pytest --cov=. --cov-report=term-missing
-```
\ No newline at end of file
diff --git a/tests/governance/__init__.py b/tests/governance/__init__.py
deleted file mode 100644
index 2936e67c98..0000000000
--- a/tests/governance/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-"""
-Bifrost Governance Plugin Test Suite
-
-Comprehensive test suite for the Bifrost governance system covering:
-- Virtual Key CRUD operations with comprehensive field updates
-- Team CRUD operations with hierarchical relationships
-- Customer CRUD operations with budget management
-- Usage tracking and monitoring
-- Rate limiting and budget enforcement
-- Integration testing with chat completions
-- Edge cases and validation testing
-- Concurrency and race condition testing
-
-Test Structure:
-- test_virtual_keys_crud.py: Virtual Key CRUD and field update tests
-- test_teams_crud.py: Team CRUD and field update tests
-- test_customers_crud.py: Customer CRUD and field update tests
-- test_usage_tracking.py: Usage tracking, monitoring, and integration tests
-- conftest.py: Test fixtures and utilities
-
-Key Features:
-- Comprehensive field update testing for all entities
-- Mutual exclusivity validation (VK team_id vs customer_id)
-- Hierarchical budget and rate limit testing
-- Automatic test entity cleanup
-- Concurrent testing support
-- Edge case and boundary condition coverage
-"""
-
-__version__ = "1.0.0"
-__author__ = "Bifrost Team"
diff --git a/tests/governance/conftest.py b/tests/governance/conftest.py
deleted file mode 100644
index 84d77c2d08..0000000000
--- a/tests/governance/conftest.py
+++ /dev/null
@@ -1,668 +0,0 @@
-"""
-Pytest configuration for Bifrost Governance Plugin testing.
-
-Provides comprehensive setup, fixtures, and utilities for testing the
-Bifrost governance system with hierarchical budgets, rate limiting,
-usage tracking, and CRUD operations for Virtual Keys, Teams, and Customers.
-"""
-
-import pytest
-import requests
-import json
-import uuid
-import time
-import os
-from datetime import datetime, timedelta
-from typing import Dict, List, Optional, Any, Tuple
-from concurrent.futures import ThreadPoolExecutor
-import threading
-from dataclasses import dataclass
-import copy
-
-
-# Test Configuration
-BIFROST_BASE_URL = os.getenv("BIFROST_BASE_URL", "http://localhost:8080")
-GOVERNANCE_API_BASE = f"{BIFROST_BASE_URL}/api/governance"
-COMPLETION_API_BASE = f"{BIFROST_BASE_URL}/v1"
-
-
-def pytest_configure(config):
-    """Configure pytest with custom markers for governance testing"""
-    markers = [
-        "governance: mark test as governance-related",
-        "virtual_keys: mark test as virtual key test",
-        "teams: mark test as team test",
-        "customers: mark test as customer test",
-        "budget: mark test as budget-related",
-        "rate_limit: mark test as rate limit-related",
-        "usage_tracking: mark test as usage tracking test",
-        "crud: mark test as CRUD operation test",
-        "field_updates: mark test as comprehensive field update test",
-        "validation: mark test as validation test",
-        "integration: mark test as integration test",
-        "edge_cases: mark test as edge case test",
-        "concurrency: mark test as concurrency test",
-        "mutual_exclusivity: mark test as mutual exclusivity test",
-        "hierarchical: mark test as hierarchical governance test",
-        "slow: mark test as slow running (>5s)",
-        "smoke: mark test as smoke test",
-    ]
-
-    for marker in markers:
-        config.addinivalue_line("markers", marker)
-
-
-@dataclass
-class TestEntity:
-    """Base class for test entities"""
-
-    id: str
-    created_at: Optional[str] = None
-    updated_at: Optional[str] = None
-
-
-@dataclass
-class TestBudget(TestEntity):
-    """Test budget entity"""
-
-    max_limit: int = 0
-    reset_duration: str = ""
-    current_usage: int = 0
-    last_reset: Optional[str] = None
-
-
-@dataclass
-class TestRateLimit(TestEntity):
-    """Test rate limit entity"""
-
-    token_max_limit: Optional[int] = None
-    token_reset_duration: Optional[str] = None
-    request_max_limit: Optional[int] = None
-    request_reset_duration: Optional[str] = None
-    token_current_usage: int = 0
-    request_current_usage: int = 0
-    token_last_reset: Optional[str] = None
-    request_last_reset: Optional[str] = None
-
-
-@dataclass
-class TestCustomer(TestEntity):
-    """Test customer entity"""
-
-    name: str = ""
-    budget_id: Optional[str] = None
-    budget: Optional[TestBudget] = None
-    teams: Optional[List["TestTeam"]] = None
-
-
-@dataclass
-class TestTeam(TestEntity):
-    """Test team entity"""
-
-    name: str = ""
-    customer_id: Optional[str] = None
-    budget_id: Optional[str] = None
-    customer: Optional[TestCustomer] = None
-    budget: Optional[TestBudget] = None
-
-
-@dataclass
-class TestVirtualKey(TestEntity):
-    """Test virtual key entity"""
-
-    name: str = ""
-    value: str = ""
-    description: str = ""
-    allowed_models: Optional[List[str]] = None
-    allowed_providers: Optional[List[str]] = None
-    team_id: Optional[str] = None
-    customer_id: Optional[str] = None
-    budget_id: Optional[str] = None
-    rate_limit_id: Optional[str] = None
-    is_active: bool = True
-    team: Optional[TestTeam] = None
-    customer: Optional[TestCustomer] = None
-    budget: Optional[TestBudget] = None
-    rate_limit: Optional[TestRateLimit] = None
-
-
-class GovernanceTestClient:
-    """HTTP client for governance API testing with comprehensive error handling"""
-
-    def __init__(self, base_url: str = GOVERNANCE_API_BASE):
-        self.base_url = base_url
-        self.session = requests.Session()
-        self.session.headers.update({"Content-Type": "application/json"})
-
-    def request(self, method: str, endpoint: str, **kwargs) -> requests.Response:
-        """Make HTTP request with comprehensive error handling"""
-        url = f"{self.base_url}/{endpoint.lstrip('/')}"
-        try:
-            response = self.session.request(method, url, **kwargs)
-            return response
-        except requests.exceptions.RequestException as e:
-            pytest.fail(f"Request failed: {method} {url} - {str(e)}")
-
-    # Virtual Key operations
-    def list_virtual_keys(self, **params) -> requests.Response:
-        """List all virtual keys"""
-        return self.request("GET", "/virtual-keys", params=params)
-
-    def create_virtual_key(self, data: Dict[str, Any]) -> requests.Response:
-        """Create a virtual key"""
-        return self.request("POST", "/virtual-keys", json=data)
-
-    def get_virtual_key(self, vk_id: str) -> requests.Response:
-        """Get virtual key by ID"""
-        return self.request("GET", f"/virtual-keys/{vk_id}")
-
-    def update_virtual_key(self, vk_id: str, data: Dict[str, Any]) -> requests.Response:
-        """Update virtual key"""
-        return self.request("PUT", f"/virtual-keys/{vk_id}", json=data)
-
-    def delete_virtual_key(self, vk_id: str) -> requests.Response:
-        """Delete virtual key"""
-        return self.request("DELETE", f"/virtual-keys/{vk_id}")
-
-    # Team operations
-    def list_teams(self, **params) -> requests.Response:
-        """List all teams"""
-        return self.request("GET", "/teams", params=params)
-
-    def create_team(self, data: Dict[str, Any]) -> requests.Response:
-        """Create a team"""
-        return self.request("POST", "/teams", json=data)
-
-    def get_team(self, team_id: str) -> requests.Response:
-        """Get team by ID"""
-        return self.request("GET", f"/teams/{team_id}")
-
-    def update_team(self, team_id: str, data: Dict[str, Any]) -> requests.Response:
-        """Update team"""
-        return self.request("PUT", f"/teams/{team_id}", json=data)
-
-    def delete_team(self, team_id: str) -> requests.Response:
-        """Delete team"""
-        return self.request("DELETE", f"/teams/{team_id}")
-
-    # Customer operations
-    def list_customers(self, **params) -> requests.Response:
-        """List all customers"""
-        return self.request("GET", "/customers", params=params)
-
-    def create_customer(self, data: Dict[str, Any]) -> requests.Response:
-        """Create a customer"""
-        return self.request("POST", "/customers", json=data)
-
-    def get_customer(self, customer_id: str) -> requests.Response:
-        """Get customer by ID"""
-        return self.request("GET", f"/customers/{customer_id}")
-
-    def update_customer(
-        self, customer_id: str, data: Dict[str, Any]
-    ) -> requests.Response:
-        """Update customer"""
-        return self.request("PUT", f"/customers/{customer_id}", json=data)
-
-    def delete_customer(self, customer_id: str) -> requests.Response:
-        """Delete customer"""
-        return self.request("DELETE", f"/customers/{customer_id}")
-
-    # Monitoring and usage operations
-    def get_usage_stats(self, **params) -> requests.Response:
-        """Get usage statistics"""
-        return self.request("GET", "/usage-stats", params=params)
-
-    def reset_usage(self, data: Dict[str, Any]) -> requests.Response:
-        """Reset usage counters"""
-        return self.request("POST", "/usage-reset", json=data)
-
-    def get_debug_stats(self) -> requests.Response:
-        """Get debug statistics"""
-        return self.request("GET", "/debug/stats")
-
-    def get_debug_counters(self) -> requests.Response:
-        """Get debug counters"""
-        return self.request("GET", "/debug/counters")
-
-    def get_health_check(self) -> requests.Response:
-        """Get health check"""
-        return self.request("GET", "/debug/health")
-
-    # Chat completion for integration testing
-    def chat_completion(
-        self,
-        messages: List[Dict],
-        model: str = "gpt-3.5-turbo",
-        headers: Optional[Dict] = None,
-        **kwargs,
-    ) -> requests.Response:
-        """Make chat completion request"""
-        data = {"model": model, "messages": messages, **kwargs}
-
-        session_headers = self.session.headers.copy()
-        if headers:
-            session_headers.update(headers)
-
-        url = f"{COMPLETION_API_BASE}/chat/completions"
-        try:
-            response = requests.post(url, json=data, headers=session_headers)
-            return response
-        except requests.exceptions.RequestException as e:
-            pytest.fail(f"Chat completion request failed: {url} - {str(e)}")
-
-
-class CleanupTracker:
-    """Tracks entities created during tests for cleanup"""
-
-    def __init__(self):
-        self.virtual_keys = []
-        self.teams = []
-        self.customers = []
-        self._lock = threading.Lock()
-
-    def add_virtual_key(self, vk_id: str):
-        """Add virtual key for cleanup"""
-        with self._lock:
-            if vk_id not in self.virtual_keys:
-                self.virtual_keys.append(vk_id)
-
-    def add_team(self, team_id: str):
-        """Add team for cleanup"""
-        with self._lock:
-            if team_id not in self.teams:
-                self.teams.append(team_id)
-
-    def add_customer(self, customer_id: str):
-        """Add customer for cleanup"""
-        with self._lock:
-            if customer_id not in self.customers:
-                self.customers.append(customer_id)
-
-    def cleanup(self, client: GovernanceTestClient):
-        """Cleanup all tracked entities"""
-        with self._lock:
-            # Delete in dependency order: VKs -> Teams -> Customers
-            for vk_id in self.virtual_keys:
-                try:
-                    client.delete_virtual_key(vk_id)
-                except Exception:
-                    pass  # Ignore cleanup errors
-
-            for team_id in self.teams:
-                try:
-                    client.delete_team(team_id)
-                except Exception:
-                    pass
-
-            for customer_id in self.customers:
-                try:
-                    client.delete_customer(customer_id)
-                except Exception:
-                    pass
-
-            # Clear lists
-            self.virtual_keys.clear()
-            self.teams.clear()
-            self.customers.clear()
-
-
-# Fixtures
-
-
-@pytest.fixture(scope="session")
-def governance_client():
-    """Governance API client for the session"""
-    return GovernanceTestClient()
-
-
-@pytest.fixture
-def cleanup_tracker():
-    """Cleanup tracker for test entities"""
-    return CleanupTracker()
-
-
-@pytest.fixture(autouse=True)
-def auto_cleanup(cleanup_tracker, governance_client):
-    """Automatically cleanup test entities after each test"""
-    yield
-    cleanup_tracker.cleanup(governance_client)
-
-
-@pytest.fixture
-def sample_budget_data():
-    """Sample budget data for testing"""
-    return {"max_limit": 10000, "reset_duration": "1h"}  # $100.00 in cents
-
-
-@pytest.fixture
-def sample_rate_limit_data():
-    """Sample rate limit data for testing"""
-    return {
-        "token_max_limit": 1000,
-        "token_reset_duration": "1m",
-        "request_max_limit": 100,
-        "request_reset_duration": "1h",
-    }
-
-
-@pytest.fixture
-def sample_customer(governance_client, cleanup_tracker):
-    """Create a sample customer for testing"""
-    data = {"name": f"Test Customer {uuid.uuid4().hex[:8]}"}
-    response = governance_client.create_customer(data)
-    assert response.status_code == 201
-    customer_data = response.json()["customer"]
-    cleanup_tracker.add_customer(customer_data["id"])
-    return customer_data
-
-
-@pytest.fixture
-def sample_team(governance_client, cleanup_tracker):
-    """Create a sample team for testing"""
-    data = {"name": f"Test Team {uuid.uuid4().hex[:8]}"}
-    response = governance_client.create_team(data)
-    assert response.status_code == 201
-    team_data = response.json()["team"]
-    cleanup_tracker.add_team(team_data["id"])
-    return team_data
-
-
-@pytest.fixture
-def sample_team_with_customer(governance_client, cleanup_tracker, sample_customer):
-    """Create a sample team associated with a customer"""
-    data = {
-        "name": f"Test Team with Customer {uuid.uuid4().hex[:8]}",
-        "customer_id": sample_customer["id"],
-    }
-    response = governance_client.create_team(data)
-    assert response.status_code == 201
-    team_data = response.json()["team"]
-    cleanup_tracker.add_team(team_data["id"])
-    return team_data
-
-
-@pytest.fixture
-def sample_virtual_key(governance_client, cleanup_tracker):
-    """Create a sample virtual key for testing"""
-    data = {"name": f"Test VK {uuid.uuid4().hex[:8]}"}
-    response = governance_client.create_virtual_key(data)
-    assert response.status_code == 201
-    vk_data = response.json()["virtual_key"]
-    cleanup_tracker.add_virtual_key(vk_data["id"])
-    return vk_data
-
-
-@pytest.fixture
-def sample_virtual_key_with_team(governance_client, cleanup_tracker, sample_team):
-    """Create a sample virtual key associated with a team"""
-    data = {
-        "name": f"Test VK with Team {uuid.uuid4().hex[:8]}",
-        "team_id": sample_team["id"],
-    }
-    response = governance_client.create_virtual_key(data)
-    assert response.status_code == 201
-    vk_data = response.json()["virtual_key"]
-    cleanup_tracker.add_virtual_key(vk_data["id"])
-    return vk_data
-
-
-@pytest.fixture
-def sample_virtual_key_with_customer(
-    governance_client, cleanup_tracker, sample_customer
-):
-    """Create a sample virtual key associated with a customer"""
-    data = {
-        "name": f"Test VK with Customer {uuid.uuid4().hex[:8]}",
-        "customer_id": sample_customer["id"],
-    }
-    response = governance_client.create_virtual_key(data)
-    assert response.status_code == 201
-    vk_data = response.json()["virtual_key"]
-    cleanup_tracker.add_virtual_key(vk_data["id"])
-    return vk_data
-
-
-# Utility functions
-
-
-def generate_unique_name(prefix: str = "Test") -> str:
-    """Generate a unique name for testing"""
-    return f"{prefix} {uuid.uuid4().hex[:8]} {int(time.time())}"
-
-
-def wait_for_condition(
-    condition_func, timeout: float = 5.0, interval: float = 0.1
-) -> bool:
-    """Wait for a condition to be true"""
-    start_time = time.time()
-    while time.time() - start_time < timeout:
-        if condition_func():
-            return True
-        time.sleep(interval)
-    return False
-
-
-def assert_response_success(response: requests.Response, expected_status: int = 200):
-    """Assert that response is successful with expected status"""
-    if response.status_code != expected_status:
-        try:
-            error_data = response.json()
-            pytest.fail(
-                f"Expected status {expected_status}, got {response.status_code}: {error_data}"
-            )
-        except:
-            pytest.fail(
-                f"Expected status {expected_status}, got {response.status_code}: {response.text}"
-            )
-
-
-def assert_field_unchanged(actual_value, expected_value, field_name: str):
-    """Assert that a field value hasn't changed"""
-    if actual_value != expected_value:
-        pytest.fail(
-            f"Field '{field_name}' changed unexpectedly. Expected: {expected_value}, Got: {actual_value}"
-        )
-
-
-def deep_compare_entities(
-    entity1: Dict, entity2: Dict, ignore_fields: List[str] = None
-) -> List[str]:
-    """Deep compare two entities and return list of differences"""
-    if ignore_fields is None:
-        ignore_fields = ["updated_at", "created_at"]
-
-    differences = []
-
-    def compare_values(path: str, val1, val2):
-        if isinstance(val1, dict) and isinstance(val2, dict):
-            for key in set(val1.keys()) | set(val2.keys()):
-                if key in ignore_fields:
-                    continue
-                new_path = f"{path}.{key}" if path else key
-                if key not in val1:
-                    differences.append(f"{new_path}: missing in first entity")
-                elif key not in val2:
-                    differences.append(f"{new_path}: missing in second entity")
-                else:
-                    compare_values(new_path, val1[key], val2[key])
-        elif isinstance(val1, list) and isinstance(val2, list):
-            if len(val1) != len(val2):
-                differences.append(
-                    f"{path}: list length differs ({len(val1)} vs {len(val2)})"
-                )
-            else:
-                for i, (item1, item2) in enumerate(zip(val1, val2)):
-                    compare_values(f"{path}[{i}]", item1, item2)
-        elif val1 != val2:
-            differences.append(f"{path}: {val1} != {val2}")
-
-    compare_values("", entity1, entity2)
-    return differences
-
-
-def create_complete_virtual_key_data(
-    name: str = None,
-    team_id: str = None,
-    customer_id: str = None,
-    include_budget: bool = True,
-    include_rate_limit: bool = True,
-) -> Dict[str, Any]:
-    """Create complete virtual key data for testing"""
-    data = {
-        "name": name or generate_unique_name("Complete VK"),
-        "description": "Complete test virtual key with all fields",
-        "allowed_models": ["gpt-4", "claude-3-5-sonnet-20240620"],
-        "allowed_providers": ["openai", "anthropic"],
-        "is_active": True,
-    }
-
-    if team_id:
-        data["team_id"] = team_id
-    elif customer_id:
-        data["customer_id"] = customer_id
-
-    if include_budget:
-        data["budget"] = {
-            "max_limit": 50000,  # $500.00 in cents
-            "reset_duration": "1d",
-        }
-
-    if include_rate_limit:
-        data["rate_limit"] = {
-            "token_max_limit": 5000,
-            "token_reset_duration": "1h",
-            "request_max_limit": 500,
-            "request_reset_duration": "1h",
-        }
-
-    return data
-
-
-def verify_entity_relationships(
-    entity: Dict[str, Any], expected_relationships: Dict[str, Any]
-):
-    """Verify that entity has expected relationship data loaded"""
-    for rel_name, expected_data in expected_relationships.items():
-        if expected_data is None:
-            assert entity.get(rel_name) is None, f"Expected {rel_name} to be None"
-        else:
-            assert entity.get(rel_name) is not None, f"Expected {rel_name} to be loaded"
-            if isinstance(expected_data, dict):
-                for key, value in expected_data.items():
-                    assert (
-                        entity[rel_name].get(key) == value
-                    ), f"Expected {rel_name}.{key} to be {value}"
-
-
-def verify_unchanged_fields(
-    updated_entity: Dict, original_entity: Dict, exclude_fields: List[str]
-):
-    """Verify that all fields except specified ones remain unchanged"""
-    ignore_fields = ["updated_at", "created_at"] + exclude_fields
-
-    def check_field(path: str, updated_val, original_val):
-        if path in ignore_fields:
-            return
-
-        if isinstance(updated_val, dict) and isinstance(original_val, dict):
-            for key in original_val.keys():
-                if key not in ignore_fields:
-                    new_path = f"{path}.{key}" if path else key
-                    if key in updated_val:
-                        check_field(new_path, updated_val[key], original_val[key])
-        elif updated_val != original_val:
-            pytest.fail(
-                f"Field '{path}' should not have changed. Expected: {original_val}, Got: {updated_val}"
-            )
-
-    for field in original_entity.keys():
-        if field not in ignore_fields:
-            if field in updated_entity:
-                check_field(field, updated_entity[field], original_entity[field])
-
-
-class FieldUpdateTester:
-    """Helper class for comprehensive field update testing"""
-
-    def __init__(self, client: GovernanceTestClient, cleanup_tracker: CleanupTracker):
-        self.client = client
-        self.cleanup_tracker = cleanup_tracker
-
-    def test_individual_field_updates(
-        self, entity_type: str, entity_id: str, field_test_cases: List[Dict]
-    ):
-        """Test updating individual fields one by one"""
-
-        # Get original entity state
-        if entity_type == "virtual_key":
-            original_response = self.client.get_virtual_key(entity_id)
-            update_func = self.client.update_virtual_key
-        elif entity_type == "team":
-            original_response = self.client.get_team(entity_id)
-            update_func = self.client.update_team
-        elif entity_type == "customer":
-            original_response = self.client.get_customer(entity_id)
-            update_func = self.client.update_customer
-        else:
-            raise ValueError(f"Unknown entity type: {entity_type}")
-
-        assert original_response.status_code == 200
-        original_entity = original_response.json()[entity_type]
-
-        for test_case in field_test_cases:
-            # Reset entity to original state if needed
-            if test_case.get("reset_before", True):
-                self._reset_entity_state(entity_type, entity_id, original_entity)
-
-            # Perform field update
-            update_data = test_case["update_data"]
-            response = update_func(entity_id, update_data)
-
-            # Verify update succeeded
-            assert (
-                response.status_code == 200
-            ), f"Field update failed for {test_case['field']}: {response.json()}"
-            updated_entity = response.json()[entity_type]
-
-            # Verify target field was updated
-            if test_case.get("custom_validation"):
-                test_case["custom_validation"](updated_entity)
-            else:
-                self._verify_field_updated(
-                    updated_entity, test_case["field"], test_case["expected_value"]
-                )
-
-            # Verify other fields unchanged if specified
-            if test_case.get("verify_unchanged", True):
-                exclude_fields = test_case.get(
-                    "exclude_from_unchanged_check", [test_case["field"]]
-                )
-                verify_unchanged_fields(updated_entity, original_entity, exclude_fields)
-
-    def _reset_entity_state(self, entity_type: str, entity_id: str, target_state: Dict):
-        """Reset entity to target state"""
-        # This would require implementing a reset mechanism
-        # For now, we'll rely on test isolation
-        pass
-
-    def _verify_field_updated(self, entity: Dict, field_path: str, expected_value):
-        """Verify that a field was updated to expected value"""
-        field_parts = field_path.split(".")
-        current_value = entity
-
-        for part in field_parts:
-            if isinstance(current_value, dict):
-                current_value = current_value.get(part)
-            else:
-                pytest.fail(f"Cannot access field '{field_path}' in entity")
-
-        assert (
-            current_value == expected_value
-        ), f"Field '{field_path}' not updated correctly. Expected: {expected_value}, Got: {current_value}"
-
-
-@pytest.fixture
-def field_update_tester(governance_client, cleanup_tracker):
-    """Field update testing helper"""
-    return FieldUpdateTester(governance_client, cleanup_tracker)
diff --git a/tests/governance/pytest.ini b/tests/governance/pytest.ini
deleted file mode 100644
index 2f6bde1484..0000000000
--- a/tests/governance/pytest.ini
+++ /dev/null
@@ -1,88 +0,0 @@
-[tool:pytest]
-# Pytest configuration for Bifrost Governance Plugin Testing
-
-# Test discovery
-testpaths = .
-python_files = test_*.py
-python_classes = Test*
-python_functions = test_*
-
-# Minimum version
-minversion = 7.0
-
-# Add options
-addopts = 
-    -ra
-    --strict-markers
-    --strict-config
-    --color=yes
-    --tb=short
-    --maxfail=10
-    --durations=10
-    --verbose
-
-# Markers for test categorization
-markers =
-    governance: Tests for governance functionality
-    virtual_keys: Virtual Key CRUD and management tests
-    teams: Team CRUD and management tests
-    customers: Customer CRUD and management tests
-    budget: Budget-related tests
-    rate_limit: Rate limiting tests
-    usage_tracking: Usage tracking and monitoring tests
-    crud: CRUD operation tests
-    field_updates: Comprehensive field update tests
-    validation: Validation and constraint tests
-    integration: Integration and end-to-end tests
-    edge_cases: Edge cases and boundary condition tests
-    concurrency: Concurrency and race condition tests
-    mutual_exclusivity: Mutual exclusivity constraint tests
-    hierarchical: Hierarchical governance tests
-    slow: Tests that run slowly (> 5 seconds)
-    smoke: Smoke tests for quick validation
-    regression: Regression tests
-    api: API endpoint tests
-    relationships: Entity relationship tests
-    cleanup: Tests that require special cleanup
-    security: Security-related tests
-
-# Test timeout (in seconds)
-timeout = 300
-
-# Warnings configuration
-filterwarnings =
-    error
-    ignore::UserWarning
-    ignore::DeprecationWarning
-    ignore::PendingDeprecationWarning
-    ignore::requests.packages.urllib3.disable_warnings
-
-# Logging configuration
-log_cli = true
-log_cli_level = INFO
-log_cli_format = %(asctime)s [%(levelname)8s] %(name)s: %(message)s
-log_cli_date_format = %Y-%m-%d %H:%M:%S
-
-log_file = governance_tests.log
-log_file_level = DEBUG
-log_file_format = %(asctime)s [%(levelname)8s] %(filename)s:%(lineno)d %(funcName)s(): %(message)s
-log_file_date_format = %Y-%m-%d %H:%M:%S
-
-# Coverage configuration (when using --cov)
-[coverage:run]
-source = .
-omit = 
-    */tests/*
-    */test_*
-    */__pycache__/*
-    */venv/*
-    */env/*
-    .tox/*
-
-[coverage:report]
-precision = 2
-show_missing = true
-skip_covered = false
-
-[coverage:html]
-directory = htmlcov
\ No newline at end of file
diff --git a/tests/governance/requirements.txt b/tests/governance/requirements.txt
deleted file mode 100644
index c25a0301fb..0000000000
--- a/tests/governance/requirements.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-# Bifrost Governance Plugin Test Suite Dependencies
-
-# Core testing framework
-pytest>=7.4.0
-pytest-asyncio>=0.21.0
-pytest-xdist>=3.3.0  # For parallel test execution
-pytest-cov>=4.1.0    # For coverage reporting
-pytest-html>=3.2.0   # For HTML reports
-pytest-json-report>=1.5.0  # For JSON reports
-pytest-timeout>=2.1.0  # For test timeouts
-
-# HTTP client and API testing
-requests>=2.31.0
-urllib3>=2.0.0
-
-# Concurrency and async support
-aiohttp>=3.8.0
-
-# Data handling and validation
-pydantic>=2.0.0
-jsonschema>=4.18.0
-
-# Performance monitoring
-psutil>=5.9.0  # For system metrics
-memory-profiler>=0.61.0  # For memory profiling
-
-# Date/time handling
-python-dateutil>=2.8.0
-
-# Utilities
-faker>=19.0.0  # For generating test data
-factory-boy>=3.3.0  # For test data factories
-
-# Development and debugging
-ipdb>=0.13.0  # Debugger
-rich>=13.0.0  # Rich console output
-
-# Configuration management
-python-dotenv>=1.0.0  # For environment configuration
-pyyaml>=6.0  # For YAML configuration files
-
-# Type checking (development)
-mypy>=1.5.0  # Static type checking
-types-requests>=2.31.0  # Type stubs for requests
-
-# Testing utilities
-pytest-mock>=3.11.0  # For mocking
-pytest-benchmark>=4.0.0  # For benchmarking
-freezegun>=1.2.0  # For time mocking
-
-# Load testing
-locust>=2.15.0  # For load testing scenarios
\ No newline at end of file
diff --git a/tests/governance/test_customers_crud.py b/tests/governance/test_customers_crud.py
deleted file mode 100644
index 7040b7f1fd..0000000000
--- a/tests/governance/test_customers_crud.py
+++ /dev/null
@@ -1,981 +0,0 @@
-"""
-Comprehensive Customer CRUD Tests for Bifrost Governance Plugin
-
-This module provides exhaustive testing of Customer operations including:
-- Complete CRUD lifecycle testing
-- Comprehensive field update testing (individual and batch)
-- Team relationship management
-- Budget management and hierarchies
-- Cascading operations
-- Edge cases and validation scenarios
-- Concurrency and race condition testing
-"""
-
-import pytest
-import time
-import uuid
-from typing import Dict, Any, List
-from concurrent.futures import ThreadPoolExecutor
-import copy
-
-from conftest import (
-    assert_response_success,
-    verify_unchanged_fields,
-    generate_unique_name,
-    verify_entity_relationships,
-    deep_compare_entities,
-)
-
-
-class TestCustomerBasicCRUD:
-    """Test basic CRUD operations for Customers"""
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    @pytest.mark.smoke
-    def test_customer_create_minimal(self, governance_client, cleanup_tracker):
-        """Test creating customer with minimal required data"""
-        data = {"name": generate_unique_name("Minimal Customer")}
-
-        response = governance_client.create_customer(data)
-        assert_response_success(response, 201)
-
-        customer_data = response.json()["customer"]
-        cleanup_tracker.add_customer(customer_data["id"])
-
-        # Verify required fields
-        assert customer_data["name"] == data["name"]
-        assert customer_data["id"] is not None
-        assert customer_data["created_at"] is not None
-        assert customer_data["updated_at"] is not None
-
-        # Verify optional fields are None/empty
-        assert customer_data["teams"] == []
-        assert customer_data["virtual_keys"] is None
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    @pytest.mark.budget
-    def test_customer_create_with_budget(self, governance_client, cleanup_tracker):
-        """Test creating customer with budget"""
-        data = {
-            "name": generate_unique_name("Budget Customer"),
-            "budget": {
-                "max_limit": 500000,  # $5000.00 in cents
-                "reset_duration": "1M",
-            },
-        }
-
-        response = governance_client.create_customer(data)
-        assert_response_success(response, 201)
-
-        customer_data = response.json()["customer"]
-        cleanup_tracker.add_customer(customer_data["id"])
-
-        # Verify budget was created
-        assert customer_data["budget"] is not None
-        assert customer_data["budget"]["max_limit"] == 500000
-        assert customer_data["budget"]["reset_duration"] == "1M"
-        assert customer_data["budget"]["current_usage"] == 0
-        assert customer_data["budget_id"] is not None
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    def test_customer_list_all(self, governance_client, sample_customer):
-        """Test listing all customers"""
-        response = governance_client.list_customers()
-        assert_response_success(response, 200)
-
-        data = response.json()
-        assert "customers" in data
-        assert "count" in data
-        assert isinstance(data["customers"], list)
-        assert data["count"] >= 1
-
-        # Find our test customer
-        test_customer = next(
-            (
-                customer
-                for customer in data["customers"]
-                if customer["id"] == sample_customer["id"]
-            ),
-            None,
-        )
-        assert test_customer is not None
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    def test_customer_get_by_id(self, governance_client, sample_customer):
-        """Test getting customer by ID with relationships loaded"""
-        response = governance_client.get_customer(sample_customer["id"])
-        assert_response_success(response, 200)
-
-        customer_data = response.json()["customer"]
-        assert customer_data["id"] == sample_customer["id"]
-        assert customer_data["name"] == sample_customer["name"]
-
-        # Verify teams relationship is loaded (empty list if no teams)
-        assert "teams" in customer_data
-        assert (
-            isinstance(customer_data["teams"], list) or customer_data["teams"] is None
-        )
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    def test_customer_get_nonexistent(self, governance_client):
-        """Test getting non-existent customer returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.get_customer(fake_id)
-        assert response.status_code == 404
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    def test_customer_delete(self, governance_client, cleanup_tracker):
-        """Test deleting a customer"""
-        # Create customer to delete
-        data = {"name": generate_unique_name("Delete Test Customer")}
-        create_response = governance_client.create_customer(data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-
-        # Delete customer
-        delete_response = governance_client.delete_customer(customer_id)
-        assert_response_success(delete_response, 200)
-
-        # Verify customer is gone
-        get_response = governance_client.get_customer(customer_id)
-        assert get_response.status_code == 404
-
-    @pytest.mark.customers
-    @pytest.mark.crud
-    def test_customer_delete_nonexistent(self, governance_client):
-        """Test deleting non-existent customer returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.delete_customer(fake_id)
-        assert response.status_code == 404
-
-
-class TestCustomerValidation:
-    """Test validation rules for Customer operations"""
-
-    @pytest.mark.customers
-    @pytest.mark.validation
-    def test_customer_create_missing_name(self, governance_client):
-        """Test creating customer without name fails"""
-        data = {"budget": {"max_limit": 1000, "reset_duration": "1h"}}
-        response = governance_client.create_customer(data)
-        assert response.status_code == 400
-
-    @pytest.mark.customers
-    @pytest.mark.validation
-    def test_customer_create_empty_name(self, governance_client):
-        """Test creating customer with empty name fails"""
-        data = {"name": ""}
-        response = governance_client.create_customer(data)
-        assert response.status_code == 400
-
-    @pytest.mark.customers
-    @pytest.mark.validation
-    def test_customer_create_invalid_budget(self, governance_client):
-        """Test creating customer with invalid budget data"""
-        # Test negative budget
-        data = {
-            "name": generate_unique_name("Negative Budget Customer"),
-            "budget": {"max_limit": -10000, "reset_duration": "1h"},
-        }
-        response = governance_client.create_customer(data)
-        assert response.status_code == 400
-
-        # Test invalid reset duration
-        data = {
-            "name": generate_unique_name("Invalid Duration Customer"),
-            "budget": {"max_limit": 10000, "reset_duration": "invalid_duration"},
-        }
-        response = governance_client.create_customer(data)
-        assert response.status_code == 400
-
-    @pytest.mark.customers
-    @pytest.mark.validation
-    def test_customer_create_invalid_json(self, governance_client):
-        """Test creating customer with invalid data types"""
-        data = {
-            "name": 12345,  # Should be string
-            "budget": "not_an_object",  # Should be object
-        }
-        response = governance_client.create_customer(data)
-        assert response.status_code == 400
-
-
-class TestCustomerFieldUpdates:
-    """Comprehensive tests for Customer field updates"""
-
-    @pytest.mark.customers
-    @pytest.mark.field_updates
-    def test_customer_update_individual_fields(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test updating each customer field individually"""
-        # Create customer with all fields for testing
-        original_data = {
-            "name": generate_unique_name("Complete Update Test Customer"),
-            "budget": {"max_limit": 250000, "reset_duration": "1w"},
-        }
-        create_response = governance_client.create_customer(original_data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-        cleanup_tracker.add_customer(customer_id)
-
-        # Get original state
-        original_response = governance_client.get_customer(customer_id)
-        original_customer = original_response.json()["customer"]
-
-        # Test individual field updates
-        field_test_cases = [
-            {
-                "field": "name",
-                "update_data": {"name": "Updated Customer Name"},
-                "expected_value": "Updated Customer Name",
-            }
-        ]
-
-        for test_case in field_test_cases:
-            # Reset customer to original state
-            reset_data = {"name": original_customer["name"]}
-            governance_client.update_customer(customer_id, reset_data)
-
-            # Perform field update
-            response = governance_client.update_customer(
-                customer_id, test_case["update_data"]
-            )
-            assert_response_success(response, 200)
-            updated_customer = response.json()["customer"]
-
-            # Verify target field was updated
-            if test_case.get("custom_validation"):
-                test_case["custom_validation"](updated_customer)
-            else:
-                field_parts = test_case["field"].split(".")
-                current_value = updated_customer
-                for part in field_parts:
-                    current_value = current_value[part]
-                assert (
-                    current_value == test_case["expected_value"]
-                ), f"Field {test_case['field']} not updated correctly"
-
-            # Verify other fields unchanged (if specified)
-            if test_case.get("verify_unchanged", True):
-                exclude_fields = test_case.get(
-                    "exclude_from_unchanged_check", [test_case["field"]]
-                )
-                verify_unchanged_fields(
-                    updated_customer, original_customer, exclude_fields
-                )
-
-    @pytest.mark.customers
-    @pytest.mark.field_updates
-    @pytest.mark.budget
-    def test_customer_budget_updates(self, governance_client, cleanup_tracker):
-        """Test comprehensive budget creation, update, and modification"""
-        # Create customer without budget
-        data = {"name": generate_unique_name("Budget Update Test Customer")}
-        create_response = governance_client.create_customer(data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-        cleanup_tracker.add_customer(customer_id)
-
-        # Test 1: Add budget to customer without budget
-        budget_data = {"max_limit": 100000, "reset_duration": "1M"}
-        response = governance_client.update_customer(
-            customer_id, {"budget": budget_data}
-        )
-        assert_response_success(response, 200)
-        updated_customer = response.json()["customer"]
-        assert updated_customer["budget"]["max_limit"] == 100000
-        assert updated_customer["budget"]["reset_duration"] == "1M"
-        assert updated_customer["budget_id"] is not None
-
-        # Test 2: Update existing budget completely
-        new_budget_data = {"max_limit": 200000, "reset_duration": "3M"}
-        response = governance_client.update_customer(
-            customer_id, {"budget": new_budget_data}
-        )
-        assert_response_success(response, 200)
-        updated_customer = response.json()["customer"]
-        assert updated_customer["budget"]["max_limit"] == 200000
-        assert updated_customer["budget"]["reset_duration"] == "3M"
-
-        # Test 3: Partial budget update (only max_limit)
-        response = governance_client.update_customer(
-            customer_id, {"budget": {"max_limit": 300000}}
-        )
-        assert_response_success(response, 200)
-        updated_customer = response.json()["customer"]
-        assert updated_customer["budget"]["max_limit"] == 300000
-        assert (
-            updated_customer["budget"]["reset_duration"] == "3M"
-        )  # Should remain unchanged
-
-        # Test 4: Partial budget update (only reset_duration)
-        response = governance_client.update_customer(
-            customer_id, {"budget": {"reset_duration": "6M"}}
-        )
-        assert_response_success(response, 200)
-        updated_customer = response.json()["customer"]
-        assert (
-            updated_customer["budget"]["max_limit"] == 300000
-        )  # Should remain unchanged
-        assert updated_customer["budget"]["reset_duration"] == "6M"
-
-    @pytest.mark.customers
-    @pytest.mark.field_updates
-    def test_customer_multiple_field_updates(self, governance_client, cleanup_tracker):
-        """Test updating multiple fields simultaneously"""
-        # Create customer with initial data
-        initial_data = {
-            "name": generate_unique_name("Multi-Field Test Customer"),
-        }
-        create_response = governance_client.create_customer(initial_data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-        cleanup_tracker.add_customer(customer_id)
-
-        # Update multiple fields at once
-        update_data = {
-            "name": "Updated Multi-Field Customer Name",
-            "budget": {"max_limit": 500000, "reset_duration": "1Y"},
-        }
-
-        response = governance_client.update_customer(customer_id, update_data)
-        assert_response_success(response, 200)
-
-        updated_customer = response.json()["customer"]
-        assert updated_customer["name"] == "Updated Multi-Field Customer Name"
-        assert updated_customer["budget"]["max_limit"] == 500000
-        assert updated_customer["budget"]["reset_duration"] == "1Y"
-
-    @pytest.mark.customers
-    @pytest.mark.field_updates
-    @pytest.mark.edge_cases
-    def test_customer_update_edge_cases(self, governance_client, cleanup_tracker):
-        """Test edge cases in customer updates"""
-        # Create test customer
-        data = {"name": generate_unique_name("Edge Case Customer")}
-        create_response = governance_client.create_customer(data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-        cleanup_tracker.add_customer(customer_id)
-
-        original_response = governance_client.get_customer(customer_id)
-        original_customer = original_response.json()["customer"]
-
-        # Test 1: Empty update (should return unchanged customer)
-        response = governance_client.update_customer(customer_id, {})
-        assert_response_success(response, 200)
-        updated_customer = response.json()["customer"]
-
-        # Compare ignoring timestamps
-        differences = deep_compare_entities(
-            updated_customer, original_customer, ignore_fields=["updated_at"]
-        )
-        assert len(differences) == 0, f"Empty update changed fields: {differences}"
-
-        # Test 2: Update with same values
-        response = governance_client.update_customer(
-            customer_id, {"name": original_customer["name"]}
-        )
-        assert_response_success(response, 200)
-
-        # Test 3: Very long customer name (test field length limits)
-        long_name = "x" * 1000  # Adjust based on actual field limits
-        response = governance_client.update_customer(customer_id, {"name": long_name})
-        # Expected behavior depends on API validation rules
-
-    @pytest.mark.customers
-    @pytest.mark.field_updates
-    def test_customer_update_nonexistent(self, governance_client):
-        """Test updating non-existent customer returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.update_customer(fake_id, {"name": "test"})
-        assert response.status_code == 404
-
-
-class TestCustomerBudgetManagement:
-    """Test customer budget specific functionality"""
-
-    @pytest.mark.customers
-    @pytest.mark.budget
-    def test_customer_budget_creation_and_validation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test budget creation with various configurations"""
-        # Test valid budget configurations
-        budget_test_cases = [
-            {"max_limit": 50000, "reset_duration": "1d"},
-            {"max_limit": 250000, "reset_duration": "1w"},
-            {"max_limit": 1000000, "reset_duration": "1M"},
-            {"max_limit": 5000000, "reset_duration": "3M"},
-            {"max_limit": 10000000, "reset_duration": "1Y"},
-        ]
-
-        for budget_config in budget_test_cases:
-            data = {
-                "name": generate_unique_name(
-                    f"Budget Customer {budget_config['reset_duration']}"
-                ),
-                "budget": budget_config,
-            }
-
-            response = governance_client.create_customer(data)
-            assert_response_success(response, 201)
-
-            customer_data = response.json()["customer"]
-            cleanup_tracker.add_customer(customer_data["id"])
-
-            assert customer_data["budget"]["max_limit"] == budget_config["max_limit"]
-            assert (
-                customer_data["budget"]["reset_duration"]
-                == budget_config["reset_duration"]
-            )
-            assert customer_data["budget"]["current_usage"] == 0
-            assert customer_data["budget"]["last_reset"] is not None
-
-    @pytest.mark.customers
-    @pytest.mark.budget
-    @pytest.mark.edge_cases
-    def test_customer_budget_edge_cases(self, governance_client, cleanup_tracker):
-        """Test budget edge cases and boundary conditions"""
-        # Test boundary values
-        edge_case_budgets = [
-            {"max_limit": 0, "reset_duration": "1h"},  # Zero budget
-            {"max_limit": 1, "reset_duration": "1s"},  # Minimal values
-            {"max_limit": 9223372036854775807, "reset_duration": "1h"},  # Max int64
-        ]
-
-        for budget_config in edge_case_budgets:
-            data = {
-                "name": generate_unique_name(
-                    f"Edge Budget Customer {budget_config['max_limit']}"
-                ),
-                "budget": budget_config,
-            }
-
-            response = governance_client.create_customer(data)
-            # Adjust assertions based on API validation rules
-            if (
-                budget_config["max_limit"] >= 0
-            ):  # Assuming non-negative budgets are valid
-                assert_response_success(response, 201)
-                cleanup_tracker.add_customer(response.json()["customer"]["id"])
-            else:
-                assert response.status_code == 400
-
-    @pytest.mark.customers
-    @pytest.mark.budget
-    @pytest.mark.hierarchical
-    def test_customer_budget_hierarchy_foundation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test customer budget as foundation of hierarchical budget system"""
-        # Create customer with large budget (top of hierarchy)
-        customer_data = {
-            "name": generate_unique_name("Hierarchy Foundation Customer"),
-            "budget": {"max_limit": 1000000, "reset_duration": "1M"},  # $10,000
-        }
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create teams under this customer with smaller budgets
-        team1_data = {
-            "name": generate_unique_name("Sub-Team 1"),
-            "customer_id": customer["id"],
-            "budget": {"max_limit": 300000, "reset_duration": "1M"},  # $3,000
-        }
-        team1_response = governance_client.create_team(team1_data)
-        assert_response_success(team1_response, 201)
-        team1 = team1_response.json()["team"]
-        cleanup_tracker.add_team(team1["id"])
-
-        team2_data = {
-            "name": generate_unique_name("Sub-Team 2"),
-            "customer_id": customer["id"],
-            "budget": {"max_limit": 200000, "reset_duration": "1M"},  # $2,000
-        }
-        team2_response = governance_client.create_team(team2_data)
-        assert_response_success(team2_response, 201)
-        team2 = team2_response.json()["team"]
-        cleanup_tracker.add_team(team2["id"])
-
-        # Create VKs under teams with even smaller budgets
-        vk1_data = {
-            "name": generate_unique_name("Team1 VK"),
-            "team_id": team1["id"],
-            "budget": {"max_limit": 100000, "reset_duration": "1M"},  # $1,000
-        }
-        vk1_response = governance_client.create_virtual_key(vk1_data)
-        assert_response_success(vk1_response, 201)
-        vk1 = vk1_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk1["id"])
-
-        # Verify hierarchy structure
-        assert customer["budget"]["max_limit"] == 1000000
-        assert team1["budget"]["max_limit"] == 300000
-        assert team2["budget"]["max_limit"] == 200000
-        assert vk1["budget"]["max_limit"] == 100000
-
-        # Verify relationships
-        assert team1["customer_id"] == customer["id"]
-        assert team2["customer_id"] == customer["id"]
-        assert vk1["team_id"] == team1["id"]
-
-    @pytest.mark.customers
-    @pytest.mark.budget
-    def test_customer_budget_large_scale(self, governance_client, cleanup_tracker):
-        """Test customer budgets for large enterprise scenarios"""
-        # Test very large budget for enterprise customer
-        enterprise_data = {
-            "name": generate_unique_name("Enterprise Customer"),
-            "budget": {
-                "max_limit": 100000000000,  # $1 billion in cents
-                "reset_duration": "1Y",
-            },
-        }
-
-        response = governance_client.create_customer(enterprise_data)
-        assert_response_success(response, 201)
-        customer = response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        assert customer["budget"]["max_limit"] == 100000000000
-        assert customer["budget"]["reset_duration"] == "1Y"
-
-
-class TestCustomerTeamRelationships:
-    """Test customer relationships with teams"""
-
-    @pytest.mark.customers
-    @pytest.mark.relationships
-    def test_customer_teams_relationship_loading(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test that customer properly loads teams relationships"""
-        # Create customer
-        customer_data = {"name": generate_unique_name("Team Parent Customer")}
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create teams under this customer
-        team_names = []
-        for i in range(3):
-            team_name = generate_unique_name(f"Customer Team {i}")
-            team_names.append(team_name)
-            team_data = {"name": team_name, "customer_id": customer["id"]}
-            team_response = governance_client.create_team(team_data)
-            assert_response_success(team_response, 201)
-            cleanup_tracker.add_team(team_response.json()["team"]["id"])
-
-        # Fetch customer with teams loaded
-        customer_response = governance_client.get_customer(customer["id"])
-        assert_response_success(customer_response, 200)
-        customer_with_teams = customer_response.json()["customer"]
-
-        # Verify teams relationship loaded
-        assert "teams" in customer_with_teams
-        teams = customer_with_teams["teams"]
-        assert isinstance(teams, list)
-        assert len(teams) == 3
-
-        # Verify all team names are present
-        loaded_team_names = {team["name"] for team in teams}
-        for name in team_names:
-            assert name in loaded_team_names
-
-        # Verify all teams have correct customer_id
-        for team in teams:
-            assert team["customer_id"] == customer["id"]
-
-    @pytest.mark.customers
-    @pytest.mark.relationships
-    def test_customer_with_no_teams(self, governance_client, cleanup_tracker):
-        """Test customer with no teams has empty teams list"""
-        # Create customer without teams
-        customer_data = {"name": generate_unique_name("No Teams Customer")}
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Fetch customer with teams loaded
-        customer_response = governance_client.get_customer(customer["id"])
-        assert_response_success(customer_response, 200)
-        customer_data = customer_response.json()["customer"]
-
-        # Teams should be empty list or None
-        teams = customer_data.get("teams")
-        assert teams == [] or teams is None
-
-    @pytest.mark.customers
-    @pytest.mark.relationships
-    def test_customer_teams_cascading_operations(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test cascading operations between customers and teams"""
-        # Create customer
-        customer_data = {"name": generate_unique_name("Cascade Test Customer")}
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create teams under customer
-        team_ids = []
-        for i in range(2):
-            team_data = {
-                "name": generate_unique_name(f"Cascade Team {i}"),
-                "customer_id": customer["id"],
-            }
-            team_response = governance_client.create_team(team_data)
-            assert_response_success(team_response, 201)
-            team_id = team_response.json()["team"]["id"]
-            team_ids.append(team_id)
-            cleanup_tracker.add_team(team_id)
-
-        # Create VKs under teams
-        vk_ids = []
-        for team_id in team_ids:
-            vk_data = {"name": generate_unique_name("Cascade VK"), "team_id": team_id}
-            vk_response = governance_client.create_virtual_key(vk_data)
-            assert_response_success(vk_response, 201)
-            vk_id = vk_response.json()["virtual_key"]["id"]
-            vk_ids.append(vk_id)
-            cleanup_tracker.add_virtual_key(vk_id)
-
-        # Verify all entities exist and are properly linked
-        customer_response = governance_client.get_customer(customer["id"])
-        customer_with_teams = customer_response.json()["customer"]
-        assert len(customer_with_teams["teams"]) == 2
-
-        for vk_id in vk_ids:
-            vk_response = governance_client.get_virtual_key(vk_id)
-            vk = vk_response.json()["virtual_key"]
-            assert vk["team"] is not None
-            assert vk["team"]["customer_id"] == customer["id"]
-
-    @pytest.mark.customers
-    @pytest.mark.relationships
-    @pytest.mark.edge_cases
-    def test_customer_orphaned_teams_handling(self, governance_client, cleanup_tracker):
-        """Test customer behavior when teams reference non-existent customer"""
-        # This test simulates data integrity issues
-        # In practice, this would be prevented by foreign key constraints
-
-        # Create customer and team
-        customer_data = {"name": generate_unique_name("Temp Customer")}
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        team_data = {
-            "name": generate_unique_name("Orphan Test Team"),
-            "customer_id": customer["id"],
-        }
-        team_response = governance_client.create_team(team_data)
-        assert_response_success(team_response, 201)
-        team = team_response.json()["team"]
-        cleanup_tracker.add_team(team["id"])
-
-        # If we were to delete the customer, what happens to the team?
-        # This depends on database constraints and API implementation
-        # For now, we just verify the relationship exists correctly
-        assert team["customer_id"] == customer["id"]
-        assert team["customer"]["id"] == customer["id"]
-
-
-class TestCustomerConcurrency:
-    """Test concurrent operations on Customers"""
-
-    @pytest.mark.customers
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_customer_concurrent_creation(self, governance_client, cleanup_tracker):
-        """Test creating multiple customers concurrently"""
-
-        def create_customer(index):
-            data = {"name": generate_unique_name(f"Concurrent Customer {index}")}
-            response = governance_client.create_customer(data)
-            return response
-
-        # Create 10 customers concurrently
-        with ThreadPoolExecutor(max_workers=10) as executor:
-            futures = [executor.submit(create_customer, i) for i in range(10)]
-            responses = [future.result() for future in futures]
-
-        # Verify all succeeded
-        created_customers = []
-        for response in responses:
-            assert_response_success(response, 201)
-            customer_data = response.json()["customer"]
-            created_customers.append(customer_data)
-            cleanup_tracker.add_customer(customer_data["id"])
-
-        # Verify all customers have unique IDs
-        customer_ids = [customer["id"] for customer in created_customers]
-        assert len(set(customer_ids)) == 10  # All unique IDs
-
-    @pytest.mark.customers
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_customer_concurrent_updates(self, governance_client, cleanup_tracker):
-        """Test updating same customer concurrently"""
-        # Create customer to update
-        data = {"name": generate_unique_name("Concurrent Update Customer")}
-        create_response = governance_client.create_customer(data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-        cleanup_tracker.add_customer(customer_id)
-
-        # Update concurrently with different names
-        def update_customer(index):
-            update_data = {"name": f"Updated by thread {index}"}
-            response = governance_client.update_customer(customer_id, update_data)
-            return response, index
-
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(update_customer, i) for i in range(5)]
-            results = [future.result() for future in futures]
-
-        # All updates should succeed (last one wins)
-        for response, index in results:
-            assert_response_success(response, 200)
-
-        # Verify final state
-        final_response = governance_client.get_customer(customer_id)
-        final_customer = final_response.json()["customer"]
-        assert final_customer["name"].startswith("Updated by thread")
-
-    @pytest.mark.customers
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_customer_concurrent_budget_updates(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test concurrent budget updates on same customer"""
-        # Create customer with budget
-        data = {
-            "name": generate_unique_name("Concurrent Budget Customer"),
-            "budget": {"max_limit": 100000, "reset_duration": "1d"},
-        }
-        create_response = governance_client.create_customer(data)
-        assert_response_success(create_response, 201)
-        customer_id = create_response.json()["customer"]["id"]
-        cleanup_tracker.add_customer(customer_id)
-
-        # Update budget concurrently with different limits
-        def update_budget(index):
-            limit = 100000 + (index * 10000)  # Different limits
-            update_data = {"budget": {"max_limit": limit}}
-            response = governance_client.update_customer(customer_id, update_data)
-            return response, limit
-
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(update_budget, i) for i in range(5)]
-            results = [future.result() for future in futures]
-
-        # All updates should succeed
-        for response, limit in results:
-            assert_response_success(response, 200)
-
-        # Verify final state has one of the updated limits
-        final_response = governance_client.get_customer(customer_id)
-        final_customer = final_response.json()["customer"]
-        final_limit = final_customer["budget"]["max_limit"]
-        expected_limits = [100000 + (i * 10000) for i in range(5)]
-        assert final_limit in expected_limits
-
-
-class TestCustomerComplexScenarios:
-    """Test complex scenarios involving customers"""
-
-    @pytest.mark.customers
-    @pytest.mark.hierarchical
-    @pytest.mark.slow
-    def test_customer_large_hierarchy_creation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test creating large hierarchical structure under customer"""
-        # Create customer
-        customer_data = {
-            "name": generate_unique_name("Large Hierarchy Customer"),
-            "budget": {"max_limit": 10000000, "reset_duration": "1M"},  # $100,000
-        }
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create multiple teams
-        team_ids = []
-        for i in range(5):
-            team_data = {
-                "name": generate_unique_name(f"Large Hierarchy Team {i}"),
-                "customer_id": customer["id"],
-                "budget": {
-                    "max_limit": 1000000,
-                    "reset_duration": "1M",
-                },  # $10,000 each
-            }
-            team_response = governance_client.create_team(team_data)
-            assert_response_success(team_response, 201)
-            team_id = team_response.json()["team"]["id"]
-            team_ids.append(team_id)
-            cleanup_tracker.add_team(team_id)
-
-        # Create multiple VKs per team
-        vk_count = 0
-        for team_id in team_ids:
-            for j in range(3):  # 3 VKs per team
-                vk_data = {
-                    "name": generate_unique_name(f"Large Hierarchy VK {team_id}-{j}"),
-                    "team_id": team_id,
-                    "budget": {
-                        "max_limit": 100000,
-                        "reset_duration": "1M",
-                    },  # $1,000 each
-                }
-                vk_response = governance_client.create_virtual_key(vk_data)
-                assert_response_success(vk_response, 201)
-                vk_id = vk_response.json()["virtual_key"]["id"]
-                cleanup_tracker.add_virtual_key(vk_id)
-                vk_count += 1
-
-        # Verify hierarchy structure
-        customer_response = governance_client.get_customer(customer["id"])
-        customer_with_teams = customer_response.json()["customer"]
-
-        assert len(customer_with_teams["teams"]) == 5
-        assert vk_count == 15  # 5 teams * 3 VKs each
-
-        # Verify budget hierarchy makes sense
-        total_team_budgets = sum(
-            team.get("budget", {}).get("max_limit", 0)
-            for team in customer_with_teams["teams"]
-        )
-        assert (
-            total_team_budgets <= customer["budget"]["max_limit"]
-        )  # Teams shouldn't exceed customer
-
-    @pytest.mark.customers
-    @pytest.mark.performance
-    @pytest.mark.slow
-    def test_customer_performance_with_many_teams(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test customer performance when loading many teams"""
-        # Create customer
-        customer_data = {"name": generate_unique_name("Performance Test Customer")}
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create many teams
-        team_count = 50  # Adjust based on performance requirements
-        start_time = time.time()
-
-        for i in range(team_count):
-            team_data = {
-                "name": generate_unique_name(f"Perf Team {i}"),
-                "customer_id": customer["id"],
-            }
-            team_response = governance_client.create_team(team_data)
-            assert_response_success(team_response, 201)
-            cleanup_tracker.add_team(team_response.json()["team"]["id"])
-
-        creation_time = time.time() - start_time
-
-        # Test customer loading performance
-        start_time = time.time()
-        customer_response = governance_client.get_customer(customer["id"])
-        assert_response_success(customer_response, 200)
-        load_time = time.time() - start_time
-
-        customer_with_teams = customer_response.json()["customer"]
-        assert len(customer_with_teams["teams"]) == team_count
-
-        # Log performance metrics (adjust thresholds based on requirements)
-        print(f"Created {team_count} teams in {creation_time:.2f}s")
-        print(f"Loaded customer with {team_count} teams in {load_time:.2f}s")
-
-        # Performance assertions (adjust based on requirements)
-        assert (
-            load_time < 5.0
-        ), f"Loading customer with {team_count} teams took too long: {load_time}s"
-
-    @pytest.mark.customers
-    @pytest.mark.integration
-    def test_customer_full_lifecycle_scenario(self, governance_client, cleanup_tracker):
-        """Test complete customer lifecycle scenario"""
-        # 1. Create customer with budget
-        customer_data = {
-            "name": generate_unique_name("Lifecycle Customer"),
-            "budget": {"max_limit": 1000000, "reset_duration": "1M"},
-        }
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # 2. Update customer name and budget
-        update_data = {
-            "name": "Updated Lifecycle Customer",
-            "budget": {"max_limit": 2000000, "reset_duration": "3M"},
-        }
-        update_response = governance_client.update_customer(customer["id"], update_data)
-        assert_response_success(update_response, 200)
-        updated_customer = update_response.json()["customer"]
-        assert updated_customer["name"] == "Updated Lifecycle Customer"
-        assert updated_customer["budget"]["max_limit"] == 2000000
-
-        # 3. Create teams under customer
-        team_data = {
-            "name": generate_unique_name("Lifecycle Team"),
-            "customer_id": customer["id"],
-            "budget": {"max_limit": 500000, "reset_duration": "1M"},
-        }
-        team_response = governance_client.create_team(team_data)
-        assert_response_success(team_response, 201)
-        team = team_response.json()["team"]
-        cleanup_tracker.add_team(team["id"])
-
-        # 4. Create VKs under team
-        vk_data = {
-            "name": generate_unique_name("Lifecycle VK"),
-            "team_id": team["id"],
-            "budget": {"max_limit": 100000, "reset_duration": "1d"},
-        }
-        vk_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(vk_response, 201)
-        vk = vk_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk["id"])
-
-        # 5. Verify complete hierarchy
-        final_customer_response = governance_client.get_customer(customer["id"])
-        final_customer = final_customer_response.json()["customer"]
-
-        assert final_customer["name"] == "Updated Lifecycle Customer"
-        assert len(final_customer["teams"]) == 1
-        assert final_customer["teams"][0]["id"] == team["id"]
-
-        final_vk_response = governance_client.get_virtual_key(vk["id"])
-        final_vk = final_vk_response.json()["virtual_key"]
-
-        # Verify VK belongs to team (customer relationship not preloaded in VK->team)
-        assert final_vk["team"]["id"] == team["id"]
-        assert final_vk["team"].get("customer_id") == customer["id"]
-
-        # 6. Clean up (automatic via cleanup_tracker)
-        # This tests the full CRUD lifecycle
diff --git a/tests/governance/test_helpers.py b/tests/governance/test_helpers.py
deleted file mode 100644
index 605f8f3986..0000000000
--- a/tests/governance/test_helpers.py
+++ /dev/null
@@ -1,644 +0,0 @@
-"""
-Helper utilities and test data generators for Bifrost Governance Plugin tests.
-
-This module provides additional utilities for test data generation, validation,
-and common test operations to support the comprehensive governance test suite.
-"""
-
-import pytest
-import uuid
-import time
-import json
-import random
-from typing import Dict, Any, List, Optional, Union
-from datetime import datetime, timedelta
-from faker import Faker
-
-from conftest import assert_response_success, generate_unique_name, GovernanceTestClient
-
-# Initialize Faker for generating test data
-fake = Faker()
-
-
-class TestDataFactory:
-    """Factory for generating realistic test data"""
-
-    @staticmethod
-    def generate_budget_config(
-        min_limit: int = 1000,
-        max_limit: int = 1000000,
-        duration_options: List[str] = None,
-    ) -> Dict[str, Any]:
-        """Generate realistic budget configuration"""
-        if duration_options is None:
-            duration_options = ["1h", "1d", "1w", "1M", "3M", "6M", "1Y"]
-
-        return {
-            "max_limit": random.randint(min_limit, max_limit),
-            "reset_duration": random.choice(duration_options),
-        }
-
-    @staticmethod
-    def generate_rate_limit_config(
-        include_tokens: bool = True, include_requests: bool = True
-    ) -> Dict[str, Any]:
-        """Generate realistic rate limit configuration"""
-        config = {}
-
-        if include_tokens:
-            config.update(
-                {
-                    "token_max_limit": random.randint(100, 100000),
-                    "token_reset_duration": random.choice(["1m", "5m", "1h", "1d"]),
-                }
-            )
-
-        if include_requests:
-            config.update(
-                {
-                    "request_max_limit": random.randint(10, 10000),
-                    "request_reset_duration": random.choice(["1m", "5m", "1h", "1d"]),
-                }
-            )
-
-        return config
-
-    @staticmethod
-    def generate_customer_data(include_budget: bool = False) -> Dict[str, Any]:
-        """Generate realistic customer data"""
-        data = {"name": f"{fake.company()} ({generate_unique_name('Customer')})"}
-
-        if include_budget:
-            data["budget"] = TestDataFactory.generate_budget_config(
-                min_limit=100000, max_limit=10000000  # Customers have larger budgets
-            )
-
-        return data
-
-    @staticmethod
-    def generate_team_data(
-        customer_id: Optional[str] = None, include_budget: bool = False
-    ) -> Dict[str, Any]:
-        """Generate realistic team data"""
-        team_types = [
-            "Engineering",
-            "Marketing",
-            "Sales",
-            "Research",
-            "Support",
-            "Operations",
-        ]
-        data = {
-            "name": f"{random.choice(team_types)} Team ({generate_unique_name('Team')})"
-        }
-
-        if customer_id:
-            data["customer_id"] = customer_id
-
-        if include_budget:
-            data["budget"] = TestDataFactory.generate_budget_config(
-                min_limit=10000, max_limit=1000000  # Teams have medium budgets
-            )
-
-        return data
-
-    @staticmethod
-    def generate_virtual_key_data(
-        team_id: Optional[str] = None,
-        customer_id: Optional[str] = None,
-        include_budget: bool = False,
-        include_rate_limit: bool = False,
-        model_restrictions: bool = False,
-    ) -> Dict[str, Any]:
-        """Generate realistic virtual key data"""
-        purposes = [
-            "Development",
-            "Production",
-            "Testing",
-            "Staging",
-            "Demo",
-            "Research",
-        ]
-        data = {
-            "name": f"{random.choice(purposes)} VK ({generate_unique_name('VK')})",
-            "description": fake.sentence(),
-            "is_active": random.choice([True, True, True, False]),  # 75% active
-        }
-
-        if team_id:
-            data["team_id"] = team_id
-        elif customer_id:
-            data["customer_id"] = customer_id
-
-        if model_restrictions:
-            all_models = [
-                "gpt-4",
-                "gpt-3.5-turbo",
-                "gpt-4-turbo",
-                "claude-3-5-sonnet-20240620",
-                "claude-3-7-sonnet-20250219",
-            ]
-            all_providers = ["openai", "anthropic"]
-
-            data["allowed_models"] = random.sample(
-                all_models, random.randint(1, len(all_models))
-            )
-            data["allowed_providers"] = random.sample(
-                all_providers, random.randint(1, len(all_providers))
-            )
-
-        if include_budget:
-            data["budget"] = TestDataFactory.generate_budget_config(
-                min_limit=1000, max_limit=100000  # VKs have smaller budgets
-            )
-
-        if include_rate_limit:
-            data["rate_limit"] = TestDataFactory.generate_rate_limit_config()
-
-        return data
-
-
-class ValidationHelper:
-    """Helper functions for validating test results"""
-
-    @staticmethod
-    def validate_entity_structure(
-        entity: Dict[str, Any], entity_type: str
-    ) -> List[str]:
-        """Validate that entity has expected structure"""
-        errors = []
-
-        # Common fields all entities should have
-        required_fields = ["id", "created_at", "updated_at"]
-        for field in required_fields:
-            if field not in entity:
-                errors.append(f"Missing required field: {field}")
-            elif entity[field] is None:
-                errors.append(f"Required field is None: {field}")
-
-        # Entity-specific validation
-        if entity_type == "virtual_key":
-            vk_fields = ["name", "value", "is_active"]
-            for field in vk_fields:
-                if field not in entity:
-                    errors.append(f"VK missing field: {field}")
-
-        elif entity_type == "team":
-            team_fields = ["name"]
-            for field in team_fields:
-                if field not in entity:
-                    errors.append(f"Team missing field: {field}")
-
-        elif entity_type == "customer":
-            customer_fields = ["name"]
-            for field in customer_fields:
-                if field not in entity:
-                    errors.append(f"Customer missing field: {field}")
-
-        return errors
-
-    @staticmethod
-    def validate_budget_structure(budget: Dict[str, Any]) -> List[str]:
-        """Validate budget structure"""
-        errors = []
-        required_fields = [
-            "id",
-            "max_limit",
-            "reset_duration",
-            "current_usage",
-            "last_reset",
-        ]
-
-        for field in required_fields:
-            if field not in budget:
-                errors.append(f"Budget missing field: {field}")
-
-        if budget.get("max_limit") is not None and budget["max_limit"] < 0:
-            errors.append("Budget max_limit cannot be negative")
-
-        if budget.get("current_usage") is not None and budget["current_usage"] < 0:
-            errors.append("Budget current_usage cannot be negative")
-
-        return errors
-
-    @staticmethod
-    def validate_rate_limit_structure(rate_limit: Dict[str, Any]) -> List[str]:
-        """Validate rate limit structure"""
-        errors = []
-        required_fields = ["id"]
-
-        for field in required_fields:
-            if field not in rate_limit:
-                errors.append(f"Rate limit missing field: {field}")
-
-        # At least one limit should be specified
-        token_fields = ["token_max_limit", "token_reset_duration"]
-        request_fields = ["request_max_limit", "request_reset_duration"]
-
-        has_token_limits = any(
-            rate_limit.get(field) is not None for field in token_fields
-        )
-        has_request_limits = any(
-            rate_limit.get(field) is not None for field in request_fields
-        )
-
-        if not has_token_limits and not has_request_limits:
-            errors.append("Rate limit must have either token or request limits")
-
-        return errors
-
-    @staticmethod
-    def validate_hierarchy_consistency(
-        customer: Dict, teams: List[Dict], vks: List[Dict]
-    ) -> List[str]:
-        """Validate hierarchical consistency"""
-        errors = []
-
-        # Check team customer references
-        for team in teams:
-            if team.get("customer_id") != customer["id"]:
-                errors.append(f"Team {team['id']} has incorrect customer_id")
-
-        # Check VK team references
-        team_ids = {team["id"] for team in teams}
-        for vk in vks:
-            if vk.get("team_id") and vk["team_id"] not in team_ids:
-                errors.append(f"VK {vk['id']} references non-existent team")
-
-        return errors
-
-
-class TestScenarioBuilder:
-    """Builder for complex test scenarios"""
-
-    def __init__(self, client: GovernanceTestClient, cleanup_tracker):
-        self.client = client
-        self.cleanup_tracker = cleanup_tracker
-        self.created_entities = {"customers": [], "teams": [], "virtual_keys": []}
-
-    def create_customer(self, **kwargs) -> Dict[str, Any]:
-        """Create a customer with automatic cleanup tracking"""
-        data = TestDataFactory.generate_customer_data(**kwargs)
-        response = self.client.create_customer(data)
-        assert_response_success(response, 201)
-
-        customer = response.json()["customer"]
-        self.cleanup_tracker.add_customer(customer["id"])
-        self.created_entities["customers"].append(customer)
-        return customer
-
-    def create_team(
-        self, customer_id: Optional[str] = None, **kwargs
-    ) -> Dict[str, Any]:
-        """Create a team with automatic cleanup tracking"""
-        data = TestDataFactory.generate_team_data(customer_id=customer_id, **kwargs)
-        response = self.client.create_team(data)
-        assert_response_success(response, 201)
-
-        team = response.json()["team"]
-        self.cleanup_tracker.add_team(team["id"])
-        self.created_entities["teams"].append(team)
-        return team
-
-    def create_virtual_key(
-        self, team_id: Optional[str] = None, customer_id: Optional[str] = None, **kwargs
-    ) -> Dict[str, Any]:
-        """Create a virtual key with automatic cleanup tracking"""
-        data = TestDataFactory.generate_virtual_key_data(
-            team_id=team_id, customer_id=customer_id, **kwargs
-        )
-        response = self.client.create_virtual_key(data)
-        assert_response_success(response, 201)
-
-        vk = response.json()["virtual_key"]
-        self.cleanup_tracker.add_virtual_key(vk["id"])
-        self.created_entities["virtual_keys"].append(vk)
-        return vk
-
-    def create_simple_hierarchy(self) -> Dict[str, Any]:
-        """Create a simple Customer -> Team -> VK hierarchy"""
-        customer = self.create_customer(include_budget=True)
-        team = self.create_team(customer_id=customer["id"], include_budget=True)
-        vk = self.create_virtual_key(
-            team_id=team["id"], include_budget=True, include_rate_limit=True
-        )
-
-        return {"customer": customer, "team": team, "virtual_key": vk}
-
-    def create_complex_hierarchy(
-        self, team_count: int = 3, vk_per_team: int = 2
-    ) -> Dict[str, Any]:
-        """Create a complex hierarchy with multiple teams and VKs"""
-        customer = self.create_customer(include_budget=True)
-
-        teams = []
-        for i in range(team_count):
-            team = self.create_team(customer_id=customer["id"], include_budget=True)
-            teams.append(team)
-
-        vks = []
-        for team in teams:
-            for j in range(vk_per_team):
-                vk = self.create_virtual_key(
-                    team_id=team["id"],
-                    include_budget=True,
-                    include_rate_limit=True,
-                    model_restrictions=random.choice([True, False]),
-                )
-                vks.append(vk)
-
-        return {"customer": customer, "teams": teams, "virtual_keys": vks}
-
-    def create_mixed_vk_associations(self) -> Dict[str, Any]:
-        """Create VKs with mixed team/customer associations"""
-        customer = self.create_customer(include_budget=True)
-        team = self.create_team(customer_id=customer["id"], include_budget=True)
-
-        # VK directly associated with customer
-        customer_vk = self.create_virtual_key(
-            customer_id=customer["id"], include_budget=True
-        )
-
-        # VK associated with team (indirect customer association)
-        team_vk = self.create_virtual_key(team_id=team["id"], include_budget=True)
-
-        # Standalone VK
-        standalone_vk = self.create_virtual_key(
-            include_budget=True, include_rate_limit=True
-        )
-
-        return {
-            "customer": customer,
-            "team": team,
-            "customer_vk": customer_vk,
-            "team_vk": team_vk,
-            "standalone_vk": standalone_vk,
-        }
-
-
-class PerformanceTracker:
-    """Track performance metrics during tests"""
-
-    def __init__(self):
-        self.measurements = []
-
-    def time_operation(self, operation_name: str, operation_func, *args, **kwargs):
-        """Time an operation and record the measurement"""
-        start_time = time.time()
-        try:
-            result = operation_func(*args, **kwargs)
-            success = True
-            error = None
-        except Exception as e:
-            result = None
-            success = False
-            error = str(e)
-
-        end_time = time.time()
-        duration = end_time - start_time
-
-        measurement = {
-            "operation": operation_name,
-            "duration": duration,
-            "success": success,
-            "error": error,
-            "timestamp": datetime.now().isoformat(),
-        }
-
-        self.measurements.append(measurement)
-        return result, measurement
-
-    def get_stats(self) -> Dict[str, Any]:
-        """Get performance statistics"""
-        if not self.measurements:
-            return {"count": 0}
-
-        durations = [m["duration"] for m in self.measurements]
-        successes = [m for m in self.measurements if m["success"]]
-        failures = [m for m in self.measurements if not m["success"]]
-
-        return {
-            "count": len(self.measurements),
-            "success_count": len(successes),
-            "failure_count": len(failures),
-            "success_rate": len(successes) / len(self.measurements),
-            "avg_duration": sum(durations) / len(durations),
-            "min_duration": min(durations),
-            "max_duration": max(durations),
-            "total_duration": sum(durations),
-        }
-
-    def print_report(self):
-        """Print performance report"""
-        stats = self.get_stats()
-        if stats["count"] == 0:
-            print("No measurements recorded")
-            return
-
-        print(f"\nPerformance Report:")
-        print(f"  Total operations: {stats['count']}")
-        print(f"  Success rate: {stats['success_rate']:.2%}")
-        print(f"  Average duration: {stats['avg_duration']:.3f}s")
-        print(f"  Min duration: {stats['min_duration']:.3f}s")
-        print(f"  Max duration: {stats['max_duration']:.3f}s")
-        print(f"  Total duration: {stats['total_duration']:.3f}s")
-
-
-class ChatCompletionHelper:
-    """Helper for chat completion testing"""
-
-    @staticmethod
-    def generate_test_messages(
-        complexity: str = "simple", token_count_estimate: int = None
-    ) -> List[Dict[str, str]]:
-        """Generate test messages of varying complexity"""
-        if complexity == "simple":
-            return [{"role": "user", "content": "Hello, how are you?"}]
-
-        elif complexity == "medium":
-            return [
-                {"role": "user", "content": "Can you explain quantum computing?"},
-                {
-                    "role": "assistant",
-                    "content": "Quantum computing is a type of computation that harnesses quantum mechanics...",
-                },
-                {
-                    "role": "user",
-                    "content": "How does it differ from classical computing?",
-                },
-            ]
-
-        elif complexity == "complex":
-            content = fake.text(max_nb_chars=2000)
-            return [
-                {"role": "system", "content": "You are a helpful AI assistant."},
-                {"role": "user", "content": content},
-                {
-                    "role": "assistant",
-                    "content": "I understand. Let me help you with that.",
-                },
-                {"role": "user", "content": "Please provide a detailed analysis."},
-            ]
-
-        elif complexity == "custom" and token_count_estimate:
-            # Rough estimate: 4 characters per token
-            char_count = token_count_estimate * 4
-            content = fake.text(max_nb_chars=char_count)
-            return [{"role": "user", "content": content}]
-
-        else:
-            return [{"role": "user", "content": fake.sentence()}]
-
-    @staticmethod
-    def make_test_request(
-        client: GovernanceTestClient,
-        vk_value: str,
-        model: str = "gpt-3.5-turbo",
-        max_tokens: int = 50,
-        **kwargs,
-    ) -> Dict[str, Any]:
-        """Make a standardized test chat completion request"""
-        messages = (
-            kwargs.get("messages") or ChatCompletionHelper.generate_test_messages()
-        )
-        headers = {"x-bf-vk": vk_value}
-
-        response = client.chat_completion(
-            messages=messages,
-            model=model,
-            headers=headers,
-            max_tokens=max_tokens,
-            **{k: v for k, v in kwargs.items() if k != "messages"},
-        )
-
-        return {
-            "response": response,
-            "status_code": response.status_code,
-            "success": response.status_code == 200,
-            "rate_limited": response.status_code == 429,
-            "budget_exceeded": response.status_code == 402,
-            "unauthorized": response.status_code in [401, 403],
-            "data": (
-                response.json()
-                if response.headers.get("content-type", "").startswith(
-                    "application/json"
-                )
-                else response.text
-            ),
-        }
-
-
-# Pytest fixtures for helpers
-
-
-@pytest.fixture
-def test_data_factory():
-    """Test data factory fixture"""
-    return TestDataFactory()
-
-
-@pytest.fixture
-def validation_helper():
-    """Validation helper fixture"""
-    return ValidationHelper()
-
-
-@pytest.fixture
-def scenario_builder(governance_client, cleanup_tracker):
-    """Test scenario builder fixture"""
-    return TestScenarioBuilder(governance_client, cleanup_tracker)
-
-
-@pytest.fixture
-def performance_tracker():
-    """Performance tracker fixture"""
-    return PerformanceTracker()
-
-
-@pytest.fixture
-def chat_completion_helper():
-    """Chat completion helper fixture"""
-    return ChatCompletionHelper()
-
-
-# Test helper usage examples
-class TestHelperExamples:
-    """Examples of how to use the test helpers"""
-
-    @pytest.mark.helpers
-    def test_data_factory_usage(
-        self, test_data_factory, governance_client, cleanup_tracker
-    ):
-        """Example of using TestDataFactory"""
-        # Generate and create customer
-        customer_data = test_data_factory.generate_customer_data(include_budget=True)
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Verify data structure
-        assert customer["name"].endswith("Customer")
-        assert customer["budget"] is not None
-
-    @pytest.mark.helpers
-    def test_scenario_builder_usage(self, scenario_builder):
-        """Example of using TestScenarioBuilder"""
-        # Create simple hierarchy
-        hierarchy = scenario_builder.create_simple_hierarchy()
-
-        # Verify hierarchy structure
-        assert hierarchy["customer"]["id"] is not None
-        assert hierarchy["team"]["customer_id"] == hierarchy["customer"]["id"]
-        assert hierarchy["virtual_key"]["team_id"] == hierarchy["team"]["id"]
-
-    @pytest.mark.helpers
-    def test_validation_helper_usage(self, validation_helper, sample_virtual_key):
-        """Example of using ValidationHelper"""
-        # Validate VK structure
-        errors = validation_helper.validate_entity_structure(
-            sample_virtual_key, "virtual_key"
-        )
-        assert len(errors) == 0, f"VK validation errors: {errors}"
-
-        # Validate budget if present
-        if sample_virtual_key.get("budget"):
-            budget_errors = validation_helper.validate_budget_structure(
-                sample_virtual_key["budget"]
-            )
-            assert len(budget_errors) == 0, f"Budget validation errors: {budget_errors}"
-
-    @pytest.mark.helpers
-    def test_performance_tracker_usage(self, performance_tracker, governance_client):
-        """Example of using PerformanceTracker"""
-        # Time an operation
-        result, measurement = performance_tracker.time_operation(
-            "list_customers", governance_client.list_customers
-        )
-
-        assert measurement["success"] is True
-        assert measurement["duration"] > 0
-
-        # Get performance stats
-        stats = performance_tracker.get_stats()
-        assert stats["count"] == 1
-        assert stats["success_rate"] == 1.0
-
-    @pytest.mark.helpers
-    def test_chat_completion_helper_usage(
-        self, chat_completion_helper, governance_client, sample_virtual_key
-    ):
-        """Example of using ChatCompletionHelper"""
-        # Generate test messages
-        simple_messages = chat_completion_helper.generate_test_messages("simple")
-        assert len(simple_messages) == 1
-        assert simple_messages[0]["role"] == "user"
-
-        # Make test request
-        result = chat_completion_helper.make_test_request(
-            governance_client, sample_virtual_key["value"], max_tokens=10
-        )
-
-        assert "status_code" in result
-        assert "success" in result
-        assert isinstance(result["success"], bool)
diff --git a/tests/governance/test_teams_crud.py b/tests/governance/test_teams_crud.py
deleted file mode 100644
index 169e6b63a9..0000000000
--- a/tests/governance/test_teams_crud.py
+++ /dev/null
@@ -1,897 +0,0 @@
-"""
-Comprehensive Team CRUD Tests for Bifrost Governance Plugin
-
-This module provides exhaustive testing of Team operations including:
-- Complete CRUD lifecycle testing
-- Comprehensive field update testing (individual and batch)
-- Customer association testing
-- Budget inheritance and management
-- Filtering and query operations
-- Edge cases and validation scenarios
-- Concurrency and race condition testing
-"""
-
-import pytest
-import time
-import uuid
-from typing import Dict, Any, List
-from concurrent.futures import ThreadPoolExecutor
-import copy
-
-from conftest import (
-    assert_response_success,
-    verify_unchanged_fields,
-    generate_unique_name,
-    verify_entity_relationships,
-    deep_compare_entities,
-)
-
-
-class TestTeamBasicCRUD:
-    """Test basic CRUD operations for Teams"""
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    @pytest.mark.smoke
-    def test_team_create_minimal(self, governance_client, cleanup_tracker):
-        """Test creating team with minimal required data"""
-        data = {"name": generate_unique_name("Minimal Team")}
-
-        response = governance_client.create_team(data)
-        assert_response_success(response, 201)
-
-        team_data = response.json()["team"]
-        cleanup_tracker.add_team(team_data["id"])
-
-        # Verify required fields
-        assert team_data["name"] == data["name"]
-        assert team_data["id"] is not None
-        assert team_data["created_at"] is not None
-        assert team_data["updated_at"] is not None
-
-        # Verify optional fields are None/empty
-        assert team_data["virtual_keys"] is None
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_create_with_customer(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test creating team associated with a customer"""
-        data = {
-            "name": generate_unique_name("Customer Team"),
-            "customer_id": sample_customer["id"],
-        }
-
-        response = governance_client.create_team(data)
-        assert_response_success(response, 201)
-
-        team_data = response.json()["team"]
-        cleanup_tracker.add_team(team_data["id"])
-
-        # Verify customer association
-        assert team_data["customer_id"] == sample_customer["id"]
-        assert team_data["customer"] is not None
-        assert team_data["customer"]["id"] == sample_customer["id"]
-        assert team_data["customer"]["name"] == sample_customer["name"]
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    @pytest.mark.budget
-    def test_team_create_with_budget(self, governance_client, cleanup_tracker):
-        """Test creating team with budget"""
-        data = {
-            "name": generate_unique_name("Budget Team"),
-            "budget": {"max_limit": 25000, "reset_duration": "1d"},  # $250.00 in cents
-        }
-
-        response = governance_client.create_team(data)
-        assert_response_success(response, 201)
-
-        team_data = response.json()["team"]
-        cleanup_tracker.add_team(team_data["id"])
-
-        # Verify budget was created
-        assert team_data["budget"] is not None
-        assert team_data["budget"]["max_limit"] == 25000
-        assert team_data["budget"]["reset_duration"] == "1d"
-        assert team_data["budget"]["current_usage"] == 0
-        assert team_data["budget_id"] is not None
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    @pytest.mark.budget
-    def test_team_create_complete(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test creating team with all possible fields"""
-        data = {
-            "name": generate_unique_name("Complete Team"),
-            "customer_id": sample_customer["id"],
-            "budget": {
-                "max_limit": 100000,  # $1000.00 in cents
-                "reset_duration": "1w",
-            },
-        }
-
-        response = governance_client.create_team(data)
-        assert_response_success(response, 201)
-
-        team_data = response.json()["team"]
-        cleanup_tracker.add_team(team_data["id"])
-
-        # Verify all fields
-        assert team_data["name"] == data["name"]
-        assert team_data["customer_id"] == sample_customer["id"]
-        assert team_data["customer"]["id"] == sample_customer["id"]
-        assert team_data["budget"]["max_limit"] == 100000
-        assert team_data["budget"]["reset_duration"] == "1w"
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_list_all(self, governance_client, sample_team):
-        """Test listing all teams"""
-        response = governance_client.list_teams()
-        assert_response_success(response, 200)
-
-        data = response.json()
-        assert "teams" in data
-        assert "count" in data
-        assert isinstance(data["teams"], list)
-        assert data["count"] >= 1
-
-        # Find our test team
-        test_team = next(
-            (team for team in data["teams"] if team["id"] == sample_team["id"]), None
-        )
-        assert test_team is not None
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_list_filter_by_customer(
-        self, governance_client, sample_team_with_customer
-    ):
-        """Test listing teams filtered by customer"""
-        customer_id = sample_team_with_customer["customer_id"]
-        response = governance_client.list_teams(customer_id=customer_id)
-        assert_response_success(response, 200)
-
-        data = response.json()
-        teams = data["teams"]
-
-        # All returned teams should belong to the specified customer
-        for team in teams:
-            assert team["customer_id"] == customer_id
-
-        # Our test team should be in the results
-        test_team = next(
-            (team for team in teams if team["id"] == sample_team_with_customer["id"]),
-            None,
-        )
-        assert test_team is not None
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_get_by_id(self, governance_client, sample_team):
-        """Test getting team by ID with relationships loaded"""
-        response = governance_client.get_team(sample_team["id"])
-        assert_response_success(response, 200)
-
-        team_data = response.json()["team"]
-        assert team_data["id"] == sample_team["id"]
-        assert team_data["name"] == sample_team["name"]
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_get_nonexistent(self, governance_client):
-        """Test getting non-existent team returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.get_team(fake_id)
-        assert response.status_code == 404
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_delete(self, governance_client, cleanup_tracker):
-        """Test deleting a team"""
-        # Create team to delete
-        data = {"name": generate_unique_name("Delete Test Team")}
-        create_response = governance_client.create_team(data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-
-        # Delete team
-        delete_response = governance_client.delete_team(team_id)
-        assert_response_success(delete_response, 200)
-
-        # Verify team is gone
-        get_response = governance_client.get_team(team_id)
-        assert get_response.status_code == 404
-
-    @pytest.mark.teams
-    @pytest.mark.crud
-    def test_team_delete_nonexistent(self, governance_client):
-        """Test deleting non-existent team returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.delete_team(fake_id)
-        assert response.status_code == 404
-
-
-class TestTeamValidation:
-    """Test validation rules for Team operations"""
-
-    @pytest.mark.teams
-    @pytest.mark.validation
-    def test_team_create_missing_name(self, governance_client):
-        """Test creating team without name fails"""
-        data = {"customer_id": str(uuid.uuid4())}
-        response = governance_client.create_team(data)
-        assert response.status_code == 400
-
-    @pytest.mark.teams
-    @pytest.mark.validation
-    def test_team_create_empty_name(self, governance_client):
-        """Test creating team with empty name fails"""
-        data = {"name": ""}
-        response = governance_client.create_team(data)
-        assert response.status_code == 400
-
-    @pytest.mark.teams
-    @pytest.mark.validation
-    def test_team_create_invalid_customer_id(self, governance_client):
-        """Test creating team with non-existent customer_id"""
-        data = {
-            "name": generate_unique_name("Invalid Customer Team"),
-            "customer_id": str(uuid.uuid4()),
-        }
-        response = governance_client.create_team(data)
-        # Note: Depending on implementation, this might succeed with warning or fail
-        # Adjust assertion based on actual API behavior
-
-    @pytest.mark.teams
-    @pytest.mark.validation
-    def test_team_create_invalid_budget(self, governance_client):
-        """Test creating team with invalid budget data"""
-        # Test negative budget (should be rejected)
-        data = {
-            "name": generate_unique_name("Negative Budget Team"),
-            "budget": {"max_limit": -1000, "reset_duration": "1h"},
-        }
-        response = governance_client.create_team(data)
-        assert response.status_code == 400  # API should reject negative budgets
-
-        # Test invalid reset duration
-        data = {
-            "name": generate_unique_name("Invalid Duration Team"),
-            "budget": {"max_limit": 1000, "reset_duration": "invalid"},
-        }
-        response = governance_client.create_team(data)
-        assert response.status_code == 400
-
-
-class TestTeamFieldUpdates:
-    """Comprehensive tests for Team field updates"""
-
-    @pytest.mark.teams
-    @pytest.mark.field_updates
-    def test_team_update_individual_fields(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test updating each team field individually"""
-        # Create team with all fields for testing
-        original_data = {
-            "name": generate_unique_name("Complete Update Test Team"),
-            "customer_id": sample_customer["id"],
-            "budget": {"max_limit": 50000, "reset_duration": "1d"},
-        }
-        create_response = governance_client.create_team(original_data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-        cleanup_tracker.add_team(team_id)
-
-        # Get original state
-        original_response = governance_client.get_team(team_id)
-        original_team = original_response.json()["team"]
-
-        # Create another customer for testing customer_id updates
-        other_customer_data = {"name": generate_unique_name("Other Customer")}
-        other_customer_response = governance_client.create_customer(other_customer_data)
-        assert_response_success(other_customer_response, 201)
-        other_customer = other_customer_response.json()["customer"]
-        cleanup_tracker.add_customer(other_customer["id"])
-
-        # Test individual field updates
-        field_test_cases = [
-            {
-                "field": "name",
-                "update_data": {"name": "Updated Team Name"},
-                "expected_value": "Updated Team Name",
-            },
-            {
-                "field": "customer_id",
-                "update_data": {"customer_id": other_customer["id"]},
-                "expected_value": other_customer["id"],
-                "exclude_from_unchanged_check": ["customer_id", "customer"],
-            },
-            {
-                "field": "customer_id_clear",
-                "update_data": {"customer_id": None},
-                "expected_value": None,
-                "exclude_from_unchanged_check": ["customer_id", "customer"],
-                "custom_validation": lambda team: team["customer_id"] is None
-                and team["customer"] is None,
-            },
-        ]
-
-        for test_case in field_test_cases:
-            # Reset team to original state
-            reset_data = {
-                "name": original_team["name"],
-                "customer_id": original_team["customer_id"],
-            }
-            governance_client.update_team(team_id, reset_data)
-
-            # Perform field update
-            response = governance_client.update_team(team_id, test_case["update_data"])
-            assert_response_success(response, 200)
-            updated_team = response.json()["team"]
-
-            # Verify target field was updated
-            if test_case.get("custom_validation"):
-                test_case["custom_validation"](updated_team)
-            else:
-                field_parts = test_case["field"].split(".")
-                current_value = updated_team
-                for part in field_parts:
-                    if part != "clear":  # Skip suffix indicators
-                        current_value = current_value[part]
-                assert (
-                    current_value == test_case["expected_value"]
-                ), f"Field {test_case['field']} not updated correctly"
-
-            # Verify other fields unchanged (if specified)
-            if test_case.get("verify_unchanged", True):
-                exclude_fields = test_case.get(
-                    "exclude_from_unchanged_check", [test_case["field"]]
-                )
-                verify_unchanged_fields(updated_team, original_team, exclude_fields)
-
-    @pytest.mark.teams
-    @pytest.mark.field_updates
-    @pytest.mark.budget
-    def test_team_budget_updates(self, governance_client, cleanup_tracker):
-        """Test comprehensive budget creation, update, and modification"""
-        # Create team without budget
-        data = {"name": generate_unique_name("Budget Update Test Team")}
-        create_response = governance_client.create_team(data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-        cleanup_tracker.add_team(team_id)
-
-        # Test 1: Add budget to team without budget
-        budget_data = {"max_limit": 15000, "reset_duration": "1h"}
-        response = governance_client.update_team(team_id, {"budget": budget_data})
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-        assert updated_team["budget"]["max_limit"] == 15000
-        assert updated_team["budget"]["reset_duration"] == "1h"
-        assert updated_team["budget_id"] is not None
-
-        # Test 2: Update existing budget completely
-        new_budget_data = {"max_limit": 30000, "reset_duration": "2h"}
-        response = governance_client.update_team(team_id, {"budget": new_budget_data})
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-        assert updated_team["budget"]["max_limit"] == 30000
-        assert updated_team["budget"]["reset_duration"] == "2h"
-
-        # Test 3: Partial budget update (only max_limit)
-        response = governance_client.update_team(
-            team_id, {"budget": {"max_limit": 45000}}
-        )
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-        assert updated_team["budget"]["max_limit"] == 45000
-        assert (
-            updated_team["budget"]["reset_duration"] == "2h"
-        )  # Should remain unchanged
-
-        # Test 4: Partial budget update (only reset_duration)
-        response = governance_client.update_team(
-            team_id, {"budget": {"reset_duration": "1d"}}
-        )
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-        assert updated_team["budget"]["max_limit"] == 45000  # Should remain unchanged
-        assert updated_team["budget"]["reset_duration"] == "1d"
-
-    @pytest.mark.teams
-    @pytest.mark.field_updates
-    def test_team_multiple_field_updates(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test updating multiple fields simultaneously"""
-        # Create team with initial data
-        initial_data = {
-            "name": generate_unique_name("Multi-Field Test Team"),
-        }
-        create_response = governance_client.create_team(initial_data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-        cleanup_tracker.add_team(team_id)
-
-        # Update multiple fields at once
-        update_data = {
-            "name": "Updated Multi-Field Team Name",
-            "customer_id": sample_customer["id"],
-            "budget": {"max_limit": 75000, "reset_duration": "1w"},
-        }
-
-        response = governance_client.update_team(team_id, update_data)
-        assert_response_success(response, 200)
-
-        updated_team = response.json()["team"]
-        assert updated_team["name"] == "Updated Multi-Field Team Name"
-        assert updated_team["customer_id"] == sample_customer["id"]
-        assert updated_team["customer"]["id"] == sample_customer["id"]
-        assert updated_team["budget"]["max_limit"] == 75000
-        assert updated_team["budget"]["reset_duration"] == "1w"
-
-    @pytest.mark.teams
-    @pytest.mark.field_updates
-    @pytest.mark.edge_cases
-    def test_team_update_edge_cases(self, governance_client, cleanup_tracker):
-        """Test edge cases in team updates"""
-        # Create test team
-        data = {"name": generate_unique_name("Edge Case Team")}
-        create_response = governance_client.create_team(data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-        cleanup_tracker.add_team(team_id)
-
-        original_response = governance_client.get_team(team_id)
-        original_team = original_response.json()["team"]
-
-        # Test 1: Empty update (should return unchanged team)
-        response = governance_client.update_team(team_id, {})
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-
-        # Compare ignoring timestamps
-        differences = deep_compare_entities(
-            updated_team, original_team, ignore_fields=["updated_at"]
-        )
-        assert len(differences) == 0, f"Empty update changed fields: {differences}"
-
-        # Test 2: Update with same values
-        response = governance_client.update_team(
-            team_id, {"name": original_team["name"]}
-        )
-        assert_response_success(response, 200)
-
-        # Test 3: Very long team name (test field length limits)
-        long_name = "x" * 1000  # Adjust based on actual field limits
-        response = governance_client.update_team(team_id, {"name": long_name})
-        # Expected behavior depends on API validation rules
-
-    @pytest.mark.teams
-    @pytest.mark.field_updates
-    def test_team_update_nonexistent(self, governance_client):
-        """Test updating non-existent team returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.update_team(fake_id, {"name": "test"})
-        assert response.status_code == 404
-
-
-class TestTeamBudgetManagement:
-    """Test team budget specific functionality"""
-
-    @pytest.mark.teams
-    @pytest.mark.budget
-    def test_team_budget_creation_and_validation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test budget creation with various configurations"""
-        # Test valid budget configurations
-        budget_test_cases = [
-            {"max_limit": 5000, "reset_duration": "1h"},
-            {"max_limit": 25000, "reset_duration": "1d"},
-            {"max_limit": 100000, "reset_duration": "1w"},
-            {"max_limit": 500000, "reset_duration": "1M"},
-        ]
-
-        for budget_config in budget_test_cases:
-            data = {
-                "name": generate_unique_name(
-                    f"Budget Team {budget_config['reset_duration']}"
-                ),
-                "budget": budget_config,
-            }
-
-            response = governance_client.create_team(data)
-            assert_response_success(response, 201)
-
-            team_data = response.json()["team"]
-            cleanup_tracker.add_team(team_data["id"])
-
-            assert team_data["budget"]["max_limit"] == budget_config["max_limit"]
-            assert (
-                team_data["budget"]["reset_duration"] == budget_config["reset_duration"]
-            )
-            assert team_data["budget"]["current_usage"] == 0
-            assert team_data["budget"]["last_reset"] is not None
-
-    @pytest.mark.teams
-    @pytest.mark.budget
-    @pytest.mark.edge_cases
-    def test_team_budget_edge_cases(self, governance_client, cleanup_tracker):
-        """Test budget edge cases and boundary conditions"""
-        # Test boundary values
-        edge_case_budgets = [
-            {"max_limit": 0, "reset_duration": "1h"},  # Zero budget
-            {"max_limit": 1, "reset_duration": "1s"},  # Minimal values
-            {"max_limit": 9223372036854775807, "reset_duration": "1h"},  # Max int64
-        ]
-
-        for budget_config in edge_case_budgets:
-            data = {
-                "name": generate_unique_name(
-                    f"Edge Budget Team {budget_config['max_limit']}"
-                ),
-                "budget": budget_config,
-            }
-
-            response = governance_client.create_team(data)
-            # Adjust assertions based on API validation rules
-            if (
-                budget_config["max_limit"] >= 0
-            ):  # Assuming non-negative budgets are valid
-                assert_response_success(response, 201)
-                cleanup_tracker.add_team(response.json()["team"]["id"])
-            else:
-                assert response.status_code == 400
-
-    @pytest.mark.teams
-    @pytest.mark.budget
-    def test_team_budget_inheritance_simulation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test team budget in context of hierarchical inheritance"""
-        # This test simulates budget inheritance behavior
-        # Actual inheritance testing would be in integration tests
-
-        # Create customer with budget
-        customer_data = {
-            "name": generate_unique_name("Budget Customer"),
-            "budget": {"max_limit": 100000, "reset_duration": "1d"},
-        }
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create team with smaller budget under customer
-        team_data = {
-            "name": generate_unique_name("Sub-Budget Team"),
-            "customer_id": customer["id"],
-            "budget": {
-                "max_limit": 25000,
-                "reset_duration": "1d",
-            },  # Smaller than customer
-        }
-        team_response = governance_client.create_team(team_data)
-        assert_response_success(team_response, 201)
-        team = team_response.json()["team"]
-        cleanup_tracker.add_team(team["id"])
-
-        # Verify both budgets exist independently
-        assert team["budget"]["max_limit"] == 25000
-        # Note: Customer budget not preloaded in team response (use customer endpoint to verify)
-        customer_response = governance_client.get_customer(customer["id"])
-        customer_with_budget = customer_response.json()["customer"]
-        assert customer_with_budget["budget"]["max_limit"] == 100000
-
-        # Create team without budget under customer (should inherit)
-        no_budget_team_data = {
-            "name": generate_unique_name("Inherit Budget Team"),
-            "customer_id": customer["id"],
-        }
-        no_budget_response = governance_client.create_team(no_budget_team_data)
-        assert_response_success(no_budget_response, 201)
-        no_budget_team = no_budget_response.json()["team"]
-        cleanup_tracker.add_team(no_budget_team["id"])
-
-        # Team without explicit budget should not have budget field (omitempty)
-        assert no_budget_team.get("budget") is None
-        # Verify customer has budget (need to fetch customer directly due to preloading limits)
-        customer_check = governance_client.get_customer(customer["id"])
-        assert customer_check.json()["customer"]["budget"]["max_limit"] == 100000
-
-
-class TestTeamRelationships:
-    """Test team relationships with customers"""
-
-    @pytest.mark.teams
-    @pytest.mark.relationships
-    def test_team_customer_relationship_loading(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test that team properly loads customer relationships"""
-        data = {
-            "name": generate_unique_name("Customer Relationship Team"),
-            "customer_id": sample_customer["id"],
-        }
-
-        response = governance_client.create_team(data)
-        assert_response_success(response, 201)
-        team_data = response.json()["team"]
-        cleanup_tracker.add_team(team_data["id"])
-
-        # Verify customer relationship loaded
-        assert team_data["customer"] is not None
-        assert team_data["customer"]["id"] == sample_customer["id"]
-        assert team_data["customer"]["name"] == sample_customer["name"]
-
-        # Verify customer budget relationship loaded if it exists
-        if sample_customer.get("budget"):
-            assert team_data["customer"]["budget"] is not None
-
-    @pytest.mark.teams
-    @pytest.mark.relationships
-    def test_team_orphaned_customer_reference(self, governance_client, cleanup_tracker):
-        """Test team behavior with orphaned customer reference"""
-        # Create team with non-existent customer_id
-        fake_customer_id = str(uuid.uuid4())
-        data = {
-            "name": generate_unique_name("Orphaned Team"),
-            "customer_id": fake_customer_id,
-        }
-
-        response = governance_client.create_team(data)
-        # Behavior depends on API implementation:
-        # - Might succeed with warning
-        # - Might fail with validation error
-        # Adjust assertion based on actual behavior
-
-        if response.status_code == 201:
-            cleanup_tracker.add_team(response.json()["team"]["id"])
-            # Verify team was created but customer relationship is null/missing
-            team_data = response.json()["team"]
-            assert team_data.get("customer") is None
-        else:
-            assert response.status_code == 400  # Validation error expected
-
-    @pytest.mark.teams
-    @pytest.mark.relationships
-    def test_team_customer_association_changes(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test changing team customer associations"""
-        # Create standalone team
-        data = {"name": generate_unique_name("Association Test Team")}
-        create_response = governance_client.create_team(data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-        cleanup_tracker.add_team(team_id)
-
-        # Create another customer
-        other_customer_data = {"name": generate_unique_name("Other Customer")}
-        other_customer_response = governance_client.create_customer(other_customer_data)
-        assert_response_success(other_customer_response, 201)
-        other_customer = other_customer_response.json()["customer"]
-        cleanup_tracker.add_customer(other_customer["id"])
-
-        # Test 1: Associate with first customer
-        response = governance_client.update_team(
-            team_id, {"customer_id": sample_customer["id"]}
-        )
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-        assert updated_team["customer_id"] == sample_customer["id"]
-        assert updated_team["customer"]["id"] == sample_customer["id"]
-
-        # Test 2: Switch to other customer
-        response = governance_client.update_team(
-            team_id, {"customer_id": other_customer["id"]}
-        )
-        assert_response_success(response, 200)
-        updated_team = response.json()["team"]
-        assert updated_team["customer_id"] == other_customer["id"]
-        assert updated_team["customer"]["id"] == other_customer["id"]
-
-        # Test 3: Remove customer association
-        response = governance_client.update_team(team_id, {"customer_id": None})
-        # Note: Behavior depends on API implementation
-        # Adjust assertion based on actual behavior
-
-
-class TestTeamConcurrency:
-    """Test concurrent operations on Teams"""
-
-    @pytest.mark.teams
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_team_concurrent_creation(self, governance_client, cleanup_tracker):
-        """Test creating multiple teams concurrently"""
-
-        def create_team(index):
-            data = {"name": generate_unique_name(f"Concurrent Team {index}")}
-            response = governance_client.create_team(data)
-            return response
-
-        # Create 10 teams concurrently
-        with ThreadPoolExecutor(max_workers=10) as executor:
-            futures = [executor.submit(create_team, i) for i in range(10)]
-            responses = [future.result() for future in futures]
-
-        # Verify all succeeded
-        created_teams = []
-        for response in responses:
-            assert_response_success(response, 201)
-            team_data = response.json()["team"]
-            created_teams.append(team_data)
-            cleanup_tracker.add_team(team_data["id"])
-
-        # Verify all teams have unique IDs
-        team_ids = [team["id"] for team in created_teams]
-        assert len(set(team_ids)) == 10  # All unique IDs
-
-    @pytest.mark.teams
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_team_concurrent_updates(self, governance_client, cleanup_tracker):
-        """Test updating same team concurrently"""
-        # Create team to update
-        data = {"name": generate_unique_name("Concurrent Update Team")}
-        create_response = governance_client.create_team(data)
-        assert_response_success(create_response, 201)
-        team_id = create_response.json()["team"]["id"]
-        cleanup_tracker.add_team(team_id)
-
-        # Update concurrently with different names
-        def update_team(index):
-            update_data = {"name": f"Updated by thread {index}"}
-            response = governance_client.update_team(team_id, update_data)
-            return response, index
-
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(update_team, i) for i in range(5)]
-            results = [future.result() for future in futures]
-
-        # All updates should succeed (last one wins)
-        for response, index in results:
-            assert_response_success(response, 200)
-
-        # Verify final state
-        final_response = governance_client.get_team(team_id)
-        final_team = final_response.json()["team"]
-        assert final_team["name"].startswith("Updated by thread")
-
-    @pytest.mark.teams
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_team_concurrent_customer_association(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test concurrent customer association updates"""
-        # Create multiple teams to associate with same customer
-        teams = []
-        for i in range(5):
-            data = {"name": generate_unique_name(f"Concurrent Association Team {i}")}
-            response = governance_client.create_team(data)
-            assert_response_success(response, 201)
-            team_data = response.json()["team"]
-            teams.append(team_data)
-            cleanup_tracker.add_team(team_data["id"])
-
-        # Associate all teams with customer concurrently
-        def associate_team(team):
-            update_data = {"customer_id": sample_customer["id"]}
-            response = governance_client.update_team(team["id"], update_data)
-            return response, team["id"]
-
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(associate_team, team) for team in teams]
-            results = [future.result() for future in futures]
-
-        # All associations should succeed
-        for response, team_id in results:
-            assert_response_success(response, 200)
-            updated_team = response.json()["team"]
-            assert updated_team["customer_id"] == sample_customer["id"]
-
-
-class TestTeamFiltering:
-    """Test team filtering and query operations"""
-
-    @pytest.mark.teams
-    @pytest.mark.api
-    def test_team_filter_by_customer_comprehensive(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test comprehensive customer filtering scenarios"""
-        # Create customers
-        customer1_data = {"name": generate_unique_name("Filter Customer 1")}
-        customer1_response = governance_client.create_customer(customer1_data)
-        assert_response_success(customer1_response, 201)
-        customer1 = customer1_response.json()["customer"]
-        cleanup_tracker.add_customer(customer1["id"])
-
-        customer2_data = {"name": generate_unique_name("Filter Customer 2")}
-        customer2_response = governance_client.create_customer(customer2_data)
-        assert_response_success(customer2_response, 201)
-        customer2 = customer2_response.json()["customer"]
-        cleanup_tracker.add_customer(customer2["id"])
-
-        # Create teams for customer1
-        for i in range(3):
-            team_data = {
-                "name": generate_unique_name(f"Customer1 Team {i}"),
-                "customer_id": customer1["id"],
-            }
-            response = governance_client.create_team(team_data)
-            assert_response_success(response, 201)
-            cleanup_tracker.add_team(response.json()["team"]["id"])
-
-        # Create teams for customer2
-        for i in range(2):
-            team_data = {
-                "name": generate_unique_name(f"Customer2 Team {i}"),
-                "customer_id": customer2["id"],
-            }
-            response = governance_client.create_team(team_data)
-            assert_response_success(response, 201)
-            cleanup_tracker.add_team(response.json()["team"]["id"])
-
-        # Create standalone team
-        standalone_data = {"name": generate_unique_name("Standalone Team")}
-        response = governance_client.create_team(standalone_data)
-        assert_response_success(response, 201)
-        cleanup_tracker.add_team(response.json()["team"]["id"])
-
-        # Test filtering by customer1
-        response = governance_client.list_teams(customer_id=customer1["id"])
-        assert_response_success(response, 200)
-        teams = response.json()["teams"]
-        assert len(teams) == 3
-        for team in teams:
-            assert team["customer_id"] == customer1["id"]
-
-        # Test filtering by customer2
-        response = governance_client.list_teams(customer_id=customer2["id"])
-        assert_response_success(response, 200)
-        teams = response.json()["teams"]
-        assert len(teams) == 2
-        for team in teams:
-            assert team["customer_id"] == customer2["id"]
-
-        # Test filtering by non-existent customer
-        fake_customer_id = str(uuid.uuid4())
-        response = governance_client.list_teams(customer_id=fake_customer_id)
-        assert_response_success(response, 200)
-        teams = response.json()["teams"]
-        assert len(teams) == 0
-
-    @pytest.mark.teams
-    @pytest.mark.api
-    def test_team_list_pagination_and_sorting(self, governance_client, cleanup_tracker):
-        """Test team list with pagination and sorting (if supported by API)"""
-        # Create multiple teams for testing
-        team_names = []
-        for i in range(10):
-            name = generate_unique_name(f"Sort Test Team {i:02d}")
-            team_names.append(name)
-            data = {"name": name}
-            response = governance_client.create_team(data)
-            assert_response_success(response, 201)
-            cleanup_tracker.add_team(response.json()["team"]["id"])
-
-        # Test basic list (should include our teams)
-        response = governance_client.list_teams()
-        assert_response_success(response, 200)
-        teams = response.json()["teams"]
-        assert len(teams) >= 10
-
-        # Verify our teams are in the response
-        response_team_names = {team["name"] for team in teams}
-        for name in team_names:
-            assert name in response_team_names
diff --git a/tests/governance/test_usage_tracking.py b/tests/governance/test_usage_tracking.py
deleted file mode 100644
index aaa5724cc4..0000000000
--- a/tests/governance/test_usage_tracking.py
+++ /dev/null
@@ -1,1061 +0,0 @@
-"""
-Comprehensive Usage Tracking and Monitoring Tests for Bifrost Governance Plugin
-
-This module provides exhaustive testing of usage tracking, monitoring, and integration including:
-- Chat completion integration with governance headers
-- Usage tracking and budget enforcement
-- Rate limiting enforcement during real requests
-- Monitoring endpoints testing
-- Reset functionality testing
-- Debug and health endpoints
-- Integration edge cases and error scenarios
-- Performance and concurrency testing
-"""
-
-import pytest
-import time
-import uuid
-import json
-from typing import Dict, Any, List
-from concurrent.futures import ThreadPoolExecutor
-import threading
-
-from conftest import (
-    assert_response_success,
-    generate_unique_name,
-    wait_for_condition,
-    BIFROST_BASE_URL,
-)
-
-
-class TestUsageStatsEndpoints:
-    """Test usage statistics and monitoring endpoints"""
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    @pytest.mark.smoke
-    def test_get_usage_stats_general(self, governance_client):
-        """Test getting general usage statistics"""
-        response = governance_client.get_usage_stats()
-        assert_response_success(response, 200)
-
-        stats = response.json()
-        # Stats structure depends on implementation, but should be valid JSON
-        assert isinstance(stats, dict)
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    def test_get_usage_stats_for_vk(self, governance_client, sample_virtual_key):
-        """Test getting usage statistics for specific VK"""
-        response = governance_client.get_usage_stats(
-            virtual_key_id=sample_virtual_key["id"]
-        )
-        assert_response_success(response, 200)
-
-        data = response.json()
-        assert "virtual_key_id" in data
-        assert data["virtual_key_id"] == sample_virtual_key["id"]
-        assert "usage_stats" in data
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    def test_get_usage_stats_nonexistent_vk(self, governance_client):
-        """Test getting usage stats for non-existent VK"""
-        fake_vk_id = str(uuid.uuid4())
-        response = governance_client.get_usage_stats(virtual_key_id=fake_vk_id)
-        # Behavior depends on implementation - might return empty stats or 404
-        assert response.status_code in [200, 404]
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    def test_reset_usage_basic(self, governance_client, sample_virtual_key):
-        """Test basic usage reset functionality"""
-        reset_data = {"virtual_key_id": sample_virtual_key["id"]}
-
-        response = governance_client.reset_usage(reset_data)
-        assert_response_success(response, 200)
-
-        result = response.json()
-        assert "message" in result
-        assert "successfully" in result["message"].lower()
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    def test_reset_usage_with_provider_and_model(
-        self, governance_client, sample_virtual_key
-    ):
-        """Test usage reset with specific provider and model"""
-        reset_data = {
-            "virtual_key_id": sample_virtual_key["id"],
-            "provider": "openai",
-            "model": "gpt-4",
-        }
-
-        response = governance_client.reset_usage(reset_data)
-        assert_response_success(response, 200)
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    def test_reset_usage_invalid_vk(self, governance_client):
-        """Test usage reset with invalid VK ID"""
-        reset_data = {"virtual_key_id": str(uuid.uuid4())}
-
-        response = governance_client.reset_usage(reset_data)
-        assert response.status_code in [400, 404, 500]  # Expected error
-
-
-class TestDebugEndpoints:
-    """Test debug and monitoring endpoints"""
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    @pytest.mark.smoke
-    def test_get_debug_stats(self, governance_client):
-        """Test debug statistics endpoint"""
-        response = governance_client.get_debug_stats()
-        assert_response_success(response, 200)
-
-        data = response.json()
-        assert "plugin_stats" in data
-        assert "database_stats" in data
-        assert "timestamp" in data
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    def test_get_debug_counters(self, governance_client):
-        """Test debug counters endpoint"""
-        response = governance_client.get_debug_counters()
-        assert_response_success(response, 200)
-
-        data = response.json()
-        assert "counters" in data
-        assert "count" in data
-        assert "timestamp" in data
-        assert isinstance(data["counters"], list)
-
-    @pytest.mark.usage_tracking
-    @pytest.mark.api
-    @pytest.mark.smoke
-    def test_get_health_check(self, governance_client):
-        """Test health check endpoint"""
-        response = governance_client.get_health_check()
-        # Health check should return 200 for healthy or 503 for unhealthy
-        assert response.status_code in [200, 503]
-
-        data = response.json()
-        assert "status" in data
-        assert "timestamp" in data
-        assert "checks" in data
-        assert data["status"] in ["healthy", "unhealthy"]
-
-
-class TestChatCompletionIntegration:
-    """Test chat completion integration with governance headers"""
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    @pytest.mark.smoke
-    def test_chat_completion_with_vk_header(
-        self, governance_client, sample_virtual_key
-    ):
-        """Test chat completion with valid VK header"""
-        messages = [{"role": "user", "content": "Hello, world!"}]
-        headers = {"x-bf-vk": sample_virtual_key["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=10,
-        )
-
-        # Response should be successful, rate limited, budget exceeded, or VK not found
-        assert response.status_code in [200, 429, 402, 403]
-
-        if response.status_code == 200:
-            data = response.json()
-            assert "choices" in data
-            assert len(data["choices"]) > 0
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    def test_chat_completion_without_vk_header(self, governance_client):
-        """Test chat completion without VK header"""
-        messages = [{"role": "user", "content": "Hello, world!"}]
-
-        response = governance_client.chat_completion(
-            messages=messages, model="openai/gpt-3.5-turbo", max_tokens=10
-        )
-
-        # Should succeed without VK header (governance skipped)
-        assert response.status_code in [
-            200,
-            400,
-        ]  # 200 if no governance, 400 if provider issues
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    def test_chat_completion_invalid_vk_header(self, governance_client):
-        """Test chat completion with invalid VK header"""
-        messages = [{"role": "user", "content": "Hello, world!"}]
-        headers = {"x-bf-vk": "invalid-vk-value"}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=10,
-        )
-
-        # Should fail with invalid VK (governance blocks)
-        assert response.status_code == 403
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    def test_chat_completion_inactive_vk(self, governance_client, cleanup_tracker):
-        """Test chat completion with inactive VK"""
-        # Create inactive VK
-        vk_data = {"name": generate_unique_name("Inactive VK"), "is_active": False}
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        inactive_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(inactive_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello, world!"}]
-        headers = {"x-bf-vk": inactive_vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=10,
-        )
-
-        # Should fail with inactive VK (governance blocks)
-        assert response.status_code == 403
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    def test_chat_completion_with_model_restrictions(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test chat completion with model restrictions"""
-        # Create VK with model restrictions
-        vk_data = {
-            "name": generate_unique_name("Restricted VK"),
-            "allowed_models": ["gpt-4"],  # Only allow GPT-4
-            "allowed_providers": ["openai"],
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        restricted_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(restricted_vk["id"])
-
-        # Test with allowed model
-        messages = [{"role": "user", "content": "Hello, world!"}]
-        headers = {"x-bf-vk": restricted_vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages, model="gpt-4", headers=headers, max_tokens=10
-        )
-
-        # Should work with allowed model
-        assert response.status_code in [200, 429, 402]  # Success or limits
-
-        # Test with disallowed model
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",  # Not in allowed_models
-            headers=headers,
-            max_tokens=10,
-        )
-
-        # Should fail with disallowed model
-        assert response.status_code in [400, 403]
-
-
-class TestBudgetEnforcement:
-    """Test budget enforcement during chat completions"""
-
-    @pytest.mark.integration
-    @pytest.mark.budget
-    @pytest.mark.usage_tracking
-    def test_budget_enforcement_basic(self, governance_client, cleanup_tracker):
-        """Test basic budget enforcement"""
-        # Create VK with very small budget
-        vk_data = {
-            "name": generate_unique_name("Small Budget VK"),
-            "budget": {
-                "max_limit": 1,  # 1 cent - very small budget
-                "reset_duration": "1h",
-            },
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        small_budget_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(small_budget_vk["id"])
-
-        messages = [
-            {
-                "role": "user",
-                "content": "Write a very long story about artificial intelligence" * 10,
-            }
-        ]
-        headers = {"x-bf-vk": small_budget_vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=1000,  # Request expensive completion
-        )
-
-        # Should fail due to budget exceeded
-        if response.status_code == 402:  # Budget exceeded
-            error_data = response.json()
-            assert "budget" in error_data.get("error", "").lower()
-        elif response.status_code == 200:
-            # If it succeeded, check that budget was tracked
-            stats_response = governance_client.get_usage_stats(
-                virtual_key_id=small_budget_vk["id"]
-            )
-            if stats_response.status_code == 200:
-                # Verify usage was tracked
-                pass
-
-    @pytest.mark.integration
-    @pytest.mark.budget
-    @pytest.mark.usage_tracking
-    def test_hierarchical_budget_enforcement(self, governance_client, cleanup_tracker):
-        """Test hierarchical budget enforcement (Customer -> Team -> VK)"""
-        # Create customer with budget
-        customer_data = {
-            "name": generate_unique_name("Budget Test Customer"),
-            "budget": {"max_limit": 10000, "reset_duration": "1h"},
-        }
-        customer_response = governance_client.create_customer(customer_data)
-        assert_response_success(customer_response, 201)
-        customer = customer_response.json()["customer"]
-        cleanup_tracker.add_customer(customer["id"])
-
-        # Create team under customer with smaller budget
-        team_data = {
-            "name": generate_unique_name("Budget Test Team"),
-            "customer_id": customer["id"],
-            "budget": {"max_limit": 5000, "reset_duration": "1h"},
-        }
-        team_response = governance_client.create_team(team_data)
-        assert_response_success(team_response, 201)
-        team = team_response.json()["team"]
-        cleanup_tracker.add_team(team["id"])
-
-        # Create VK under team with even smaller budget
-        vk_data = {
-            "name": generate_unique_name("Budget Test VK"),
-            "team_id": team["id"],
-            "budget": {"max_limit": 1, "reset_duration": "1h"},  # Smallest budget
-        }
-        vk_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(vk_response, 201)
-        vk = vk_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk["id"])
-
-        # Test request that should hit VK budget first
-        messages = [{"role": "user", "content": "Expensive request" * 50}]
-        headers = {"x-bf-vk": vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="gpt-4",  # More expensive model
-            headers=headers,
-            max_tokens=1000,
-        )
-
-        # Should be limited by VK budget (smallest in hierarchy)
-        # Actual behavior depends on implementation
-
-    @pytest.mark.integration
-    @pytest.mark.budget
-    @pytest.mark.usage_tracking
-    def test_budget_reset_functionality(self, governance_client, cleanup_tracker):
-        """Test budget reset functionality"""
-        # Create VK with small budget
-        vk_data = {
-            "name": generate_unique_name("Reset Budget VK"),
-            "budget": {"max_limit": 100, "reset_duration": "1h"},  # Small but not tiny
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk["id"])
-
-        # Make a request to use some budget
-        messages = [{"role": "user", "content": "Hello"}]
-        headers = {"x-bf-vk": vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Reset the usage
-        reset_data = {"virtual_key_id": vk["id"]}
-        reset_response = governance_client.reset_usage(reset_data)
-        assert_response_success(reset_response, 200)
-
-        # Budget should be reset - could make another request
-        response2 = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Should work after reset (unless other limits apply)
-        assert response2.status_code in [200, 429]  # Success or rate limited
-
-
-class TestRateLimitEnforcement:
-    """Test rate limiting enforcement during chat completions"""
-
-    @pytest.mark.integration
-    @pytest.mark.rate_limit
-    @pytest.mark.usage_tracking
-    def test_request_rate_limiting(self, governance_client, cleanup_tracker):
-        """Test request rate limiting"""
-        # Create VK with very restrictive request rate limit
-        vk_data = {
-            "name": generate_unique_name("Rate Limited VK"),
-            "rate_limit": {
-                "request_max_limit": 2,  # Only 2 requests allowed
-                "request_reset_duration": "1m",
-            },
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        rate_limited_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(rate_limited_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello"}]
-        headers = {"x-bf-vk": rate_limited_vk["value"]}
-
-        # Make requests up to the limit
-        responses = []
-        for i in range(3):  # Try 3 requests, limit is 2
-            response = governance_client.chat_completion(
-                messages=messages,
-                model="openai/gpt-3.5-turbo",
-                headers=headers,
-                max_tokens=5,
-            )
-            responses.append(response)
-            time.sleep(0.1)  # Small delay
-
-        # First 2 should succeed, 3rd should be rate limited
-        success_count = sum(1 for r in responses if r.status_code == 200)
-        rate_limited_count = sum(1 for r in responses if r.status_code == 429)
-
-        # Depending on implementation, might be exactly enforced or allow some variance
-        assert rate_limited_count > 0 or success_count <= 2
-
-    @pytest.mark.integration
-    @pytest.mark.rate_limit
-    @pytest.mark.usage_tracking
-    def test_token_rate_limiting(self, governance_client, cleanup_tracker):
-        """Test token rate limiting"""
-        # Create VK with restrictive token rate limit
-        vk_data = {
-            "name": generate_unique_name("Token Rate Limited VK"),
-            "rate_limit": {
-                "token_max_limit": 100,  # Only 100 tokens allowed
-                "token_reset_duration": "1m",
-            },
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        token_limited_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(token_limited_vk["id"])
-
-        # Make request that would exceed token limit
-        messages = [
-            {"role": "user", "content": "Write a very long response about AI" * 10}
-        ]
-        headers = {"x-bf-vk": token_limited_vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=500,  # Request more tokens than limit
-        )
-
-        # Should be limited by token rate limit
-        if response.status_code == 429:
-            error_data = response.json()
-            # Check if error mentions tokens or rate limit
-            error_text = error_data.get("error", "").lower()
-            assert "token" in error_text or "rate" in error_text
-
-    @pytest.mark.integration
-    @pytest.mark.rate_limit
-    @pytest.mark.usage_tracking
-    def test_independent_rate_limits(self, governance_client, cleanup_tracker):
-        """Test that token and request rate limits are independent"""
-        # Create VK with different token and request limits
-        vk_data = {
-            "name": generate_unique_name("Independent Limits VK"),
-            "rate_limit": {
-                "token_max_limit": 1000,
-                "token_reset_duration": "1h",
-                "request_max_limit": 5,
-                "request_reset_duration": "1m",
-            },
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        independent_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(independent_vk["id"])
-
-        messages = [{"role": "user", "content": "Short"}]
-        headers = {"x-bf-vk": independent_vk["value"]}
-
-        # Make multiple small requests (should hit request limit before token limit)
-        responses = []
-        for i in range(10):  # More than request limit
-            response = governance_client.chat_completion(
-                messages=messages,
-                model="openai/gpt-3.5-turbo",
-                headers=headers,
-                max_tokens=5,  # Small token count
-            )
-            responses.append(response)
-            time.sleep(0.1)
-
-        # Should be limited by request count, not tokens
-        rate_limited_responses = [r for r in responses if r.status_code == 429]
-        assert len(rate_limited_responses) > 0
-
-    @pytest.mark.integration
-    @pytest.mark.rate_limit
-    @pytest.mark.usage_tracking
-    def test_rate_limit_reset(self, governance_client, cleanup_tracker):
-        """Test rate limit reset functionality"""
-        # Create VK with short reset duration for testing
-        vk_data = {
-            "name": generate_unique_name("Reset Test VK"),
-            "rate_limit": {
-                "request_max_limit": 1,
-                "request_reset_duration": "5s",  # Short duration for testing
-            },
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        reset_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(reset_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello"}]
-        headers = {"x-bf-vk": reset_vk["value"]}
-
-        # Make first request (should succeed)
-        response1 = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Make second request immediately (should be rate limited)
-        response2 = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Reset the rate limit manually
-        reset_data = {"virtual_key_id": reset_vk["id"]}
-        reset_response = governance_client.reset_usage(reset_data)
-        assert_response_success(reset_response, 200)
-
-        # Make third request after reset (should succeed)
-        response3 = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Should work after reset
-        assert response3.status_code in [200, 429]  # Success or different limit
-
-
-class TestConcurrentUsageTracking:
-    """Test concurrent usage tracking and limits"""
-
-    @pytest.mark.integration
-    @pytest.mark.concurrency
-    @pytest.mark.usage_tracking
-    @pytest.mark.slow
-    def test_concurrent_requests_same_vk(self, governance_client, cleanup_tracker):
-        """Test concurrent requests using same VK"""
-        # Create VK with moderate limits
-        vk_data = {
-            "name": generate_unique_name("Concurrent VK"),
-            "rate_limit": {"request_max_limit": 10, "request_reset_duration": "1m"},
-            "budget": {"max_limit": 10000, "reset_duration": "1h"},
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        concurrent_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(concurrent_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello"}]
-        headers = {"x-bf-vk": concurrent_vk["value"]}
-
-        def make_request(index):
-            try:
-                response = governance_client.chat_completion(
-                    messages=messages,
-                    model="openai/gpt-3.5-turbo",
-                    headers=headers,
-                    max_tokens=5,
-                )
-                return response.status_code, index
-            except Exception as e:
-                return str(e), index
-
-        # Make 15 concurrent requests (more than rate limit)
-        with ThreadPoolExecutor(max_workers=15) as executor:
-            futures = [executor.submit(make_request, i) for i in range(15)]
-            results = [future.result() for future in futures]
-
-        # Count success vs rate limited responses
-        success_codes = [r[0] for r in results if r[0] == 200]
-        rate_limited_codes = [r[0] for r in results if r[0] == 429]
-
-        # Should have some successful and some rate limited
-        total_responses = len(success_codes) + len(rate_limited_codes)
-        assert total_responses > 0
-
-        # Rate limiting should have kicked in for some requests
-        assert len(success_codes) <= 10  # Shouldn't exceed rate limit
-
-    @pytest.mark.integration
-    @pytest.mark.concurrency
-    @pytest.mark.usage_tracking
-    @pytest.mark.slow
-    def test_concurrent_budget_tracking(self, governance_client, cleanup_tracker):
-        """Test concurrent budget tracking accuracy"""
-        # Create VK with small budget for testing
-        vk_data = {
-            "name": generate_unique_name("Budget Tracking VK"),
-            "budget": {"max_limit": 1000, "reset_duration": "1h"},  # Small budget
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        budget_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(budget_vk["id"])
-
-        messages = [{"role": "user", "content": "Count to 10"}]
-        headers = {"x-bf-vk": budget_vk["value"]}
-
-        def make_budget_request(index):
-            try:
-                response = governance_client.chat_completion(
-                    messages=messages,
-                    model="openai/gpt-3.5-turbo",
-                    headers=headers,
-                    max_tokens=20,
-                )
-                return (
-                    response.status_code,
-                    index,
-                    response.json() if response.status_code == 200 else None,
-                )
-            except Exception as e:
-                return str(e), index, None
-
-        # Make concurrent requests that should consume budget
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(make_budget_request, i) for i in range(5)]
-            results = [future.result() for future in futures]
-
-        # Check budget tracking consistency
-        success_count = sum(1 for r in results if r[0] == 200)
-        budget_exceeded_count = sum(1 for r in results if r[0] == 402)
-
-        # Should have proper budget enforcement
-        assert success_count + budget_exceeded_count > 0
-
-
-class TestStreamingIntegration:
-    """Test streaming integration with governance"""
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    def test_streaming_chat_completion_with_governance(
-        self, governance_client, sample_virtual_key
-    ):
-        """Test streaming chat completion with governance headers"""
-        messages = [{"role": "user", "content": "Count from 1 to 5"}]
-        headers = {"x-bf-vk": sample_virtual_key["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=50,
-            stream=True,
-        )
-
-        # Streaming should work with governance
-        if response.status_code == 200:
-            # For streaming, response should be text/event-stream
-            content_type = response.headers.get("content-type", "")
-            assert (
-                "text/event-stream" in content_type
-                or "application/json" in content_type
-            )
-        else:
-            # Should be properly governed (rate limited, budget exceeded, etc.)
-            assert response.status_code in [402, 403, 429]
-
-    @pytest.mark.integration
-    @pytest.mark.usage_tracking
-    @pytest.mark.rate_limit
-    def test_streaming_rate_limit_enforcement(self, governance_client, cleanup_tracker):
-        """Test rate limiting during streaming requests"""
-        # Create VK with token rate limit
-        vk_data = {
-            "name": generate_unique_name("Streaming Rate Limit VK"),
-            "rate_limit": {"token_max_limit": 50, "token_reset_duration": "1m"},
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        streaming_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(streaming_vk["id"])
-
-        messages = [{"role": "user", "content": "Write a long story about AI"}]
-        headers = {"x-bf-vk": streaming_vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=200,  # More than token limit
-            stream=True,
-        )
-
-        # Should be limited by token rate limit
-        if response.status_code == 429:
-            error_data = response.json()
-            assert "token" in error_data.get("error", "").lower()
-
-
-class TestProviderModelValidation:
-    """Test provider and model validation during integration"""
-
-    @pytest.mark.integration
-    @pytest.mark.validation
-    def test_anthropic_model_integration(self, governance_client, cleanup_tracker):
-        """Test integration with Anthropic models"""
-        # Create VK allowing Anthropic
-        vk_data = {
-            "name": generate_unique_name("Anthropic VK"),
-            "allowed_providers": ["anthropic"],
-            "allowed_models": ["claude-3-5-sonnet-20240620"],
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        anthropic_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(anthropic_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello Claude"}]
-        headers = {"x-bf-vk": anthropic_vk["value"]}
-
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="claude-3-5-sonnet-20240620",
-            headers=headers,
-            max_tokens=10,
-        )
-
-        # Should work if Anthropic is properly configured
-        assert response.status_code in [200, 400, 402, 429, 503]
-
-    @pytest.mark.integration
-    @pytest.mark.validation
-    def test_openai_model_integration(self, governance_client, cleanup_tracker):
-        """Test integration with OpenAI models"""
-        # Create VK allowing OpenAI
-        vk_data = {
-            "name": generate_unique_name("OpenAI VK"),
-            "allowed_providers": ["openai"],
-            "allowed_models": ["gpt-4", "gpt-3.5-turbo"],
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        openai_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(openai_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello GPT"}]
-        headers = {"x-bf-vk": openai_vk["value"]}
-
-        # Test GPT-4
-        response = governance_client.chat_completion(
-            messages=messages, model="gpt-4", headers=headers, max_tokens=10
-        )
-
-        # Should work if OpenAI is properly configured
-        assert response.status_code in [200, 400, 402, 429, 503]
-
-    @pytest.mark.integration
-    @pytest.mark.validation
-    def test_disallowed_provider_model_combination(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test disallowed provider/model combinations"""
-        # Create VK only allowing OpenAI
-        vk_data = {
-            "name": generate_unique_name("OpenAI Only VK"),
-            "allowed_providers": ["openai"],
-            "allowed_models": ["gpt-4"],
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        restricted_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(restricted_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello"}]
-        headers = {"x-bf-vk": restricted_vk["value"]}
-
-        # Try to use Anthropic model (should fail)
-        response = governance_client.chat_completion(
-            messages=messages,
-            model="claude-3-5-sonnet-20240620",
-            headers=headers,
-            max_tokens=10,
-        )
-
-        # Should be rejected for disallowed model
-        assert response.status_code in [400, 403]
-
-
-class TestErrorHandlingAndEdgeCases:
-    """Test error handling and edge cases in usage tracking"""
-
-    @pytest.mark.integration
-    @pytest.mark.edge_cases
-    def test_malformed_vk_header(self, governance_client):
-        """Test malformed VK header handling"""
-        messages = [{"role": "user", "content": "Hello"}]
-
-        malformed_headers = [
-            {"x-bf-vk": ""},  # Empty
-            {"x-bf-vk": " "},  # Whitespace
-            {"x-bf-vk": "short"},  # Too short
-            {"x-bf-vk": "x" * 100},  # Too long
-            {"x-bf-vk": "invalid-characters-#@!"},  # Invalid chars
-        ]
-
-        for headers in malformed_headers:
-            response = governance_client.chat_completion(
-                messages=messages,
-                model="openai/gpt-3.5-turbo",
-                headers=headers,
-                max_tokens=5,
-            )
-
-            # Should properly reject malformed headers
-            assert response.status_code in [400, 403]
-
-    @pytest.mark.integration
-    @pytest.mark.edge_cases
-    def test_concurrent_vk_updates_during_requests(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test VK updates during active requests"""
-        # Create VK
-        vk_data = {"name": generate_unique_name("Update Test VK")}
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        update_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(update_vk["id"])
-
-        messages = [{"role": "user", "content": "Hello"}]
-        headers = {"x-bf-vk": update_vk["value"]}
-
-        def make_request():
-            return governance_client.chat_completion(
-                messages=messages,
-                model="openai/gpt-3.5-turbo",
-                headers=headers,
-                max_tokens=5,
-            )
-
-        def update_vk_config():
-            update_data = {"description": "Updated during request"}
-            return governance_client.update_virtual_key(update_vk["id"], update_data)
-
-        # Start request and update concurrently
-        with ThreadPoolExecutor(max_workers=2) as executor:
-            request_future = executor.submit(make_request)
-            update_future = executor.submit(update_vk_config)
-
-            request_response = request_future.result()
-            update_response = update_future.result()
-
-        # Both should handle gracefully
-        assert request_response.status_code in [200, 402, 403, 429]
-        assert_response_success(update_response, 200)
-
-    @pytest.mark.integration
-    @pytest.mark.edge_cases
-    def test_extreme_token_counts(self, governance_client, sample_virtual_key):
-        """Test extreme token count scenarios"""
-        headers = {"x-bf-vk": sample_virtual_key["value"]}
-
-        # Test with 0 max_tokens
-        response = governance_client.chat_completion(
-            messages=[{"role": "user", "content": "Hello"}],
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=0,
-        )
-
-        # Should handle 0 tokens gracefully
-        assert response.status_code in [200, 400]
-
-        # Test with very large max_tokens
-        response = governance_client.chat_completion(
-            messages=[{"role": "user", "content": "Hello"}],
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=100000,  # Very large
-        )
-
-        # Should handle large token requests
-        assert response.status_code in [200, 400, 402, 429]
-
-    @pytest.mark.integration
-    @pytest.mark.edge_cases
-    def test_empty_and_large_messages(self, governance_client, sample_virtual_key):
-        """Test empty and very large message scenarios"""
-        headers = {"x-bf-vk": sample_virtual_key["value"]}
-
-        # Test with empty message
-        response = governance_client.chat_completion(
-            messages=[{"role": "user", "content": ""}],
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Should handle empty messages
-        assert response.status_code in [200, 400]
-
-        # Test with very large message
-        large_content = "This is a very long message. " * 1000
-        response = governance_client.chat_completion(
-            messages=[{"role": "user", "content": large_content}],
-            model="openai/gpt-3.5-turbo",
-            headers=headers,
-            max_tokens=5,
-        )
-
-        # Should handle large messages
-        assert response.status_code in [200, 400, 402, 429]
-
-
-class TestPerformanceAndScaling:
-    """Test performance and scaling of usage tracking"""
-
-    @pytest.mark.integration
-    @pytest.mark.performance
-    @pytest.mark.slow
-    def test_high_frequency_requests(self, governance_client, cleanup_tracker):
-        """Test high frequency requests performance"""
-        # Create VK with high limits
-        vk_data = {
-            "name": generate_unique_name("High Frequency VK"),
-            "rate_limit": {
-                "request_max_limit": 1000,
-                "request_reset_duration": "1h",
-                "token_max_limit": 100000,
-                "token_reset_duration": "1h",
-            },
-            "budget": {"max_limit": 1000000, "reset_duration": "1h"},
-        }
-        create_response = governance_client.create_virtual_key(vk_data)
-        assert_response_success(create_response, 201)
-        high_freq_vk = create_response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(high_freq_vk["id"])
-
-        messages = [{"role": "user", "content": "Hi"}]
-        headers = {"x-bf-vk": high_freq_vk["value"]}
-
-        # Measure performance of rapid requests
-        start_time = time.time()
-        responses = []
-
-        for i in range(20):  # Make 20 rapid requests
-            response = governance_client.chat_completion(
-                messages=messages,
-                model="openai/gpt-3.5-turbo",
-                headers=headers,
-                max_tokens=1,
-            )
-            responses.append(response.status_code)
-            if i % 5 == 0:
-                time.sleep(0.1)  # Brief pause every 5 requests
-
-        total_time = time.time() - start_time
-
-        # Performance assertions
-        assert total_time < 30.0, f"20 requests took too long: {total_time}s"
-
-        # Most requests should succeed (unless rate limited)
-        success_count = sum(1 for code in responses if code == 200)
-        print(
-            f"High frequency test: {success_count}/20 requests succeeded in {total_time:.2f}s"
-        )
-
-    @pytest.mark.integration
-    @pytest.mark.performance
-    @pytest.mark.slow
-    def test_usage_stats_performance(self, governance_client, cleanup_tracker):
-        """Test usage statistics endpoint performance"""
-        # Create multiple VKs and make requests
-        vk_ids = []
-        for i in range(10):
-            vk_data = {"name": generate_unique_name(f"Stats Perf VK {i}")}
-            response = governance_client.create_virtual_key(vk_data)
-            assert_response_success(response, 201)
-            vk_id = response.json()["virtual_key"]["id"]
-            vk_ids.append(vk_id)
-            cleanup_tracker.add_virtual_key(vk_id)
-
-        # Test general stats performance
-        start_time = time.time()
-        response = governance_client.get_usage_stats()
-        stats_time = time.time() - start_time
-
-        assert_response_success(response, 200)
-        assert stats_time < 2.0, f"Usage stats took too long: {stats_time}s"
-
-        # Test individual VK stats performance
-        start_time = time.time()
-        for vk_id in vk_ids[:5]:  # Test 5 VKs
-            response = governance_client.get_usage_stats(virtual_key_id=vk_id)
-            assert_response_success(response, 200)
-
-        individual_stats_time = time.time() - start_time
-        assert (
-            individual_stats_time < 5.0
-        ), f"Individual VK stats took too long: {individual_stats_time}s"
-
-        print(
-            f"Performance test: General stats: {stats_time:.2f}s, 5 individual stats: {individual_stats_time:.2f}s"
-        )
diff --git a/tests/governance/test_virtual_keys_crud.py b/tests/governance/test_virtual_keys_crud.py
deleted file mode 100644
index f2b0259566..0000000000
--- a/tests/governance/test_virtual_keys_crud.py
+++ /dev/null
@@ -1,928 +0,0 @@
-"""
-Comprehensive Virtual Key CRUD Tests for Bifrost Governance Plugin
-
-This module provides exhaustive testing of Virtual Key operations including:
-- Complete CRUD lifecycle testing
-- Comprehensive field update testing (individual and batch)
-- Mutual exclusivity validation (team_id vs customer_id)
-- Budget and rate limit management
-- Relationship testing with teams and customers
-- Edge cases and validation scenarios
-- Concurrency and race condition testing
-"""
-
-import pytest
-import time
-import uuid
-from typing import Dict, Any, List
-from concurrent.futures import ThreadPoolExecutor
-import copy
-
-from conftest import (
-    assert_response_success,
-    verify_unchanged_fields,
-    generate_unique_name,
-    create_complete_virtual_key_data,
-    verify_entity_relationships,
-    deep_compare_entities,
-)
-
-
-class TestVirtualKeyBasicCRUD:
-    """Test basic CRUD operations for Virtual Keys"""
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    @pytest.mark.smoke
-    def test_vk_create_minimal(self, governance_client, cleanup_tracker):
-        """Test creating VK with minimal required data"""
-        data = {"name": generate_unique_name("Minimal VK")}
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-
-        vk_data = response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify required fields
-        assert vk_data["name"] == data["name"]
-        assert vk_data["value"] is not None  # Auto-generated
-        assert vk_data["is_active"] is True  # Default value
-        assert vk_data["id"] is not None
-        assert vk_data["created_at"] is not None
-        assert vk_data["updated_at"] is not None
-
-        # Verify optional fields are None/empty
-        assert vk_data["allowed_models"] is None
-        assert vk_data["allowed_providers"] is None
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_create_complete(self, governance_client, cleanup_tracker):
-        """Test creating VK with all possible fields"""
-        data = create_complete_virtual_key_data()
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-
-        vk_data = response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify all fields are set correctly
-        assert vk_data["name"] == data["name"]
-        assert vk_data["description"] == data["description"]
-        assert vk_data["allowed_models"] == data["allowed_models"]
-        assert vk_data["allowed_providers"] == data["allowed_providers"]
-        assert vk_data["is_active"] == data["is_active"]
-
-        # Verify budget was created
-        assert vk_data["budget"] is not None
-        assert vk_data["budget"]["max_limit"] == data["budget"]["max_limit"]
-        assert vk_data["budget"]["reset_duration"] == data["budget"]["reset_duration"]
-
-        # Verify rate limit was created
-        assert vk_data["rate_limit"] is not None
-        assert (
-            vk_data["rate_limit"]["token_max_limit"]
-            == data["rate_limit"]["token_max_limit"]
-        )
-        assert (
-            vk_data["rate_limit"]["request_max_limit"]
-            == data["rate_limit"]["request_max_limit"]
-        )
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_create_with_team(self, governance_client, cleanup_tracker, sample_team):
-        """Test creating VK associated with a team"""
-        data = {"name": generate_unique_name("Team VK"), "team_id": sample_team["id"]}
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-
-        vk_data = response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify team association
-        assert vk_data["team_id"] == sample_team["id"]
-        assert vk_data.get("customer_id") is None
-        assert vk_data["team"] is not None
-        assert vk_data["team"]["id"] == sample_team["id"]
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_create_with_customer(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test creating VK associated with a customer"""
-        data = {
-            "name": generate_unique_name("Customer VK"),
-            "customer_id": sample_customer["id"],
-        }
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-
-        vk_data = response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify customer association
-        assert vk_data["customer_id"] == sample_customer["id"]
-        assert vk_data.get("team_id") is None
-        assert vk_data["customer"] is not None
-        assert vk_data["customer"]["id"] == sample_customer["id"]
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    @pytest.mark.mutual_exclusivity
-    def test_vk_create_mutual_exclusivity_violation(
-        self, governance_client, sample_team, sample_customer
-    ):
-        """Test that VK cannot be created with both team_id and customer_id"""
-        data = {
-            "name": generate_unique_name("Invalid VK"),
-            "team_id": sample_team["id"],
-            "customer_id": sample_customer["id"],
-        }
-
-        response = governance_client.create_virtual_key(data)
-        assert response.status_code == 400
-        error_data = response.json()
-        assert "cannot be attached to both" in error_data["error"].lower()
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_list_all(self, governance_client, sample_virtual_key):
-        """Test listing all virtual keys"""
-        response = governance_client.list_virtual_keys()
-        assert_response_success(response, 200)
-
-        data = response.json()
-        assert "virtual_keys" in data
-        assert "count" in data
-        assert isinstance(data["virtual_keys"], list)
-        assert data["count"] >= 1
-
-        # Find our test VK
-        test_vk = next(
-            (vk for vk in data["virtual_keys"] if vk["id"] == sample_virtual_key["id"]),
-            None,
-        )
-        assert test_vk is not None
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_get_by_id(self, governance_client, sample_virtual_key):
-        """Test getting VK by ID with relationships loaded"""
-        response = governance_client.get_virtual_key(sample_virtual_key["id"])
-        assert_response_success(response, 200)
-
-        vk_data = response.json()["virtual_key"]
-        assert vk_data["id"] == sample_virtual_key["id"]
-        assert vk_data["name"] == sample_virtual_key["name"]
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_get_nonexistent(self, governance_client):
-        """Test getting non-existent VK returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.get_virtual_key(fake_id)
-        assert response.status_code == 404
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_delete(self, governance_client, cleanup_tracker):
-        """Test deleting a virtual key"""
-        # Create VK to delete
-        data = {"name": generate_unique_name("Delete Test VK")}
-        create_response = governance_client.create_virtual_key(data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-
-        # Delete VK
-        delete_response = governance_client.delete_virtual_key(vk_id)
-        assert_response_success(delete_response, 200)
-
-        # Verify VK is gone
-        get_response = governance_client.get_virtual_key(vk_id)
-        assert get_response.status_code == 404
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.crud
-    def test_vk_delete_nonexistent(self, governance_client):
-        """Test deleting non-existent VK returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.delete_virtual_key(fake_id)
-        assert response.status_code == 404
-
-
-class TestVirtualKeyValidation:
-    """Test validation rules for Virtual Key operations"""
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.validation
-    def test_vk_create_missing_name(self, governance_client):
-        """Test creating VK without name fails"""
-        data = {"description": "VK without name"}
-        response = governance_client.create_virtual_key(data)
-        assert response.status_code == 400
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.validation
-    def test_vk_create_empty_name(self, governance_client):
-        """Test creating VK with empty name fails"""
-        data = {"name": ""}
-        response = governance_client.create_virtual_key(data)
-        assert response.status_code == 400
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.validation
-    def test_vk_create_invalid_team_id(self, governance_client):
-        """Test creating VK with non-existent team_id"""
-        data = {
-            "name": generate_unique_name("Invalid Team VK"),
-            "team_id": str(uuid.uuid4()),
-        }
-        response = governance_client.create_virtual_key(data)
-        # Note: Depending on implementation, this might succeed with warning or fail
-        # Adjust assertion based on actual API behavior
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.validation
-    def test_vk_create_invalid_customer_id(self, governance_client):
-        """Test creating VK with non-existent customer_id"""
-        data = {
-            "name": generate_unique_name("Invalid Customer VK"),
-            "customer_id": str(uuid.uuid4()),
-        }
-        response = governance_client.create_virtual_key(data)
-        # Note: Adjust assertion based on actual API behavior
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.validation
-    def test_vk_create_invalid_json(self, governance_client):
-        """Test creating VK with malformed JSON"""
-        # This would be tested at the HTTP level, but pytest requests handles JSON encoding
-        # So we test with invalid data types instead
-        data = {
-            "name": 123,  # Should be string
-            "is_active": "not_boolean",  # Should be boolean
-        }
-        response = governance_client.create_virtual_key(data)
-        assert response.status_code == 400
-
-
-class TestVirtualKeyFieldUpdates:
-    """Comprehensive tests for Virtual Key field updates"""
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    def test_vk_update_individual_fields(
-        self, governance_client, cleanup_tracker, sample_team, sample_customer
-    ):
-        """Test updating each VK field individually"""
-        # Create complete VK for testing
-        original_data = create_complete_virtual_key_data()
-        create_response = governance_client.create_virtual_key(original_data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        # Get original state
-        original_response = governance_client.get_virtual_key(vk_id)
-        original_vk = original_response.json()["virtual_key"]
-
-        # Test individual field updates
-        field_test_cases = [
-            {
-                "field": "description",
-                "update_data": {"description": "Updated description"},
-                "expected_value": "Updated description",
-            },
-            {
-                "field": "allowed_models",
-                "update_data": {"allowed_models": ["gpt-4", "claude-3-opus"]},
-                "expected_value": ["gpt-4", "claude-3-opus"],
-            },
-            {
-                "field": "allowed_providers",
-                "update_data": {"allowed_providers": ["openai"]},
-                "expected_value": ["openai"],
-            },
-            {
-                "field": "is_active",
-                "update_data": {"is_active": False},
-                "expected_value": False,
-            },
-            {
-                "field": "team_id",
-                "update_data": {"team_id": sample_team["id"]},
-                "expected_value": sample_team["id"],
-                "exclude_from_unchanged_check": [
-                    "team_id",
-                    "customer_id",
-                    "team",
-                    "customer",
-                ],
-            },
-            {
-                "field": "customer_id",
-                "update_data": {"customer_id": sample_customer["id"]},
-                "expected_value": sample_customer["id"],
-                "exclude_from_unchanged_check": [
-                    "team_id",
-                    "customer_id",
-                    "team",
-                    "customer",
-                ],
-            },
-        ]
-
-        for test_case in field_test_cases:
-            # Reset VK to original state by updating all fields back
-            reset_data = {
-                "description": original_vk.get("description", ""),
-                "allowed_models": original_vk["allowed_models"],
-                "allowed_providers": original_vk["allowed_providers"],
-                "is_active": original_vk["is_active"],
-                "team_id": original_vk.get("team_id"),
-                "customer_id": original_vk.get("customer_id"),
-            }
-            governance_client.update_virtual_key(vk_id, reset_data)
-
-            # Perform field update
-            response = governance_client.update_virtual_key(
-                vk_id, test_case["update_data"]
-            )
-            assert_response_success(response, 200)
-            updated_vk = response.json()["virtual_key"]
-
-            # Verify target field was updated
-            field_parts = test_case["field"].split(".")
-            current_value = updated_vk
-            for part in field_parts:
-                current_value = current_value[part]
-            assert (
-                current_value == test_case["expected_value"]
-            ), f"Field {test_case['field']} not updated correctly"
-
-            # Verify other fields unchanged (if specified)
-            if test_case.get("verify_unchanged", True):
-                exclude_fields = test_case.get(
-                    "exclude_from_unchanged_check", [test_case["field"]]
-                )
-                verify_unchanged_fields(updated_vk, original_vk, exclude_fields)
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    def test_vk_budget_updates(self, governance_client, cleanup_tracker):
-        """Test comprehensive budget creation, update, and modification"""
-        # Create VK without budget
-        data = {"name": generate_unique_name("Budget Test VK")}
-        create_response = governance_client.create_virtual_key(data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        # Test 1: Add budget to VK without budget
-        budget_data = {"max_limit": 10000, "reset_duration": "1h"}
-        response = governance_client.update_virtual_key(vk_id, {"budget": budget_data})
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["budget"]["max_limit"] == 10000
-        assert updated_vk["budget"]["reset_duration"] == "1h"
-        assert updated_vk["budget_id"] is not None
-
-        # Test 2: Update existing budget completely
-        new_budget_data = {"max_limit": 20000, "reset_duration": "2h"}
-        response = governance_client.update_virtual_key(
-            vk_id, {"budget": new_budget_data}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["budget"]["max_limit"] == 20000
-        assert updated_vk["budget"]["reset_duration"] == "2h"
-
-        # Test 3: Partial budget update (only max_limit)
-        response = governance_client.update_virtual_key(
-            vk_id, {"budget": {"max_limit": 30000}}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["budget"]["max_limit"] == 30000
-        assert updated_vk["budget"]["reset_duration"] == "2h"  # Should remain unchanged
-
-        # Test 4: Partial budget update (only reset_duration)
-        response = governance_client.update_virtual_key(
-            vk_id, {"budget": {"reset_duration": "24h"}}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["budget"]["max_limit"] == 30000  # Should remain unchanged
-        assert updated_vk["budget"]["reset_duration"] == "24h"
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    def test_vk_rate_limit_updates(self, governance_client, cleanup_tracker):
-        """Test comprehensive rate limit creation, update, and field-level modifications"""
-        # Create VK without rate limit
-        data = {"name": generate_unique_name("Rate Limit Test VK")}
-        create_response = governance_client.create_virtual_key(data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        # Test 1: Add rate limit to VK
-        rate_limit_data = {
-            "token_max_limit": 1000,
-            "token_reset_duration": "1m",
-            "request_max_limit": 100,
-            "request_reset_duration": "1h",
-        }
-        response = governance_client.update_virtual_key(
-            vk_id, {"rate_limit": rate_limit_data}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["rate_limit"]["token_max_limit"] == 1000
-        assert updated_vk["rate_limit"]["request_max_limit"] == 100
-        assert updated_vk["rate_limit_id"] is not None
-
-        # Test 2: Update only token limits
-        response = governance_client.update_virtual_key(
-            vk_id,
-            {"rate_limit": {"token_max_limit": 2000, "token_reset_duration": "2m"}},
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["rate_limit"]["token_max_limit"] == 2000
-        assert updated_vk["rate_limit"]["token_reset_duration"] == "2m"
-        assert updated_vk["rate_limit"]["request_max_limit"] == 100  # Unchanged
-        assert updated_vk["rate_limit"]["request_reset_duration"] == "1h"  # Unchanged
-
-        # Test 3: Update only request limits
-        response = governance_client.update_virtual_key(
-            vk_id,
-            {"rate_limit": {"request_max_limit": 200, "request_reset_duration": "2h"}},
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["rate_limit"]["token_max_limit"] == 2000  # Unchanged
-        assert updated_vk["rate_limit"]["request_max_limit"] == 200
-        assert updated_vk["rate_limit"]["request_reset_duration"] == "2h"
-
-        # Test 4: Partial rate limit update (single field)
-        response = governance_client.update_virtual_key(
-            vk_id, {"rate_limit": {"token_max_limit": 5000}}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["rate_limit"]["token_max_limit"] == 5000
-        assert updated_vk["rate_limit"]["token_reset_duration"] == "2m"  # Unchanged
-        assert updated_vk["rate_limit"]["request_max_limit"] == 200  # Unchanged
-        assert updated_vk["rate_limit"]["request_reset_duration"] == "2h"  # Unchanged
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    def test_vk_multiple_field_updates(self, governance_client, cleanup_tracker):
-        """Test updating multiple fields simultaneously"""
-        # Create VK with some initial data
-        initial_data = {
-            "name": generate_unique_name("Multi-Field Test VK"),
-            "description": "Initial description",
-            "allowed_models": ["gpt-3.5-turbo"],
-            "is_active": True,
-        }
-        create_response = governance_client.create_virtual_key(initial_data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        # Update multiple fields at once
-        update_data = {
-            "description": "Updated description via multi-field",
-            "allowed_models": ["gpt-4", "claude-3-5-sonnet-20240620"],
-            "allowed_providers": ["openai", "anthropic"],
-            "is_active": False,
-            "budget": {"max_limit": 50000, "reset_duration": "1d"},
-            "rate_limit": {
-                "token_max_limit": 5000,
-                "request_max_limit": 500,
-                "token_reset_duration": "1h",
-                "request_reset_duration": "1h",
-            },
-        }
-
-        response = governance_client.update_virtual_key(vk_id, update_data)
-        assert_response_success(response, 200)
-
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["description"] == "Updated description via multi-field"
-        assert updated_vk["allowed_models"] == ["gpt-4", "claude-3-5-sonnet-20240620"]
-        assert updated_vk["allowed_providers"] == ["openai", "anthropic"]
-        assert updated_vk["is_active"] is False
-        assert updated_vk["budget"]["max_limit"] == 50000
-        assert updated_vk["rate_limit"]["token_max_limit"] == 5000
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    @pytest.mark.mutual_exclusivity
-    def test_vk_relationship_updates(
-        self, governance_client, cleanup_tracker, sample_team, sample_customer
-    ):
-        """Test updating VK relationships with mutual exclusivity validation"""
-        # Create standalone VK
-        data = {"name": generate_unique_name("Relationship Test VK")}
-        create_response = governance_client.create_virtual_key(data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        # Test 1: Add team relationship
-        response = governance_client.update_virtual_key(
-            vk_id, {"team_id": sample_team["id"]}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["team_id"] == sample_team["id"]
-        assert updated_vk.get("customer_id") is None
-        assert updated_vk["team"]["id"] == sample_team["id"]
-
-        # Test 2: Switch to customer (should clear team)
-        response = governance_client.update_virtual_key(
-            vk_id, {"customer_id": sample_customer["id"]}
-        )
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-        assert updated_vk["customer_id"] == sample_customer["id"]
-        assert updated_vk.get("team_id") is None
-        assert updated_vk["customer"]["id"] == sample_customer["id"]
-        assert updated_vk.get("team") is None
-
-        # Test 3: Try to set both (should fail)
-        response = governance_client.update_virtual_key(
-            vk_id, {"team_id": sample_team["id"], "customer_id": sample_customer["id"]}
-        )
-        assert response.status_code == 400
-        error_data = response.json()
-        assert "cannot be attached to both" in error_data["error"].lower()
-
-        # Test 4: Clear both relationships
-        response = governance_client.update_virtual_key(
-            vk_id, {"team_id": None, "customer_id": None}
-        )
-        # Note: Behavior depends on API implementation - adjust based on actual behavior
-        # Some APIs might not support explicit null setting
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    @pytest.mark.edge_cases
-    def test_vk_update_edge_cases(self, governance_client, cleanup_tracker):
-        """Test edge cases in VK updates"""
-        # Create test VK
-        data = {"name": generate_unique_name("Edge Case VK")}
-        create_response = governance_client.create_virtual_key(data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        original_response = governance_client.get_virtual_key(vk_id)
-        original_vk = original_response.json()["virtual_key"]
-
-        # Test 1: Empty update (should return unchanged VK)
-        response = governance_client.update_virtual_key(vk_id, {})
-        assert_response_success(response, 200)
-        updated_vk = response.json()["virtual_key"]
-
-        # Compare ignoring timestamps
-        differences = deep_compare_entities(
-            updated_vk, original_vk, ignore_fields=["updated_at"]
-        )
-        assert len(differences) == 0, f"Empty update changed fields: {differences}"
-
-        # Test 2: Invalid field values
-        response = governance_client.update_virtual_key(vk_id, {"is_active": "invalid"})
-        assert response.status_code == 400
-
-        # Test 3: Update with same values (should succeed but might not change updated_at)
-        response = governance_client.update_virtual_key(
-            vk_id,
-            {
-                "description": original_vk.get("description", ""),
-            },
-        )
-        # Note: Adjust based on API behavior for no-op updates
-
-        # Test 4: Very long values (test field length limits)
-        long_description = "x" * 10000  # Adjust based on actual field limits
-        response = governance_client.update_virtual_key(
-            vk_id, {"description": long_description}
-        )
-        # Expected behavior depends on API validation rules
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.field_updates
-    def test_vk_update_nonexistent(self, governance_client):
-        """Test updating non-existent VK returns 404"""
-        fake_id = str(uuid.uuid4())
-        response = governance_client.update_virtual_key(
-            fake_id, {"description": "test"}
-        )
-        assert response.status_code == 404
-
-
-class TestVirtualKeyBudgetAndRateLimit:
-    """Test budget and rate limit specific functionality"""
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.budget
-    def test_vk_budget_creation_and_validation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test budget creation with various configurations"""
-        # Test valid budget configurations
-        budget_test_cases = [
-            {"max_limit": 1000, "reset_duration": "1h"},
-            {"max_limit": 50000, "reset_duration": "1d"},
-            {"max_limit": 100000, "reset_duration": "1w"},
-            {"max_limit": 1000000, "reset_duration": "1M"},
-        ]
-
-        for budget_config in budget_test_cases:
-            data = {
-                "name": generate_unique_name(
-                    f"Budget VK {budget_config['reset_duration']}"
-                ),
-                "budget": budget_config,
-            }
-
-            response = governance_client.create_virtual_key(data)
-            assert_response_success(response, 201)
-
-            vk_data = response.json()["virtual_key"]
-            cleanup_tracker.add_virtual_key(vk_data["id"])
-
-            assert vk_data["budget"]["max_limit"] == budget_config["max_limit"]
-            assert (
-                vk_data["budget"]["reset_duration"] == budget_config["reset_duration"]
-            )
-            assert vk_data["budget"]["current_usage"] == 0
-            assert vk_data["budget"]["last_reset"] is not None
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.budget
-    @pytest.mark.edge_cases
-    def test_vk_budget_edge_cases(self, governance_client, cleanup_tracker):
-        """Test budget edge cases and boundary conditions"""
-        # Test boundary values
-        edge_case_budgets = [
-            {"max_limit": 0, "reset_duration": "1h"},  # Zero budget
-            {"max_limit": 1, "reset_duration": "1s"},  # Minimal values
-            {"max_limit": 9223372036854775807, "reset_duration": "1h"},  # Max int64
-        ]
-
-        for budget_config in edge_case_budgets:
-            data = {
-                "name": generate_unique_name(
-                    f"Edge Budget VK {budget_config['max_limit']}"
-                ),
-                "budget": budget_config,
-            }
-
-            response = governance_client.create_virtual_key(data)
-            # Adjust assertions based on API validation rules
-            if (
-                budget_config["max_limit"] >= 0
-            ):  # Assuming non-negative budgets are valid
-                assert_response_success(response, 201)
-                cleanup_tracker.add_virtual_key(response.json()["virtual_key"]["id"])
-            else:
-                assert response.status_code == 400
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.rate_limit
-    def test_vk_rate_limit_creation_and_validation(
-        self, governance_client, cleanup_tracker
-    ):
-        """Test rate limit creation with various configurations"""
-        # Test different rate limit configurations
-        rate_limit_test_cases = [
-            {
-                "token_max_limit": 1000,
-                "token_reset_duration": "1m",
-                "request_max_limit": 100,
-                "request_reset_duration": "1h",
-            },
-            {
-                "token_max_limit": 10000,
-                "token_reset_duration": "1h",
-                # Only token limits
-            },
-            {
-                "request_max_limit": 500,
-                "request_reset_duration": "1d",
-                # Only request limits
-            },
-            {
-                "token_max_limit": 5000,
-                "token_reset_duration": "30s",
-                "request_max_limit": 1000,
-                "request_reset_duration": "5m",
-            },
-        ]
-
-        for rate_limit_config in rate_limit_test_cases:
-            data = {
-                "name": generate_unique_name("Rate Limit VK"),
-                "rate_limit": rate_limit_config,
-            }
-
-            response = governance_client.create_virtual_key(data)
-            assert_response_success(response, 201)
-
-            vk_data = response.json()["virtual_key"]
-            cleanup_tracker.add_virtual_key(vk_data["id"])
-
-            rate_limit = vk_data["rate_limit"]
-            for key, value in rate_limit_config.items():
-                assert rate_limit[key] == value
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.rate_limit
-    @pytest.mark.edge_cases
-    def test_vk_rate_limit_edge_cases(self, governance_client, cleanup_tracker):
-        """Test rate limit edge cases and boundary conditions"""
-        # Test minimal rate limits
-        minimal_rate_limit = {
-            "token_max_limit": 1,
-            "token_reset_duration": "1s",
-            "request_max_limit": 1,
-            "request_reset_duration": "1s",
-        }
-
-        data = {
-            "name": generate_unique_name("Minimal Rate Limit VK"),
-            "rate_limit": minimal_rate_limit,
-        }
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-        cleanup_tracker.add_virtual_key(response.json()["virtual_key"]["id"])
-
-        # Test large rate limits
-        large_rate_limit = {
-            "token_max_limit": 1000000,
-            "token_reset_duration": "1h",
-            "request_max_limit": 100000,
-            "request_reset_duration": "1h",
-        }
-
-        data = {
-            "name": generate_unique_name("Large Rate Limit VK"),
-            "rate_limit": large_rate_limit,
-        }
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-        cleanup_tracker.add_virtual_key(response.json()["virtual_key"]["id"])
-
-
-class TestVirtualKeyConcurrency:
-    """Test concurrent operations on Virtual Keys"""
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_vk_concurrent_creation(self, governance_client, cleanup_tracker):
-        """Test creating multiple VKs concurrently"""
-
-        def create_vk(index):
-            data = {"name": generate_unique_name(f"Concurrent VK {index}")}
-            response = governance_client.create_virtual_key(data)
-            return response
-
-        # Create 10 VKs concurrently
-        with ThreadPoolExecutor(max_workers=10) as executor:
-            futures = [executor.submit(create_vk, i) for i in range(10)]
-            responses = [future.result() for future in futures]
-
-        # Verify all succeeded
-        created_vks = []
-        for response in responses:
-            assert_response_success(response, 201)
-            vk_data = response.json()["virtual_key"]
-            created_vks.append(vk_data)
-            cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify all VKs have unique IDs and values
-        vk_ids = [vk["id"] for vk in created_vks]
-        vk_values = [vk["value"] for vk in created_vks]
-        assert len(set(vk_ids)) == 10  # All unique IDs
-        assert len(set(vk_values)) == 10  # All unique values
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.concurrency
-    @pytest.mark.slow
-    def test_vk_concurrent_updates(self, governance_client, cleanup_tracker):
-        """Test updating same VK concurrently"""
-        # Create VK to update
-        data = {"name": generate_unique_name("Concurrent Update VK")}
-        create_response = governance_client.create_virtual_key(data)
-        assert_response_success(create_response, 201)
-        vk_id = create_response.json()["virtual_key"]["id"]
-        cleanup_tracker.add_virtual_key(vk_id)
-
-        # Update concurrently with different descriptions
-        def update_vk(index):
-            update_data = {"description": f"Updated by thread {index}"}
-            response = governance_client.update_virtual_key(vk_id, update_data)
-            return response, index
-
-        with ThreadPoolExecutor(max_workers=5) as executor:
-            futures = [executor.submit(update_vk, i) for i in range(5)]
-            results = [future.result() for future in futures]
-
-        # All updates should succeed (last one wins)
-        for response, index in results:
-            assert_response_success(response, 200)
-
-        # Verify final state
-        final_response = governance_client.get_virtual_key(vk_id)
-        final_vk = final_response.json()["virtual_key"]
-        assert final_vk["description"].startswith("Updated by thread")
-
-
-class TestVirtualKeyRelationships:
-    """Test VK relationships with teams and customers"""
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.relationships
-    def test_vk_team_relationship_loading(
-        self, governance_client, cleanup_tracker, sample_team_with_customer
-    ):
-        """Test that VK properly loads team and customer relationships"""
-        data = {
-            "name": generate_unique_name("Relationship VK"),
-            "team_id": sample_team_with_customer["id"],
-        }
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-        vk_data = response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify team relationship loaded
-        assert vk_data["team"] is not None
-        assert vk_data["team"]["id"] == sample_team_with_customer["id"]
-        assert vk_data["team"]["name"] == sample_team_with_customer["name"]
-
-        # Verify team's customer_id is present (nested customer not preloaded)
-        if sample_team_with_customer.get("customer_id"):
-            # Note: API only preloads one level deep, so customer object isn't nested here
-            assert (
-                vk_data["team"].get("customer_id")
-                == sample_team_with_customer["customer_id"]
-            )
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.relationships
-    def test_vk_customer_relationship_loading(
-        self, governance_client, cleanup_tracker, sample_customer
-    ):
-        """Test that VK properly loads customer relationships"""
-        data = {
-            "name": generate_unique_name("Customer Relationship VK"),
-            "customer_id": sample_customer["id"],
-        }
-
-        response = governance_client.create_virtual_key(data)
-        assert_response_success(response, 201)
-        vk_data = response.json()["virtual_key"]
-        cleanup_tracker.add_virtual_key(vk_data["id"])
-
-        # Verify customer relationship loaded
-        assert vk_data["customer"] is not None
-        assert vk_data["customer"]["id"] == sample_customer["id"]
-        assert vk_data["customer"]["name"] == sample_customer["name"]
-
-    @pytest.mark.virtual_keys
-    @pytest.mark.relationships
-    def test_vk_orphaned_relationships(self, governance_client, cleanup_tracker):
-        """Test VK behavior with orphaned team/customer references"""
-        # Create VK with non-existent team_id
-        fake_team_id = str(uuid.uuid4())
-        data = {"name": generate_unique_name("Orphaned VK"), "team_id": fake_team_id}
-
-        response = governance_client.create_virtual_key(data)
-        # Behavior depends on API implementation:
-        # - Might succeed with warning
-        # - Might fail with validation error
-        # Adjust assertion based on actual behavior
-
-        if response.status_code == 201:
-            cleanup_tracker.add_virtual_key(response.json()["virtual_key"]["id"])
-            # Verify VK was created but team relationship is null/missing
-            vk_data = response.json()["virtual_key"]
-            assert vk_data.get("team") is None
-        else:
-            assert response.status_code == 400  # Validation error expected