diff --git a/core/internal/llmtests/account.go b/core/internal/llmtests/account.go index 050fa9bfc5..ec5b30a001 100644 --- a/core/internal/llmtests/account.go +++ b/core/internal/llmtests/account.go @@ -228,7 +228,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context, UseForBatchAPI: bifrost.Ptr(true), }, { - Models: []string{"cohere.embed-v4:0", "amazon.nova-canvas-v1:0"}, + Models: []string{"cohere.embed-v4:0", "amazon.nova-canvas-v1:0", "anthropic.claude-sonnet-4-20250514-v1:0"}, Weight: 1.0, BedrockKeyConfig: &schemas.BedrockKeyConfig{ AccessKey: *schemas.NewEnvVar("env.AWS_ACCESS_KEY_ID"), diff --git a/core/internal/llmtests/prompt_caching.go b/core/internal/llmtests/prompt_caching.go index 605c27cc3a..70db9cdb0f 100644 --- a/core/internal/llmtests/prompt_caching.go +++ b/core/internal/llmtests/prompt_caching.go @@ -361,7 +361,7 @@ func RunPromptCachingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con // For the second query (index 1), add cached tokens validation if i == 1 { expectations.ProviderSpecific = map[string]interface{}{ - "min_cached_tokens_percentage": 0.90, // 90% minimum + "min_cached_tokens_percentage": 0.80, // 80% minimum "query_index": i, } } @@ -421,17 +421,17 @@ func RunPromptCachingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con t.Logf(" â„šī¸ First request has %d cached tokens (cache from previous test run)", cachedTokens) } } else if i == 1 { - // Query 2: Verify cached tokens are >90% of prompt tokens + // Query 2: Verify cached tokens are >80% of prompt tokens // This validation is also done in the retry framework, but we verify here as well if promptTokens > 0 { cachedPercentage := float64(cachedTokens) / float64(promptTokens) t.Logf(" Cached tokens percentage: %.2f%%", cachedPercentage*100) - require.GreaterOrEqual(t, cachedPercentage, 0.90, - "Query 2 should have at least 90%% cached tokens (got %.2f%%, cached: %d, prompt: %d)", + require.GreaterOrEqual(t, cachedPercentage, 0.80, + "Query 2 should have at least 80%% cached tokens (got %.2f%%, cached: %d, prompt: %d)", cachedPercentage*100, cachedTokens, promptTokens) - t.Logf(" ✅ Cached tokens percentage: %.2f%% (>= 90%%)", cachedPercentage*100) + t.Logf(" ✅ Cached tokens percentage: %.2f%% (>= 80%%)", cachedPercentage*100) } else { t.Fatalf("Prompt tokens is 0, cannot calculate cached percentage") } diff --git a/core/providers/bedrock/bedrock_test.go b/core/providers/bedrock/bedrock_test.go index 204ccffa09..482f23b2fa 100644 --- a/core/providers/bedrock/bedrock_test.go +++ b/core/providers/bedrock/bedrock_test.go @@ -126,6 +126,7 @@ func TestBedrock(t *testing.T) { Fallbacks: []schemas.Fallback{ {Provider: schemas.Bedrock, Model: "claude-4-sonnet"}, {Provider: schemas.Bedrock, Model: "claude-4.5-sonnet"}, + {Provider: schemas.Bedrock, Model: "anthropic.claude-sonnet-4-20250514-v1:0"}, // Used for count tokens }, EmbeddingModel: "cohere.embed-v4:0", RerankModel: rerankModelARN,