diff --git a/core/internal/llmtests/account.go b/core/internal/llmtests/account.go
index 050fa9bfc5..ec5b30a001 100644
--- a/core/internal/llmtests/account.go
+++ b/core/internal/llmtests/account.go
@@ -228,7 +228,7 @@ func (account *ComprehensiveTestAccount) GetKeysForProvider(ctx context.Context,
 				UseForBatchAPI: bifrost.Ptr(true),
 			},
 			{
-				Models: []string{"cohere.embed-v4:0", "amazon.nova-canvas-v1:0"},
+				Models: []string{"cohere.embed-v4:0", "amazon.nova-canvas-v1:0", "anthropic.claude-sonnet-4-20250514-v1:0"},
 				Weight: 1.0,
 				BedrockKeyConfig: &schemas.BedrockKeyConfig{
 					AccessKey:    *schemas.NewEnvVar("env.AWS_ACCESS_KEY_ID"),
diff --git a/core/internal/llmtests/prompt_caching.go b/core/internal/llmtests/prompt_caching.go
index 605c27cc3a..70db9cdb0f 100644
--- a/core/internal/llmtests/prompt_caching.go
+++ b/core/internal/llmtests/prompt_caching.go
@@ -361,7 +361,7 @@ func RunPromptCachingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
 				// For the second query (index 1), add cached tokens validation
 				if i == 1 {
 					expectations.ProviderSpecific = map[string]interface{}{
-						"min_cached_tokens_percentage": 0.90, // 90% minimum
+						"min_cached_tokens_percentage": 0.80, // 80% minimum
 						"query_index":                  i,
 					}
 				}
@@ -421,17 +421,17 @@ func RunPromptCachingTest(t *testing.T, client *bifrost.Bifrost, ctx context.Con
 						t.Logf("  ℹ️  First request has %d cached tokens (cache from previous test run)", cachedTokens)
 					}
 				} else if i == 1 {
-					// Query 2: Verify cached tokens are >90% of prompt tokens
+					// Query 2: Verify cached tokens are >80% of prompt tokens
 					// This validation is also done in the retry framework, but we verify here as well
 					if promptTokens > 0 {
 						cachedPercentage := float64(cachedTokens) / float64(promptTokens)
 						t.Logf("  Cached tokens percentage: %.2f%%", cachedPercentage*100)
 
-						require.GreaterOrEqual(t, cachedPercentage, 0.90,
-							"Query 2 should have at least 90%% cached tokens (got %.2f%%, cached: %d, prompt: %d)",
+						require.GreaterOrEqual(t, cachedPercentage, 0.80,
+							"Query 2 should have at least 80%% cached tokens (got %.2f%%, cached: %d, prompt: %d)",
 							cachedPercentage*100, cachedTokens, promptTokens)
 
-						t.Logf("  ✅ Cached tokens percentage: %.2f%% (>= 90%%)", cachedPercentage*100)
+						t.Logf("  ✅ Cached tokens percentage: %.2f%% (>= 80%%)", cachedPercentage*100)
 					} else {
 						t.Fatalf("Prompt tokens is 0, cannot calculate cached percentage")
 					}
diff --git a/core/providers/bedrock/bedrock_test.go b/core/providers/bedrock/bedrock_test.go
index 204ccffa09..482f23b2fa 100644
--- a/core/providers/bedrock/bedrock_test.go
+++ b/core/providers/bedrock/bedrock_test.go
@@ -126,6 +126,7 @@ func TestBedrock(t *testing.T) {
 		Fallbacks: []schemas.Fallback{
 			{Provider: schemas.Bedrock, Model: "claude-4-sonnet"},
 			{Provider: schemas.Bedrock, Model: "claude-4.5-sonnet"},
+			{Provider: schemas.Bedrock, Model: "anthropic.claude-sonnet-4-20250514-v1:0"}, // Used for count tokens
 		},
 		EmbeddingModel:      "cohere.embed-v4:0",
 		RerankModel:         rerankModelARN,