From fece72640f8ebfdda199f689160ca09ee4ef385f Mon Sep 17 00:00:00 2001
From: Dorien Koelemeijer <dkoelemeijer@squareup.com>
Date: Mon, 12 Jan 2026 13:12:39 +1000
Subject: [PATCH 1/5] If context safe, don't flag prompt injection unless
 critical tool call result

---
 crates/goose/src/security/scanner.rs | 51 ++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 10 deletions(-)
diff --git a/crates/goose/src/security/scanner.rs b/crates/goose/src/security/scanner.rs
index 2e658b399909..73c5ee583e44 100644
--- a/crates/goose/src/security/scanner.rs
+++ b/crates/goose/src/security/scanner.rs
@@ -16,6 +16,7 @@ pub struct ScanResult {
     pub explanation: String,
 }
 
+#[derive(Clone)]
 struct DetailedScanResult {
     confidence: f32,
     pattern_matches: Vec<PatternMatch>,
@@ -106,20 +107,23 @@ impl PromptInjectionScanner {
             self.scan_conversation(messages)
         );
 
-        let highest_confidence_result =
-            self.select_highest_confidence_result(tool_result?, context_result?);
+        let tool_result = tool_result?;
+        let context_result = context_result?;
         let threshold = self.get_threshold_from_config();
 
+        let final_result =
+            self.select_result_with_context_awareness(tool_result, context_result, threshold);
+
         tracing::info!(
-            "✅ Security analysis complete: confidence={:.3}, malicious={}",
-            highest_confidence_result.confidence,
-            highest_confidence_result.confidence >= threshold
+            "Security analysis complete: confidence={:.3}, malicious={}",
+            final_result.confidence,
+            final_result.confidence >= threshold
         );
 
         Ok(ScanResult {
-            is_malicious: highest_confidence_result.confidence >= threshold,
-            confidence: highest_confidence_result.confidence,
-            explanation: self.build_explanation(&highest_confidence_result, threshold),
+            is_malicious: final_result.confidence >= threshold,
+            confidence: final_result.confidence,
+            explanation: self.build_explanation(&final_result, threshold),
         })
     }
 
@@ -169,12 +173,39 @@ impl PromptInjectionScanner {
         })
     }
 
-    fn select_highest_confidence_result(
+    fn select_result_with_context_awareness(
         &self,
         tool_result: DetailedScanResult,
         context_result: DetailedScanResult,
+        threshold: f32,
     ) -> DetailedScanResult {
-        if tool_result.confidence >= context_result.confidence {
+        let context_is_safe = context_result
+            .ml_confidence
+            .is_some_and(|conf| conf < threshold);
+
+        let tool_has_only_non_critical = !tool_result.pattern_matches.is_empty()
+            && tool_result
+                .pattern_matches
+                .iter()
+                .all(|m| m.threat.risk_level != crate::security::patterns::RiskLevel::Critical);
+
+        if context_is_safe && tool_has_only_non_critical {
+            tracing::info!(
+                "Suppressing non-critical pattern match due to safe context evaluation"
+            );
+            tracing::debug!(
+                context_ml_confidence = ?context_result.ml_confidence,
+                pattern_count = tool_result.pattern_matches.len(),
+                max_pattern_risk = ?tool_result.pattern_matches.first().map(|m| &m.threat.risk_level),
+                "Suppression conditions met: safe context + non-critical patterns"
+            );
+
+            DetailedScanResult {
+                confidence: 0.0,
+                pattern_matches: Vec::new(),
+                ml_confidence: context_result.ml_confidence,
+            }
+        } else if tool_result.confidence >= context_result.confidence {
             tool_result
         } else {
             context_result

From 836884bc69907c4942c7b40689e4f951451b693d Mon Sep 17 00:00:00 2001
From: Dorien Koelemeijer <dkoelemeijer@squareup.com>
Date: Mon, 12 Jan 2026 13:57:11 +1000
Subject: [PATCH 2/5] fix

---
 crates/goose/src/security/scanner.rs | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/crates/goose/src/security/scanner.rs b/crates/goose/src/security/scanner.rs
index 73c5ee583e44..3c0f8ac83ed8 100644
--- a/crates/goose/src/security/scanner.rs
+++ b/crates/goose/src/security/scanner.rs
@@ -16,7 +16,6 @@ pub struct ScanResult {
     pub explanation: String,
 }
 
-#[derive(Clone)]
 struct DetailedScanResult {
     confidence: f32,
     pattern_matches: Vec<PatternMatch>,
@@ -190,9 +189,7 @@ impl PromptInjectionScanner {
                 .all(|m| m.threat.risk_level != crate::security::patterns::RiskLevel::Critical);
 
         if context_is_safe && tool_has_only_non_critical {
-            tracing::info!(
-                "Suppressing non-critical pattern match due to safe context evaluation"
-            );
+            tracing::info!("Suppressing non-critical pattern match due to safe context evaluation");
             tracing::debug!(
                 context_ml_confidence = ?context_result.ml_confidence,
                 pattern_count = tool_result.pattern_matches.len(),

From ce9e44fbba181346367d5c42d7438103ff4d6fa9 Mon Sep 17 00:00:00 2001
From: Dorien Koelemeijer <dkoelemeijer@squareup.com>
Date: Mon, 12 Jan 2026 14:30:45 +1000
Subject: [PATCH 3/5] Make sure 'SECURITY_PROMPT_CLASSIFIER_MODEL' is set if
 'SECURITY_ML_MODEL_MAPPING' is set but 'SECURITY_PROMPT_CLASSIFIER_MODEL' is
 set to empty string

---
 crates/goose/src/security/scanner.rs | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/crates/goose/src/security/scanner.rs b/crates/goose/src/security/scanner.rs
index 3c0f8ac83ed8..e43266b52a5b 100644
--- a/crates/goose/src/security/scanner.rs
+++ b/crates/goose/src/security/scanner.rs
@@ -46,7 +46,7 @@ impl PromptInjectionScanner {
     fn create_classifier_from_config() -> Result<ClassificationClient> {
         let config = Config::global();
 
-        let model_name = config
+        let mut model_name = config
             .get_param::<String>("SECURITY_PROMPT_CLASSIFIER_MODEL")
             .ok()
             .filter(|s| !s.trim().is_empty());
@@ -59,6 +59,23 @@ impl PromptInjectionScanner {
             .ok()
             .filter(|s| !s.trim().is_empty());
 
+        if model_name.is_none() {
+            if let Ok(mapping_json) = std::env::var("SECURITY_ML_MODEL_MAPPING") {
+                if let Ok(mapping) = serde_json::from_str::<
+                    crate::security::classification_client::ModelMappingConfig,
+                >(&mapping_json)
+                {
+                    if let Some(first_model) = mapping.models.keys().next() {
+                        tracing::info!(
+                            default_model = %first_model,
+                            "SECURITY_ML_MODEL_MAPPING available but no model selected - using first available model as default"
+                        );
+                        model_name = Some(first_model.clone());
+                    }
+                }
+            }
+        }
+
         tracing::debug!(
             model_name = ?model_name,
             has_endpoint = endpoint.is_some(),

From 7143412f804681b7dcba66c1c9ee6f3ef6c4c833 Mon Sep 17 00:00:00 2001
From: Dorien Koelemeijer <dkoelemeijer@squareup.com>
Date: Mon, 12 Jan 2026 14:50:44 +1000
Subject: [PATCH 4/5] Add test

---
 crates/goose/src/security/scanner.rs | 128 +++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)

diff --git a/crates/goose/src/security/scanner.rs b/crates/goose/src/security/scanner.rs
index e43266b52a5b..72194b61dcaa 100644
--- a/crates/goose/src/security/scanner.rs
+++ b/crates/goose/src/security/scanner.rs
@@ -359,4 +359,132 @@ mod tests {
         assert!(result.is_malicious);
         assert!(result.explanation.contains("Security threat"));
     }
+
+    #[test]
+    fn test_context_aware_suppression() {
+        let scanner = PromptInjectionScanner::new();
+        let threshold = 0.8;
+
+        // Test case 1: Safe context + non-critical pattern → should suppress
+        let result = scanner.select_result_with_context_awareness(
+            DetailedScanResult {
+                confidence: 0.6,
+                pattern_matches: vec![PatternMatch {
+                    matched_text: "test".to_string(),
+                    threat: crate::security::patterns::ThreatPattern {
+                        name: "test_pattern",
+                        pattern: "test",
+                        description: "Test pattern",
+                        risk_level: crate::security::patterns::RiskLevel::Medium,
+                        category: crate::security::patterns::ThreatCategory::CommandInjection,
+                    },
+                    start_pos: 0,
+                    end_pos: 4,
+                }],
+                ml_confidence: None,
+            },
+            DetailedScanResult {
+                confidence: 0.3,
+                pattern_matches: Vec::new(),
+                ml_confidence: Some(0.3),
+            },
+            threshold,
+        );
+        assert_eq!(
+            result.confidence, 0.0,
+            "Should suppress non-critical pattern with safe context"
+        );
+        assert!(result.pattern_matches.is_empty());
+
+        // Test case 2: Safe context + critical pattern → should NOT suppress
+        let result = scanner.select_result_with_context_awareness(
+            DetailedScanResult {
+                confidence: 0.95,
+                pattern_matches: vec![PatternMatch {
+                    matched_text: "rm -rf /".to_string(),
+                    threat: crate::security::patterns::ThreatPattern {
+                        name: "rm_rf_root",
+                        pattern: r"rm\s+-rf",
+                        description: "Dangerous command",
+                        risk_level: crate::security::patterns::RiskLevel::Critical,
+                        category: crate::security::patterns::ThreatCategory::FileSystemDestruction,
+                    },
+                    start_pos: 0,
+                    end_pos: 9,
+                }],
+                ml_confidence: None,
+            },
+            DetailedScanResult {
+                confidence: 0.3,
+                pattern_matches: Vec::new(),
+                ml_confidence: Some(0.3),
+            },
+            threshold,
+        );
+        assert!(
+            result.confidence > 0.0,
+            "Should NOT suppress critical pattern even with safe context"
+        );
+        assert!(!result.pattern_matches.is_empty());
+
+        // Test case 3: Unsafe context + non-critical pattern → should NOT suppress
+        let result = scanner.select_result_with_context_awareness(
+            DetailedScanResult {
+                confidence: 0.6,
+                pattern_matches: vec![PatternMatch {
+                    matched_text: "test".to_string(),
+                    threat: crate::security::patterns::ThreatPattern {
+                        name: "test_pattern",
+                        pattern: "test",
+                        description: "Test pattern",
+                        risk_level: crate::security::patterns::RiskLevel::Medium,
+                        category: crate::security::patterns::ThreatCategory::CommandInjection,
+                    },
+                    start_pos: 0,
+                    end_pos: 4,
+                }],
+                ml_confidence: None,
+            },
+            DetailedScanResult {
+                confidence: 0.9,
+                pattern_matches: Vec::new(),
+                ml_confidence: Some(0.9),
+            },
+            threshold,
+        );
+        assert!(
+            result.confidence > 0.0,
+            "Should NOT suppress with unsafe context"
+        );
+
+        // Test case 4: No ML confidence (ML disabled) + non-critical pattern → should NOT suppress
+        let result = scanner.select_result_with_context_awareness(
+            DetailedScanResult {
+                confidence: 0.6,
+                pattern_matches: vec![PatternMatch {
+                    matched_text: "test".to_string(),
+                    threat: crate::security::patterns::ThreatPattern {
+                        name: "test_pattern",
+                        pattern: "test",
+                        description: "Test pattern",
+                        risk_level: crate::security::patterns::RiskLevel::Medium,
+                        category: crate::security::patterns::ThreatCategory::CommandInjection,
+                    },
+                    start_pos: 0,
+                    end_pos: 4,
+                }],
+                ml_confidence: None,
+            },
+            DetailedScanResult {
+                confidence: 0.0,
+                pattern_matches: Vec::new(),
+                ml_confidence: None,
+            },
+            threshold,
+        );
+        assert!(
+            result.confidence > 0.0,
+            "Should NOT suppress when ML is disabled"
+        );
+    }
 }

From 5a703efaceced9db3dbf72178e64aa016eeca431 Mon Sep 17 00:00:00 2001
From: Dorien Koelemeijer <dkoelemeijer@squareup.com>
Date: Tue, 13 Jan 2026 07:57:18 +1000
Subject: [PATCH 5/5] address PR comments

---
 crates/goose/src/security/scanner.rs | 136 ---------------------------
 1 file changed, 136 deletions(-)

diff --git a/crates/goose/src/security/scanner.rs b/crates/goose/src/security/scanner.rs
index 72194b61dcaa..3411b89a6a5a 100644
--- a/crates/goose/src/security/scanner.rs
+++ b/crates/goose/src/security/scanner.rs
@@ -206,14 +206,6 @@ impl PromptInjectionScanner {
                 .all(|m| m.threat.risk_level != crate::security::patterns::RiskLevel::Critical);
 
         if context_is_safe && tool_has_only_non_critical {
-            tracing::info!("Suppressing non-critical pattern match due to safe context evaluation");
-            tracing::debug!(
-                context_ml_confidence = ?context_result.ml_confidence,
-                pattern_count = tool_result.pattern_matches.len(),
-                max_pattern_risk = ?tool_result.pattern_matches.first().map(|m| &m.threat.risk_level),
-                "Suppression conditions met: safe context + non-critical patterns"
-            );
-
             DetailedScanResult {
                 confidence: 0.0,
                 pattern_matches: Vec::new(),
@@ -359,132 +351,4 @@ mod tests {
         assert!(result.is_malicious);
         assert!(result.explanation.contains("Security threat"));
     }
-
-    #[test]
-    fn test_context_aware_suppression() {
-        let scanner = PromptInjectionScanner::new();
-        let threshold = 0.8;
-
-        // Test case 1: Safe context + non-critical pattern → should suppress
-        let result = scanner.select_result_with_context_awareness(
-            DetailedScanResult {
-                confidence: 0.6,
-                pattern_matches: vec![PatternMatch {
-                    matched_text: "test".to_string(),
-                    threat: crate::security::patterns::ThreatPattern {
-                        name: "test_pattern",
-                        pattern: "test",
-                        description: "Test pattern",
-                        risk_level: crate::security::patterns::RiskLevel::Medium,
-                        category: crate::security::patterns::ThreatCategory::CommandInjection,
-                    },
-                    start_pos: 0,
-                    end_pos: 4,
-                }],
-                ml_confidence: None,
-            },
-            DetailedScanResult {
-                confidence: 0.3,
-                pattern_matches: Vec::new(),
-                ml_confidence: Some(0.3),
-            },
-            threshold,
-        );
-        assert_eq!(
-            result.confidence, 0.0,
-            "Should suppress non-critical pattern with safe context"
-        );
-        assert!(result.pattern_matches.is_empty());
-
-        // Test case 2: Safe context + critical pattern → should NOT suppress
-        let result = scanner.select_result_with_context_awareness(
-            DetailedScanResult {
-                confidence: 0.95,
-                pattern_matches: vec![PatternMatch {
-                    matched_text: "rm -rf /".to_string(),
-                    threat: crate::security::patterns::ThreatPattern {
-                        name: "rm_rf_root",
-                        pattern: r"rm\s+-rf",
-                        description: "Dangerous command",
-                        risk_level: crate::security::patterns::RiskLevel::Critical,
-                        category: crate::security::patterns::ThreatCategory::FileSystemDestruction,
-                    },
-                    start_pos: 0,
-                    end_pos: 9,
-                }],
-                ml_confidence: None,
-            },
-            DetailedScanResult {
-                confidence: 0.3,
-                pattern_matches: Vec::new(),
-                ml_confidence: Some(0.3),
-            },
-            threshold,
-        );
-        assert!(
-            result.confidence > 0.0,
-            "Should NOT suppress critical pattern even with safe context"
-        );
-        assert!(!result.pattern_matches.is_empty());
-
-        // Test case 3: Unsafe context + non-critical pattern → should NOT suppress
-        let result = scanner.select_result_with_context_awareness(
-            DetailedScanResult {
-                confidence: 0.6,
-                pattern_matches: vec![PatternMatch {
-                    matched_text: "test".to_string(),
-                    threat: crate::security::patterns::ThreatPattern {
-                        name: "test_pattern",
-                        pattern: "test",
-                        description: "Test pattern",
-                        risk_level: crate::security::patterns::RiskLevel::Medium,
-                        category: crate::security::patterns::ThreatCategory::CommandInjection,
-                    },
-                    start_pos: 0,
-                    end_pos: 4,
-                }],
-                ml_confidence: None,
-            },
-            DetailedScanResult {
-                confidence: 0.9,
-                pattern_matches: Vec::new(),
-                ml_confidence: Some(0.9),
-            },
-            threshold,
-        );
-        assert!(
-            result.confidence > 0.0,
-            "Should NOT suppress with unsafe context"
-        );
-
-        // Test case 4: No ML confidence (ML disabled) + non-critical pattern → should NOT suppress
-        let result = scanner.select_result_with_context_awareness(
-            DetailedScanResult {
-                confidence: 0.6,
-                pattern_matches: vec![PatternMatch {
-                    matched_text: "test".to_string(),
-                    threat: crate::security::patterns::ThreatPattern {
-                        name: "test_pattern",
-                        pattern: "test",
-                        description: "Test pattern",
-                        risk_level: crate::security::patterns::RiskLevel::Medium,
-                        category: crate::security::patterns::ThreatCategory::CommandInjection,
-                    },
-                    start_pos: 0,
-                    end_pos: 4,
-                }],
-                ml_confidence: None,
-            },
-            DetailedScanResult {
-                confidence: 0.0,
-                pattern_matches: Vec::new(),
-                ml_confidence: None,
-            },
-            threshold,
-        );
-        assert!(
-            result.confidence > 0.0,
-            "Should NOT suppress when ML is disabled"
-        );
-    }
 }