Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions crates/goose/src/security/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,15 @@ impl SecurityManager {
fn is_ml_scanning_enabled(&self) -> bool {
let config = Config::global();

config
let prompt_enabled = config
.get_param::<bool>("SECURITY_PROMPT_CLASSIFIER_ENABLED")
.unwrap_or(false)
.unwrap_or(false);

let command_enabled = config
.get_param::<bool>("SECURITY_COMMAND_CLASSIFIER_ENABLED")
.unwrap_or(false);

prompt_enabled || command_enabled
}

pub async fn analyze_tool_requests(
Expand Down
15 changes: 10 additions & 5 deletions crates/goose/src/security/scanner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,18 +149,19 @@ impl PromptInjectionScanner {
tracing::info!(
"Classifier Results - Command: {:.3}, Prompt: {:.3}, Threshold: {:.3}",
tool_result.confidence,
context_result.confidence,
context_result.ml_confidence.unwrap_or(0.0),
threshold
);

let final_confidence =
self.combine_confidences(tool_result.confidence, context_result.confidence);
self.combine_confidences(tool_result.confidence, context_result.ml_confidence);

tracing::info!(
tool_confidence = %tool_result.confidence,
context_confidence = %context_result.confidence,
context_confidence = ?context_result.ml_confidence,
final_confidence = %final_confidence,
has_ml = tool_result.ml_confidence.is_some(),
has_command_ml = tool_result.ml_confidence.is_some(),
has_prompt_ml = context_result.ml_confidence.is_some(),
has_patterns = !tool_result.pattern_matches.is_empty(),
threshold = %threshold,
malicious = final_confidence >= threshold,
Expand Down Expand Up @@ -239,7 +240,11 @@ impl PromptInjectionScanner {
})
}

fn combine_confidences(&self, tool_confidence: f32, context_confidence: f32) -> f32 {
fn combine_confidences(&self, tool_confidence: f32, context_confidence: Option<f32>) -> f32 {
let Some(context_confidence) = context_confidence else {
return tool_confidence;
};

// If tool is safe, context is not taken into account
if tool_confidence < 0.3 {
return tool_confidence;
Expand Down
117 changes: 59 additions & 58 deletions ui/desktop/src/components/settings/security/SecurityToggle.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -271,17 +271,73 @@ export const SecurityToggle = () => {
/>
</div>

{/* ML Detection Toggle */}
{/* Command Injection Detection Toggle */}
<div className="border-t border-border-default pt-4">
<div className="flex items-center justify-between py-2 hover:bg-background-muted rounded-lg transition-all">
<div>
<h4
className={`text-sm font-medium ${enabled ? 'text-text-default' : 'text-text-muted'}`}
>
Enable ML-Based Detection
Enable Command Injection ML Detection
</h4>
<p className="text-xs text-text-muted max-w-md mt-[2px]">
Use ML models to detect malicious shell commands
</p>
</div>
<div className="flex items-center">
<Switch
checked={effectiveCommandClassifierEnabled}
onCheckedChange={handleCommandClassifierToggle}
disabled={!enabled}
variant="mono"
/>
</div>
</div>

{hasCommandModel ? (
enabled &&
effectiveCommandClassifierEnabled && (
<div className="text-sm text-gray-700 dark:text-gray-300 mt-2">
✓ Command classifier active (auto-configured from environment)
</div>
)
) : (
<div
className={`overflow-hidden transition-all duration-300 ease-in-out ${
enabled && effectiveCommandClassifierEnabled
? 'max-h-[32rem] opacity-100 mt-3'
: 'max-h-0 opacity-0'
}`}
>
<div className={enabled && effectiveCommandClassifierEnabled ? '' : 'opacity-50'}>
<ClassifierEndpointInputs
endpointValue={commandEndpointInput}
tokenValue={commandTokenInput}
onEndpointChange={setCommandEndpointInput}
onTokenChange={setCommandTokenInput}
onEndpointBlur={handleCommandEndpointChange}
onTokenBlur={handleCommandTokenChange}
disabled={!enabled || !effectiveCommandClassifierEnabled}
endpointPlaceholder="https://example.com/classify"
tokenPlaceholder="token..."
endpointDescription="Enter the full URL for your command injection classification service"
/>
</div>
</div>
)}
</div>

{/* Prompt Injection Detection Toggle */}
<div className="border-t border-border-default pt-4">
<div className="flex items-center justify-between py-2 hover:bg-background-muted rounded-lg transition-all">
<div>
<h4
className={`text-sm font-medium ${enabled ? 'text-text-default' : 'text-text-muted'}`}
>
Enable Prompt Injection ML Detection
</h4>
<p className="text-xs text-text-muted max-w-md mt-[2px]">
Use machine learning models for more accurate detection
Use ML models to detect potential prompt injection in your chat
</p>
</div>
<div className="flex items-center">
Expand Down Expand Up @@ -348,61 +404,6 @@ export const SecurityToggle = () => {
</div>
</div>
</div>

<div className="border-t border-border-default pt-4">
<div className="flex items-center justify-between py-2 hover:bg-background-muted rounded-lg transition-all">
<div>
<h4
className={`text-sm font-medium ${enabled ? 'text-text-default' : 'text-text-muted'}`}
>
Enable Command Injection ML Detection
</h4>
<p className="text-xs text-text-muted max-w-md mt-[2px]">
Use ML models to detect malicious shell commands
</p>
</div>
<div className="flex items-center">
<Switch
checked={effectiveCommandClassifierEnabled}
onCheckedChange={handleCommandClassifierToggle}
disabled={!enabled}
variant="mono"
/>
</div>
</div>

{hasCommandModel ? (
enabled &&
effectiveCommandClassifierEnabled && (
<div className="text-sm text-gray-700 dark:text-gray-300 mt-2">
✓ Command classifier active (auto-configured from environment)
</div>
)
) : (
<div
className={`overflow-hidden transition-all duration-300 ease-in-out ${
enabled && effectiveCommandClassifierEnabled
? 'max-h-[32rem] opacity-100 mt-3'
: 'max-h-0 opacity-0'
}`}
>
<div className={enabled && effectiveCommandClassifierEnabled ? '' : 'opacity-50'}>
<ClassifierEndpointInputs
endpointValue={commandEndpointInput}
tokenValue={commandTokenInput}
onEndpointChange={setCommandEndpointInput}
onTokenChange={setCommandTokenInput}
onEndpointBlur={handleCommandEndpointChange}
onTokenBlur={handleCommandTokenChange}
disabled={!enabled || !effectiveCommandClassifierEnabled}
endpointPlaceholder="https://example.com/classify"
tokenPlaceholder="token..."
endpointDescription="Enter the full URL for your command injection classification service"
/>
</div>
</div>
)}
</div>
</div>
</div>
</div>
Expand Down
Loading