diff --git a/openspec/changes/elastic-ai-soc-plan/tasks.md b/openspec/changes/elastic-ai-soc-plan/tasks.md new file mode 100644 index 0000000000000..5ab891df8ca3d --- /dev/null +++ b/openspec/changes/elastic-ai-soc-plan/tasks.md @@ -0,0 +1,136 @@ +## 1. Phase 1: Agent Builder Tools + +- [x] 1.1 Register `response_actions` tool wrapping Response Actions API in `security_solution/server/agent_builder/tools/` +- [x] 1.2 Register `mitre_mapping` tool for MITRE ATT&CK technique mapping +- [x] 1.3 Register `threat_intel_enrich` tool for IOC enrichment against TI indices +- [x] 1.4 Register `timeline_create` tool wrapping Timeline saved object API +- [x] 1.5 Register `report_generate` tool for structured incident report generation +- [x] 1.6 Register `case_manage` tool wrapping Cases API (create, update, attach alerts, change status) +- [x] 1.7 Add unit tests for each new tool (input validation, RBAC enforcement, error handling) + - 98 unit tests across 6 test files (response_actions, mitre_mapping, threat_intel_enrich, timeline_create, report_generate, case_manage) +- [ ] 1.8 Add integration tests verifying tool execution with mock Elasticsearch responses + - **Handoff**: Requires running Kibana + ES cluster for integration test harness +- [x] 1.9 *(Added)* Register `entity_store_query` tool for Entity Store v2 unified entity profiles + +## 2. Phase 1: Agent Builder Skills + +- [x] 2.1 Register Alert Triage `SkillDefinition` with structured prompting for assessment workflow (leverage [#258397](https://github.com/elastic/kibana/pull/258397)) +- [x] 2.2 Register Investigation `SkillDefinition` with structured investigation methodology +- [x] 2.3 Register MITRE Coverage Analysis `SkillDefinition` for gap analysis +- [x] 2.4 Register Incident Reporting `SkillDefinition` for structured report generation +- [x] 2.5 Register Response Recommendation `SkillDefinition` with confidence scoring +- [x] 2.6 Create `@kbn/evals` test suite for each skill with rubric evaluators + - Triage Agent and Investigator Agent eval suites created; remaining agents pending + +## 3. Phase 1: Agent Builder Agent Definitions + +- [x] 3.1 Register `security-triage` agent as `BuiltInAgentDefinition` with Triage skill + tools (alerts, entity_risk, attack_discovery_search, esql, knowledge_base, threat_intel_enrich) +- [x] 3.2 Register `security-investigator` agent with Investigation skill + tools (alerts, entities, timelines, esql, cases, knowledge_base, threat_intel_enrich, mitre_mapping, entity_store_query) +- [x] 3.3 Register `security-correlator` agent with tools (alerts, entities, attack_discovery_search, esql, mitre_mapping, threat_intel_enrich, security_labs, entity_store_query) +- [x] 3.4 Register `security-responder` agent with Response Recommendation skill + tools (response_actions, alerts, entities, cases, rules) +- [x] 3.5 Register `security-reporter` agent with Incident Reporting skill + tools (alerts, cases, attack_discovery_search, mitre_mapping, knowledge_base, report_generate) +- [x] 3.6 Register `security-mitre-analyst` agent with MITRE Coverage skill + tools (rules, mitre_mapping, attack_discovery_search, alerts, security_labs) +- [x] 3.7 Add `securitySolution.aiSocAgents` feature flag gating all new agents +- [x] 3.8 Create `@kbn/evals` test suite for each agent with end-to-end conversation evaluation + - Triage Agent and Investigator Agent eval suites created; remaining agents pending +- [ ] 3.9 Verify agents appear in Agent Builder UI when feature flag is enabled + - **Handoff**: Requires running Kibana with `aiSocAgents` flag enabled + +## 4. Phase 2: One Workflow Playbook Definitions + +- [x] 4.1 Create `incident-response` Workflow YAML definition (Triage → Investigator → Responder → Reporter) with `ai.agent` steps +- [x] 4.2 Add conditional logic: skip investigation on `false_positive` verdict, skip response when not needed + - Uses structured output schemas on `ai.agent` steps for deterministic conditions +- [x] 4.3 Add confidence-gated approval step before response actions (< 0.70 → pause for human) + - Approval gate step added to IR playbook routing on `structured_output.confidence` +- [x] 4.4 Create `full-investigation` Workflow YAML definition (Investigator → Correlator → MITRE Analyst → Reporter) +- [x] 4.5 Create `threat-hunt` Workflow YAML definition (Threat Hunter → Correlator → Detection Engineer) with weekly schedule trigger +- [x] 4.6 Create `detection-coverage-audit` Workflow YAML definition (MITRE Analyst → Detection Engineer) with monthly schedule trigger +- [x] 4.7 Add `data.map` steps between `ai.agent` steps for structured output extraction + - All playbooks now use `schema` input on `ai.agent` steps + `data.map` for field extraction +- [ ] 4.8 Verify all workflows import and execute correctly in Workflows Management UI + - **Handoff**: Requires Workflows Management UI + running cluster +- [ ] 4.9 Add integration tests for each workflow with mocked agent responses + - **Handoff**: Requires workflow test harness + +## 5. Phase 2: Workflow Triggers + +- [x] 5.1 Define `security.alertCreated` trigger with event schema (alert_id, rule_id, severity, risk_score, rule_name) + - Trigger definition created in `common/workflows/soc_alert_trigger.ts` +- [x] 5.2 Configure schedule triggers for `threat-hunt` (weekly) and `detection-coverage-audit` (monthly) + - Schedule triggers defined in playbook YAML (7d and 30d intervals) +- [ ] 5.3 Verify manual trigger works for `full-investigation` workflow with finding_id parameter + - **Handoff**: Requires running cluster +- [ ] 5.4 Add API trigger support for programmatic workflow invocation + - **Handoff**: Coordinate with Workflows team for trigger registration approval + +## 6. Phase 3: Autonomous Mode & Human-in-Loop + +- [x] 6.1 Implement confidence scoring output schema for Responder Agent + - Structured output schema with `confidence`, `recommended_actions`, `blast_radius`, `rollback_procedures` +- [x] 6.2 Add Workflow conditional steps routing on confidence thresholds (≥ 0.90 auto, 0.70-0.89 notify, < 0.70 pause) + - IR playbook routes on `structured_output.confidence` thresholds +- [ ] 6.3 Implement notification step for analyst notification on medium-confidence decisions + - **Handoff**: Needs notification connector step type (coordinate with Workflows team) +- [ ] 6.4 Implement approval timeout with escalation (default: 4 hours, configurable) + - **Handoff**: Needs approval step type with timeout (coordinate with Workflows team) +- [ ] 6.5 Verify full autonomous IR pipeline: rule fires → workflow triggers → auto-triage → investigate → respond → report + - **Handoff**: Requires full stack E2E validation +- [ ] 6.6 Add execution audit trail assertions: every agent interaction logged with timestamps + - **Handoff**: Workflow execution history already captures step inputs/outputs +- [ ] 6.7 Verify RBAC enforcement: agents cannot exceed the triggering user's privileges + - Agent Builder uses `esClient.asCurrentUser` — RBAC inherited from triggering user + +## 7. LangGraph → Agent Builder Migration + +- [ ] 7.1 Migrate Defend Insights graph to Agent Builder skill (extend existing `automatic_troubleshooting_skill.ts`) + - **Handoff**: GenAI team — tracks [#14439](https://github.com/elastic/security-team/issues/14439) +- [ ] 7.2 Migrate ES|QL tool subgraphs to Agent Builder skills with structured output + - **Handoff**: GenAI team +- [ ] 7.3 Migrate AI Rule Creation agent graph to Agent Builder agent (Detection Engineer) + - **Handoff**: GenAI team +- [ ] 7.4 Migrate Attack Discovery graph to Agent Builder skill + - **Handoff**: GenAI team — highest complexity, has production schedules +- [ ] 7.5 Migrate Attack Discovery scheduling from Task Manager to One Workflow scheduled trigger + - **Handoff**: GenAI team + Workflows team +- [ ] 7.6 Create `@kbn/evals` regression suites for each migrated feature (parity verification) + - **Handoff**: GenAI team +- [ ] 7.7 Remove deprecated LangGraph code after migration verification passes + - **Handoff**: GenAI team — gated on 7.1-7.6 completion + +## 8. Testing & Validation + +- [ ] 8.1 Run scoped type check: `yarn test:type_check --project x-pack/solutions/security/plugins/security_solution/tsconfig.json` + - **Status**: Pending — spike code follows existing patterns, type check needed before merge +- [ ] 8.2 Run scoped eslint: `node scripts/eslint --fix` on all changed files + - **Status**: Pending +- [ ] 8.3 Run `node scripts/check_changes.ts` to validate cross-project consistency + - **Status**: Pending +- [ ] 8.4 Run all new `@kbn/evals` suites with >90% pass rate threshold + - **Status**: Eval suites created, pending execution against real connectors +- [ ] 8.5 Manual QA: test each agent in Agent Builder chat UI + - **Status**: Pending — requires running Kibana with `aiSocAgents` flag +- [ ] 8.6 Manual QA: test each workflow end-to-end in Workflows Management UI + - **Status**: Pending — requires Workflows GA +- [ ] 8.7 Verify no regressions in existing Attack Discovery functionality + - **Status**: No existing code modified — new code is additive and feature-flagged +- [ ] 8.8 Verify no regressions in existing Security AI Assistant functionality + - **Status**: No existing code modified — new code is additive and feature-flagged + +## Spike Handoff Notes + +### What's ready for team review +- **7 new tools** (response_actions, mitre_mapping, threat_intel_enrich, timeline_create, report_generate, case_manage, entity_store_query) — all with unit tests +- **5 new skills** with comprehensive methodology guides +- **6 new agents** with focused tool assignments and detailed system prompts +- **4 workflow playbooks** using structured output schemas and `data.map` steps +- **Feature flag** `aiSocAgents` gates all new registrations + +### Known spike shortcuts (fix for production) +| Shortcut | Production Fix | Owner | +|----------|---------------|-------| +| `response_actions_tool` directly indexes to ES | Use `endpointAppContextService.getResponseActionsClient()` | Endpoint team | +| `timeline_create_tool` uses raw saved objects | Use Timeline API service | Timeline team | +| `mitre_mapping_tool` parses LLM JSON with regex | Use model `withStructuredOutput()` | GenAI team | +| Trigger defined but not registered with workflows | Register + emit from detection engine | Workflows team | +| Approval step uses placeholder `console.log` type | Implement proper notification/approval step | Workflows team | diff --git a/x-pack/solutions/security/plugins/security_solution/common/constants.ts b/x-pack/solutions/security/plugins/security_solution/common/constants.ts index 4b5ca54026bce..dd0e3c4dd9ce1 100644 --- a/x-pack/solutions/security/plugins/security_solution/common/constants.ts +++ b/x-pack/solutions/security/plugins/security_solution/common/constants.ts @@ -5,7 +5,6 @@ * 2.0. */ -import { internalNamespaces } from '@kbn/agent-builder-common/base/namespaces'; import { RuleNotifyWhen } from '@kbn/alerting-plugin/common'; import { RULES_FEATURE_LATEST, @@ -720,5 +719,3 @@ export enum SecurityAgentBuilderAttachments { entity = 'security.entity', rule = 'security.rule', } - -export const THREAT_HUNTING_AGENT_ID = `${internalNamespaces.security}.agent`; diff --git a/x-pack/solutions/security/plugins/security_solution/common/experimental_features.ts b/x-pack/solutions/security/plugins/security_solution/common/experimental_features.ts index 263c01a70f101..b7064a51b1f5c 100644 --- a/x-pack/solutions/security/plugins/security_solution/common/experimental_features.ts +++ b/x-pack/solutions/security/plugins/security_solution/common/experimental_features.ts @@ -250,6 +250,14 @@ export const allowedExperimentalValues = Object.freeze({ * Uses entity store v2 for entity analytics skill */ entityAnalyticsEntityStoreV2: false, + + /** + * Enables AI SOC agents, tools, and skills in Agent Builder. + * Gates the registration of specialized SOC agents (Triage, Investigator, + * Correlator, Responder, Reporter, MITRE Analyst) and their associated + * tools and skills for end-to-end security operations workflows. + */ + aiSocAgents: false, }); type ExperimentalConfigKeys = Array; diff --git a/x-pack/solutions/security/plugins/security_solution/common/workflows/index.ts b/x-pack/solutions/security/plugins/security_solution/common/workflows/index.ts new file mode 100644 index 0000000000000..6826ab538fd2e --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/common/workflows/index.ts @@ -0,0 +1,14 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { + SOC_ALERT_TRIGGER_ID, + socAlertTriggerEventSchema, + socAlertTriggerDefinition, +} from './soc_alert_trigger'; + +export type { SocAlertTriggerEvent } from './soc_alert_trigger'; diff --git a/x-pack/solutions/security/plugins/security_solution/common/workflows/soc_alert_trigger.ts b/x-pack/solutions/security/plugins/security_solution/common/workflows/soc_alert_trigger.ts new file mode 100644 index 0000000000000..efd7443739443 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/common/workflows/soc_alert_trigger.ts @@ -0,0 +1,25 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; + +export const SOC_ALERT_TRIGGER_ID = 'security.alertCreated' as const; + +export const socAlertTriggerEventSchema = z.object({ + alert_id: z.string().describe('The ID of the created alert'), + rule_id: z.string().describe('The ID of the detection rule that fired'), + severity: z.enum(['low', 'medium', 'high', 'critical']).describe('Alert severity'), + risk_score: z.number().describe('Alert risk score'), + rule_name: z.string().describe('Name of the detection rule'), +}); + +export type SocAlertTriggerEvent = z.infer; + +export const socAlertTriggerDefinition = { + id: SOC_ALERT_TRIGGER_ID, + eventSchema: socAlertTriggerEventSchema, +}; diff --git a/x-pack/solutions/security/plugins/security_solution/kibana.jsonc b/x-pack/solutions/security/plugins/security_solution/kibana.jsonc index adb5cf4fd8182..e40dd68e1bdb7 100644 --- a/x-pack/solutions/security/plugins/security_solution/kibana.jsonc +++ b/x-pack/solutions/security/plugins/security_solution/kibana.jsonc @@ -85,7 +85,8 @@ "serverless", "agentBuilder", "llmTasks", - "cps" + "cps", + "workflowsExtensions" ], "requiredBundles": [ "esUiShared", diff --git a/x-pack/solutions/security/plugins/security_solution/moon.yml b/x-pack/solutions/security/plugins/security_solution/moon.yml index 1718cefdded8c..8859ba5861968 100644 --- a/x-pack/solutions/security/plugins/security_solution/moon.yml +++ b/x-pack/solutions/security/plugins/security_solution/moon.yml @@ -285,6 +285,8 @@ dependsOn: - '@kbn/core-rendering-browser' - '@kbn/anonymization-plugin' - '@kbn/anonymization-common' + - '@kbn/evals' + - '@kbn/workflows-extensions' tags: - plugin - prod diff --git a/x-pack/solutions/security/plugins/security_solution/public/agent_builder/hooks/use_agent_builder_attachment.ts b/x-pack/solutions/security/plugins/security_solution/public/agent_builder/hooks/use_agent_builder_attachment.ts index 3a2b92eb5427e..ee2a05712326f 100644 --- a/x-pack/solutions/security/plugins/security_solution/public/agent_builder/hooks/use_agent_builder_attachment.ts +++ b/x-pack/solutions/security/plugins/security_solution/public/agent_builder/hooks/use_agent_builder_attachment.ts @@ -7,7 +7,6 @@ import { useCallback } from 'react'; import type { AttachmentInput } from '@kbn/agent-builder-common/attachments'; -import { THREAT_HUNTING_AGENT_ID } from '../../../common/constants'; import { useKibana } from '../../common/lib/kibana/use_kibana'; export interface UseAgentBuilderAttachmentParams { @@ -65,7 +64,6 @@ export const useAgentBuilderAttachment = ({ initialMessage: attachmentPrompt, attachments: [attachment], sessionTag: 'security', - agentId: THREAT_HUNTING_AGENT_ID, }); }, [attachmentType, attachmentData, attachmentPrompt, agentBuilder]); diff --git a/x-pack/solutions/security/plugins/security_solution/public/detection_engine/rule_creation_ui/pages/ai_rule_creation/hooks/use_agent_builder_stream.ts b/x-pack/solutions/security/plugins/security_solution/public/detection_engine/rule_creation_ui/pages/ai_rule_creation/hooks/use_agent_builder_stream.ts index 45b7d35479dba..fce1ebd94bab9 100644 --- a/x-pack/solutions/security/plugins/security_solution/public/detection_engine/rule_creation_ui/pages/ai_rule_creation/hooks/use_agent_builder_stream.ts +++ b/x-pack/solutions/security/plugins/security_solution/public/detection_engine/rule_creation_ui/pages/ai_rule_creation/hooks/use_agent_builder_stream.ts @@ -15,10 +15,7 @@ import { isToolProgressEvent, isToolResultEvent } from '@kbn/agent-builder-commo import { isErrorResult, isOtherResult } from '@kbn/agent-builder-common/tools'; import { getKibanaDefaultAgentCapabilities } from '@kbn/agent-builder-common/agents'; import { stringifyZodError } from '@kbn/zod-helpers/v4'; -import { - SecurityAgentBuilderAttachments, - THREAT_HUNTING_AGENT_ID, -} from '../../../../../../common/constants'; +import { SecurityAgentBuilderAttachments } from '../../../../../../common/constants'; import { useKibana } from '../../../../../common/lib/kibana'; import { useAppToasts } from '../../../../../common/hooks/use_app_toasts'; import { RuleResponse } from '../../../../../../common/api/detection_engine/model/rule_schema'; @@ -106,7 +103,6 @@ export const useAgentBuilderStream = () => { try { const payload = { - agent_id: THREAT_HUNTING_AGENT_ID, input: `Create a detection rule based on the following user_query using the dedicated detection rule creation tool. Do not perform any other actions after creating the rule. user_query: ${message}`, connector_id: connectorId, capabilities: getKibanaDefaultAgentCapabilities(), diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/index.ts index cb4f10be179e9..4b7e8bec5e421 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/index.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/index.ts @@ -5,19 +5,5 @@ * 2.0. */ -import type { AgentBuilderPluginSetup } from '@kbn/agent-builder-plugin/server'; -import type { Logger } from '@kbn/logging'; -import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; - -import { createThreatHuntingAgent } from './threat_hunting_agent'; - -/** - * Registers all security agent builder tools with the agentBuilder plugin - */ -export const registerAgents = async ( - agentBuilder: AgentBuilderPluginSetup, - core: SecuritySolutionPluginCoreSetupDependencies, - logger: Logger -) => { - agentBuilder.agents.register(createThreatHuntingAgent(core, logger)); -}; +// Agent registrations have been migrated to skills (Default Elastic Agent pattern). +// See skills/threat_hunting/ and skills/register_skills.ts for the skill-based equivalents. diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/threat_hunting_agent.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/threat_hunting_agent.ts deleted file mode 100644 index 697802d414032..0000000000000 --- a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/agents/threat_hunting_agent.ts +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import type { BuiltInAgentDefinition } from '@kbn/agent-builder-server/agents'; -import { platformCoreTools } from '@kbn/agent-builder-common'; -import type { Logger } from '@kbn/logging'; -import { THREAT_HUNTING_AGENT_ID } from '../../../common/constants'; -import { - SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, - SECURITY_LABS_SEARCH_TOOL_ID, - SECURITY_ALERTS_TOOL_ID, - SECURITY_ENTITY_RISK_SCORE_TOOL_ID, -} from '../tools'; -import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; -import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; - -const PLATFORM_TOOL_IDS = [ - platformCoreTools.search, - platformCoreTools.listIndices, - platformCoreTools.getIndexMapping, - platformCoreTools.getDocumentById, - platformCoreTools.cases, - platformCoreTools.productDocumentation, - platformCoreTools.generateEsql, - platformCoreTools.executeEsql, -]; - -const SECURITY_TOOL_IDS = [ - SECURITY_ALERTS_TOOL_ID, - SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, - SECURITY_ENTITY_RISK_SCORE_TOOL_ID, - SECURITY_LABS_SEARCH_TOOL_ID, -]; - -export const THREAT_HUNTING_AGENT_TOOL_IDS = [...PLATFORM_TOOL_IDS, ...SECURITY_TOOL_IDS]; - -export const createThreatHuntingAgent = ( - core: SecuritySolutionPluginCoreSetupDependencies, - logger: Logger -): BuiltInAgentDefinition => { - return { - id: THREAT_HUNTING_AGENT_ID, - avatar_icon: 'logoSecurity', - name: 'Threat Hunting Agent', - description: - 'Agent specialized in security alert analysis and entity analysis tasks, including alert investigation, entity investigation and security documentation.', - labels: ['security'], - availability: { - cacheMode: 'space', - handler: async ({ request }) => { - return getAgentBuilderResourceAvailability({ core, request, logger }); - }, - }, - configuration: { - instructions: `You are a security analyst and expert in resolving security incidents. Your role is to assist by answering questions about Elastic Security.`, - tools: [ - { - tool_ids: THREAT_HUNTING_AGENT_TOOL_IDS, - }, - ], - }, - }; -}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/chat_client.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/chat_client.ts new file mode 100644 index 0000000000000..d9a32d51eacf0 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/chat_client.ts @@ -0,0 +1,144 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ToolingLog } from '@kbn/tooling-log'; +import type { HttpHandler } from '@kbn/core/public'; +import pRetry from 'p-retry'; + +const RETRIES = 2; +const MIN_TIMEOUT = 2000; + +export type Messages = { message: string }[]; + +export interface ErrorResponse { + error: { + message: string; + stack?: string; + }; + type: string; +} + +export interface Step { + [key: string]: unknown; +} + +export interface ConverseParams { + messages: Messages; + conversationId?: string; + agentId?: string; +} + +interface ModelUsageStats { + input_tokens?: number; + output_tokens?: number; + llm_calls?: number; + model?: string; + connector_id?: string; +} + +export interface ConverseResponse { + conversationId?: string; + messages: Messages; + errors: ErrorResponse[]; + steps?: Step[]; + traceId?: string; + modelUsage?: ModelUsageStats; +} + +export class AiSocEvalChatClient { + constructor( + private readonly fetch: HttpHandler, + private readonly log: ToolingLog, + private readonly connectorId: string + ) {} + + async converse({ messages, conversationId, agentId }: ConverseParams): Promise { + const callConverseApi = async (): Promise => { + const response = await this.fetch('/api/agent_builder/converse', { + method: 'POST', + version: '2023-10-31', + body: JSON.stringify({ + agent_id: agentId, + connector_id: this.connectorId, + conversation_id: conversationId, + input: messages[messages.length - 1].message, + }), + }); + + const chatResponse = response as { + conversation_id: string; + trace_id?: string; + steps: Step[]; + response: { message: string }; + model_usage?: ModelUsageStats; + }; + + const { + conversation_id: conversationIdFromResponse, + response: latestResponse, + steps, + trace_id: traceId, + model_usage: modelUsage, + } = chatResponse; + + return { + conversationId: conversationIdFromResponse, + messages: [...messages, latestResponse], + steps, + traceId, + modelUsage, + errors: [], + }; + }; + + try { + return await pRetry(callConverseApi, { + retries: RETRIES, + minTimeout: MIN_TIMEOUT, + onFailedAttempt: (error) => { + const isLastAttempt = error.retriesLeft === 0; + + if (isLastAttempt) { + this.log.error( + new Error(`Failed to call converse API after ${error.attemptNumber} attempts`, { + cause: error, + }) + ); + } else { + this.log.warning( + new Error(`Converse API call failed on attempt ${error.attemptNumber}; retrying...`, { + cause: error, + }) + ); + } + }, + }); + } catch (error) { + this.log.error('Error occurred while calling converse API'); + return { + conversationId, + steps: [], + messages: [ + ...messages, + { + message: + 'This question could not be answered as an internal error occurred. Please try again.', + }, + ], + errors: [ + { + error: { + message: error instanceof Error ? error.message : 'Unknown error', + stack: error instanceof Error ? error.stack : undefined, + }, + type: 'error', + }, + ], + }; + } + } +} diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/correlator_agent/correlator_agent.spec.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/correlator_agent/correlator_agent.spec.ts new file mode 100644 index 0000000000000..805773101d6f8 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/correlator_agent/correlator_agent.spec.ts @@ -0,0 +1,203 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout-security'; +import { evaluate } from '../evaluate'; + +/** + * Correlator Agent eval suite. + * + * Validates that the correlator agent: + * - Identifies cross-host campaign patterns from related alerts + * - Links alerts via shared network infrastructure (C2 correlation) + * - Identifies coordinated campaigns from temporal clustering + * - Maps attack chains to MITRE ATT&CK techniques + * - Identifies entity clusters (host, user, network) + */ +evaluate.describe('Correlator Agent', { tag: tags.serverless.security.complete }, () => { + evaluate( + 'given related alerts across 3 hosts should identify cross-host campaign pattern', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'correlator-agent: cross-host-campaign', + description: + 'Validates the correlator agent identifies a cross-host campaign pattern when related alerts span multiple hosts.', + examples: [ + { + input: { + question: + 'Correlate the following alerts: 1) Host "web-prod-01" triggered "Suspicious PowerShell Execution" at 2024-03-15T14:00:00Z with source process powershell.exe downloading a payload from 185.220.101.42. 2) Host "db-prod-03" triggered "Unusual Outbound Connection" at 2024-03-15T14:05:00Z connecting to 185.220.101.42 on port 443. 3) Host "app-prod-02" triggered "Credential Dumping Detected" at 2024-03-15T14:12:00Z with Mimikatz signatures found in memory. All three hosts are in the same VLAN (10.0.50.0/24) and share the same Active Directory domain "corp.example.com".', + }, + output: { + criteria: [ + 'The response MUST identify a cross-host campaign pattern linking all three hosts (web-prod-01, db-prod-03, app-prod-02)', + 'The response MUST reference the shared C2 IP (185.220.101.42) as a common indicator connecting at least two of the hosts', + 'The response MUST note the temporal proximity of the alerts (all within ~12 minutes) as evidence of coordinated activity', + 'The response MUST identify the network relationship (same VLAN 10.0.50.0/24) as a lateral movement corridor', + 'The response MUST produce a correlation summary or campaign identifier grouping the alerts together', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve details about the related alerts across the three hosts', + ], + }, + ], + }, + metadata: { query_intent: 'Correlate' }, + }, + ], + }, + }); + } + ); + + evaluate( + 'given alerts with shared C2 infrastructure should link via network correlation', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'correlator-agent: shared-c2-infrastructure', + description: + 'Validates the correlator agent links alerts via shared command-and-control infrastructure when multiple hosts communicate with the same C2 endpoints.', + examples: [ + { + input: { + question: + 'Correlate the following alerts: Host "endpoint-42" triggered "Beacon Activity Detected" connecting to domain "update-service.malware-infra.net" (resolving to 198.51.100.10) every 60 seconds with jitter. Host "endpoint-77" triggered "Suspicious DNS Query" resolving "cdn-relay.malware-infra.net" (resolving to 198.51.100.11) which is in the same /24 subnet as the first C2. Host "endpoint-15" triggered "Encrypted Channel to Known Bad IP" connecting to 198.51.100.10 on port 8443. All three endpoints are in different departments but the C2 infrastructure shares the same ASN and domain registrar.', + }, + output: { + criteria: [ + 'The response MUST identify the shared C2 infrastructure (malware-infra.net domain family and 198.51.100.0/24 subnet) as the linking factor', + 'The response MUST group all three endpoints (endpoint-42, endpoint-77, endpoint-15) as part of the same campaign based on network indicators', + 'The response MUST highlight the infrastructure overlap (same ASN, same domain registrar, same /24 subnet)', + 'The response MUST provide a confidence assessment for the correlation', + 'The response MUST recommend network-level IOC blocking across the shared infrastructure', + ], + toolCalls: [ + { + id: 'security.threat_intel_enrich', + criteria: [ + 'The threat intelligence enrichment tool should be called to check the C2 IPs or domains against known threat intelligence', + ], + }, + ], + }, + metadata: { query_intent: 'Correlate' }, + }, + ], + }, + }); + } + ); + + evaluate( + 'given temporal clustering of alerts should identify coordinated campaign', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'correlator-agent: temporal-clustering', + description: + 'Validates the correlator agent identifies a coordinated campaign from temporal clustering of alerts across the environment.', + examples: [ + { + input: { + question: + 'Correlate the following burst of alerts that occurred within a 3-minute window: At 2024-03-15T09:00:00Z, 5 hosts simultaneously triggered "Ransomware File Encryption Detected" alerts. The hosts are: "file-server-01", "file-server-02", "backup-server-01", "nas-01", "workstation-admin-05". Each alert shows rapid file renaming with .encrypted extension. Prior to this burst, at 2024-03-15T08:45:00Z, "workstation-admin-05" triggered "Suspicious RDP Lateral Movement" to each of the other 4 hosts. At 2024-03-15T08:30:00Z, "workstation-admin-05" triggered "Credential Theft via LSASS Memory Access".', + }, + output: { + criteria: [ + 'The response MUST identify this as a coordinated ransomware campaign with a clear attack chain timeline', + 'The response MUST identify "workstation-admin-05" as the initial compromise point (patient zero) based on the credential theft alert preceding the lateral movement', + 'The response MUST note the temporal clustering of the 5 simultaneous ransomware alerts as evidence of automated/scripted deployment', + 'The response MUST reconstruct the attack timeline: credential theft (08:30) -> lateral movement (08:45) -> ransomware deployment (09:00)', + 'The response MUST flag the severity as critical given the scope (5 hosts including backup infrastructure)', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve the full cluster of alerts across the affected hosts', + ], + }, + ], + }, + metadata: { query_intent: 'Correlate' }, + }, + ], + }, + }); + } + ); + + evaluate('should map attack chain to MITRE ATT&CK techniques', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'correlator-agent: mitre-attack-mapping', + description: + 'Validates the correlator agent maps correlated alerts to a MITRE ATT&CK attack chain with proper technique identification.', + examples: [ + { + input: { + question: + 'Correlate and map the following attack chain to MITRE ATT&CK: 1) "Phishing Email Delivered" - user "jdoe" received a macro-enabled Word document at 2024-03-15T10:00:00Z. 2) "Malicious Macro Execution" - Word.exe spawned cmd.exe on host "ws-jdoe-01" at 2024-03-15T10:02:00Z. 3) "PowerShell Download Cradle" - powershell.exe downloaded a second-stage payload from evil.com at 2024-03-15T10:03:00Z. 4) "Persistence via Registry Run Key" - a new Run key was added pointing to the downloaded payload at 2024-03-15T10:04:00Z. 5) "Credential Dumping" - Mimikatz-like activity detected dumping LSASS at 2024-03-15T10:10:00Z. 6) "Lateral Movement via WMI" - wmic.exe was used to execute commands on "dc-prod-01" at 2024-03-15T10:15:00Z.', + }, + output: { + criteria: [ + 'The response MUST map at least 4 distinct MITRE ATT&CK techniques from the attack chain', + 'The response MUST include Initial Access technique (T1566 - Phishing or sub-technique)', + 'The response MUST include Execution technique (T1059 - Command and Scripting Interpreter or T1204 - User Execution)', + 'The response MUST include Persistence technique (T1547.001 - Registry Run Keys)', + 'The response MUST include Credential Access technique (T1003 - OS Credential Dumping)', + 'The response MUST present the techniques in attack-chain order showing the progression from initial access through lateral movement', + ], + }, + metadata: { query_intent: 'Correlate' }, + }, + ], + }, + }); + }); + + evaluate('should identify entity clusters (host, user, network)', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'correlator-agent: entity-clusters', + description: + 'Validates the correlator agent identifies entity clusters by grouping related hosts, users, and network indicators.', + examples: [ + { + input: { + question: + 'Correlate the following alerts and identify entity clusters: Alert 1: User "admin-smith" logged into "dc-prod-01" from IP 10.0.1.50 and ran "net group domain admins /domain". Alert 2: User "admin-smith" logged into "exchange-01" from IP 10.0.1.50 and exported mailbox data. Alert 3: User "svc-backup" (a service account also used by admin-smith per HR records) logged into "backup-server-01" from IP 10.0.1.50 and initiated an unscheduled backup deletion. Alert 4: An outbound connection from "dc-prod-01" to external IP 203.0.113.77 transferred 2GB of data. Alert 5: DNS queries from "exchange-01" to "exfil.attacker-domain.com" resolving to 203.0.113.77. Identify all entity clusters (user, host, network) and their relationships.', + }, + output: { + criteria: [ + 'The response MUST identify a user entity cluster linking "admin-smith" and "svc-backup" as related entities (same operator)', + 'The response MUST identify a host entity cluster grouping "dc-prod-01", "exchange-01", and "backup-server-01" as compromised hosts', + 'The response MUST identify a network entity cluster linking source IP 10.0.1.50 and exfiltration endpoint 203.0.113.77 / exfil.attacker-domain.com', + 'The response MUST map the relationships between clusters (e.g., user cluster accessed host cluster, host cluster communicated with network cluster)', + 'The response MUST produce a structured entity graph or relationship summary showing at least 3 entity types with their connections', + ], + toolCalls: [ + { + id: 'security.entity_store_query', + criteria: [ + 'The entity store query tool should be called to enrich entity profiles for the involved hosts and/or users', + ], + }, + ], + }, + metadata: { query_intent: 'Correlate' }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/evaluate.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/evaluate.ts new file mode 100644 index 0000000000000..e79ccb65f573a --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/evaluate.ts @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { evaluate as base } from '@kbn/evals'; +import { AiSocEvalChatClient } from './chat_client'; +import type { EvaluateAiSocDataset } from './evaluate_dataset'; +import { createEvaluateAiSocDataset } from './evaluate_dataset'; + +export const evaluate = base.extend< + {}, + { + chatClient: AiSocEvalChatClient; + evaluateDataset: EvaluateAiSocDataset; + } +>({ + chatClient: [ + async ({ fetch, log, connector }, use) => { + const chatClient = new AiSocEvalChatClient(fetch, log, connector.id); + await use(chatClient); + }, + { scope: 'worker' }, + ], + evaluateDataset: [ + ({ chatClient, evaluators, executorClient }, use) => { + use( + createEvaluateAiSocDataset({ + chatClient, + evaluators, + executorClient, + }) + ); + }, + { scope: 'worker' }, + ], +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/evaluate_dataset.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/evaluate_dataset.ts new file mode 100644 index 0000000000000..f4bf29ef5654b --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/evaluate_dataset.ts @@ -0,0 +1,291 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { + createQuantitativeCorrectnessEvaluators, + createQuantitativeGroundednessEvaluator, + selectEvaluators, + withEvaluatorSpan, + type DefaultEvaluators, + type EvalsExecutorClient, + type EvaluationDataset, + type EvaluationResult, + type Evaluator, + type Example, +} from '@kbn/evals'; +import type { AiSocEvalChatClient, ErrorResponse, Step, Messages } from './chat_client'; + +interface ToolCallAssertion { + id: string; + criteria?: string[]; +} + +export interface AiSocDatasetExample extends Example { + input: { question: string }; + output: { criteria?: string[]; toolCalls?: ToolCallAssertion[] }; + metadata?: { query_intent?: string }; +} + +interface ChatTaskOutput { + errors: ErrorResponse[]; + messages: Messages; + steps?: Step[]; +} + +export type EvaluateAiSocDataset = (options: { + dataset: { + name: string; + description: string; + examples: AiSocDatasetExample[]; + }; + concurrency?: number; +}) => Promise; + +/** + * Finds tool call steps for a specific tool ID. + */ +function findToolCallSteps(toolId: string, steps: Step[]): Step[] { + return steps.filter( + (step) => + (step as { type?: string; tool_id?: string }).type === 'tool_call' && + (step as { type?: string; tool_id?: string }).tool_id === toolId + ); +} + +/** + * Evaluates a tool call assertion with its specific criteria. + */ +const evaluateToolCallAssertion = async ( + toolCallAssertion: ToolCallAssertion, + steps: Step[], + evaluators: DefaultEvaluators, + input: AiSocDatasetExample['input'], + output: ChatTaskOutput, + metadata: AiSocDatasetExample['metadata'] +): Promise => { + const toolCallSteps = findToolCallSteps(toolCallAssertion.id, steps); + const toolWasCalled = toolCallSteps.length > 0; + + if (!toolWasCalled) { + return { + score: 0, + label: 'FAIL', + explanation: `Tool "${toolCallAssertion.id}" was not called during the conversation.`, + }; + } + + if (!toolCallAssertion.criteria || toolCallAssertion.criteria.length === 0) { + return { + score: 1, + label: 'PASS', + explanation: `Tool "${toolCallAssertion.id}" was called during the conversation.`, + }; + } + + const toolCriteriaResult = await evaluators + .criteria(toolCallAssertion.criteria) + .evaluate({ input, expected: { criteria: toolCallAssertion.criteria }, output, metadata }); + + const toolCallExplanation = `Tool "${toolCallAssertion.id}" was called during the conversation.`; + const combinedExplanation = `${toolCallExplanation} ${toolCriteriaResult.explanation ?? ''}`; + + return { + score: toolCriteriaResult.score ?? null, + label: toolCriteriaResult.label ?? 'PASS', + explanation: combinedExplanation, + }; +}; + +const evaluateAllToolCalls = async ( + toolCalls: ToolCallAssertion[], + steps: Step[], + evaluators: DefaultEvaluators, + input: AiSocDatasetExample['input'], + output: ChatTaskOutput, + metadata: AiSocDatasetExample['metadata'] +): Promise => { + const results: EvaluationResult[] = []; + + for (const toolCallAssertion of toolCalls) { + const result = await evaluateToolCallAssertion( + toolCallAssertion, + steps, + evaluators, + input, + output, + metadata + ); + results.push(result); + } + + return results; +}; + +/** + * Combines multiple evaluation results into a single result. + * All results must pass for the overall result to pass. + */ +function combineEvaluationResults(results: EvaluationResult[]): EvaluationResult { + const allPassed = results.every((result) => result.label === 'PASS' && (result.score ?? 0) > 0); + + const scores = results.map((r) => r.score ?? 0).filter((s) => s !== null); + const averageScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0; + + const explanations = results.map((r) => r.explanation ?? '').filter((e) => e.length > 0); + + return { + score: allPassed ? averageScore : 0, + label: allPassed ? 'PASS' : 'FAIL', + explanation: explanations.join(' '), + }; +} + +interface EvaluateOpts { + input: AiSocDatasetExample['input']; + output: ChatTaskOutput; + expected: AiSocDatasetExample['output']; + metadata: AiSocDatasetExample['metadata']; +} + +const DEFAULT_CONCURRENCY = 3; + +const createCriteriaEvaluator = ({ + evaluators, +}: { + evaluators: DefaultEvaluators; +}): Evaluator => { + return { + name: 'Criteria', + kind: 'LLM' as const, + evaluate: async ({ expected, ...rest }: EvaluateOpts) => { + const criteria = expected.criteria ?? []; + + if (criteria.length === 0) { + return { + score: 1, + label: 'PASS', + explanation: 'No main criteria specified.', + }; + } + + return evaluators.criteria(criteria).evaluate({ expected, ...rest }); + }, + }; +}; + +const createToolCallsEvaluator = ({ + evaluators, +}: { + evaluators: DefaultEvaluators; +}): Evaluator => { + return { + name: 'ToolCalls', + kind: 'LLM' as const, + evaluate: async ({ input, output, expected, metadata }: EvaluateOpts) => { + const toolCalls = expected.toolCalls ?? []; + const steps = output.steps ?? []; + + if (toolCalls.length === 0) { + return { + score: 1, + label: 'PASS', + explanation: 'No tool call assertions specified.', + }; + } + + const toolCallResults = await evaluateAllToolCalls( + toolCalls, + steps, + evaluators, + input, + output, + metadata + ); + + return combineEvaluationResults(toolCallResults); + }, + }; +}; + +export function createEvaluateAiSocDataset({ + evaluators, + executorClient, + chatClient, +}: { + evaluators: DefaultEvaluators; + executorClient: EvalsExecutorClient; + chatClient: AiSocEvalChatClient; +}): EvaluateAiSocDataset { + return async function evaluateAiSocDataset({ + dataset: { name, description, examples }, + concurrency = DEFAULT_CONCURRENCY, + }) { + const dataset = { name, description, examples } satisfies EvaluationDataset; + + await executorClient.runExperiment( + { + dataset, + concurrency, + task: async ({ input, output, metadata }) => { + const agentId = (metadata as { agentId?: string })?.agentId; + + const response = await chatClient.converse({ + messages: [{ message: input.question }], + agentId, + }); + + let correctnessResult: { metadata?: unknown } | undefined; + let groundednessResult: { metadata?: unknown } | undefined; + + const result = await Promise.all([ + withEvaluatorSpan('CorrectnessAnalysis', {}, () => + evaluators.correctnessAnalysis().evaluate({ + input, + expected: output, + output: response, + metadata, + }) + ), + withEvaluatorSpan('GroundednessAnalysis', {}, () => + evaluators.groundednessAnalysis().evaluate({ + input, + expected: output, + output: response, + metadata, + }) + ), + ]).catch(() => { + // Catch cases where these optional evaluators fail so that entire evaluation doesn't fail + }); + + if (result) { + correctnessResult = result[0]; + groundednessResult = result[1]; + } + + return { + errors: response.errors, + messages: response.messages, + steps: response.steps, + traceId: response.traceId, + modelUsage: response.modelUsage, + correctnessAnalysis: correctnessResult?.metadata, + groundednessAnalysis: groundednessResult?.metadata, + }; + }, + }, + [ + createCriteriaEvaluator({ evaluators }), + createToolCallsEvaluator({ evaluators }), + ...selectEvaluators([ + createQuantitativeGroundednessEvaluator(), + ...createQuantitativeCorrectnessEvaluators(), + ]), + ] + ); + }; +} diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/investigator_agent/investigator_agent.spec.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/investigator_agent/investigator_agent.spec.ts new file mode 100644 index 0000000000000..d7583aa15f52c --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/investigator_agent/investigator_agent.spec.ts @@ -0,0 +1,222 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout-security'; +import { evaluate } from '../evaluate'; + +/** + * Investigator Agent eval suite. + * + * Validates that the investigator agent: + * - Produces a timeline with chronological events for a true positive alert + * - Identifies affected entities (hosts, users) + * - Identifies the attack vector + * - Produces a root cause hypothesis with confidence + * - Creates or references a case for tracking + */ +evaluate.describe('Investigator Agent', { tag: tags.serverless.security.complete }, () => { + evaluate( + 'true positive alert produces timeline with chronological events', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'investigator-agent: timeline-reconstruction', + description: + 'Validates the investigator agent produces a chronological timeline of events when investigating a confirmed true positive alert.', + examples: [ + { + input: { + question: + 'Investigate the following confirmed true positive alert: A Cobalt Strike beacon was detected on host "ws-finance-03" at 2024-03-15T14:23:00Z. The triage agent classified this as true_positive with confidence 0.92. The beacon was communicating with C2 server 185.220.101.42 over HTTPS. The affected user is "jdoe" who logged in at 2024-03-15T09:01:00Z. Prior to the beacon detection, there was a suspicious email attachment opened at 2024-03-15T13:45:00Z and a PowerShell download cradle executed at 2024-03-15T14:15:00Z. Reconstruct the full timeline of this incident.', + }, + output: { + criteria: [ + 'The response MUST include a "Timeline of Events" section with events ordered chronologically', + 'The timeline MUST include at least 3 distinct timestamped events (e.g., email attachment opened, PowerShell execution, beacon detection)', + 'Each timeline entry MUST include a timestamp, event description, and the entity or data source involved', + 'The timeline MUST span from the initial access (email attachment at 13:45) through to the beacon detection (14:23)', + 'The timeline MUST identify the progression of the attack chain (initial access -> execution -> command and control)', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve all alerts related to host ws-finance-03 and/or user jdoe within the investigation time window', + ], + }, + ], + }, + metadata: { query_intent: 'Investigation' }, + }, + ], + }, + }); + } + ); + + evaluate('identifies affected entities (hosts, users)', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'investigator-agent: entity-identification', + description: + 'Validates the investigator agent correctly identifies all affected entities including hosts, users, and external infrastructure involved in the incident.', + examples: [ + { + input: { + question: + 'Investigate a lateral movement incident: An alert for "Suspicious Lateral Movement via PsExec" was triggered on host "dc-prod-01" at 2024-03-16T02:30:00Z. The source host is "ws-finance-03" (previously compromised) and the actor is user "admin-jsmith" whose credentials were likely stolen. Additional network connections were observed from "ws-finance-03" to hosts "db-prod-01" and "app-server-02" over SMB (port 445). Identify all affected entities in this incident.', + }, + output: { + criteria: [ + 'The response MUST include an "Affected Entities" section listing all identified hosts and users', + 'The affected hosts MUST include at minimum: ws-finance-03 (source/compromised), dc-prod-01 (target), db-prod-01, and app-server-02', + 'The affected users MUST include at minimum: admin-jsmith (compromised credentials)', + 'Each entity MUST include a status or role description (e.g., "source of lateral movement", "target", "compromised credentials")', + 'The response MUST distinguish between confirmed-compromised entities and potentially-affected entities', + ], + toolCalls: [ + { + id: 'security.entity_risk_score', + criteria: [ + 'The entity risk score tool should be called to check risk profiles of the involved hosts and/or users', + ], + }, + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to find related alerts across the affected hosts', + ], + }, + ], + }, + metadata: { query_intent: 'Investigation' }, + }, + ], + }, + }); + }); + + evaluate('identifies attack vector', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'investigator-agent: attack-vector-identification', + description: + 'Validates the investigator agent correctly identifies the initial access vector and attack technique used.', + examples: [ + { + input: { + question: + 'Investigate the initial access vector for the following incident: Host "ws-hr-07" triggered a "Ransomware File Encryption Detected" alert at 2024-03-17T16:00:00Z. Earlier alerts on the same host include: "Suspicious Macro Execution in Office Document" at 2024-03-17T10:15:00Z triggered by user "hr-analyst" opening "Q1_Benefits_Update.docm" received via email from "benefits@hr-portal-update.com", "Encoded PowerShell Command Execution" at 2024-03-17T10:17:00Z, and "Suspicious DLL Sideloading" at 2024-03-17T10:25:00Z. The email domain "hr-portal-update.com" was registered 3 days ago. Determine the attack vector.', + }, + output: { + criteria: [ + 'The response MUST identify the initial access vector as a phishing email with a malicious macro-enabled Office document', + 'The response MUST mention the suspicious email domain "hr-portal-update.com" and note that it was recently registered (a phishing indicator)', + 'The response MUST map the attack progression through the kill chain: phishing (initial access) -> macro execution (execution) -> PowerShell (execution/download) -> DLL sideloading (persistence/defense evasion) -> ransomware (impact)', + 'The response MUST reference relevant MITRE ATT&CK techniques (e.g., T1566.001 Spearphishing Attachment, T1059.001 PowerShell)', + 'The response MUST include an "Initial Access Vector" or "Root Cause" section clearly stating the finding', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve the full chain of alerts on host ws-hr-07', + ], + }, + { + id: 'security.threat_intel_enrich', + criteria: [ + 'The threat intelligence tool should be called to check the suspicious email domain or other indicators', + ], + }, + ], + }, + metadata: { query_intent: 'Investigation' }, + }, + ], + }, + }); + }); + + evaluate('produces root cause hypothesis with confidence', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'investigator-agent: root-cause-hypothesis', + description: + 'Validates the investigator agent produces a root cause hypothesis with a stated level of confidence and supporting evidence.', + examples: [ + { + input: { + question: + 'Investigate the root cause of this incident: Multiple hosts in the engineering subnet (10.0.20.0/24) simultaneously triggered "Cryptocurrency Mining Activity Detected" alerts at 2024-03-18T03:00:00Z. The affected hosts are: eng-build-01, eng-build-02, eng-build-03, and eng-ci-runner-01. All hosts are running a Jenkins CI/CD pipeline. The mining process was spawned as a child of the Jenkins agent process. A new Jenkins plugin "performance-optimizer-v2.1" was installed by user "devops-lead" at 2024-03-17T22:00:00Z. The plugin was downloaded from an unofficial repository. No other hosts outside the engineering subnet are affected. Determine the root cause.', + }, + output: { + criteria: [ + 'The response MUST include a "Root Cause" section with a clearly stated hypothesis', + 'The root cause hypothesis MUST identify the malicious Jenkins plugin ("performance-optimizer-v2.1") installed from an unofficial repository as the likely initial cause', + 'The response MUST include a confidence level or certainty assessment for the root cause hypothesis (e.g., "high confidence", a numeric score, or equivalent)', + 'The response MUST list supporting evidence for the hypothesis (timing correlation between plugin install and mining activity, all affected hosts running Jenkins, mining process spawned by Jenkins agent)', + 'The response MUST identify contributing factors such as: the use of an unofficial plugin repository, insufficient plugin vetting, and the devops-lead account having install privileges', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve cryptocurrency mining alerts across the affected hosts', + ], + }, + ], + }, + metadata: { query_intent: 'Investigation' }, + }, + ], + }, + }); + }); + + evaluate('creates or references a case for tracking', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'investigator-agent: case-management', + description: + 'Validates the investigator agent creates or references a case for tracking the investigation, including attaching findings and IOCs.', + examples: [ + { + input: { + question: + 'Investigate and create a case for the following incident: A confirmed data exfiltration incident was detected on host "db-prod-01". Alerts include: "Unusual Large Data Transfer" at 2024-03-19T01:00:00Z showing 4.2GB transferred to external IP 198.51.100.15 over DNS tunneling, "Suspicious DNS Query Pattern" at 2024-03-19T00:45:00Z with encoded payloads in DNS TXT queries to "exfil.data-analytics-cdn.net", and "Credential Access via LSASS Memory Dump" at 2024-03-18T23:30:00Z on the same host. The affected user is "db-admin" and the host contains PII data for approximately 50,000 customer records. This requires immediate case creation for tracking and cross-team coordination.', + }, + output: { + criteria: [ + 'The response MUST indicate creation of a case or explicit recommendation to create a case for tracking this incident', + 'The case details MUST include: a descriptive title, severity (critical or high given the data exfiltration of PII), and assignment recommendation', + 'The response MUST include IOCs discovered during the investigation (e.g., external IP 198.51.100.15, domain exfil.data-analytics-cdn.net)', + 'The response MUST attach or reference the investigation timeline and affected entity list within the case', + 'The response MUST include immediate containment recommendations (e.g., isolate host, block external IP/domain, reset db-admin credentials)', + ], + toolCalls: [ + { + id: 'security.case_manage', + criteria: [ + 'The case management tool should be called to create a new case or reference an existing case for this investigation', + ], + }, + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve the data exfiltration and related alerts', + ], + }, + ], + }, + metadata: { query_intent: 'Investigation' }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/mitre_analyst_agent/mitre_analyst_agent.spec.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/mitre_analyst_agent/mitre_analyst_agent.spec.ts new file mode 100644 index 0000000000000..30fcf1fa2772e --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/mitre_analyst_agent/mitre_analyst_agent.spec.ts @@ -0,0 +1,196 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout-security'; +import { evaluate } from '../evaluate'; + +/** + * MITRE Analyst Agent eval suite. + * + * Validates that the MITRE analyst agent: + * - Queries active detection rules and maps them to MITRE techniques + * - Identifies uncovered MITRE techniques as gaps + * - Prioritizes gaps by severity weighting + * - Recommends new detection rules for top gaps + * - Produces a coverage percentage between 0 and 100 + */ +evaluate.describe('MITRE Analyst Agent', { tag: tags.serverless.security.complete }, () => { + evaluate( + 'should query active detection rules and map to MITRE techniques', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'mitre-analyst-agent: rule-to-technique-mapping', + description: + 'Validates the MITRE analyst agent queries active detection rules and maps them to MITRE ATT&CK techniques.', + examples: [ + { + input: { + question: + 'Analyze our current detection rule coverage against the MITRE ATT&CK framework. Query all active detection rules and map each rule to its corresponding MITRE ATT&CK techniques. Provide a summary of which techniques and tactics are covered by our current rule set.', + }, + output: { + criteria: [ + 'The response MUST query active detection rules to enumerate the current rule set', + 'The response MUST map at least some detection rules to specific MITRE ATT&CK technique IDs (e.g., T1059, T1053)', + 'The response MUST organize the mapping by MITRE ATT&CK tactic (e.g., Initial Access, Execution, Persistence)', + 'The response MUST indicate the number of rules mapped per tactic or technique', + 'The response MUST provide a structured summary showing the technique-to-rule mapping', + ], + toolCalls: [ + { + id: 'security.mitre_mapping', + criteria: [ + 'The MITRE mapping tool should be called to retrieve or compute the mapping between detection rules and MITRE techniques', + ], + }, + ], + }, + metadata: { query_intent: 'MITRE Analysis' }, + }, + ], + }, + }); + } + ); + + evaluate('should identify uncovered MITRE techniques as gaps', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'mitre-analyst-agent: gap-identification', + description: + 'Validates the MITRE analyst agent identifies MITRE ATT&CK techniques that are not covered by any active detection rule.', + examples: [ + { + input: { + question: + 'Identify gaps in our MITRE ATT&CK detection coverage. Our current detection rules cover the following techniques: T1566 (Phishing), T1059 (Command and Scripting Interpreter), T1053 (Scheduled Task/Job), T1003 (OS Credential Dumping), T1021 (Remote Services). What important MITRE ATT&CK techniques are we missing coverage for? Focus on Enterprise ATT&CK matrix techniques that are commonly used in real-world attacks.', + }, + output: { + criteria: [ + 'The response MUST identify at least 5 uncovered MITRE ATT&CK techniques that represent detection gaps', + 'The identified gaps MUST be real MITRE ATT&CK Enterprise technique IDs (e.g., T1055, T1071, T1105)', + 'Each gap MUST include the technique name and a brief description of why it represents a risk', + 'The response MUST NOT list the already-covered techniques (T1566, T1059, T1053, T1003, T1021) as gaps', + 'The gaps MUST span multiple MITRE ATT&CK tactics to show breadth of coverage analysis', + ], + toolCalls: [ + { + id: 'security.mitre_mapping', + criteria: [ + 'The MITRE mapping tool should be called to determine current coverage and identify gaps', + ], + }, + ], + }, + metadata: { query_intent: 'MITRE Analysis' }, + }, + ], + }, + }); + }); + + evaluate('should prioritize gaps by severity weighting', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'mitre-analyst-agent: gap-prioritization', + description: + 'Validates the MITRE analyst agent prioritizes identified coverage gaps by severity weighting based on threat prevalence and impact.', + examples: [ + { + input: { + question: + 'Prioritize our MITRE ATT&CK detection coverage gaps by severity. We have no detection rules for the following techniques: T1055 (Process Injection), T1071 (Application Layer Protocol), T1105 (Ingress Tool Transfer), T1027 (Obfuscated Files or Information), T1572 (Protocol Tunneling), T1218 (System Binary Proxy Execution), T1547 (Boot or Logon Autostart Execution), T1078 (Valid Accounts), T1048 (Exfiltration Over Alternative Protocol), T1574 (Hijack Execution Flow). Rank these gaps from highest to lowest priority based on severity, threat prevalence in real-world attacks, and potential impact if exploited.', + }, + output: { + criteria: [ + 'The response MUST rank all 10 provided techniques in a prioritized order from highest to lowest severity', + 'Each technique MUST have an explicit severity or priority score/label (e.g., Critical/High/Medium/Low or numeric score)', + 'The prioritization MUST consider threat prevalence (how commonly the technique is used in real attacks)', + 'The prioritization MUST consider potential impact if the technique is exploited without detection', + 'The response MUST provide justification for why the top 3 gaps are ranked highest', + ], + }, + metadata: { query_intent: 'MITRE Analysis' }, + }, + ], + }, + }); + }); + + evaluate('should recommend new detection rules for top gaps', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'mitre-analyst-agent: rule-recommendations', + description: + 'Validates the MITRE analyst agent recommends new detection rules to address the highest-priority coverage gaps.', + examples: [ + { + input: { + question: + 'Our highest priority MITRE ATT&CK coverage gaps are: 1) T1055 - Process Injection (no detection), 2) T1078 - Valid Accounts (no detection), 3) T1071 - Application Layer Protocol (no detection). Recommend specific detection rules we should create to close these gaps. For each recommendation, specify the rule type (EQL, KQL, threshold, ML), the data sources required, and the expected detection logic.', + }, + output: { + criteria: [ + 'The response MUST recommend at least one detection rule for each of the 3 specified gap techniques', + 'Each rule recommendation MUST specify the rule type (EQL, KQL, ES|QL, threshold, or ML)', + 'Each rule recommendation MUST specify the required data sources (e.g., process events, authentication logs, network events)', + 'Each rule recommendation MUST include a description of the detection logic (what the rule looks for)', + 'The recommendations MUST be actionable and specific enough to implement as Elastic Security detection rules', + ], + toolCalls: [ + { + id: 'security.mitre_mapping', + criteria: [ + 'The MITRE mapping tool should be called to understand current coverage context before recommending new rules', + ], + }, + ], + }, + metadata: { query_intent: 'MITRE Analysis' }, + }, + ], + }, + }); + }); + + evaluate('coverage percentage should be between 0 and 100', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'mitre-analyst-agent: coverage-percentage', + description: + 'Validates the MITRE analyst agent computes a coverage percentage between 0 and 100 representing the proportion of MITRE ATT&CK techniques covered by active detection rules.', + examples: [ + { + input: { + question: + 'Calculate our overall MITRE ATT&CK detection coverage percentage. Query our active detection rules, map them to MITRE techniques, and compute the percentage of Enterprise ATT&CK techniques that have at least one detection rule. Express the result as a percentage between 0 and 100.', + }, + output: { + criteria: [ + 'The response MUST include a clearly labeled coverage percentage as a numeric value between 0 and 100', + 'The percentage MUST be calculated as (covered techniques / total techniques) * 100 or a clearly defined equivalent formula', + 'The response MUST specify both the numerator (number of covered techniques) and denominator (total techniques evaluated)', + 'The coverage percentage MUST be a reasonable value (not 0% unless no rules exist, not 100% unless all techniques are covered)', + 'The response MUST include a breakdown showing coverage per tactic to contextualize the overall percentage', + ], + toolCalls: [ + { + id: 'security.mitre_mapping', + criteria: [ + 'The MITRE mapping tool should be called to compute the coverage mapping and percentage', + ], + }, + ], + }, + metadata: { query_intent: 'MITRE Analysis' }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/reporter_agent/reporter_agent.spec.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/reporter_agent/reporter_agent.spec.ts new file mode 100644 index 0000000000000..c9e1f3350ca8a --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/reporter_agent/reporter_agent.spec.ts @@ -0,0 +1,210 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout-security'; +import { evaluate } from '../evaluate'; + +/** + * Reporter Agent eval suite. + * + * Validates that the reporter agent: + * - Generates an executive summary section + * - Generates a technical timeline section + * - Includes MITRE ATT&CK mapping in the report + * - Includes impact assessment with affected entities count + * - Creates a case and attaches the report + */ +evaluate.describe('Reporter Agent', { tag: tags.serverless.security.complete }, () => { + evaluate('should generate executive summary section', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'reporter-agent: executive-summary', + description: + 'Validates the reporter agent generates a well-structured executive summary section suitable for non-technical stakeholders.', + examples: [ + { + input: { + question: + 'Generate an incident report for the following completed investigation: A targeted phishing campaign delivered a Cobalt Strike beacon to 3 hosts in the Finance department on 2024-03-15. The attacker gained domain admin credentials and exfiltrated 2.1GB of financial records to an external server in Eastern Europe. The incident was contained within 4 hours of initial detection. All compromised hosts have been isolated, credentials rotated, and C2 infrastructure blocked. No evidence of data manipulation was found. Generate the executive summary section of the report.', + }, + output: { + criteria: [ + 'The response MUST include an "Executive Summary" section clearly labeled as such', + 'The executive summary MUST describe the incident type (targeted phishing campaign leading to data exfiltration) in business-friendly language', + 'The executive summary MUST include the scope of impact (3 hosts, Finance department, 2.1GB data exfiltrated)', + 'The executive summary MUST include the resolution status (contained within 4 hours, hosts isolated, credentials rotated)', + 'The executive summary MUST be concise (no more than 2-3 paragraphs) and avoid deeply technical jargon', + ], + toolCalls: [ + { + id: 'security.report_generate', + criteria: [ + 'The report generation tool should be called to produce the structured report with the executive summary section', + ], + }, + ], + }, + metadata: { query_intent: 'Report' }, + }, + ], + }, + }); + }); + + evaluate('should generate technical timeline section', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'reporter-agent: technical-timeline', + description: + 'Validates the reporter agent generates a detailed technical timeline section with chronological events.', + examples: [ + { + input: { + question: + 'Generate an incident report timeline section for the following events: 2024-03-15T08:00:00Z - Phishing email delivered to user "jdoe@corp.com" with malicious Word attachment. 2024-03-15T08:15:00Z - User opened attachment, macro executed, powershell.exe spawned. 2024-03-15T08:17:00Z - Cobalt Strike beacon downloaded from evil-cdn.com/payload.bin. 2024-03-15T08:20:00Z - Persistence established via scheduled task "WindowsUpdate". 2024-03-15T08:45:00Z - LSASS memory dumped, domain admin credentials obtained. 2024-03-15T09:00:00Z - Lateral movement to "file-server-01" and "dc-prod-01" via PsExec. 2024-03-15T09:30:00Z - Data staging began on "file-server-01", compressing financial records. 2024-03-15T10:00:00Z - 2.1GB exfiltrated to 203.0.113.50 via HTTPS. 2024-03-15T12:00:00Z - SOC analyst detected anomalous outbound traffic. 2024-03-15T12:15:00Z - Incident response initiated, compromised hosts isolated.', + }, + output: { + criteria: [ + 'The response MUST include a "Timeline" section with events listed in chronological order', + 'The timeline MUST include timestamps for each event (matching or derived from the provided timestamps)', + 'The timeline MUST cover the full attack lifecycle from initial access (phishing) through containment (isolation)', + 'The timeline MUST include at least 8 distinct events from the provided sequence', + 'The timeline MUST clearly distinguish between attacker actions and defender/SOC response actions', + ], + toolCalls: [ + { + id: 'security.report_generate', + criteria: [ + 'The report generation tool should be called to produce the structured report with the timeline section', + ], + }, + ], + }, + metadata: { query_intent: 'Report' }, + }, + ], + }, + }); + }); + + evaluate('should include MITRE ATT&CK mapping in report', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'reporter-agent: mitre-attack-mapping', + description: + 'Validates the reporter agent includes MITRE ATT&CK technique and tactic mappings in the incident report.', + examples: [ + { + input: { + question: + 'Generate a report that includes MITRE ATT&CK mapping for the following incident: The attacker used spear-phishing (email with malicious attachment) for initial access, executed malicious macros and PowerShell for execution, created a scheduled task for persistence, dumped LSASS credentials for credential access, used PsExec for lateral movement, staged and compressed data for collection, and exfiltrated via HTTPS for exfiltration. Map each phase to the appropriate MITRE ATT&CK technique and tactic.', + }, + output: { + criteria: [ + 'The response MUST include a "MITRE ATT&CK Mapping" section in the report', + 'The mapping MUST include at least 5 distinct MITRE ATT&CK techniques with their IDs (e.g., T1566, T1059, T1053)', + 'Each technique MUST be associated with the correct MITRE ATT&CK tactic (e.g., Initial Access, Execution, Persistence)', + 'The mapping MUST cover techniques from at least 4 different tactics to represent the full attack chain', + 'The techniques MUST be contextually accurate for the described attacker activities', + ], + toolCalls: [ + { + id: 'security.report_generate', + criteria: [ + 'The report generation tool should be called to produce the structured report with the MITRE ATT&CK mapping section', + ], + }, + ], + }, + metadata: { query_intent: 'Report' }, + }, + ], + }, + }); + }); + + evaluate( + 'should include impact assessment with affected entities count', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'reporter-agent: impact-assessment', + description: + 'Validates the reporter agent includes an impact assessment section with quantified affected entities.', + examples: [ + { + input: { + question: + 'Generate a report impact assessment for the following incident: 5 hosts were compromised (web-prod-01, web-prod-02, db-prod-03, app-prod-04, file-server-01). 3 user accounts were involved (admin-jsmith with domain admin privileges, svc-deploy service account, user-mwilson standard user). 2 departments affected (Engineering and Finance). 2.1GB of data exfiltrated consisting of financial projections and source code repositories. The incident caused 6 hours of downtime for the customer portal (web-prod-01, web-prod-02). Estimated revenue impact is $150,000 from downtime. No customer PII was confirmed exfiltrated.', + }, + output: { + criteria: [ + 'The response MUST include an "Impact Assessment" section in the report', + 'The impact assessment MUST quantify affected hosts (5 hosts with names listed)', + 'The impact assessment MUST quantify affected user accounts (3 accounts with privilege levels)', + 'The impact assessment MUST quantify data exfiltration volume (2.1GB) and describe the types of data affected', + 'The impact assessment MUST include business impact metrics (6 hours downtime, $150,000 revenue impact, affected departments)', + ], + toolCalls: [ + { + id: 'security.report_generate', + criteria: [ + 'The report generation tool should be called to produce the structured report with the impact assessment section', + ], + }, + ], + }, + metadata: { query_intent: 'Report' }, + }, + ], + }, + }); + } + ); + + evaluate('should create a case and attach the report', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'reporter-agent: case-creation-and-attachment', + description: + 'Validates the reporter agent creates a case in Elastic Security and attaches the generated report.', + examples: [ + { + input: { + question: + 'Generate a full incident report for the Cobalt Strike campaign incident (IR-2024-0315) and create a case in Elastic Security to track it. The incident involved 3 compromised hosts in the Finance department, domain admin credential theft, and 2.1GB data exfiltration. Severity is critical. The report should be attached to the case as a comment. Tag the case with "incident-response", "data-exfiltration", and "cobalt-strike".', + }, + output: { + criteria: [ + 'The response MUST create a case via the case management tool with a descriptive title referencing the incident', + 'The case MUST be created with severity "critical"', + 'The case MUST be tagged with the specified tags ("incident-response", "data-exfiltration", "cobalt-strike")', + 'The response MUST generate a report and attach it to the case as a comment', + 'The response MUST return the case ID and URL for reference', + ], + toolCalls: [ + { + id: 'security.report_generate', + criteria: [ + 'The report generation tool should be called to generate the structured incident report', + ], + }, + { + id: 'security.case_manage', + criteria: [ + 'The case management tool should be called to create a new case and attach the report', + ], + }, + ], + }, + metadata: { query_intent: 'Report' }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/responder_agent/responder_agent.spec.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/responder_agent/responder_agent.spec.ts new file mode 100644 index 0000000000000..46b318f669000 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/responder_agent/responder_agent.spec.ts @@ -0,0 +1,186 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout-security'; +import { evaluate } from '../evaluate'; + +/** + * Responder Agent eval suite. + * + * Validates that the responder agent: + * - Recommends endpoint isolation with confidence >= 0.7 for confirmed active compromises + * - Outputs confidence < 0.7 requiring human approval for ambiguous situations + * - Always includes rollback procedures in every recommendation + * - Assesses blast radius for each recommended action + * - Produces numeric confidence scores between 0.0 and 1.0 + */ +evaluate.describe('Responder Agent', { tag: tags.serverless.security.complete }, () => { + evaluate( + 'given confirmed active compromise should recommend endpoint isolation with confidence >= 0.7', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'responder-agent: active-compromise-isolation', + description: + 'Validates the responder agent recommends endpoint isolation with high confidence for a confirmed active compromise scenario.', + examples: [ + { + input: { + question: + 'Respond to the following confirmed active compromise: Host "dc-prod-01" (domain controller, critical asset) has an active Cobalt Strike beacon communicating with C2 at 185.220.101.42 every 60 seconds. The triage agent classified this as true_positive with confidence 0.95. The correlator agent confirmed lateral movement to 3 additional hosts. Active data exfiltration of 500MB has been detected to an external IP. The attacker has domain admin credentials and is actively enumerating the network. Recommend immediate response actions.', + }, + output: { + criteria: [ + 'The response MUST recommend endpoint isolation for "dc-prod-01" as a primary action', + 'The response MUST include a confidence score >= 0.7 for the isolation recommendation', + 'The response MUST recommend blocking the C2 IP (185.220.101.42) at the network perimeter', + 'The response MUST recommend credential reset for compromised domain admin accounts', + 'The response MUST flag the critical nature of isolating a domain controller and include special considerations for maintaining domain services', + ], + toolCalls: [ + { + id: 'security.response_actions', + criteria: [ + 'The response actions tool should be called to initiate or recommend endpoint isolation', + ], + }, + ], + }, + metadata: { query_intent: 'Respond' }, + }, + ], + }, + }); + } + ); + + evaluate( + 'given ambiguous situation should output confidence < 0.7 requiring human approval', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'responder-agent: ambiguous-requires-approval', + description: + 'Validates the responder agent outputs a lower confidence score requiring human approval for an ambiguous situation.', + examples: [ + { + input: { + question: + 'Respond to the following situation: Host "dev-server-07" triggered a "Suspicious Outbound Connection" alert. The destination IP 104.18.32.7 is a Cloudflare IP that hosts both legitimate services and has been associated with one low-confidence threat intelligence report. The process making the connection is "node.js" which is expected on this development server. The triage agent classified this as benign_true_positive with confidence 0.4. The user "developer-3" has no prior security incidents. However, the connection occurred at 3:00 AM outside normal working hours. Recommend response actions.', + }, + output: { + criteria: [ + 'The response MUST include a confidence score < 0.7 reflecting the ambiguity of the situation', + 'The response MUST explicitly state that human approval is required before executing any disruptive actions', + 'The response MUST present options rather than a single definitive action (e.g., monitor vs. investigate vs. block)', + 'The response MUST acknowledge both the benign indicators (expected process, legitimate IP) and suspicious indicators (unusual timing)', + 'The response MUST NOT recommend immediate endpoint isolation given the low confidence', + ], + }, + metadata: { query_intent: 'Respond' }, + }, + ], + }, + }); + } + ); + + evaluate('every recommendation must include rollback procedure', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'responder-agent: rollback-procedures', + description: + 'Validates that every response recommendation from the responder agent includes a corresponding rollback procedure.', + examples: [ + { + input: { + question: + 'Respond to the following confirmed incident: Host "web-prod-05" is actively being used for cryptomining. The process "xmrig" is consuming 95% CPU. The triage agent confirmed true_positive with confidence 0.9. The host serves production web traffic for the customer portal. Recommend response actions and for each action include a rollback procedure in case the action causes unintended consequences.', + }, + output: { + criteria: [ + 'The response MUST include at least 2 distinct response actions', + 'Each response action MUST have an explicitly labeled "Rollback" or "Rollback Procedure" section', + 'The rollback procedure for process termination MUST describe how to verify service health after killing the malicious process', + 'If endpoint isolation is recommended, the rollback MUST describe the un-isolation procedure and service restoration steps', + 'The rollback procedures MUST be specific and actionable (not generic statements like "undo the action")', + ], + toolCalls: [ + { + id: 'security.response_actions', + criteria: [ + 'The response actions tool should be called to recommend or execute response actions with rollback considerations', + ], + }, + ], + }, + metadata: { query_intent: 'Respond' }, + }, + ], + }, + }); + }); + + evaluate( + 'should assess blast radius for each recommended action', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'responder-agent: blast-radius-assessment', + description: + 'Validates the responder agent assesses the blast radius (impact scope) for each recommended response action.', + examples: [ + { + input: { + question: + 'Respond to the following incident: The correlator agent identified a campaign affecting 15 hosts across 3 departments (Finance, Engineering, HR). The attacker has compromised the "svc-deploy" service account which is used by the CI/CD pipeline to deploy code to 50+ production servers. Recommending a password reset for "svc-deploy" would halt all deployments. Isolating the 15 compromised hosts would affect 45 users. Blocking the C2 domain at DNS level would affect all 2000 employees. Assess the blast radius for each recommended action.', + }, + output: { + criteria: [ + 'The response MUST include a blast radius assessment for each recommended action quantifying affected users, systems, or services', + 'The response MUST assess the blast radius of resetting "svc-deploy" credentials (impact on CI/CD pipeline and 50+ production servers)', + 'The response MUST assess the blast radius of isolating the 15 compromised hosts (impact on 45 users across 3 departments)', + 'The response MUST prioritize actions by balancing containment urgency against operational impact', + 'The response MUST recommend a phased approach or mitigation strategy to minimize blast radius while still containing the threat', + ], + }, + metadata: { query_intent: 'Respond' }, + }, + ], + }, + }); + } + ); + + evaluate('confidence score must be numeric 0.0-1.0', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'responder-agent: confidence-score-format', + description: + 'Validates that the responder agent always produces a numeric confidence score between 0.0 and 1.0 for its response recommendations.', + examples: [ + { + input: { + question: + 'Respond to the following alert: Host "workstation-22" triggered "Ransomware Pre-encryption Behavior Detected" alert. Shadow copies are being deleted via vssadmin.exe. The triage agent classified this as true_positive with confidence 0.88. The host belongs to user "cfo-jones" in the Finance department. Recommend response actions with confidence scores.', + }, + output: { + criteria: [ + 'The response MUST include a clearly labeled "Confidence" field with a numeric value between 0.0 and 1.0 (inclusive)', + 'The confidence score MUST be expressed as a decimal number (e.g., 0.85, 0.92), not as a percentage or qualitative label', + 'Each distinct response action MUST have its own confidence score or there MUST be an overall confidence score for the response plan', + 'The confidence score MUST reflect the certainty that the recommended actions are appropriate for the situation', + 'The response MUST include at least one concrete response action alongside the confidence score', + ], + }, + metadata: { query_intent: 'Respond' }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/triage_agent/triage_agent.spec.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/triage_agent/triage_agent.spec.ts new file mode 100644 index 0000000000000..87f99b8ce4658 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/evals/triage_agent/triage_agent.spec.ts @@ -0,0 +1,214 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout-security'; +import { evaluate } from '../evaluate'; + +/** + * Triage Agent eval suite. + * + * Validates that the triage agent: + * - Correctly classifies alerts as true_positive, benign_true_positive, or false_positive + * - Returns a confidence score between 0.0 and 1.0 + * - Provides a recommended_action field + * - References threat intelligence findings when applicable + * - Calls the expected tools during its analysis workflow + */ +evaluate.describe('Triage Agent', { tag: tags.serverless.security.complete }, () => { + evaluate( + 'critical severity alert with high entity risk returns true_positive verdict', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'triage-agent: critical-severity-high-risk', + description: + 'Validates the triage agent classifies a critical severity alert with high entity risk as true_positive with sufficient confidence.', + examples: [ + { + input: { + question: + 'Triage the following alert: A critical severity malware detection alert has been triggered on host "dc-prod-01" by user "admin-jsmith". The host entity risk score is 92 (critical) and the user entity risk score is 87 (high). The alert rule is "Malware Detection: Cobalt Strike Beacon Detected" with MITRE ATT&CK technique T1059.001 (PowerShell). The alert triggered at 2024-03-15T14:23:00Z and the source process is powershell.exe spawning rundll32.exe with a suspicious command line argument pointing to a DLL in a temp directory.', + }, + output: { + criteria: [ + 'The verdict MUST be "true_positive" — the alert represents genuine malicious activity (Cobalt Strike beacon on a high-risk entity)', + 'The confidence score MUST be >= 0.7, reflecting high certainty given the critical severity and high entity risk scores', + 'The response MUST include a "Recommended Action" that specifies escalation to investigation with critical or high urgency', + 'The response MUST reference the high entity risk scores (host risk 92, user risk 87) as corroborating evidence', + 'The response MUST mention the MITRE ATT&CK technique (T1059.001 or PowerShell execution)', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve details about the malware detection alert', + ], + }, + { + id: 'security.entity_risk_score', + criteria: [ + 'The entity risk score tool should be called to check risk scores for the involved host and/or user entities', + ], + }, + ], + }, + metadata: { query_intent: 'Triage' }, + }, + ], + }, + }); + } + ); + + evaluate( + 'known-benign alert (authorized vulnerability scan) returns benign_true_positive or false_positive', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'triage-agent: known-benign-authorized-scan', + description: + 'Validates the triage agent classifies a known-benign alert triggered by an authorized vulnerability scan as benign_true_positive or false_positive.', + examples: [ + { + input: { + question: + 'Triage the following alert: A medium severity alert "Network Scan Detected" was triggered on host "vuln-scanner-01" by service account "svc-nessus". The source IP 10.0.50.10 is performing sequential port scanning across the 10.0.0.0/16 subnet. The host entity risk score is 12 (low) and the service account has no prior alerts. The activity matches the pattern of a Nessus vulnerability scanner. The organization runs authorized vulnerability scans every Tuesday between 02:00-06:00 UTC, and the current alert timestamp is 2024-03-19T03:15:00Z (a Tuesday).', + }, + output: { + criteria: [ + 'The verdict MUST be either "benign_true_positive" or "false_positive" — the scanning activity is authorized and expected', + 'The response MUST acknowledge that the activity matches an authorized vulnerability scanning pattern (Nessus, scheduled Tuesday window)', + 'The response MUST include a confidence score between 0.0 and 1.0', + 'The response MUST include a "Recommended Action" — either documenting the exception or recommending rule tuning to suppress recurring alerts from this scanner', + 'The response MUST note the low entity risk score as supporting evidence for benign classification', + ], + toolCalls: [ + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve the network scan alert details', + ], + }, + ], + }, + metadata: { query_intent: 'Triage' }, + }, + ], + }, + }); + } + ); + + evaluate( + 'alert with matching threat intelligence references TI findings in assessment', + async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'triage-agent: threat-intelligence-match', + description: + 'Validates that the triage agent references threat intelligence findings when the alert contains indicators that match known IOCs.', + examples: [ + { + input: { + question: + 'Triage the following alert: A high severity alert "Suspicious Outbound Connection" was triggered on host "ws-finance-03" by user "jdoe". The destination IP is 185.220.101.42 and the destination domain is "c2-relay.malware-infra.net". The process making the connection is "svchost.exe" with an unusual parent process chain. The host entity risk score is 65 (high). Please check threat intelligence for the destination IP and domain, and include any TI findings in your assessment.', + }, + output: { + criteria: [ + 'The response MUST reference threat intelligence findings for the destination IP (185.220.101.42) or domain (c2-relay.malware-infra.net)', + 'The response MUST include a verdict (true_positive, benign_true_positive, or false_positive)', + 'The response MUST include a confidence score between 0.0 and 1.0', + 'The assessment MUST discuss whether the TI findings corroborate or contradict the alert as malicious activity', + 'The response MUST include a "Recommended Action" field', + ], + toolCalls: [ + { + id: 'security.threat_intel_enrich', + criteria: [ + 'The threat intelligence enrichment tool should be called to check the destination IP or domain against known IOCs', + ], + }, + { + id: 'security.alerts', + criteria: [ + 'The alerts tool should be called to retrieve the suspicious outbound connection alert details', + ], + }, + ], + }, + metadata: { query_intent: 'Triage' }, + }, + ], + }, + }); + } + ); + + evaluate('verdict includes confidence score between 0.0 and 1.0', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'triage-agent: confidence-score-format', + description: + 'Validates that the triage agent always includes a properly formatted confidence score in its verdict output.', + examples: [ + { + input: { + question: + 'Triage the following alert: A low severity alert "Unusual Process Execution" was triggered on host "dev-server-12" by user "developer-1". The process is python3 executing a script from /tmp/test_exploit.py. The host entity risk score is 25 (low) and the user has no prior alerts. The script name suggests security testing but the environment is a development server.', + }, + output: { + criteria: [ + 'The response MUST include a clearly labeled "Confidence" field with a numeric value between 0.0 and 1.0 (inclusive)', + 'The confidence score MUST be expressed as a decimal number (e.g., 0.6, 0.85), not as a percentage or qualitative label', + 'The response MUST include a verdict classification (true_positive, benign_true_positive, or false_positive)', + 'The response MUST include a summary explanation of the reasoning behind the chosen confidence level', + ], + }, + metadata: { query_intent: 'Triage' }, + }, + ], + }, + }); + }); + + evaluate('verdict includes recommended_action field', async ({ evaluateDataset }) => { + await evaluateDataset({ + dataset: { + name: 'triage-agent: recommended-action-field', + description: + 'Validates that the triage agent always includes a recommended_action field appropriate to the verdict classification.', + examples: [ + { + input: { + question: + 'Triage the following alert: A high severity alert "Brute Force Authentication Attempt" was triggered for user "cfo-martinez" from source IP 203.0.113.50. There have been 47 failed login attempts in the last 10 minutes followed by 1 successful login. The user entity risk score is 78 (high) and the source IP is external. The successful login occurred from a geographic location inconsistent with the user\'s normal login pattern.', + }, + output: { + criteria: [ + 'The response MUST include a "Recommended Action" section with specific, actionable next steps', + 'For a true_positive verdict: the recommended action MUST include escalation to investigation and specify urgency level (critical, high, or medium)', + 'For a benign_true_positive verdict: the recommended action MUST include documenting the exception', + 'For a false_positive verdict: the recommended action MUST include rule tuning recommendations', + 'The recommended action MUST be contextually appropriate to the alert — for a brute force with successful login on a high-risk user, the action should involve immediate investigation and potential credential reset', + 'The response MUST also include a verdict and confidence score', + ], + toolCalls: [ + { + id: 'security.entity_risk_score', + criteria: [ + 'The entity risk score tool should be called to check the risk profile of the targeted user account', + ], + }, + ], + }, + metadata: { query_intent: 'Triage' }, + }, + ], + }, + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/alert_triage/alert_triage_skill.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/alert_triage/alert_triage_skill.ts new file mode 100644 index 0000000000000..40fb635fe8f8f --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/alert_triage/alert_triage_skill.ts @@ -0,0 +1,251 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { defineSkillType } from '@kbn/agent-builder-server/skills/type_definition'; +import { platformCoreTools } from '@kbn/agent-builder-common'; +import { + SECURITY_ALERTS_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + SECURITY_THREAT_INTEL_ENRICH_TOOL_ID, +} from '../../tools'; + +export const getAlertTriageSkill = () => + defineSkillType({ + id: 'alert-triage', + name: 'alert-triage', + basePath: 'skills/security/alerts', + experimental: true, + description: + 'Guide to systematically triaging security alerts: severity assessment, entity context gathering, threat intelligence correlation, verdict classification (true_positive, benign_true_positive, false_positive), and recommended next actions with confidence scoring.', + content: `# Alert Triage Guide + +## When to Use This Skill + +Use this skill when: +- A user asks to triage one or more security alerts +- A user wants a structured verdict on whether an alert is a true positive, benign true positive, or false positive +- A user needs to assess the severity and priority of an alert +- A user wants recommended next actions after triaging an alert +- An analyst needs to quickly classify alerts during an active queue review + +## Related Skills + +After using this skill, you may want to use: +- '~/skills/security/alerts/investigation' to conduct a deeper investigation after triage identifies a true positive +- '~/skills/security/alerts/response-recommendation' to get containment recommendations for confirmed threats +- '~/skills/security/alerts/incident-reporting' to generate a formal incident report after triage and investigation +- '~/skills/security/entities/entity-analytics' to get entity risk scores and asset criticality for involved entities + +## Triage Methodology + +### 1. Initial Alert Assessment +- Fetch the alert details using the 'security.alerts' tool +- Review the alert's core attributes: + - **Severity**: critical, high, medium, low + - **Timestamp**: when the alert fired and the event timeline + - **Rule name and description**: what detection logic triggered + - **MITRE ATT&CK mapping**: which tactics and techniques are referenced + - **Status**: open, acknowledged, closed, in-progress +- Identify key entities involved: users, hosts, IP addresses, file hashes, domains, process names +- Note any existing assignments, comments, or workflow status updates + +### 2. Entity Context Gathering +- For each key entity identified in the alert, gather context: + - **Entity risk score**: query using ES|QL against risk score indices to determine if the entity has an elevated risk profile + - **Asset criticality**: check if the entity is tagged as a critical asset (extreme_impact, high_impact, medium_impact, low_impact) + - **Historical alerts**: search for recent alerts involving the same entity within the last 7-30 days + - **User activity**: for user entities, check for unusual login patterns, privilege escalation, or access anomalies + - **Host activity**: for host entities, check for unusual process execution, network connections, or file modifications +- Prioritize entities with high risk scores or critical asset tags for deeper investigation + +### 3. Threat Intelligence Correlation +- Query the 'security.security_labs_search' tool for known indicators of compromise (IOCs) related to the alert: + - File hashes (MD5, SHA256) + - IP addresses (source, destination) + - Domain names + - URLs + - Email addresses +- Check for matches against known threat actor TTPs (tactics, techniques, and procedures) +- Correlate alert indicators with published threat intelligence reports +- Note any matches with known malware families, campaign names, or APT groups + +### 4. Attack Discovery Correlation +- Query the 'security.attack_discovery_search' tool to check if this alert is part of a broader attack pattern +- Look for attack chains or kill chain progression that includes this alert +- Determine if the alert is an isolated event or part of a multi-stage attack +- Review any existing attack discoveries that reference the same entities or techniques + +### 5. Verdict Classification +Classify the alert into one of three verdict categories: + +#### true_positive (Confidence: 0.0 - 1.0) +- The alert represents a genuine security threat that requires action +- Indicators match known malicious activity or threat intelligence +- Entity behavior deviates significantly from baseline +- Multiple corroborating signals exist (other alerts, anomalies, TI matches) +- **Confidence scoring factors**: + - 0.90 - 1.00: Multiple independent corroborating signals, TI match, confirmed malicious behavior + - 0.70 - 0.89: Strong indicators but some ambiguity, single TI match with behavioral correlation + - 0.50 - 0.69: Suspicious activity with limited corroboration, pattern matches but no direct TI hit + - 0.30 - 0.49: Weak indicators, could go either way, needs further investigation + - 0.00 - 0.29: Very low confidence, minimal supporting evidence + +#### benign_true_positive (Confidence: 0.0 - 1.0) +- The alert correctly detected the activity, but the activity is authorized or expected +- Common scenarios: + - Authorized penetration testing or red team exercises + - Legitimate administrative actions that match detection patterns + - Known software behavior that triggers security rules (e.g., developer tools, security scanners) + - Scheduled maintenance activities + - Approved exceptions documented in security policies +- **Confidence scoring factors**: + - 0.90 - 1.00: Activity matches a documented exception or known authorized operation + - 0.70 - 0.89: Activity is consistent with expected behavior but not explicitly documented + - 0.50 - 0.69: Plausible benign explanation but cannot be fully confirmed + - 0.00 - 0.49: Uncertain classification, may require human verification + +#### false_positive (Confidence: 0.0 - 1.0) +- The alert was triggered by benign activity that does not represent a threat +- Common scenarios: + - Overly broad detection rule matching legitimate activity + - Known software bug or misconfiguration generating noise + - Environmental factors (e.g., network scanning tools, backup processes) + - Stale or incorrect threat intelligence data +- **Confidence scoring factors**: + - 0.90 - 1.00: Clear evidence that detection logic is flawed or indicators are stale + - 0.70 - 0.89: Strong evidence of benign activity with known pattern match + - 0.50 - 0.69: Likely benign but some uncertainty remains + - 0.00 - 0.49: Insufficient evidence to confidently classify as false positive + +### 6. Recommended Next Actions +Based on the verdict, recommend specific next actions: + +#### For true_positive verdicts: +- Escalate to incident response if confidence >= 0.70 +- Initiate containment procedures for affected entities +- Create or update a security case with findings +- Trigger the investigation skill for deeper analysis +- Notify relevant stakeholders based on severity + +#### For benign_true_positive verdicts: +- Document the benign activity for future reference +- Consider creating a detection rule exception if the pattern recurs +- Update the alert status to acknowledged with classification notes +- Review if the detection rule can be tuned to reduce future benign triggers + +#### For false_positive verdicts: +- Document the false positive with supporting evidence +- Recommend rule tuning or threshold adjustment +- Update the alert status to closed with classification notes +- Track false positive rate for the triggering rule + +## Output Format + +### Structured Triage Report + +For each triaged alert, produce the following structured output: + +**Alert Summary** +- Alert ID: +- Rule: +- Severity: +- Timestamp: +- Key Entities: + +**Triage Findings** +| Step | Finding | Relevance | +| --- | --- | --- | +| Entity Context | | | +| Threat Intel | | | +| Attack Discovery | | | +| Historical Alerts | | | + +**Verdict** +- Classification: +- Confidence: <0.00 - 1.00> +- Reasoning: <2-3 sentences explaining the classification> + +**Recommended Actions** +1. +2. +3. + +## Examples + +### Example 1: Triaging a High-Severity Malware Alert + +User query: Triage alert abc123 + +Steps: +1. Use the 'security.alerts' tool to fetch alert abc123 details. +2. Identify key entities: host "web-server-01", user "svc-deploy", file hash "a1b2c3...". +3. Query entity risk scores using ES|QL to check risk levels for "web-server-01" and "svc-deploy". +4. Use 'security.security_labs_search' to look up the file hash against known malware databases. +5. Use 'security.attack_discovery_search' to check if this alert is part of a broader attack chain. +6. Search for related alerts involving the same host and user in the last 7 days. +7. Classify verdict based on findings: if file hash matches known malware and entity risk is elevated, classify as true_positive with high confidence. +8. Recommend escalation to incident response and containment of the affected host. + +### Example 2: Triaging a Batch of Medium-Severity Alerts + +User query: Triage the last 10 alerts from the "Suspicious PowerShell Execution" rule + +Steps: +1. Use the 'security.alerts' tool to fetch the last 10 alerts matching the rule name. +2. For each alert, identify the executing user and host. +3. Group alerts by entity to identify patterns (e.g., same user on multiple hosts). +4. Query entity risk scores and asset criticality for all unique entities. +5. Use 'security.security_labs_search' to check PowerShell command patterns against known attack techniques. +6. Classify each alert individually, noting common patterns across the batch. +7. Present a summary table with all verdicts and highlight any true positives that need immediate attention. + +### Example 3: Triaging a Potential False Positive + +User query: Is alert xyz789 a false positive? + +Steps: +1. Use the 'security.alerts' tool to fetch alert xyz789 details. +2. Review the triggering rule logic and the specific event that matched. +3. Check if the activity matches known benign patterns (e.g., scheduled backup job, authorized scanning). +4. Query for historical false positives from the same rule using ES|QL. +5. If the activity is consistent with legitimate operations and no threat indicators are found, classify as false_positive with appropriate confidence. +6. Recommend rule tuning if the false positive rate for this rule is high. + +## Best Practices +- Always fetch the full alert details before making any classification +- Do not classify an alert without checking at least entity context and threat intelligence +- When confidence is below 0.50, explicitly recommend further investigation rather than a definitive verdict +- Document all evidence that supports the verdict classification +- For batch triage, prioritize critical and high severity alerts first +- Track triage metrics: average time per alert, verdict distribution, confidence distribution +- When in doubt, classify as true_positive with lower confidence rather than dismissing a potential threat +- Always provide actionable next steps, not just the classification +- Reference specific evidence (alert IDs, entity names, TI matches) in the reasoning +- Consider the organizational context: asset criticality and business impact should influence priority + +## Escalation Guidelines +- **Escalate immediately** if: confidence >= 0.8 for true_positive with critical/high severity, or multiple correlated alerts suggest an active campaign +- **Escalate for review** if: confidence is between 0.5 and 0.8, or the alert involves sensitive assets (domain controllers, executive accounts, crown jewel systems) +- **Close with documentation** if: confidence >= 0.8 for false_positive or benign_true_positive, with clear supporting evidence +- Never dismiss an alert without checking entity risk scores and attack discovery context +- When in doubt, escalate rather than close — false negatives are more costly than false positives in security +- Document your reasoning thoroughly so that Tier 2 analysts can quickly understand your assessment +`, + getRegistryTools: () => [ + platformCoreTools.search, + platformCoreTools.executeEsql, + platformCoreTools.cases, + platformCoreTools.productDocumentation, + SECURITY_ALERTS_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + SECURITY_THREAT_INTEL_ENRICH_TOOL_ID, + ], + }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/alert_triage/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/alert_triage/index.ts new file mode 100644 index 0000000000000..828ec9dc95bad --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/alert_triage/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { getAlertTriageSkill } from './alert_triage_skill'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/incident_reporting/incident_reporting_skill.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/incident_reporting/incident_reporting_skill.ts new file mode 100644 index 0000000000000..315e591f3996c --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/incident_reporting/incident_reporting_skill.ts @@ -0,0 +1,342 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { defineSkillType } from '@kbn/agent-builder-server/skills/type_definition'; +import { platformCoreTools } from '@kbn/agent-builder-common'; +import { + SECURITY_ALERTS_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_MITRE_MAPPING_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + SECURITY_REPORT_GENERATE_TOOL_ID, + SECURITY_CASE_MANAGE_TOOL_ID, +} from '../../tools'; + +export const getIncidentReportingSkill = () => + defineSkillType({ + id: 'incident-reporting', + name: 'incident-reporting', + basePath: 'skills/security/alerts', + experimental: true, + description: + 'Guide to generating structured incident reports: executive summaries, technical timelines, MITRE ATT&CK mappings, impact assessments, and recommended follow-up actions with optional compliance framework mappings (NIST, ISO 27001).', + content: `# Incident Reporting Guide + +## When to Use This Skill + +Use this skill when: +- A user needs to generate a formal incident report after an investigation has been completed +- A user requests an executive summary of a security incident for leadership +- A user needs a technical incident report for the SOC team or incident responders +- A user requires a compliance-oriented report mapping the incident to regulatory frameworks (NIST CSF, ISO 27001) +- A user wants to document lessons learned and recommended improvements after an incident +- A user needs to attach a structured report to an existing Kibana case + +## Related Skills + +Before using this skill, you should have used: +- '~/skills/security/alerts/alert-triage' to classify the alerts involved in the incident +- '~/skills/security/alerts/investigation' to conduct the investigation and gather evidence + +After using this skill, you may want to use: +- '~/skills/security/alerts/response-recommendation' to include specific containment recommendations in the report + +## Report Structure + +### Section 1: Executive Summary +- **Purpose**: Provide a concise, non-technical overview suitable for executive leadership +- **Length**: 1-2 paragraphs (150-300 words) +- **Content**: + - What happened: brief description of the incident type and scope + - When it happened: incident timeline window (first detection to containment) + - Who was affected: impacted business units, users, customers, or systems + - Business impact: quantified or estimated impact (data exposed, downtime, financial loss) + - Current status: contained, eradicated, recovered, or ongoing + - Key decisions needed: any pending actions requiring leadership approval +- **Tone**: factual, objective, avoiding jargon; translate technical findings into business risk + +### Section 2: Incident Classification +- **Incident ID**: unique identifier for tracking +- **Classification**: data breach, malware infection, unauthorized access, denial of service, insider threat, supply chain compromise, or other +- **Severity**: critical (P1), high (P2), medium (P3), low (P4) + - P1: active compromise with confirmed data loss or system destruction + - P2: confirmed compromise with potential data exposure, no confirmed loss + - P3: suspicious activity requiring investigation, limited impact + - P4: low-impact incident with minimal risk, primarily for documentation +- **Detection method**: automated detection rule, manual analyst review, user report, external notification, threat intelligence +- **MITRE ATT&CK mapping**: list all observed tactics and techniques with IDs + +### Section 3: Technical Timeline +- **Format**: chronological table of events from initial compromise to current status +- **Required columns**: timestamp, event type, source, entity, description, MITRE technique +- **Key milestones to include**: + - Initial compromise: the earliest known malicious activity + - Foothold establishment: persistence mechanism deployed + - Lateral movement: attacker pivoting to additional systems + - Data access: sensitive data viewed or copied + - Exfiltration: data transferred outside the environment + - Detection: when the activity was first detected + - Containment: when containment actions were initiated + - Eradication: when the threat was fully removed + - Recovery: when systems were restored to normal operation +- **Timeline gaps**: explicitly note periods with no visibility or data gaps + +### Section 4: Affected Entities and Impact Assessment +- **Entity inventory**: complete list of affected entities with details + - Hosts: hostname, IP, OS, role, business function, compromise status + - Users: username, role, department, account status, credential exposure + - Services: service name, type, consumers, SLA impact + - Data: data classification, volume, sensitivity, exposure type +- **Impact categories**: + - **Confidentiality**: was sensitive data exposed, accessed, or exfiltrated? + - **Integrity**: was data or system configuration modified by the attacker? + - **Availability**: were systems taken offline or degraded? +- **Business impact assessment**: + - Operational disruption: downtime hours, affected processes + - Data exposure: records exposed, PII/PHI/PCI data types + - Financial impact: estimated cost (incident response, remediation, regulatory fines, lost revenue) + - Reputational impact: customer notification required, media exposure risk + +### Section 5: MITRE ATT&CK Mapping +- **Purpose**: map all observed attacker techniques to the MITRE ATT&CK framework +- **Format**: table with kill chain progression + +| Kill Chain Phase | Tactic | Technique | Sub-Technique | Observed Evidence | +| --- | --- | --- | --- | --- | +| Initial Access | TA0001 | T1566 Phishing | T1566.001 Spearphishing Attachment | Malicious email with PDF attachment | +| Execution | TA0002 | T1059 Command and Scripting Interpreter | T1059.001 PowerShell | Encoded PowerShell command executed | +| ... | ... | ... | ... | ... | + +- **Coverage notes**: for each technique, note whether an existing detection rule covered it or if it was a gap +- **Navigator export**: recommend generating a MITRE ATT&CK Navigator layer for visual representation + +### Section 6: Root Cause and Contributing Factors +- **Root cause**: the primary vulnerability or gap that enabled the incident +- **Contributing factors**: additional conditions that facilitated or worsened the incident + - Technical factors: unpatched systems, misconfigurations, missing controls + - Process factors: delayed response, lack of procedures, communication gaps + - Human factors: social engineering success, policy violations, training gaps +- **Detection assessment**: why the incident was or was not detected promptly + - Existing detection rules that fired + - Detection gaps that were identified + - Mean time to detect (MTTD) + - Mean time to respond (MTTR) + +### Section 7: Recommended Follow-Up Actions +- **Immediate actions** (0-48 hours): + - Remaining containment steps + - Credential rotation requirements + - System isolation or network segmentation changes + - Evidence preservation tasks +- **Short-term remediation** (1-2 weeks): + - Vulnerability patching + - Configuration hardening + - Detection rule creation or tuning + - Access control adjustments +- **Long-term improvements** (1-3 months): + - Architecture changes + - Security control investments + - Process improvements + - Training and awareness programs + - Policy updates + +### Section 8: Compliance Framework Mappings (Optional) + +#### NIST Cybersecurity Framework (CSF) Mapping +- Map incident findings and recommendations to NIST CSF functions: + - **Identify (ID)**: asset management, risk assessment gaps discovered + - **Protect (PR)**: access control, data security, protective technology gaps + - **Detect (DE)**: detection process, continuous monitoring gaps + - **Respond (RS)**: response planning, communications, analysis, mitigation gaps + - **Recover (RC)**: recovery planning, improvements, communications gaps +- Reference specific NIST CSF subcategories (e.g., PR.AC-1, DE.CM-1) + +#### ISO 27001 Control Mapping +- Map incident findings to relevant ISO 27001:2022 controls: + - **A.5 Organizational controls**: policies, roles, responsibilities + - **A.6 People controls**: screening, awareness, training + - **A.7 Physical controls**: physical security perimeters, equipment + - **A.8 Technological controls**: endpoint, network, application security +- Reference specific control numbers (e.g., A.8.7 Protection against malware, A.8.16 Monitoring activities) + +#### Additional Frameworks +- If requested, map to additional frameworks: + - NIST SP 800-53: security and privacy controls + - CIS Controls: prioritized cybersecurity actions + - PCI DSS: payment card industry requirements + - HIPAA: healthcare data protection requirements + +## Audience Adaptation + +### Executive Report +- Emphasize business impact and risk in non-technical language +- Lead with the executive summary and impact assessment +- Minimize technical details; reference the full report for depth +- Include clear decision points and resource requests +- Use visual elements: severity indicators, impact ratings, status indicators + +### Technical Report +- Include full technical timeline with event-level detail +- Provide complete MITRE ATT&CK mapping with evidence references +- Include specific indicators of compromise (IOCs) for detection teams +- Detail the exact attack chain and techniques used +- Reference specific log entries, alert IDs, and event IDs + +### Compliance Report +- Lead with regulatory context and reporting obligations +- Map all findings to the requested compliance framework +- Include control gap analysis and remediation mapping +- Document evidence of compliance or non-compliance +- Note any mandatory notification timelines and their status + +## Output Format + +### Full Incident Report Template + +--- + +# Incident Report: + +**Report ID**: +**Date Generated**: +**Classification**: +**Severity**: +**Status**: + +## Executive Summary +<1-2 paragraphs summarizing the incident for leadership> + +## Incident Classification +| Field | Value | +| --- | --- | +| Incident ID | | +| Type | | +| Severity | | +| Detection Method | | +| MITRE Tactics | | +| Time to Detect | | +| Time to Respond | | + +## Technical Timeline +| Timestamp | Event Type | Source | Entity | Description | MITRE Technique | +| --- | --- | --- | --- | --- | --- | +| | | | | | | + +## Affected Entities +| Entity | Type | Role | Status | Impact | +| --- | --- | --- | --- | --- | +| | | | | | + +## Impact Assessment +| Category | Impact | Details | +| --- | --- | --- | +| Confidentiality | |
| +| Integrity | |
| +| Availability | |
| + +## MITRE ATT&CK Mapping + + +## Root Cause + + +## Recommendations +### Immediate (0-48h) +1. +### Short-term (1-2 weeks) +1. +### Long-term (1-3 months) +1. + +## Compliance Mapping (if requested) + + +--- + +## Examples + +### Example 1: Post-Investigation Executive Report + +User query: Generate an executive incident report for the investigation on host web-server-01 + +Steps: +1. Use 'platform.core.search' to retrieve the investigation findings, alerts, and timeline data for web-server-01. +2. Use 'platform.core.cases' to check for an existing case and retrieve any documented findings. +3. Compile the executive summary focusing on business impact and current status. +4. Build the impact assessment based on affected entities and data exposure. +5. Create the MITRE ATT&CK mapping from the investigation's technique observations. +6. Generate recommendations prioritized by urgency and business impact. +7. Attach the report to the existing case using the cases tool. + +### Example 2: Compliance-Focused Report with NIST Mapping + +User query: Generate a NIST CSF-mapped incident report for the phishing campaign that targeted our finance team + +Steps: +1. Use 'platform.core.search' to retrieve all alerts and investigation data related to the phishing campaign. +2. Build the full technical timeline from initial phishing email to containment. +3. Map each finding and recommendation to the relevant NIST CSF functions and subcategories. +4. Identify NIST CSF control gaps exposed by the incident. +5. Generate remediation recommendations tied to specific NIST CSF subcategories. +6. Format the report for compliance review with framework references. + +### Example 3: Technical Report for SOC Team + +User query: Create a detailed technical incident report for the malware infection on endpoints in the engineering department + +Steps: +1. Use 'platform.core.search' to retrieve all alerts across engineering department hosts. +2. Use 'platform.core.cases' to retrieve the investigation case and documented evidence. +3. Build a detailed technical timeline with process-level events and network connections. +4. List all IOCs: file hashes, C2 domains, IP addresses, mutexes. +5. Create the full MITRE ATT&CK kill chain mapping with evidence for each technique. +6. Include specific detection rule IDs that fired and gaps that were identified. +7. Generate technical recommendations for detection and hardening improvements. + +## Best Practices +- Always pull evidence from the actual investigation before generating a report; never fabricate or assume details +- Use the incident severity classification consistently across all report sections +- Include specific metrics: MTTD, MTTR, number of affected entities, data volume +- Clearly distinguish between confirmed facts and analyst assessments in the report +- When mapping to compliance frameworks, reference specific control numbers and subcategories +- Adapt the report depth and language to the intended audience +- Always include a recommendations section with prioritized, actionable items +- Attach the report to the relevant Kibana case for auditability and tracking +- Include a "Lessons Learned" section for post-incident reviews +- Version the report if it will be updated as new information becomes available +- Do not include raw log data in executive reports; reference it in technical appendices +- Ensure all timestamps use a consistent timezone (preferably UTC) + +## Report Generation Process +1. Gather all available investigation context: alerts, investigation findings, correlation results, response actions +2. Query attack discovery for additional context that may have been missed +3. Map all findings to MITRE ATT&CK techniques for standardized reporting +4. Check Elastic Security Labs for relevant published threat research to cite +5. Generate the report in the requested format +6. Attach the completed report to the associated case +7. Update the case status and add any follow-up actions as case comments + +## Accuracy Guidelines +- Accuracy is paramount — never speculate or include unverified information in reports +- Clearly distinguish between confirmed facts, high-confidence assessments, and hypotheses +- Always cite the data source for every claim (which index, which alert, which query) +- Reports should be self-contained — a reader should understand the full context without external references +- Use consistent terminology throughout the report +- When generating reports for regulatory purposes, consult product documentation for compliance-specific guidance +`, + getRegistryTools: () => [ + platformCoreTools.search, + platformCoreTools.cases, + platformCoreTools.productDocumentation, + SECURITY_ALERTS_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_MITRE_MAPPING_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + SECURITY_REPORT_GENERATE_TOOL_ID, + SECURITY_CASE_MANAGE_TOOL_ID, + ], + }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/incident_reporting/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/incident_reporting/index.ts new file mode 100644 index 0000000000000..f961630602318 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/incident_reporting/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { getIncidentReportingSkill } from './incident_reporting_skill'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/investigation/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/investigation/index.ts new file mode 100644 index 0000000000000..9326b901a0ddf --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/investigation/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { getInvestigationSkill } from './investigation_skill'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/investigation/investigation_skill.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/investigation/investigation_skill.ts new file mode 100644 index 0000000000000..93d9d2ca85caf --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/investigation/investigation_skill.ts @@ -0,0 +1,279 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { defineSkillType } from '@kbn/agent-builder-server/skills/type_definition'; +import { platformCoreTools } from '@kbn/agent-builder-common'; +import { + SECURITY_ALERTS_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_THREAT_INTEL_ENRICH_TOOL_ID, + SECURITY_TIMELINE_CREATE_TOOL_ID, + SECURITY_CASE_MANAGE_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + SECURITY_MITRE_MAPPING_TOOL_ID, + SECURITY_ENTITY_STORE_QUERY_TOOL_ID, +} from '../../tools'; + +export const getInvestigationSkill = () => + defineSkillType({ + id: 'investigation', + name: 'investigation', + basePath: 'skills/security/alerts', + experimental: true, + description: + 'Guide to conducting systematic security investigations: scope definition, evidence collection, timeline reconstruction, cross-source correlation, root cause analysis, and findings documentation with confidence assessment.', + content: `# Security Investigation Guide + +## When to Use This Skill + +Use this skill when: +- A user needs to conduct a deep investigation after alert triage has identified a true positive +- A user wants to understand the full scope and timeline of a security incident +- A user asks for root cause analysis of a security event or alert chain +- A user needs to correlate events across multiple data sources (endpoints, network, authentication, application logs) +- An analyst needs to build a comprehensive timeline for an incident report +- A user wants to identify all affected entities and lateral movement paths + +## Related Skills + +Before using this skill, you may want to use: +- '~/skills/security/alerts/alert-triage' to classify alerts and determine which ones warrant deeper investigation + +After using this skill, you may want to use: +- '~/skills/security/alerts/response-recommendation' to get containment recommendations based on investigation findings +- '~/skills/security/alerts/incident-reporting' to generate a formal incident report from the investigation findings +- '~/skills/security/entities/entity-analytics' to get entity risk scores and asset criticality for involved entities +- '~/skills/security/ml/find-security-ml-jobs' to check for anomalous behavior related to investigated entities + +## Investigation Methodology + +### Phase 1: Scope Definition +- Define the investigation scope based on the triggering alert or request: + - **Primary entities**: the hosts, users, IPs, and services directly referenced in the triggering event + - **Time window**: establish initial investigation window (default: 24 hours before and after the triggering event) + - **Data sources**: identify which indices and data sources are relevant (alerts, endpoint events, network logs, authentication logs, application logs) + - **Investigation hypothesis**: formulate an initial hypothesis about what happened +- Set scope boundaries to prevent investigation sprawl: + - Maximum entity expansion depth: 3 hops from the primary entity + - Maximum time window expansion: 7 days in either direction from the triggering event + - Entity count cap: investigate up to 50 unique entities before summarizing and prioritizing + +### Phase 2: Evidence Collection +- Systematically collect evidence from multiple data sources: + +#### 2a. Alert Evidence +- Fetch all alerts related to primary entities within the investigation time window +- Group alerts by rule name, severity, and MITRE ATT&CK technique +- Identify alert clusters that suggest coordinated activity +- Note any alerts that were previously closed or marked as false positives + +#### 2b. Endpoint Evidence +- Query endpoint events for primary host entities: + - Process execution events: look for suspicious process trees, command-line arguments, unsigned binaries + - File events: file creation, modification, deletion in sensitive directories + - Registry events (Windows): persistence mechanisms, configuration changes + - Network connections: outbound connections from host processes, unusual ports or protocols + - Library loads: DLL side-loading, injection indicators +- Use ES|QL to query endpoint data efficiently with targeted filters + +#### 2c. Network Evidence +- Query network flow data for primary IP entities: + - Connection patterns: volume, frequency, timing, destination diversity + - DNS queries: suspicious domains, DGA patterns, tunnel indicators + - HTTP/HTTPS traffic: unusual user agents, large data transfers, beaconing patterns + - Lateral movement indicators: SMB, RDP, WMI, SSH connections between internal hosts +- Cross-reference network connections with known threat intelligence indicators + +#### 2d. Authentication Evidence +- Query authentication logs for primary user entities: + - Login events: successful and failed attempts, source IPs, timestamps + - Privilege escalation: sudo events, service account usage, token manipulation + - Account modifications: password changes, group membership changes, new account creation + - Session activity: session duration, concurrent sessions, unusual access patterns +- Identify authentication anomalies: impossible travel, brute force patterns, credential stuffing + +#### 2e. Application and Audit Evidence +- Query application logs for relevant activity: + - Cloud service events: API calls, configuration changes, data access + - Email events: phishing indicators, attachment downloads, forwarding rules + - Database access: unusual queries, bulk data exports, schema modifications + - Audit logs: policy changes, security control modifications + +### Phase 3: Timeline Reconstruction +- Build a chronological timeline of events from all collected evidence: + - Merge events from all data sources into a single timeline sorted by timestamp + - Identify the initial compromise event (patient zero) + - Map the attack progression through the kill chain: + 1. Initial Access: how did the attacker gain entry? + 2. Execution: what code or commands were run? + 3. Persistence: what mechanisms were established for continued access? + 4. Privilege Escalation: how were higher privileges obtained? + 5. Defense Evasion: what techniques were used to avoid detection? + 6. Credential Access: were credentials harvested or stolen? + 7. Discovery: what reconnaissance was performed? + 8. Lateral Movement: how did the attacker move between systems? + 9. Collection: what data was staged for exfiltration? + 10. Exfiltration: was data transferred out of the environment? + 11. Impact: what damage was done or attempted? + - Highlight gaps in the timeline where visibility is limited + - Note any anti-forensic activity (log deletion, timestamp manipulation) + +### Phase 4: Cross-Source Correlation +- Correlate findings across data sources to build a coherent narrative: + - **Entity linking**: connect the same entity across different data sources (e.g., a user's authentication events linked to their endpoint activity) + - **Temporal correlation**: identify events that occurred within close time proximity across different systems + - **Causal correlation**: establish cause-and-effect relationships (e.g., phishing email -> malware download -> C2 connection) + - **Pattern matching**: identify common attack patterns or TTPs across the evidence +- Validate correlations: + - Require at least two independent data sources to confirm a correlation + - Flag single-source findings as "unconfirmed" in the report + - Weight correlations by data quality and completeness + +### Phase 5: Root Cause Analysis +- Determine the root cause of the incident: + - **Initial vector**: how did the threat initially enter the environment? + - **Enabling factors**: what vulnerabilities, misconfigurations, or policy gaps enabled the attack? + - **Propagation mechanism**: how did the threat spread from the initial foothold? + - **Detection gap**: why was the threat not detected sooner? +- Assess confidence in the root cause hypothesis: + - 0.90 - 1.00: Root cause is definitively established with clear evidence chain + - 0.70 - 0.89: Root cause is highly probable based on available evidence + - 0.50 - 0.69: Root cause is the most likely explanation but alternative hypotheses exist + - 0.30 - 0.49: Root cause is a working hypothesis that needs further validation + - 0.00 - 0.29: Insufficient evidence to determine root cause + +### Phase 6: Findings Documentation +- Compile all findings into a structured investigation report: + - Executive summary: one paragraph overview of the incident + - Timeline: chronological sequence of events with timestamps + - Affected entities: complete list of compromised or affected hosts, users, services + - Attack vector: detailed description of how the attack was carried out + - Root cause: explanation of the underlying vulnerability or gap + - Evidence inventory: list of all evidence collected with source references + - Confidence assessment: overall confidence in the investigation findings + - Recommendations: prioritized list of remediation and prevention actions + +## Output Format + +### Investigation Summary + +**Scope** +- Triggering Event: +- Time Window: to +- Primary Entities: +- Data Sources Queried: + +**Timeline** +| Timestamp | Source | Entity | Event | MITRE Technique | Significance | +| --- | --- | --- | --- | --- | --- | +| | | | | | | + +**Affected Entities** +| Entity | Type | Role | Risk Score | Criticality | Status | +| --- | --- | --- | --- | --- | --- | +| | | | | | | + +**Root Cause** +- Hypothesis: +- Confidence: <0.00 - 1.00> +- Supporting Evidence: +- Alternative Hypotheses: + +**Recommendations** +1. +2. +3. + +## Examples + +### Example 1: Investigating a Confirmed Malware Alert + +User query: Investigate alert abc123 which was triaged as a true positive malware infection + +Steps: +1. Use the 'security.alerts' tool to fetch the triggering alert and identify primary entities (host, user, file hash). +2. Define scope: 24-hour window around the alert timestamp, primary host and user entities. +3. Use 'platform.core.execute_esql' to query endpoint events on the affected host for process execution, file creation, and network connections. +4. Use 'platform.core.execute_esql' to query authentication logs for the affected user. +5. Use 'platform.core.search' to find related alerts involving the same entities. +6. Use 'platform.core.get_document_by_id' to retrieve specific high-value events by ID for detailed examination. +7. Build timeline from collected evidence, identifying initial access through malware delivery. +8. Correlate process execution with network connections to identify C2 communication. +9. Assess root cause: identify the delivery mechanism (e.g., phishing email, drive-by download). +10. Document findings with timeline, affected entities, and recommendations. + +### Example 2: Investigating Lateral Movement + +User query: Investigate potential lateral movement from host srv-db-01 + +Steps: +1. Use 'platform.core.execute_esql' to query authentication events originating from srv-db-01 to other internal hosts. +2. Use 'platform.core.execute_esql' to query network connections from srv-db-01 on SMB, RDP, WMI, and SSH ports. +3. Use the 'security.alerts' tool to find all alerts on srv-db-01 and any hosts it connected to. +4. For each destination host identified, query endpoint events for suspicious process execution following the connection. +5. Use 'platform.core.cases' to check for existing cases involving srv-db-01 or connected hosts. +6. Build a lateral movement map showing the progression from srv-db-01 to other hosts. +7. Correlate timestamps to establish the sequence of lateral movement events. +8. Determine root cause: how was srv-db-01 initially compromised and what credentials were used for lateral movement. +9. Document all affected hosts, compromised accounts, and the lateral movement path. + +### Example 3: Investigating Unusual Data Exfiltration + +User query: Investigate unusually large data transfers from user jsmith over the last 48 hours + +Steps: +1. Define scope: 48-hour window, primary entity user "jsmith" and associated hosts. +2. Use 'platform.core.execute_esql' to query network flow data for high-volume outbound transfers associated with jsmith's hosts. +3. Use 'platform.core.execute_esql' to query endpoint events for file staging activity (compression, encryption, large file copies). +4. Use 'platform.core.execute_esql' to query authentication events for jsmith to identify all sessions and accessed systems. +5. Use the 'security.alerts' tool to check for any data loss prevention (DLP) alerts or exfiltration detection alerts. +6. Use 'platform.core.get_document_by_id' to examine specific high-volume transfer events in detail. +7. Build timeline of data access, staging, and transfer events. +8. Correlate with jsmith's normal behavior baseline to quantify the deviation. +9. Assess whether the transfer was authorized (e.g., legitimate business need) or potentially malicious. +10. Document findings with data volume estimates, destination analysis, and recommendations. + +## Best Practices +- Always start with scope definition to prevent investigation sprawl +- Use ES|QL for efficient querying across large datasets; prefer targeted filters over broad scans +- Build the timeline incrementally as evidence is collected; do not wait until all evidence is gathered +- Correlate across at least two independent data sources before drawing conclusions +- Document gaps in visibility explicitly; do not assume absence of evidence means absence of activity +- Assess confidence at each phase and adjust the investigation direction based on findings +- Prioritize evidence collection based on the investigation hypothesis +- When the scope expands beyond initial boundaries, pause and reassess priorities +- Always recommend both immediate actions and long-term preventive measures +- Reference specific event IDs, timestamps, and entity identifiers in all findings +- Keep the investigation narrative coherent; connect each finding to the overall story +- Consider anti-forensic techniques that may have been used to obscure evidence + +## Case Creation Guidelines +- Create a case when: the investigation confirms malicious activity, multiple entities are affected, or the incident requires cross-team coordination +- Always attach the investigation timeline and IOC list to the case +- Set appropriate severity and assign to the relevant team +- Link related alerts to the case for full traceability +- Preserve the chain of evidence — note exactly which queries and data sources you used +- When evidence is ambiguous, state the uncertainty clearly rather than drawing unsupported conclusions +`, + getRegistryTools: () => [ + platformCoreTools.search, + platformCoreTools.executeEsql, + platformCoreTools.cases, + platformCoreTools.getDocumentById, + platformCoreTools.productDocumentation, + SECURITY_ALERTS_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_THREAT_INTEL_ENRICH_TOOL_ID, + SECURITY_TIMELINE_CREATE_TOOL_ID, + SECURITY_CASE_MANAGE_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + SECURITY_MITRE_MAPPING_TOOL_ID, + SECURITY_ENTITY_STORE_QUERY_TOOL_ID, + ], + }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/mitre_coverage/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/mitre_coverage/index.ts new file mode 100644 index 0000000000000..c9b52475baebd --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/mitre_coverage/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { getMitreCoverageSkill } from './mitre_coverage_skill'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/mitre_coverage/mitre_coverage_skill.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/mitre_coverage/mitre_coverage_skill.ts new file mode 100644 index 0000000000000..766fab329dc40 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/mitre_coverage/mitre_coverage_skill.ts @@ -0,0 +1,233 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { defineSkillType } from '@kbn/agent-builder-server/skills/type_definition'; +import { + SECURITY_ALERTS_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_MITRE_MAPPING_TOOL_ID, + SECURITY_CREATE_DETECTION_RULE_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, +} from '../../tools'; + +export const getMitreCoverageSkill = () => + defineSkillType({ + id: 'mitre-coverage', + name: 'mitre-coverage', + basePath: 'skills/security/alerts/rules', + experimental: true, + description: + 'Guide to analyzing MITRE ATT&CK detection coverage: map active detection rules to MITRE techniques, identify coverage gaps, prioritize uncovered techniques by severity, and recommend new detection rules.', + content: `# MITRE ATT&CK Coverage Analysis Guide + +## When to Use This Skill + +Use this skill when: +- A user asks for a MITRE ATT&CK coverage assessment or gap analysis +- A user wants to understand which MITRE techniques are covered by their current detection rules +- A user asks which MITRE techniques they lack detection coverage for +- A user wants to prioritize new detection rule development based on coverage gaps +- A user needs a coverage matrix for compliance reporting or security posture review +- A user wants to map specific detection rules to MITRE ATT&CK tactics and techniques + +## Related Skills + +After using this skill, you may want to use: +- '~/skills/security/alerts/alert-triage' to triage alerts from newly created detection rules +- '~/skills/security/alerts/incident-reporting' to include MITRE coverage data in compliance reports + +## MITRE ATT&CK Framework Reference + +### Tactics (in kill chain order) +1. **Reconnaissance** (TA0043): Gathering information for planning future operations +2. **Resource Development** (TA0042): Establishing resources for operations +3. **Initial Access** (TA0001): Getting into the network +4. **Execution** (TA0002): Running malicious code +5. **Persistence** (TA0003): Maintaining foothold +6. **Privilege Escalation** (TA0004): Gaining higher-level permissions +7. **Defense Evasion** (TA0005): Avoiding detection +8. **Credential Access** (TA0006): Stealing credentials +9. **Discovery** (TA0007): Learning about the environment +10. **Lateral Movement** (TA0008): Moving through the environment +11. **Collection** (TA0009): Gathering data of interest +12. **Command and Control** (TA0011): Communicating with compromised systems +13. **Exfiltration** (TA0010): Stealing data +14. **Impact** (TA0040): Manipulating, interrupting, or destroying systems + +## Coverage Analysis Process + +### Step 1: Inventory Active Detection Rules +- Query the detection rules index to retrieve all active (enabled) detection rules +- Use ES|QL to query the detection rules: + \`\`\` + FROM .kibana_alerting_cases* METADATA _id + | WHERE alert.alertTypeId == "siem.signals" OR alert.alertTypeId == "siem.queryRule" + | WHERE alert.enabled == true + | KEEP _id, alert.name, alert.params.threat + \`\`\` +- Extract MITRE ATT&CK metadata from each rule: + - Tactic IDs and names (e.g., TA0001 - Initial Access) + - Technique IDs and names (e.g., T1566 - Phishing) + - Sub-technique IDs and names (e.g., T1566.001 - Spearphishing Attachment) +- Count the total number of active rules and rules with MITRE mappings + +### Step 2: Build Coverage Matrix +- Create a mapping of all MITRE techniques referenced by active rules: + - Group rules by tactic to show coverage distribution across the kill chain + - Group rules by technique to show depth of coverage per technique + - Identify techniques with multiple detection rules (defense in depth) + - Identify techniques with only a single detection rule (single point of failure) +- Calculate coverage metrics: + - **Tactic coverage**: percentage of tactics with at least one detection rule + - **Technique coverage**: percentage of known techniques with at least one detection rule + - **Average rules per technique**: mean number of rules per covered technique + - **Coverage depth score**: weighted score based on rules per technique (1 rule = shallow, 2-3 = moderate, 4+ = deep) + +### Step 3: Identify Coverage Gaps +- Compare active rule MITRE mappings against the full MITRE ATT&CK framework: + - List all tactics with zero coverage + - List all techniques with zero coverage, grouped by tactic + - Identify sub-techniques that are uncovered even when parent technique has coverage +- Classify gaps by criticality: + - **Critical gaps**: no coverage for high-impact techniques commonly used in real-world attacks + - **High gaps**: no coverage for techniques frequently observed in the organization's threat landscape + - **Medium gaps**: limited coverage for techniques with moderate prevalence + - **Low gaps**: no coverage for techniques that are rarely observed or have limited applicability + +### Step 4: Gap Prioritization +- Prioritize uncovered techniques using a weighted scoring model: + +#### Severity Weighting Factors +| Factor | Weight | Description | +| --- | --- | --- | +| Prevalence in real attacks | 30% | How commonly the technique is used in observed attacks | +| Industry relevance | 25% | Relevance to the organization's industry vertical | +| Data source availability | 20% | Whether the required data sources are already being collected | +| Detection feasibility | 15% | How practical it is to build a reliable detection rule | +| Existing compensating controls | 10% | Whether other security controls mitigate the technique | + +#### Priority Scoring +- **Priority 1 (Critical)**: Score >= 0.80 - Immediate action required, high-risk gap +- **Priority 2 (High)**: Score 0.60 - 0.79 - Address in next rule development sprint +- **Priority 3 (Medium)**: Score 0.40 - 0.59 - Plan for upcoming quarters +- **Priority 4 (Low)**: Score < 0.40 - Monitor and address as resources allow + +### Step 5: Detection Rule Recommendations +- For each prioritized gap, recommend detection rule approaches: + - **Rule type**: query rule, threshold rule, EQL correlation, ML-based detection + - **Data source requirements**: which indices and fields are needed + - **Detection logic outline**: high-level description of what the rule should detect + - **Expected false positive rate**: estimated noise level based on the technique + - **Complexity estimate**: low, medium, high implementation effort +- Group recommendations by tactic for organized implementation planning +- Suggest rule templates or Elastic prebuilt rules that can be enabled to close gaps quickly + +## Output Format + +### Coverage Summary + +**Overview** +- Total active detection rules: +- Rules with MITRE mappings: (%) +- Tactics covered: /14 (%) +- Techniques covered: / (%) + +**Coverage by Tactic** +| Tactic | ID | Rules | Techniques Covered | Coverage | +| --- | --- | --- | --- | --- | +| Initial Access | TA0001 | | / | % | +| Execution | TA0002 | | / | % | +| ... | ... | ... | ... | ... | + +**Top Coverage Gaps (Priority 1 & 2)** +| Technique | ID | Tactic | Priority | Score | Recommendation | +| --- | --- | --- | --- | --- | --- | +| | | | | | | + +**Recommendations Summary** +1. : Enable prebuilt rule "" to cover +2. : Create custom rule for using +3. : Deploy ML job for anomaly-based detection of + +## Examples + +### Example 1: Full Coverage Audit + +User query: What is our MITRE ATT&CK coverage? + +Steps: +1. Use 'platform.core.execute_esql' to query all active detection rules with their MITRE ATT&CK tags. +2. Parse the MITRE tactic and technique IDs from each rule's threat metadata. +3. Build a coverage matrix mapping rules to the MITRE ATT&CK framework. +4. Calculate coverage percentages by tactic and overall. +5. Identify the top gaps by tactic and technique. +6. Prioritize gaps using the weighted scoring model. +7. Present the coverage summary table and top recommendations. + +### Example 2: Tactic-Specific Gap Analysis + +User query: Do we have coverage for lateral movement techniques? + +Steps: +1. Use 'platform.core.execute_esql' to query active rules mapped to tactic TA0008 (Lateral Movement). +2. List all covered lateral movement techniques with their detection rules. +3. Compare against the full list of lateral movement techniques in the MITRE framework. +4. Identify uncovered techniques (e.g., T1021 Remote Services, T1570 Lateral Tool Transfer). +5. Assess data source availability for the uncovered techniques. +6. Recommend specific detection rules to close the gaps. + +### Example 3: Rule-to-Technique Mapping + +User query: Which MITRE techniques does our rule "Suspicious PowerShell Execution" cover? + +Steps: +1. Use 'platform.core.search' to find the specific detection rule by name. +2. Extract the MITRE ATT&CK mappings from the rule's threat metadata. +3. List all tactics, techniques, and sub-techniques the rule is mapped to. +4. Assess whether the rule's detection logic adequately covers the mapped techniques. +5. Recommend additional rules if the mapping reveals techniques that need deeper coverage. + +### Example 4: Data-Source-Aware Gap Prioritization + +User query: Given our current data sources, which MITRE gaps should we prioritize? + +Steps: +1. Use 'platform.core.execute_esql' to inventory active detection rules and their MITRE mappings. +2. Use 'platform.core.execute_esql' to query available data sources (index patterns with recent data). +3. Cross-reference uncovered MITRE techniques with available data sources. +4. Boost priority for techniques where required data is already being collected. +5. Lower priority for techniques where data source deployment would be required. +6. Present a data-source-aware prioritized gap list with actionable recommendations. + +## Best Practices +- Always distinguish between "no detection rule" and "no data source" when reporting gaps +- Consider that a single rule may cover multiple techniques, and a technique may need multiple rules +- Prebuilt Elastic rules should be checked first before recommending custom rule development +- Include both the technique ID and name in all outputs for clarity +- When recommending new rules, estimate the expected false positive rate +- Track coverage metrics over time to show improvement trends +- Consider the organization's specific threat landscape when prioritizing gaps +- Do not equate coverage with effectiveness; a rule may exist but perform poorly +- Recommend periodic coverage audits (quarterly) to account for new MITRE techniques and rule changes +- Always note when coverage data may be incomplete due to rules without MITRE mappings +- Consider sub-techniques separately from parent techniques — a rule for T1059 does not necessarily cover T1059.001 +- Quality of coverage matters more than quantity — ten noisy low-confidence rules are worse than two precise high-confidence rules +- Factor in the environment's data sources: do not recommend rules for data that is not being collected +- Prioritize detection at the earliest possible stage of the kill chain +- Consider rule type diversity: environments benefit from a mix of signature-based, behavioral, and anomaly-based detections +- When recommending new rules, use the create detection rule tool to implement critical-priority rules directly +`, + getRegistryTools: () => [ + 'platform.core.search', + 'platform.core.execute_esql', + SECURITY_ALERTS_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_MITRE_MAPPING_TOOL_ID, + SECURITY_CREATE_DETECTION_RULE_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + ], + }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/register_skills.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/register_skills.ts index d49b4d23ff054..d5ab215ff8bf7 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/register_skills.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/register_skills.ts @@ -13,6 +13,12 @@ import { createAutomaticTroubleshootingSkill } from './automatic_troubleshooting import { getEntityAnalyticsSkill } from './entity_analytics'; import type { EntityAnalyticsRoutesDeps } from '../../lib/entity_analytics/types'; import { getSecurityMlJobsSkill } from './security_ml_jobs'; +import { getThreatHuntingSkill } from './threat_hunting'; +import { getAlertTriageSkill } from './alert_triage'; +import { getInvestigationSkill } from './investigation'; +import { getMitreCoverageSkill } from './mitre_coverage'; +import { getIncidentReportingSkill } from './incident_reporting'; +import { getResponseRecommendationSkill } from './response_recommendation'; interface RegisterSkillsOpts { agentBuilder: AgentBuilderPluginSetup; @@ -49,4 +55,14 @@ export const registerSkills = async ({ getEntityAnalyticsSkill({ getStartServices, isEntityStoreV2Enabled, kibanaVersion, logger }) ); await agentBuilder.skills.register(getSecurityMlJobsSkill({ getStartServices, logger, ml })); + await agentBuilder.skills.register(getThreatHuntingSkill()); + + // AI SOC skills — gated behind aiSocAgents feature flag + if (experimentalFeatures.aiSocAgents) { + await agentBuilder.skills.register(getAlertTriageSkill()); + await agentBuilder.skills.register(getInvestigationSkill()); + await agentBuilder.skills.register(getMitreCoverageSkill()); + await agentBuilder.skills.register(getIncidentReportingSkill()); + await agentBuilder.skills.register(getResponseRecommendationSkill()); + } }; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/response_recommendation/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/response_recommendation/index.ts new file mode 100644 index 0000000000000..855c0445f5322 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/response_recommendation/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { getResponseRecommendationSkill } from './response_recommendation_skill'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/response_recommendation/response_recommendation_skill.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/response_recommendation/response_recommendation_skill.ts new file mode 100644 index 0000000000000..c9a5b71f526d8 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/response_recommendation/response_recommendation_skill.ts @@ -0,0 +1,167 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { defineSkillType } from '@kbn/agent-builder-server/skills/type_definition'; +import { platformCoreTools } from '@kbn/agent-builder-common'; +import { + SECURITY_ALERTS_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_RESPONSE_ACTIONS_TOOL_ID, + SECURITY_CASE_MANAGE_TOOL_ID, +} from '../../tools'; + +export const getResponseRecommendationSkill = () => + defineSkillType({ + id: 'response-recommendation', + name: 'response-recommendation', + basePath: 'skills/security/alerts', + experimental: true, + description: + 'Guide to assessing blast radius and producing confidence-scored containment recommendations: evaluate compromise scope, rank response actions by effectiveness and risk, output confidence scores (0.0-1.0) with rollback procedures.', + content: `# Response Recommendation Guide + +## When to Use This Skill + +Use this skill when: +- An investigation has confirmed active compromise requiring response +- An analyst requests containment or remediation recommendations +- A workflow needs confidence-scored response actions for automated decision-making +- Post-triage escalation requires blast radius assessment + +## Response Recommendation Process + +### 1. Assess Blast Radius + +**Identify affected scope:** +- Query all entities (hosts, users, services) involved in the confirmed compromise +- Determine which systems have confirmed indicators vs. suspected exposure +- Assess lateral movement potential from compromised systems +- Evaluate data exposure: what sensitive data is accessible? +- Check business impact: are compromised systems critical? + +**Blast radius categories:** +- **Contained**: Single host/user, no lateral movement evidence +- **Limited**: 2-5 entities with some lateral movement but segmented +- **Broad**: Multiple segments, active lateral movement +- **Critical**: Crown jewel systems, domain controllers, sensitive data stores + +### 2. Evaluate Response Actions + +For each potential response action, evaluate: +- **Effectiveness**: How completely does this contain the threat? +- **Disruption**: What legitimate operations are affected? +- **Reversibility**: Can this be rolled back? How quickly? + +### 3. Confidence Scoring + +Assign a confidence score (0.0 - 1.0) to each recommendation: + +**Evidence quality (0.0 - 0.4):** +- 0.0-0.1: Speculation based on general patterns +- 0.1-0.2: Weak correlation, limited evidence +- 0.2-0.3: Moderate evidence from multiple sources +- 0.3-0.4: Strong evidence with confirmed IOCs + +**Indicator reliability (0.0 - 0.3):** +- 0.0-0.1: Unverified, single source +- 0.1-0.2: Partially verified, some disagreement +- 0.2-0.3: Well-verified from trusted, independent sources + +**Action appropriateness (0.0 - 0.3):** +- 0.0-0.1: Speculative, may not address threat +- 0.1-0.2: Addresses part of the threat +- 0.2-0.3: Directly addresses confirmed threat vector + +**Automation thresholds:** +- **>= 0.90**: Auto-execute with audit logging +- **0.70 - 0.89**: Execute with analyst notification +- **< 0.70**: Require human approval + +### 4. Available Response Actions + +**Host-Level:** +- Endpoint isolation (high effectiveness, high disruption, reversible) +- Process termination (medium effectiveness, low disruption, not reversible) +- Process suspension (medium effectiveness, low disruption, reversible) + +**Case-Level:** +- Create case with severity, attach alerts, escalate status + +**Detection-Level:** +- Create rules for discovered TTPs, add exceptions for benign positives + +## Output Format + +**Blast Radius Assessment:** +- Scope: [Contained | Limited | Broad | Critical] +- Affected entities: [Count and list] +- Lateral movement risk: [Low | Medium | High] +- Data exposure risk: [Low | Medium | High] +- Business impact: [Low | Medium | High | Critical] + +**Recommended Actions (ranked):** + +1. **[Action Name]** + - Confidence: [0.0 - 1.0] + - Target: [Entity] + - Rationale: [Why recommended] + - Disruption: [Low | Medium | High] + - Rollback: [Steps to reverse] + - Automation: [Auto-execute | Notify | Require approval] + +## Important Guidelines +- Safety first: when in doubt, require human approval +- Preserve forensic evidence: never recommend destructive actions +- Proportional response: match response to confirmed threat level +- Every recommendation must include a rollback procedure +- Never inflate confidence scores — they drive automation decisions + +## Containment Options Reference + +### Host-Level Containment +- **Network isolation**: Disconnect the host from the network while preserving forensic state +- **Process termination**: Kill specific malicious processes +- **Service disabling**: Disable compromised services +- **Full endpoint isolation**: Use Elastic Defend to isolate the endpoint + +### Account-Level Containment +- **Password reset**: Force credential rotation for compromised accounts +- **Session termination**: Revoke all active sessions +- **Account disabling**: Temporarily disable the account +- **MFA enforcement**: Require re-enrollment of multi-factor authentication + +### Network-Level Containment +- **IP blocking**: Block communication with identified C2 infrastructure +- **DNS sinkholing**: Redirect malicious domain resolutions +- **Firewall rule updates**: Restrict lateral movement paths + +## Rollback Procedures +For every recommended action, provide a clear rollback procedure: +- **Network isolation rollback**: Steps to reconnect the host, verify clean state, and restore network access +- **Account disabling rollback**: Steps to re-enable the account, verify no unauthorized changes, and confirm identity +- **Process termination rollback**: Steps to restart legitimate services that may have been affected +- **Firewall rule rollback**: Steps to remove temporary blocking rules once the threat is neutralized + +## Safety Guidelines +- **Never auto-execute destructive actions** without explicit confirmation from an authorized analyst +- Always provide rollback procedures — assume every action might need to be reversed +- Consider business impact: isolating a critical server may cause more damage than the threat itself +- When confidence is below 0.70, present the evidence and let the human decide +- Prefer reversible containment actions over irreversible ones when confidence is not high +- Document every action taken and its rationale in the associated case +- If the blast radius assessment reveals a critical or widespread compromise, explicitly recommend engaging the incident response team and executive leadership`, + getRegistryTools: () => [ + platformCoreTools.search, + platformCoreTools.executeEsql, + platformCoreTools.cases, + platformCoreTools.productDocumentation, + SECURITY_ALERTS_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_RESPONSE_ACTIONS_TOOL_ID, + SECURITY_CASE_MANAGE_TOOL_ID, + ], + }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/threat_hunting/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/threat_hunting/index.ts new file mode 100644 index 0000000000000..d3675a0133ad1 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/threat_hunting/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { getThreatHuntingSkill } from './threat_hunting_skill'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/threat_hunting/threat_hunting_skill.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/threat_hunting/threat_hunting_skill.ts new file mode 100644 index 0000000000000..48886146686e8 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/skills/threat_hunting/threat_hunting_skill.ts @@ -0,0 +1,113 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { defineSkillType } from '@kbn/agent-builder-server/skills/type_definition'; +import { platformCoreTools } from '@kbn/agent-builder-common'; +import { + SECURITY_ALERTS_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, +} from '../../tools'; + +export const getThreatHuntingSkill = () => + defineSkillType({ + id: 'threat-hunting', + name: 'threat-hunting', + basePath: 'skills/security/alerts', + description: + 'Guide to proactive security threat hunting: alert analysis, entity investigation, attack pattern identification, and security documentation using Elastic Security data sources.', + content: `# Threat Hunting Guide + +## When to Use This Skill + +Use this skill when: +- Conducting proactive threat hunts across the environment +- Investigating security alerts and determining their significance +- Performing entity-level analysis (host, user, IP) for suspicious activity +- Analyzing attack patterns and indicators of compromise +- Searching for evidence of lateral movement, persistence, or data exfiltration +- Producing security investigation documentation + +## Threat Hunting Methodology + +### 1. Hypothesis Formation +- Start with a hypothesis based on threat intelligence, recent alerts, or organizational risk areas +- Define the scope: time window, data sources, entity types, and attack techniques to investigate +- Prioritize based on entity risk scores and asset criticality + +### 2. Data Collection +- Query security alerts for the relevant time window and entities +- Search attack discoveries for correlated findings +- Check entity risk scores to identify high-risk hosts and users +- Cross-reference with Elastic Security Labs for relevant threat research + +### 3. Analysis +- Look for unusual authentication patterns and privilege escalation +- Identify suspicious process execution chains and living-off-the-land techniques +- Analyze network connections to rare or suspicious external domains +- Correlate activity across entity dimensions (host ↔ user ↔ IP) + +### 4. Documentation +- Document all findings with supporting evidence +- Create investigation timelines for significant discoveries +- Escalate confirmed threats with severity assessment and recommended actions +- Record negative results to inform future hunting priorities + +## Available Tools + +### Security-Specific Tools +- **Alerts tool** — Search and analyze security alerts using natural language or structured queries +- **Attack Discovery Search** — Find attack discoveries related to specific alerts +- **Entity Risk Score** — Look up entity risk scores and contributing alert inputs +- **Security Labs Search** — Search Elastic Security Labs for threat research and intelligence + +### Platform Tools +- **Search** — Query any Elasticsearch index with natural language +- **ES|QL** — Generate and execute ES|QL queries for advanced analysis +- **Cases** — Create and manage investigation cases +- **Product Documentation** — Search Elastic product documentation for guidance + +## Response Formats + +### Hunting Summary +Provide a concise summary of findings: + +| Finding | Severity | Entities | Evidence | +| --- | --- | --- | --- | +| | | | | + +### Entity Analysis +When analyzing entities, present risk context: + +| Entity | Type | Risk Score | Risk Level | Key Signals | +| --- | --- | --- | --- | --- | +| | | <0-100> | | | + +## Best Practices +- Always check entity risk scores before deep-diving into alert analysis +- Cross-reference findings with Attack Discovery for correlated context +- Use Elastic Security Labs to understand known threat actor techniques +- Document your methodology so hunts are reproducible +- Prioritize investigation of high-risk entities and critical assets`, + getRegistryTools: () => [ + // Platform tools + platformCoreTools.search, + platformCoreTools.listIndices, + platformCoreTools.getIndexMapping, + platformCoreTools.getDocumentById, + platformCoreTools.cases, + platformCoreTools.productDocumentation, + platformCoreTools.generateEsql, + platformCoreTools.executeEsql, + // Security tools + SECURITY_ALERTS_TOOL_ID, + SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, + SECURITY_ENTITY_RISK_SCORE_TOOL_ID, + SECURITY_LABS_SEARCH_TOOL_ID, + ], + }); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/__integration__/soc_tools.integration.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/__integration__/soc_tools.integration.test.ts new file mode 100644 index 0000000000000..518745f7e229e --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/__integration__/soc_tools.integration.test.ts @@ -0,0 +1,770 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType, type ErrorResult } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../../__mocks__/test_helpers'; +import { threatIntelEnrichTool } from '../threat_intel_enrich_tool'; +import { caseManageTool } from '../case_manage_tool'; +import { reportGenerateTool } from '../report_generate_tool'; +import { entityStoreQueryTool } from '../entity_store_query_tool'; + +/** + * Integration tests for SOC agent builder tools. + * + * These tests exercise each tool with more realistic mock setups, + * validating multi-step interactions and cross-cutting concerns + * that unit tests do not cover (e.g. multiple sequential calls, + * realistic ES response shapes, multi-action case workflows). + * + * Run with: yarn test:jest_integration --config x-pack/solutions/security/plugins/security_solution/server/jest.integration.config.js server/agent_builder/tools/__integration__/soc_tools.integration.test.ts + */ + +describe('SOC Tools Integration', () => { + describe('threat_intel_enrich_tool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = threatIntelEnrichTool(mockCore, mockLogger); + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('enriches an IP against multiple TI sources and returns consolidated results', async () => { + const mockTiHits = [ + { + _id: 'ti-1', + _index: '.ds-logs-ti_abusech-default', + _source: { + '@timestamp': '2024-01-15T10:00:00Z', + threat: { + indicator: { + type: 'ipv4-addr', + ip: '185.220.101.42', + provider: 'AbuseCH', + confidence: 'High', + description: 'Known C2 server for Cobalt Strike', + first_seen: '2023-12-01T00:00:00Z', + last_seen: '2024-01-15T00:00:00Z', + }, + feed: { name: 'AbuseCH' }, + }, + }, + }, + { + _id: 'ti-2', + _index: '.ds-logs-ti_otx-default', + _source: { + '@timestamp': '2024-01-14T08:00:00Z', + threat: { + indicator: { + type: 'ipv4-addr', + ip: '185.220.101.42', + provider: 'AlienVault OTX', + confidence: 'Medium', + description: 'Associated with APT29 infrastructure', + first_seen: '2023-11-15T00:00:00Z', + last_seen: '2024-01-14T00:00:00Z', + }, + feed: { name: 'AlienVault OTX' }, + }, + }, + }, + { + _id: 'ti-3', + _index: '.ds-logs-ti_anomali-default', + _source: { + '@timestamp': '2024-01-13T12:00:00Z', + threat: { + indicator: { + type: 'ipv4-addr', + ip: '185.220.101.42', + provider: 'Anomali', + confidence: 'High', + description: 'Tor exit node associated with malware distribution', + first_seen: '2023-10-01T00:00:00Z', + last_seen: '2024-01-13T00:00:00Z', + }, + feed: { name: 'Anomali' }, + }, + }, + }, + ]; + + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: mockTiHits, + total: { value: 3, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { ioc_type: 'ip', ioc_value: '185.220.101.42' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + ioc_type: 'ip', + ioc_value: '185.220.101.42', + match_count: 3, + total_matches: 3, + }) + ); + expect(result.results[0].data.matches).toHaveLength(3); + + // Verify matches come from different sources + const providers = (result.results[0].data.matches as Array>).map( + (m) => m.provider + ); + expect(providers).toContain('AbuseCH'); + expect(providers).toContain('AlienVault OTX'); + expect(providers).toContain('Anomali'); + }); + + it('handles sequential lookups for IP then domain in the same investigation', async () => { + // First lookup: IP + mockEsClient.asCurrentUser.search.mockResolvedValueOnce({ + hits: { + hits: [ + { + _id: 'ti-ip-1', + _index: '.ds-logs-ti_abusech-default', + _source: { + '@timestamp': '2024-01-15T10:00:00Z', + threat: { + indicator: { + type: 'ipv4-addr', + ip: '198.51.100.10', + provider: 'AbuseCH', + confidence: 'High', + description: 'Known malware C2', + first_seen: '2024-01-01T00:00:00Z', + last_seen: '2024-01-15T00:00:00Z', + }, + feed: { name: 'AbuseCH' }, + }, + }, + }, + ], + total: { value: 1, relation: 'eq' }, + }, + } as never); + + // Second lookup: domain + mockEsClient.asCurrentUser.search.mockResolvedValueOnce({ + hits: { + hits: [ + { + _id: 'ti-domain-1', + _index: '.ds-logs-ti_abusech-default', + _source: { + '@timestamp': '2024-01-15T09:00:00Z', + threat: { + indicator: { + type: 'domain-name', + url: { domain: 'c2-relay.malware-infra.net' }, + provider: 'AbuseCH', + confidence: 'High', + description: 'Known C2 domain', + first_seen: '2024-01-01T00:00:00Z', + last_seen: '2024-01-15T00:00:00Z', + }, + feed: { name: 'AbuseCH' }, + }, + }, + }, + ], + total: { value: 1, relation: 'eq' }, + }, + } as never); + + const ipResult = (await tool.handler( + { ioc_type: 'ip', ioc_value: '198.51.100.10' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + const domainResult = (await tool.handler( + { ioc_type: 'domain', ioc_value: 'c2-relay.malware-infra.net' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(ipResult.results[0].data.match_count).toBe(1); + expect(domainResult.results[0].data.match_count).toBe(1); + expect(mockEsClient.asCurrentUser.search).toHaveBeenCalledTimes(2); + }); + + it('returns empty result with descriptive message for unknown IOC', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [], + total: { value: 0, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { ioc_type: 'ip', ioc_value: '127.0.0.1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.match_count).toBe(0); + expect(result.results[0].data.message).toContain('No threat intelligence found'); + }); + }); + + describe('case_manage_tool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = caseManageTool(mockCore, mockLogger); + + const mockCasesClient = { + cases: { + create: jest.fn(), + get: jest.fn(), + update: jest.fn(), + }, + attachments: { + add: jest.fn(), + bulkCreate: jest.fn(), + }, + }; + + beforeEach(() => { + jest.clearAllMocks(); + mockCore.getStartServices.mockResolvedValue([ + {} as never, + { + cases: { + getCasesClientWithRequest: jest.fn().mockReturnValue(mockCasesClient), + }, + } as never, + {} as never, + ]); + }); + + it('creates a case, adds a comment, and retrieves it in sequence', async () => { + // Step 1: Create + mockCasesClient.cases.create.mockResolvedValue({ + id: 'case-ir-001', + title: 'IR-2024-0315: Cobalt Strike Campaign', + status: 'open', + severity: 'critical', + }); + + const createResult = (await tool.handler( + { + action: 'create', + title: 'IR-2024-0315: Cobalt Strike Campaign', + description: 'Critical incident involving Cobalt Strike beacon on domain controller', + tags: ['incident-response', 'cobalt-strike', 'data-exfiltration'], + severity: 'critical', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(createResult.results[0].type).toBe(ToolResultType.other); + expect(createResult.results[0].data.case_id).toBe('case-ir-001'); + expect(createResult.results[0].data.severity).toBe('critical'); + + // Step 2: Add comment + mockCasesClient.attachments.add.mockResolvedValue({ + id: 'case-ir-001', + title: 'IR-2024-0315: Cobalt Strike Campaign', + totalComment: 1, + }); + + const commentResult = (await tool.handler( + { + action: 'add_comment', + case_id: 'case-ir-001', + comment: + 'Triage complete: Confirmed active Cobalt Strike C2 beacon on dc-prod-01. Lateral movement detected to 3 additional hosts.', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(commentResult.results[0].type).toBe(ToolResultType.other); + expect(commentResult.results[0].data.total_comments).toBe(1); + + // Step 3: Retrieve + mockCasesClient.cases.get.mockResolvedValue({ + id: 'case-ir-001', + title: 'IR-2024-0315: Cobalt Strike Campaign', + description: 'Critical incident involving Cobalt Strike beacon on domain controller', + version: 'v1', + status: 'open', + severity: 'critical', + tags: ['incident-response', 'cobalt-strike', 'data-exfiltration'], + totalComment: 1, + totalAlerts: 0, + created_at: '2024-03-15T12:00:00Z', + updated_at: '2024-03-15T12:05:00Z', + created_by: { username: 'ai-soc-agent' }, + }); + + const getResult = (await tool.handler( + { action: 'get', case_id: 'case-ir-001' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(getResult.results[0].type).toBe(ToolResultType.other); + expect(getResult.results[0].data.case_id).toBe('case-ir-001'); + expect(getResult.results[0].data.severity).toBe('critical'); + expect(getResult.results[0].data.tags).toEqual([ + 'incident-response', + 'cobalt-strike', + 'data-exfiltration', + ]); + }); + + it('updates case severity and status through the investigation lifecycle', async () => { + // Get the case first (for version) + mockCasesClient.cases.get.mockResolvedValue({ + id: 'case-lifecycle-01', + title: 'Suspicious Activity Investigation', + version: 'v1', + status: 'open', + severity: 'medium', + }); + + // Update severity to critical + mockCasesClient.cases.update.mockResolvedValueOnce([ + { + id: 'case-lifecycle-01', + title: 'Suspicious Activity Investigation', + status: 'open', + severity: 'critical', + }, + ]); + + const severityResult = (await tool.handler( + { + action: 'update', + case_id: 'case-lifecycle-01', + severity: 'critical', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(severityResult.results[0].type).toBe(ToolResultType.other); + + // Now close the case + mockCasesClient.cases.get.mockResolvedValue({ + id: 'case-lifecycle-01', + title: 'Suspicious Activity Investigation', + version: 'v2', + status: 'open', + severity: 'critical', + }); + + mockCasesClient.cases.update.mockResolvedValueOnce([ + { + id: 'case-lifecycle-01', + title: 'Suspicious Activity Investigation', + status: 'closed', + severity: 'critical', + }, + ]); + + const closeResult = (await tool.handler( + { + action: 'change_status', + case_id: 'case-lifecycle-01', + status: 'closed', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(closeResult.results[0].type).toBe(ToolResultType.other); + expect(closeResult.results[0].data.new_status).toBe('closed'); + }); + + it('attaches multiple alerts to a case', async () => { + // Mock ES search for alert rule info (called before bulkCreate) + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [ + { + _id: 'alert-001', + _source: { 'kibana.alert.rule.uuid': 'rule-1', 'kibana.alert.rule.name': 'Rule 1' }, + }, + { + _id: 'alert-002', + _source: { 'kibana.alert.rule.uuid': 'rule-1', 'kibana.alert.rule.name': 'Rule 1' }, + }, + { + _id: 'alert-003', + _source: { 'kibana.alert.rule.uuid': 'rule-2', 'kibana.alert.rule.name': 'Rule 2' }, + }, + { + _id: 'alert-004', + _source: { 'kibana.alert.rule.uuid': 'rule-2', 'kibana.alert.rule.name': 'Rule 2' }, + }, + { + _id: 'alert-005', + _source: { 'kibana.alert.rule.uuid': 'rule-3', 'kibana.alert.rule.name': 'Rule 3' }, + }, + ], + }, + } as any); + mockCasesClient.attachments.bulkCreate.mockResolvedValue({}); + + const result = (await tool.handler( + { + action: 'attach_alerts', + case_id: 'case-ir-001', + alert_ids: ['alert-001', 'alert-002', 'alert-003', 'alert-004', 'alert-005'], + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.attached_alerts).toBe(5); + + const bulkCreateCall = mockCasesClient.attachments.bulkCreate.mock.calls[0][0]; + expect(bulkCreateCall.attachments).toHaveLength(5); + bulkCreateCall.attachments.forEach((attachment: Record, index: number) => { + expect(attachment.type).toBe('alert'); + expect(attachment.alertId).toBe(`alert-00${index + 1}`); + expect(attachment.owner).toBe('securitySolution'); + }); + }); + + it('handles error when cases plugin is not available', async () => { + mockCore.getStartServices.mockResolvedValue([{} as never, {} as never, {} as never]); + + const result = (await tool.handler( + { action: 'get', case_id: 'case-1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Cases plugin is not available'); + }); + }); + + describe('report_generate_tool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = reportGenerateTool(mockCore, mockLogger); + + beforeEach(() => { + jest.clearAllMocks(); + jest.useFakeTimers(); + jest.setSystemTime(new Date('2024-03-15T16:00:00Z')); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + + it('generates a full markdown report with all sections populated', async () => { + const result = (await tool.handler( + { + title: 'IR-2024-0315: Cobalt Strike Campaign', + sections: { + executive_summary: + 'A targeted phishing campaign delivered Cobalt Strike beacons to 3 hosts in the Finance department. The attacker gained domain admin credentials and exfiltrated 2.1GB of financial records. The incident was contained within 4 hours.', + timeline: + '08:00 - Phishing email delivered\n08:15 - Macro executed, beacon deployed\n08:45 - Credential theft via LSASS dump\n09:00 - Lateral movement to file-server-01 and dc-prod-01\n09:30 - Data staging began\n10:00 - 2.1GB exfiltrated\n12:00 - SOC detected anomalous traffic\n12:15 - Incident response initiated', + mitre_mapping: + 'T1566.001 - Spear-phishing Attachment\nT1059.001 - PowerShell\nT1053.005 - Scheduled Task\nT1003.001 - LSASS Memory\nT1021.002 - SMB/Admin Shares\nT1048.002 - Exfiltration Over Asymmetric Encrypted Channel', + impact_assessment: + '3 hosts compromised, 2 user accounts affected (1 domain admin), 2.1GB financial data exfiltrated, 6 hours of portal downtime, estimated $150K revenue impact.', + recommendations: + '1. Rotate all domain admin credentials\n2. Block C2 IP range at perimeter\n3. Deploy additional EDR rules for PowerShell cradle detection\n4. Conduct security awareness training for Finance department', + }, + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.format).toBe('markdown'); + expect(result.results[0].data.section_count).toBe(5); + + const report = result.results[0].data.report as string; + expect(report).toContain('# Incident Report: IR-2024-0315: Cobalt Strike Campaign'); + expect(report).toContain('## Executive Summary'); + expect(report).toContain('## Incident Timeline'); + expect(report).toContain('## MITRE ATT&CK Mapping'); + expect(report).toContain('## Impact Assessment'); + expect(report).toContain('## Recommendations'); + expect(report).toContain('**Generated:** 2024-03-15T16:00:00.000Z'); + expect(report).toContain('T1566.001'); + expect(report).toContain('2.1GB'); + }); + + it('generates a JSON report with correct structure', async () => { + const result = (await tool.handler( + { + title: 'JSON Incident Report', + sections: { + executive_summary: 'A ransomware incident was detected and contained.', + timeline: '10:00 - Ransomware detected\n10:15 - Hosts isolated\n10:30 - Contained', + mitre_mapping: 'T1486 - Data Encrypted for Impact', + impact_assessment: '5 hosts affected, no data loss confirmed.', + }, + format: 'json', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.format).toBe('json'); + const report = result.results[0].data.report as Record; + + expect(report.title).toBe('JSON Incident Report'); + expect(report.status).toBe('draft'); + expect(report.generated_at).toBe('2024-03-15T16:00:00.000Z'); + + const sections = report.sections as Record; + expect(sections.executive_summary).toBeDefined(); + expect(sections.timeline).toBeDefined(); + expect(sections.mitre_mapping).toBeDefined(); + expect(sections.impact_assessment).toBeDefined(); + expect(sections.recommendations).toBeUndefined(); + }); + + it('generates minimal report with only required sections', async () => { + const result = (await tool.handler( + { + title: 'Minimal Report', + sections: { + executive_summary: 'Brief summary of a minor incident.', + timeline: '14:00 - Alert triggered\n14:05 - Investigated and closed.', + }, + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.section_count).toBe(2); + const report = result.results[0].data.report as string; + expect(report).toContain('## Executive Summary'); + expect(report).toContain('## Incident Timeline'); + expect(report).not.toContain('## MITRE ATT&CK Mapping'); + expect(report).not.toContain('## Impact Assessment'); + expect(report).not.toContain('## Recommendations'); + }); + }); + + describe('entity_store_query_tool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = entityStoreQueryTool(mockCore, mockLogger); + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('queries a specific host entity and returns enriched profile', async () => { + const mockEntityHit = { + _id: 'entity-host-1', + _index: '.entities.v1.latest.security_host_default', + _source: { + 'entity.name': 'dc-prod-01', + 'host.name': 'dc-prod-01', + 'host.os': { name: 'Windows Server 2022', platform: 'windows' }, + 'host.ip': ['10.0.50.10'], + 'entity.risk': { + calculated_score_norm: 92, + calculated_level: 'Critical', + }, + 'asset.criticality': 'extreme_impact', + 'entity.source': ['logs-endpoint', 'logs-system', 'risk-score'], + }, + }; + + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [mockEntityHit], + total: { value: 1, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { entity_type: 'host', identifier: 'dc-prod-01' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + entity_type: 'host', + identifier: 'dc-prod-01', + count: 1, + }) + ); + + const entities = result.results[0].data.entities as Array>; + expect(entities).toHaveLength(1); + expect(entities[0]['entity.name']).toBe('dc-prod-01'); + }); + + it('queries wildcard for top entities sorted by risk score', async () => { + const mockEntities = [ + { + _id: 'entity-1', + _index: '.entities.v1.latest.security_host_default', + _source: { + 'entity.name': 'dc-prod-01', + 'host.name': 'dc-prod-01', + 'entity.risk': { calculated_score_norm: 92 }, + }, + }, + { + _id: 'entity-2', + _index: '.entities.v1.latest.security_host_default', + _source: { + 'entity.name': 'web-prod-05', + 'host.name': 'web-prod-05', + 'entity.risk': { calculated_score_norm: 78 }, + }, + }, + { + _id: 'entity-3', + _index: '.entities.v1.latest.security_host_default', + _source: { + 'entity.name': 'dev-server-12', + 'host.name': 'dev-server-12', + 'entity.risk': { calculated_score_norm: 25 }, + }, + }, + ]; + + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: mockEntities, + total: { value: 3, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { entity_type: 'host', identifier: '*', limit: 5 }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.count).toBe(3); + + // Verify the ES query used correct sort + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + expect(searchCall.index).toBe('.entities.v1.latest.security_host_default'); + expect(searchCall.size).toBe(5); + expect(searchCall.sort).toEqual([ + { 'entity.risk.calculated_score_norm': { order: 'desc', missing: '_last' } }, + ]); + }); + + it('queries user entities by name', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [ + { + _id: 'entity-user-1', + _index: '.entities.v1.latest.security_user_default', + _source: { + 'entity.name': 'admin-jsmith', + 'user.name': 'admin-jsmith', + 'user.domain': 'corp.example.com', + 'entity.risk': { calculated_score_norm: 87 }, + }, + }, + ], + total: { value: 1, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { entity_type: 'user', identifier: 'admin-jsmith' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.entity_type).toBe('user'); + expect(result.results[0].data.identifier).toBe('admin-jsmith'); + + // Verify correct index was queried + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + expect(searchCall.index).toBe('.entities.v1.latest.security_user_default'); + }); + + it('returns error when entity is not found', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [], + total: { value: 0, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { entity_type: 'host', identifier: 'nonexistent-host' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain( + 'No entity found for host with identifier: nonexistent-host' + ); + }); + + it('returns specific fields when requested', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [ + { + _id: 'entity-1', + _index: '.entities.v1.latest.security_host_default', + _source: { + 'entity.name': 'dc-prod-01', + 'entity.risk': { calculated_score_norm: 92 }, + }, + }, + ], + total: { value: 1, relation: 'eq' }, + }, + } as never); + + await tool.handler( + { + entity_type: 'host', + identifier: 'dc-prod-01', + fields: ['entity.name', 'entity.risk'], + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + expect(searchCall._source).toEqual(['entity.name', 'entity.risk']); + }); + + it('handles ES errors gracefully', async () => { + mockEsClient.asCurrentUser.search.mockRejectedValue(new Error('index_not_found_exception')); + + const result = (await tool.handler( + { entity_type: 'service', identifier: 'svc-api' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain( + 'index_not_found_exception' + ); + expect(mockLogger.error).toHaveBeenCalled(); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/case_manage_tool.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/case_manage_tool.test.ts new file mode 100644 index 0000000000000..08b77c57613fd --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/case_manage_tool.test.ts @@ -0,0 +1,619 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType, type ErrorResult } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../__mocks__/test_helpers'; +import { caseManageTool } from './case_manage_tool'; + +describe('caseManageTool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = caseManageTool(mockCore, mockLogger); + + const mockCasesClient = { + cases: { + create: jest.fn().mockResolvedValue({ + id: 'case-1', + title: 'Test Case', + status: 'open', + severity: 'low', + }), + get: jest.fn().mockResolvedValue({ + id: 'case-1', + title: 'Test Case', + description: 'Test description', + version: 'v1', + status: 'open', + severity: 'low', + tags: ['security'], + totalComment: 2, + totalAlerts: 3, + created_at: '2024-01-15T10:00:00Z', + updated_at: '2024-01-15T11:00:00Z', + created_by: { username: 'test-user' }, + }), + update: jest.fn().mockResolvedValue([ + { + id: 'case-1', + title: 'Updated Case', + status: 'open', + severity: 'low', + }, + ]), + }, + attachments: { + add: jest.fn().mockResolvedValue({ + id: 'case-1', + title: 'Test Case', + totalComment: 3, + }), + bulkCreate: jest.fn().mockResolvedValue({}), + }, + }; + + beforeEach(() => { + jest.clearAllMocks(); + + // Mock core.getStartServices to return cases plugin + mockCore.getStartServices.mockResolvedValue([ + {} as never, + { + cases: { + getCasesClientWithRequest: jest.fn().mockReturnValue(mockCasesClient), + }, + } as never, + {} as never, + ]); + }); + + describe('schema', () => { + it('validates correct create action', () => { + const validInput = { + action: 'create', + title: 'New Security Case', + description: 'Investigating suspicious activity', + severity: 'high', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates correct update action with case_id', () => { + const validInput = { + action: 'update', + case_id: 'case-1', + title: 'Updated Title', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates action enum values', () => { + const validActions = [ + 'create', + 'update', + 'add_comment', + 'attach_alerts', + 'get', + 'change_status', + ]; + for (const action of validActions) { + const result = tool.schema.safeParse({ action, case_id: 'case-1' }); + expect(result.success).toBe(true); + } + }); + + it('rejects invalid action', () => { + const invalidInput = { + action: 'delete', + case_id: 'case-1', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects missing action', () => { + const invalidInput = { + case_id: 'case-1', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('validates optional tags array', () => { + const validInput = { + action: 'create', + title: 'Tagged Case', + tags: ['malware', 'critical'], + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates status enum values', () => { + for (const status of ['open', 'in-progress', 'closed']) { + const result = tool.schema.safeParse({ + action: 'change_status', + case_id: 'case-1', + status, + }); + expect(result.success).toBe(true); + } + }); + + it('validates severity enum values', () => { + for (const severity of ['low', 'medium', 'high', 'critical']) { + const result = tool.schema.safeParse({ + action: 'create', + title: 'Test', + severity, + }); + expect(result.success).toBe(true); + } + }); + }); + + describe('handler', () => { + describe('create action', () => { + it('creates case via casesClient', async () => { + const result = (await tool.handler( + { + action: 'create', + title: 'New Case', + description: 'Case description', + tags: ['security'], + severity: 'high', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + case_id: 'case-1', + title: 'Test Case', + status: 'open', + severity: 'low', + }) + ); + expect(mockCasesClient.cases.create).toHaveBeenCalledWith( + expect.objectContaining({ + title: 'New Case', + description: 'Case description', + tags: ['security'], + severity: 'high', + owner: 'securitySolution', + }) + ); + }); + + it('requires title for create action', async () => { + const result = (await tool.handler( + { action: 'create' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Title is required'); + }); + + it('returns case URL', async () => { + const result = (await tool.handler( + { action: 'create', title: 'URL Test Case' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.url).toBe('/app/security/cases/case-1'); + }); + + it('defaults optional fields when not provided', async () => { + await tool.handler( + { action: 'create', title: 'Minimal Case' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + expect(mockCasesClient.cases.create).toHaveBeenCalledWith( + expect.objectContaining({ + description: '', + tags: [], + severity: 'low', + }) + ); + }); + }); + + describe('update action', () => { + it('updates case with version', async () => { + mockCasesClient.cases.update.mockResolvedValue([ + { id: 'case-1', title: 'Updated Title', status: 'open', severity: 'high' }, + ]); + + const result = (await tool.handler( + { + action: 'update', + case_id: 'case-1', + title: 'Updated Title', + severity: 'high', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + case_id: 'case-1', + title: 'Updated Title', + }) + ); + + // Verify version was fetched and used + expect(mockCasesClient.cases.get).toHaveBeenCalledWith({ id: 'case-1' }); + expect(mockCasesClient.cases.update).toHaveBeenCalledWith({ + cases: [ + expect.objectContaining({ + id: 'case-1', + version: 'v1', + title: 'Updated Title', + severity: 'high', + }), + ], + }); + }); + + it('requires case_id for update', async () => { + const result = (await tool.handler( + { action: 'update', title: 'No ID' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('case_id is required'); + }); + }); + + describe('add_comment action', () => { + it('adds comment via casesClient', async () => { + const result = (await tool.handler( + { + action: 'add_comment', + case_id: 'case-1', + comment: 'Investigation update: found additional indicators.', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + case_id: 'case-1', + total_comments: 3, + }) + ); + expect(mockCasesClient.attachments.add).toHaveBeenCalledWith({ + caseId: 'case-1', + comment: expect.objectContaining({ + type: 'user', + comment: 'Investigation update: found additional indicators.', + owner: 'securitySolution', + }), + }); + }); + + it('requires case_id for add_comment', async () => { + const result = (await tool.handler( + { action: 'add_comment', comment: 'No case id' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('case_id is required'); + }); + + it('requires comment for add_comment', async () => { + const result = (await tool.handler( + { action: 'add_comment', case_id: 'case-1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('comment is required'); + }); + }); + + describe('attach_alerts action', () => { + it('attaches alerts via bulkCreate', async () => { + // Mock ES search for alert rule info + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [ + { + _id: 'alert-1', + _source: { + 'kibana.alert.rule.uuid': 'rule-1', + 'kibana.alert.rule.name': 'Test Rule', + }, + }, + { + _id: 'alert-2', + _source: { + 'kibana.alert.rule.uuid': 'rule-1', + 'kibana.alert.rule.name': 'Test Rule', + }, + }, + ], + }, + } as any); + + const result = (await tool.handler( + { + action: 'attach_alerts', + case_id: 'case-1', + alert_ids: ['alert-1', 'alert-2'], + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + case_id: 'case-1', + attached_alerts: 2, + }) + ); + expect(mockCasesClient.attachments.bulkCreate).toHaveBeenCalledWith({ + caseId: 'case-1', + attachments: [ + expect.objectContaining({ + type: 'alert', + alertId: 'alert-1', + owner: 'securitySolution', + }), + expect.objectContaining({ + type: 'alert', + alertId: 'alert-2', + owner: 'securitySolution', + }), + ], + }); + }); + + it('requires case_id for attach_alerts', async () => { + const result = (await tool.handler( + { action: 'attach_alerts', alert_ids: ['alert-1'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('case_id is required'); + }); + + it('requires alert_ids for attach_alerts', async () => { + const result = (await tool.handler( + { action: 'attach_alerts', case_id: 'case-1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('alert_ids is required'); + }); + + it('requires non-empty alert_ids for attach_alerts', async () => { + const result = (await tool.handler( + { action: 'attach_alerts', case_id: 'case-1', alert_ids: [] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('alert_ids is required'); + }); + }); + + describe('get action', () => { + it('returns case details', async () => { + const result = (await tool.handler( + { action: 'get', case_id: 'case-1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + case_id: 'case-1', + title: 'Test Case', + description: 'Test description', + status: 'open', + severity: 'low', + tags: ['security'], + total_comments: 2, + total_alerts: 3, + created_at: '2024-01-15T10:00:00Z', + updated_at: '2024-01-15T11:00:00Z', + url: '/app/security/cases/case-1', + }) + ); + expect(mockCasesClient.cases.get).toHaveBeenCalledWith({ id: 'case-1' }); + }); + + it('requires case_id for get', async () => { + const result = (await tool.handler( + { action: 'get' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('case_id is required'); + }); + }); + + describe('change_status action', () => { + it('changes status with version', async () => { + mockCasesClient.cases.update.mockResolvedValue([ + { id: 'case-1', title: 'Test Case', status: 'closed', severity: 'low' }, + ]); + + const result = (await tool.handler( + { action: 'change_status', case_id: 'case-1', status: 'closed' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + case_id: 'case-1', + previous_status: 'open', + new_status: 'closed', + }) + ); + + // Verify version was fetched and used + expect(mockCasesClient.cases.get).toHaveBeenCalledWith({ id: 'case-1' }); + expect(mockCasesClient.cases.update).toHaveBeenCalledWith({ + cases: [ + expect.objectContaining({ + id: 'case-1', + version: 'v1', + status: 'closed', + }), + ], + }); + }); + + it('requires case_id for change_status', async () => { + const result = (await tool.handler( + { action: 'change_status', status: 'closed' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('case_id is required'); + }); + + it('requires status for change_status', async () => { + const result = (await tool.handler( + { action: 'change_status', case_id: 'case-1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.error); + expect((result.results[0] as ErrorResult).data.message).toContain('status is required'); + }); + }); + + describe('error handling', () => { + it('handles errors from casesClient create', async () => { + mockCasesClient.cases.create.mockRejectedValue(new Error('Cases service unavailable')); + + const result = (await tool.handler( + { action: 'create', title: 'Failing Case' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Cases service unavailable'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + + it('handles errors from casesClient get', async () => { + mockCasesClient.cases.get.mockRejectedValue(new Error('Case not found')); + + const result = (await tool.handler( + { action: 'get', case_id: 'nonexistent' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Case not found'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + + it('handles errors from casesClient update', async () => { + mockCasesClient.cases.get.mockResolvedValue({ + id: 'case-1', + version: 'v1', + status: 'open', + }); + mockCasesClient.cases.update.mockRejectedValue(new Error('Conflict: version mismatch')); + + const result = (await tool.handler( + { action: 'update', case_id: 'case-1', title: 'Updated' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Conflict: version mismatch'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + + it('handles cases plugin not available', async () => { + mockCore.getStartServices.mockResolvedValue([ + {} as never, + {} as never, // No cases plugin + {} as never, + ]); + + const result = (await tool.handler( + { action: 'get', case_id: 'case-1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Cases plugin is not available'); + }); + + it('handles errors from attachments bulkCreate', async () => { + // Mock ES search for alert rule info (succeeds, but bulkCreate fails) + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [ + { + _id: 'alert-1', + _source: { 'kibana.alert.rule.uuid': 'rule-1', 'kibana.alert.rule.name': 'Rule' }, + }, + ], + }, + } as any); + mockCasesClient.attachments.bulkCreate.mockRejectedValue(new Error('Bulk create failed')); + + const result = (await tool.handler( + { + action: 'attach_alerts', + case_id: 'case-1', + alert_ids: ['alert-1'], + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Bulk create failed'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/case_manage_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/case_manage_tool.ts new file mode 100644 index 0000000000000..c93c59beecb67 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/case_manage_tool.ts @@ -0,0 +1,533 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import type { Logger } from '@kbn/logging'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import { DEFAULT_ALERTS_INDEX } from '../../../common/constants'; +import { securityTool } from './constants'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; + +const caseManageSchema = z.object({ + action: z + .enum(['create', 'update', 'add_comment', 'attach_alerts', 'get', 'change_status']) + .describe( + 'The action to perform on the case: create a new case, update an existing case, add a comment, attach alerts, get case details, or change case status' + ), + case_id: z + .string() + .optional() + .describe( + 'The ID of the case to operate on. Required for update, add_comment, attach_alerts, get, and change_status actions.' + ), + title: z + .string() + .max(256) + .optional() + .describe('The title for the case. Required when creating a new case.'), + description: z + .string() + .max(30000) + .optional() + .describe('The description for the case. Used when creating or updating a case.'), + tags: z + .array(z.string()) + .optional() + .describe('Tags to associate with the case for categorization and filtering.'), + alert_ids: z + .array(z.string().max(255)) + .max(100) + .optional() + .describe('Array of alert IDs to attach to the case. Used with the attach_alerts action.'), + comment: z + .string() + .max(30000) + .optional() + .describe('Comment text to add to the case. Used with the add_comment action.'), + status: z + .enum(['open', 'in-progress', 'closed']) + .optional() + .describe('The status to set on the case. Used with the change_status action.'), + severity: z + .enum(['low', 'medium', 'high', 'critical']) + .optional() + .describe('The severity level for the case. Used when creating or updating a case.'), +}); + +export const SECURITY_CASE_MANAGE_TOOL_ID = securityTool('case_manage'); + +const SECURITY_SOLUTION_OWNER = 'securitySolution'; + +export const caseManageTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger +): BuiltinToolDefinition => { + return { + id: SECURITY_CASE_MANAGE_TOOL_ID, + type: ToolType.builtin, + description: + 'Create, update, and manage security cases. Supports creating cases, updating case details, attaching alerts, adding comments, and changing case status. Uses the Cases API through plugin contracts.', + schema: caseManageSchema, + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + try { + const availability = await getAgentBuilderResourceAvailability({ core, request, logger }); + if (availability.status !== 'available') { + return availability; + } + + // Verify the Cases plugin is available + const [, startPlugins] = await core.getStartServices(); + if (!startPlugins.cases) { + return { + status: 'unavailable' as const, + reason: 'Cases plugin is not available', + }; + } + + return { status: 'available' as const }; + } catch (error) { + return { + status: 'unavailable' as const, + reason: `Failed to check cases availability: ${ + error instanceof Error ? error.message : 'Unknown error' + }`, + }; + } + }, + }, + handler: async ( + { + action, + case_id: caseId, + title, + description, + tags: caseTags, + alert_ids: alertIds, + comment, + status, + severity, + }, + { request, spaceId, esClient } + ) => { + logger.debug( + `${SECURITY_CASE_MANAGE_TOOL_ID} tool called with action: ${action}${ + caseId ? `, caseId: ${caseId}` : '' + }` + ); + + try { + const [, startPlugins] = await core.getStartServices(); + + if (!startPlugins.cases) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'Cases plugin is not available', + }, + }, + ], + }; + } + + const casesClient = await startPlugins.cases.getCasesClientWithRequest(request); + + switch (action) { + case 'create': { + if (!title) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'Title is required when creating a case', + }, + }, + ], + }; + } + + const createdCase = await casesClient.cases.create({ + title, + description: description ?? '', + tags: caseTags ?? [], + severity: severity ?? 'low', + owner: SECURITY_SOLUTION_OWNER, + connector: { + id: 'none', + name: 'none', + type: '.none', + fields: null, + }, + settings: { + syncAlerts: true, + }, + }); + + logger.debug(`Successfully created case "${title}" with ID: ${createdCase.id}`); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + case_id: createdCase.id, + title: createdCase.title, + status: createdCase.status, + severity: createdCase.severity, + url: `/app/security/cases/${createdCase.id}`, + message: `Case "${title}" created successfully.`, + }, + }, + ], + }; + } + + case 'update': { + if (!caseId) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'case_id is required when updating a case', + }, + }, + ], + }; + } + + // Fetch the current case to get its version + const currentCase = await casesClient.cases.get({ id: caseId }); + + const updatedCases = await casesClient.cases.update({ + cases: [ + { + id: caseId, + version: currentCase.version, + ...(title && { title }), + ...(description && { description }), + ...(caseTags && { tags: caseTags }), + ...(severity && { severity }), + }, + ], + }); + + const updatedCase = updatedCases[0]; + + logger.debug(`Successfully updated case ${caseId}`); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + case_id: updatedCase.id, + title: updatedCase.title, + status: updatedCase.status, + severity: updatedCase.severity, + url: `/app/security/cases/${updatedCase.id}`, + message: `Case "${updatedCase.title}" updated successfully.`, + }, + }, + ], + }; + } + + case 'add_comment': { + if (!caseId) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'case_id is required when adding a comment', + }, + }, + ], + }; + } + + if (!comment) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'comment is required when adding a comment to a case', + }, + }, + ], + }; + } + + const commentResult = await casesClient.attachments.add({ + caseId, + comment: { + type: 'user' as const, + comment, + owner: SECURITY_SOLUTION_OWNER, + }, + }); + + logger.debug(`Successfully added comment to case ${caseId}`); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + case_id: commentResult.id, + total_comments: commentResult.totalComment, + url: `/app/security/cases/${caseId}`, + message: `Comment added to case "${commentResult.title}" successfully.`, + }, + }, + ], + }; + } + + case 'attach_alerts': { + if (!caseId) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'case_id is required when attaching alerts', + }, + }, + ], + }; + } + + if (!alertIds || alertIds.length === 0) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'alert_ids is required and must not be empty when attaching alerts', + }, + }, + ], + }; + } + + // Query alert documents to get rule info for proper case attachments + const alertsIndex = `${DEFAULT_ALERTS_INDEX}-${spaceId}`; + const alertsResponse = await esClient.asCurrentUser.search({ + index: alertsIndex, + ignore_unavailable: true, + allow_no_indices: true, + size: alertIds.length, + _source: ['kibana.alert.rule.uuid', 'kibana.alert.rule.name'], + query: { bool: { filter: [{ terms: { _id: alertIds } }] } }, + }); + + const alertRuleMap = new Map(); + for (const hit of alertsResponse.hits.hits) { + if (hit._id && hit._source) { + const src = hit._source as Record; + const kibanaAlert = src['kibana.alert.rule.uuid'] as string | undefined; + const kibanaName = src['kibana.alert.rule.name'] as string | undefined; + alertRuleMap.set(hit._id, { + id: kibanaAlert ?? 'unknown', + name: kibanaName ?? 'unknown', + }); + } + } + + const alertAttachments = alertIds.map((alertId) => { + const ruleInfo = alertRuleMap.get(alertId) ?? { id: 'unknown', name: 'unknown' }; + return { + type: 'alert' as const, + alertId, + index: alertsIndex, + rule: ruleInfo, + owner: SECURITY_SOLUTION_OWNER, + }; + }); + + await casesClient.attachments.bulkCreate({ + caseId, + attachments: alertAttachments, + }); + + logger.debug(`Successfully attached ${alertIds.length} alerts to case ${caseId}`); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + case_id: caseId, + attached_alerts: alertIds.length, + url: `/app/security/cases/${caseId}`, + message: `${alertIds.length} alert(s) attached to case successfully.`, + }, + }, + ], + }; + } + + case 'get': { + if (!caseId) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'case_id is required when getting case details', + }, + }, + ], + }; + } + + const caseDetails = await casesClient.cases.get({ id: caseId }); + + logger.debug(`Successfully retrieved case ${caseId}`); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + case_id: caseDetails.id, + title: caseDetails.title, + description: caseDetails.description, + status: caseDetails.status, + severity: caseDetails.severity, + tags: caseDetails.tags, + total_comments: caseDetails.totalComment, + total_alerts: caseDetails.totalAlerts, + created_at: caseDetails.created_at, + updated_at: caseDetails.updated_at, + created_by: caseDetails.created_by, + url: `/app/security/cases/${caseDetails.id}`, + }, + }, + ], + }; + } + + case 'change_status': { + if (!caseId) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'case_id is required when changing case status', + }, + }, + ], + }; + } + + if (!status) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'status is required when changing case status', + }, + }, + ], + }; + } + + // Fetch the current case to get its version + const existingCase = await casesClient.cases.get({ id: caseId }); + + const statusUpdatedCases = await casesClient.cases.update({ + cases: [ + { + id: caseId, + version: existingCase.version, + status, + }, + ], + }); + + const statusUpdatedCase = statusUpdatedCases[0]; + + logger.debug(`Successfully changed status of case ${caseId} to ${status}`); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + case_id: statusUpdatedCase.id, + title: statusUpdatedCase.title, + previous_status: existingCase.status, + new_status: statusUpdatedCase.status, + url: `/app/security/cases/${statusUpdatedCase.id}`, + message: `Case "${statusUpdatedCase.title}" status changed from "${existingCase.status}" to "${status}".`, + }, + }, + ], + }; + } + + default: + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Unknown action: ${action}`, + }, + }, + ], + }; + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`Error in ${SECURITY_CASE_MANAGE_TOOL_ID} tool: ${errorMessage}`); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error managing case: ${errorMessage}`, + }, + }, + ], + }; + } + }, + tags: ['security', 'cases', 'incident-management'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/entity_store_query_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/entity_store_query_tool.ts new file mode 100644 index 0000000000000..7468b7e304d87 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/entity_store_query_tool.ts @@ -0,0 +1,322 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { ElasticsearchClient, Logger } from '@kbn/core/server'; +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition, ToolAvailabilityContext } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import { AGENT_BUILDER_EXPERIMENTAL_FEATURES_SETTING_ID } from '@kbn/management-settings-ids'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; +import { securityTool } from './constants'; + +const entityStoreQuerySchema = z.object({ + entity_type: z.enum(['host', 'user', 'service']).describe('The type of entity to query'), + identifier: z + .string() + .min(1) + .max(255) + .describe( + 'The entity identifier value (hostname, username, service name). Use "*" for top entities.' + ), + fields: z + .array(z.string()) + .max(20) + .optional() + .describe('Specific fields to return. If not provided, returns all available fields.'), + time_range: z + .string() + .optional() + .describe('Time range for entity data (e.g., "7d", "30d"). Defaults to latest snapshot.'), + limit: z + .number() + .int() + .min(1) + .max(50) + .optional() + .describe('Maximum number of entities to return for wildcard queries (default: 10)'), +}); + +export const SECURITY_ENTITY_STORE_QUERY_TOOL_ID = securityTool('entity_store_query'); + +/** + * Builds the entity store index name for a given entity type and space. + * Entity store indices follow the pattern: .entities.v1.latest.security__ + */ +const getEntityStoreIndexName = (entityType: string, spaceId: string): string => { + return `.entities.v1.latest.security_${entityType}_${spaceId}`; +}; + +/** + * Resolves the entity name field based on entity type. + */ +const getEntityNameField = (entityType: string): string => { + return `${entityType}.name`; +}; + +/** + * Queries the entity store for wildcard queries, returning top entities sorted by risk score. + */ +const queryEntityStoreForWildcard = async ({ + esClient, + index, + entityType, + limit = 10, + fields, + timeRange, +}: { + esClient: ElasticsearchClient; + index: string; + entityType: string; + limit: number; + fields?: string[]; + timeRange?: string; +}): Promise>> => { + const query = timeRange + ? { bool: { filter: [{ range: { '@timestamp': { gte: `now-${timeRange}` } } }] } } + : { match_all: {} }; + + const response = await esClient.search>({ + index, + ignore_unavailable: true, + allow_no_indices: true, + size: limit, + _source: fields ?? true, + query, + sort: [ + { + 'entity.risk.calculated_score_norm': { + order: 'desc', + missing: '_last', + }, + }, + ], + }); + + return response.hits.hits + .map((hit) => hit._source) + .filter((source): source is Record => source !== undefined); +}; + +/** + * Queries the entity store for a specific entity by name. + */ +const queryEntityStoreByName = async ({ + esClient, + index, + entityType, + identifier, + fields, + timeRange, +}: { + esClient: ElasticsearchClient; + index: string; + entityType: string; + identifier: string; + fields?: string[]; + timeRange?: string; +}): Promise>> => { + const nameField = getEntityNameField(entityType); + + const filterClauses: Array> = [ + { + bool: { + should: [{ term: { 'entity.name': identifier } }, { term: { [nameField]: identifier } }], + minimum_should_match: 1, + }, + }, + ]; + + if (timeRange) { + filterClauses.push({ range: { '@timestamp': { gte: `now-${timeRange}` } } }); + } + + const response = await esClient.search>({ + index, + ignore_unavailable: true, + allow_no_indices: true, + size: 10, + _source: fields ?? true, + query: { + bool: { + filter: filterClauses, + }, + }, + }); + + return response.hits.hits + .map((hit) => hit._source) + .filter((source): source is Record => source !== undefined); +}; + +export const entityStoreQueryTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger +): BuiltinToolDefinition => { + return { + id: SECURITY_ENTITY_STORE_QUERY_TOOL_ID, + type: ToolType.builtin, + description: + 'Query the unified Entity Store for enriched entity profiles including observed data sources, related alerts, risk context, asset criticality, and entity relationships. Provides richer context than risk scores alone.', + schema: entityStoreQuerySchema, + availability: { + cacheMode: 'space', + handler: async ({ request, spaceId, uiSettings }: ToolAvailabilityContext) => { + try { + if ((await uiSettings.get(AGENT_BUILDER_EXPERIMENTAL_FEATURES_SETTING_ID)) === true) { + return { + status: 'unavailable', + reason: + 'Skills are enabled, which takes precedence over entity store query tool availability', + }; + } + + const availability = await getAgentBuilderResourceAvailability({ core, request, logger }); + if (availability.status === 'available') { + const [coreStart] = await core.getStartServices(); + const esClient = coreStart.elasticsearch.client.asInternalUser; + + // Check if at least one entity store index exists for this space + const indexExists = await esClient.indices.exists({ + index: getEntityStoreIndexName('*', spaceId), + }); + + if (indexExists) { + return { status: 'available' }; + } + + return { + status: 'unavailable', + reason: 'Entity store indices do not exist for this space', + }; + } + return availability; + } catch (error) { + return { + status: 'unavailable', + reason: `Failed to check entity store index availability: ${ + error instanceof Error ? error.message : String(error) + }`, + }; + } + }, + }, + handler: async ( + { entity_type: entityType, identifier, fields, time_range: timeRange, limit = 10 }, + { spaceId, esClient } + ) => { + const entityStoreIndex = getEntityStoreIndexName(entityType, spaceId); + + logger.debug( + `${SECURITY_ENTITY_STORE_QUERY_TOOL_ID} tool called with entity_type: ${entityType}, identifier: ${identifier}` + ); + + try { + let entities: Array>; + + if (identifier === '*') { + entities = await queryEntityStoreForWildcard({ + esClient: esClient.asCurrentUser, + index: entityStoreIndex, + entityType, + limit, + fields, + timeRange, + }); + + if (entities.length === 0) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `No entities found in the ${entityType} entity store`, + }, + }, + ], + }; + } + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + entity_type: entityType, + count: entities.length, + entities, + }, + }, + ], + }; + } + + // Handle specific entity queries + entities = await queryEntityStoreByName({ + esClient: esClient.asCurrentUser, + index: entityStoreIndex, + entityType, + identifier, + fields, + timeRange, + }); + + if (entities.length === 0) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `No entity found for ${entityType} with identifier: ${identifier}`, + }, + }, + ], + }; + } + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + entity_type: entityType, + identifier, + count: entities.length, + entities, + }, + }, + ], + }; + } catch (error) { + logger.error( + `Error in ${SECURITY_ENTITY_STORE_QUERY_TOOL_ID} tool: ${ + error instanceof Error ? error.message : String(error) + }` + ); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error querying entity store: ${ + error instanceof Error ? error.message : String(error) + }`, + }, + }, + ], + }; + } + }, + tags: ['security', 'entity-store', 'entities'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/index.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/index.ts index ce197db2ce83d..c825e32615bef 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/index.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/index.ts @@ -13,6 +13,10 @@ export { searchEntitiesTool, SECURITY_SEARCH_ENTITIES_TOOL_ID, } from './entity_analytics'; +export { + entityStoreQueryTool, + SECURITY_ENTITY_STORE_QUERY_TOOL_ID, +} from './entity_store_query_tool'; export { attackDiscoverySearchTool, SECURITY_ATTACK_DISCOVERY_SEARCH_TOOL_ID, @@ -23,3 +27,12 @@ export { createDetectionRuleTool, SECURITY_CREATE_DETECTION_RULE_TOOL_ID, } from './create_detection_rule_tool'; +export { responseActionsTool, SECURITY_RESPONSE_ACTIONS_TOOL_ID } from './response_actions_tool'; +export { mitreMappingTool, SECURITY_MITRE_MAPPING_TOOL_ID } from './mitre_mapping_tool'; +export { + threatIntelEnrichTool, + SECURITY_THREAT_INTEL_ENRICH_TOOL_ID, +} from './threat_intel_enrich_tool'; +export { timelineCreateTool, SECURITY_TIMELINE_CREATE_TOOL_ID } from './timeline_create_tool'; +export { reportGenerateTool, SECURITY_REPORT_GENERATE_TOOL_ID } from './report_generate_tool'; +export { caseManageTool, SECURITY_CASE_MANAGE_TOOL_ID } from './case_manage_tool'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/mitre_mapping_tool.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/mitre_mapping_tool.test.ts new file mode 100644 index 0000000000000..8e3716f5c6a9b --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/mitre_mapping_tool.test.ts @@ -0,0 +1,284 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType, type ErrorResult } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../__mocks__/test_helpers'; +import { mitreMappingTool } from './mitre_mapping_tool'; + +describe('mitreMappingTool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = mitreMappingTool(mockCore, mockLogger); + + const mockMappingsResponse = { + mappings: [ + { + indicator: 'lateral movement via PsExec', + techniques: [ + { + technique_id: 'T1570', + technique_name: 'Lateral Tool Transfer', + tactic: 'Lateral Movement', + confidence: 0.95, + reasoning: 'PsExec is commonly used for lateral movement', + }, + ], + }, + ], + }; + + const createMockModelProvider = (responseContent: string) => ({ + getDefaultModel: jest.fn().mockResolvedValue({ + chatModel: { + invoke: jest.fn().mockResolvedValue({ content: responseContent }), + }, + }), + }); + + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('schema', () => { + it('validates correct indicators array', () => { + const validInput = { + indicators: ['lateral movement via PsExec', 'credential dumping from LSASS'], + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('rejects empty indicators array', () => { + const invalidInput = { + indicators: [], + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects missing indicators', () => { + const invalidInput = {}; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects non-array indicators', () => { + const invalidInput = { + indicators: 'lateral movement via PsExec', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('accepts optional context', () => { + const validInput = { + indicators: ['suspicious PowerShell execution'], + context: 'Windows server environment with Active Directory', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates without context', () => { + const validInput = { + indicators: ['suspicious PowerShell execution'], + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + }); + + describe('handler', () => { + it('invokes model and returns parsed mappings', async () => { + const mockModelProvider = createMockModelProvider(JSON.stringify(mockMappingsResponse)); + + const result = (await tool.handler( + { indicators: ['lateral movement via PsExec'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + mappings: mockMappingsResponse.mappings, + indicator_count: 1, + technique_count: 1, + }) + ); + }); + + it('handles multiple indicators and techniques', async () => { + const multiMappingsResponse = { + mappings: [ + { + indicator: 'lateral movement via PsExec', + techniques: [ + { + technique_id: 'T1570', + technique_name: 'Lateral Tool Transfer', + tactic: 'Lateral Movement', + confidence: 0.95, + reasoning: 'PsExec is commonly used for lateral movement', + }, + ], + }, + { + indicator: 'credential dumping from LSASS', + techniques: [ + { + technique_id: 'T1003.001', + technique_name: 'LSASS Memory', + tactic: 'Credential Access', + confidence: 0.98, + reasoning: 'LSASS memory dumping is a well-known credential access technique', + }, + { + technique_id: 'T1003', + technique_name: 'OS Credential Dumping', + tactic: 'Credential Access', + confidence: 0.9, + reasoning: 'Parent technique for credential dumping', + }, + ], + }, + ], + }; + + const mockModelProvider = createMockModelProvider(JSON.stringify(multiMappingsResponse)); + + const result = (await tool.handler( + { + indicators: ['lateral movement via PsExec', 'credential dumping from LSASS'], + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + indicator_count: 2, + technique_count: 3, + }) + ); + }); + + it('passes context to the model when provided', async () => { + const mockModelProvider = createMockModelProvider(JSON.stringify(mockMappingsResponse)); + + await tool.handler( + { + indicators: ['suspicious PowerShell execution'], + context: 'Windows server environment', + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + ); + + const chatModel = (await mockModelProvider.getDefaultModel()).chatModel; + const invokeArgs = chatModel.invoke.mock.calls[0][0]; + const userMessage = invokeArgs[1].content; + expect(userMessage).toContain('Additional context: Windows server environment'); + }); + + it('handles response wrapped in markdown code blocks', async () => { + const wrappedResponse = `\`\`\`json\n${JSON.stringify(mockMappingsResponse)}\n\`\`\``; + const mockModelProvider = createMockModelProvider(wrappedResponse); + + const result = (await tool.handler( + { indicators: ['lateral movement via PsExec'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + mappings: mockMappingsResponse.mappings, + }) + ); + }); + + it('handles JSON parse errors when no JSON found in response', async () => { + const mockModelProvider = createMockModelProvider( + 'I cannot provide MITRE mappings for this input.' + ); + + const result = (await tool.handler( + { indicators: ['some vague indicator'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Failed to parse MITRE mapping response'); + }); + + it('handles model invocation errors', async () => { + const mockModelProvider = { + getDefaultModel: jest.fn().mockResolvedValue({ + chatModel: { + invoke: jest.fn().mockRejectedValue(new Error('Model invocation failed')), + }, + }), + }; + + const result = (await tool.handler( + { indicators: ['lateral movement via PsExec'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Model invocation failed'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + + it('handles getDefaultModel errors', async () => { + const mockModelProvider = { + getDefaultModel: jest.fn().mockRejectedValue(new Error('No model configured')), + }; + + const result = (await tool.handler( + { indicators: ['lateral movement via PsExec'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + modelProvider: mockModelProvider as never, + }) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('No model configured'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/mitre_mapping_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/mitre_mapping_tool.ts new file mode 100644 index 0000000000000..d6cde816e3081 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/mitre_mapping_tool.ts @@ -0,0 +1,183 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import type { Logger } from '@kbn/logging'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import { securityTool } from './constants'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; + +const mitreMappingSchema = z.object({ + indicators: z + .array(z.string().max(500)) + .min(1) + .max(20) + .describe( + 'An array of strings describing security behaviors, TTPs, or findings to map to MITRE ATT&CK techniques (e.g., "lateral movement via PsExec", "credential dumping from LSASS")' + ), + context: z + .string() + .optional() + .describe( + 'Additional context about the environment, alert, or investigation to improve mapping accuracy' + ), +}); + +export const SECURITY_MITRE_MAPPING_TOOL_ID = securityTool('mitre_mapping'); + +const MITRE_MAPPING_SYSTEM_PROMPT = `You are an expert threat intelligence analyst specializing in the MITRE ATT&CK framework. +Given a list of security behaviors, TTPs, or findings, map each one to the most relevant MITRE ATT&CK techniques and sub-techniques. + +For each mapping, provide: +- technique_id: The MITRE ATT&CK technique or sub-technique ID (e.g., "T1059.001") +- technique_name: The human-readable name of the technique (e.g., "PowerShell") +- tactic: The tactic phase(s) this technique belongs to (e.g., "Execution") +- confidence: A confidence score from 0.0 to 1.0 indicating how well the behavior matches +- reasoning: A brief explanation of why this mapping was chosen + +Return your response as a JSON object with this exact structure: +{ + "mappings": [ + { + "indicator": "", + "techniques": [ + { + "technique_id": "T1059.001", + "technique_name": "PowerShell", + "tactic": "Execution", + "confidence": 0.95, + "reasoning": "The behavior directly describes PowerShell script execution" + } + ] + } + ] +} + +Only return valid JSON, no additional text or markdown.`; + +export const mitreMappingTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger +): BuiltinToolDefinition => { + return { + id: SECURITY_MITRE_MAPPING_TOOL_ID, + type: ToolType.builtin, + description: + 'Map security behaviors, alerts, or findings to MITRE ATT&CK techniques and sub-techniques. Returns technique IDs, names, tactic phases, and confidence scores.', + schema: mitreMappingSchema, + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + return getAgentBuilderResourceAvailability({ core, request, logger }); + }, + }, + handler: async ({ indicators, context }, { modelProvider }) => { + logger.debug( + `${SECURITY_MITRE_MAPPING_TOOL_ID} tool called with ${indicators.length} indicators` + ); + + try { + const model = await modelProvider.getDefaultModel(); + const chatModel = model.chatModel; + + const userMessage = [ + `Map the following security behaviors/findings to MITRE ATT&CK techniques:`, + '', + ...indicators.map((indicator, i) => `${i + 1}. ${indicator}`), + ...(context ? ['', `Additional context: ${context}`] : []), + ].join('\n'); + + const response = await chatModel.invoke([ + { role: 'system', content: MITRE_MAPPING_SYSTEM_PROMPT }, + { role: 'user', content: userMessage }, + ]); + + let responseText: string; + if (typeof response.content === 'string') { + responseText = response.content; + } else if (Array.isArray(response.content)) { + // Extract text from content block arrays (e.g., [{ type: 'text', text: '...' }]) + const textBlock = response.content.find( + (block): block is { type: string; text: string } => + typeof block === 'object' && block !== null && 'text' in block + ); + responseText = textBlock?.text ?? JSON.stringify(response.content); + } else { + responseText = JSON.stringify(response.content); + } + + // Extract JSON from the response, handling potential markdown code blocks + const jsonMatch = responseText.match(/\{[\s\S]*\}/); + if (!jsonMatch) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'Failed to parse MITRE mapping response from model', + }, + }, + ], + }; + } + + const parsedResponse = JSON.parse(jsonMatch[0]); + + if (!parsedResponse.mappings || !Array.isArray(parsedResponse.mappings)) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: 'Invalid MITRE mapping response structure: expected { mappings: [...] }', + }, + }, + ], + }; + } + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + mappings: parsedResponse.mappings, + indicator_count: indicators.length, + technique_count: parsedResponse.mappings.reduce( + (acc: number, m: { techniques?: unknown[] }) => + acc + (Array.isArray(m.techniques) ? m.techniques.length : 0), + 0 + ), + }, + }, + ], + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`Error in ${SECURITY_MITRE_MAPPING_TOOL_ID} tool: ${errorMessage}`); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error mapping to MITRE ATT&CK: ${errorMessage}`, + }, + }, + ], + }; + } + }, + tags: ['security', 'mitre', 'threat-intelligence'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/register_tools.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/register_tools.ts index 01d4bb1560b55..7be125334105f 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/register_tools.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/register_tools.ts @@ -13,7 +13,15 @@ import { attackDiscoverySearchTool } from './attack_discovery_search_tool'; import { entityRiskScoreTool, getEntityTool, searchEntitiesTool } from './entity_analytics'; import { alertsTool } from './alerts_tool'; import { createDetectionRuleTool } from './create_detection_rule_tool'; +import { responseActionsTool } from './response_actions_tool'; +import { mitreMappingTool } from './mitre_mapping_tool'; +import { threatIntelEnrichTool } from './threat_intel_enrich_tool'; +import { timelineCreateTool } from './timeline_create_tool'; +import { reportGenerateTool } from './report_generate_tool'; +import { caseManageTool } from './case_manage_tool'; +import { entityStoreQueryTool } from './entity_store_query_tool'; import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; +import type { EndpointAppContextService } from '../../endpoint/endpoint_app_context_services'; /** * Registers all security agent builder tools with the agentBuilder plugin @@ -22,7 +30,8 @@ export const registerTools = async ( agentBuilder: AgentBuilderPluginSetup, core: SecuritySolutionPluginCoreSetupDependencies, logger: Logger, - experimentalFeatures: ExperimentalFeatures + experimentalFeatures: ExperimentalFeatures, + endpointAppContextService: EndpointAppContextService ) => { agentBuilder.tools.register(entityRiskScoreTool(core, logger)); agentBuilder.tools.register(attackDiscoverySearchTool(core, logger)); @@ -31,4 +40,15 @@ export const registerTools = async ( agentBuilder.tools.register(alertsTool(core, logger)); agentBuilder.tools.register(getEntityTool(core, logger, experimentalFeatures)); agentBuilder.tools.register(searchEntitiesTool(core, logger, experimentalFeatures)); + + // AI SOC tools — gated behind aiSocAgents feature flag + if (experimentalFeatures.aiSocAgents) { + agentBuilder.tools.register(responseActionsTool(core, logger, endpointAppContextService)); + agentBuilder.tools.register(mitreMappingTool(core, logger)); + agentBuilder.tools.register(threatIntelEnrichTool(core, logger)); + agentBuilder.tools.register(timelineCreateTool(core, logger)); + agentBuilder.tools.register(reportGenerateTool(core, logger)); + agentBuilder.tools.register(caseManageTool(core, logger)); + agentBuilder.tools.register(entityStoreQueryTool(core, logger)); + } }; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/report_generate_tool.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/report_generate_tool.test.ts new file mode 100644 index 0000000000000..77544287bab1d --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/report_generate_tool.test.ts @@ -0,0 +1,330 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../__mocks__/test_helpers'; +import { reportGenerateTool } from './report_generate_tool'; + +describe('reportGenerateTool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = reportGenerateTool(mockCore, mockLogger); + + const fullSections = { + executive_summary: 'A critical incident was detected involving lateral movement.', + timeline: '10:00 - Initial access detected. 10:15 - Lateral movement observed.', + mitre_mapping: 'T1059 - Command and Scripting Interpreter', + impact_assessment: '3 servers compromised, no data exfiltration confirmed.', + recommendations: 'Isolate affected hosts and rotate credentials.', + }; + + const requiredOnlySections = { + executive_summary: 'A critical incident was detected.', + timeline: '10:00 - Initial access detected.', + }; + + beforeEach(() => { + jest.clearAllMocks(); + jest.useFakeTimers(); + jest.setSystemTime(new Date('2024-01-15T12:00:00Z')); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + + describe('schema', () => { + it('validates correct input with all sections', () => { + const validInput = { + title: 'Incident Report - Malware Outbreak', + sections: fullSections, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates correct input with only required sections', () => { + const validInput = { + title: 'Incident Report', + sections: requiredOnlySections, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('rejects missing title', () => { + const invalidInput = { + sections: requiredOnlySections, + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects missing sections', () => { + const invalidInput = { + title: 'Incident Report', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects missing required executive_summary section', () => { + const invalidInput = { + title: 'Incident Report', + sections: { + timeline: '10:00 - Initial access detected.', + }, + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects missing required timeline section', () => { + const invalidInput = { + title: 'Incident Report', + sections: { + executive_summary: 'A critical incident occurred.', + }, + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('validates optional mitre_mapping section', () => { + const validInput = { + title: 'Incident Report', + sections: { + ...requiredOnlySections, + mitre_mapping: 'T1059 - Command and Scripting Interpreter', + }, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates optional impact_assessment section', () => { + const validInput = { + title: 'Incident Report', + sections: { + ...requiredOnlySections, + impact_assessment: '3 servers compromised.', + }, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates optional recommendations section', () => { + const validInput = { + title: 'Incident Report', + sections: { + ...requiredOnlySections, + recommendations: 'Rotate all credentials immediately.', + }, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates format enum', () => { + const markdownInput = { + title: 'Report', + sections: requiredOnlySections, + format: 'markdown', + }; + expect(tool.schema.safeParse(markdownInput).success).toBe(true); + + const jsonInput = { + title: 'Report', + sections: requiredOnlySections, + format: 'json', + }; + expect(tool.schema.safeParse(jsonInput).success).toBe(true); + }); + + it('rejects invalid format', () => { + const invalidInput = { + title: 'Report', + sections: requiredOnlySections, + format: 'html', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + }); + + describe('handler', () => { + it('generates markdown report with correct structure', async () => { + const result = (await tool.handler( + { title: 'Malware Outbreak', sections: fullSections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.format).toBe('markdown'); + + const report = result.results[0].data.report as string; + expect(report).toContain('# Incident Report: Malware Outbreak'); + expect(report).toContain('## Executive Summary'); + expect(report).toContain(fullSections.executive_summary); + expect(report).toContain('## Incident Timeline'); + expect(report).toContain(fullSections.timeline); + expect(report).toContain('## MITRE ATT&CK Mapping'); + expect(report).toContain(fullSections.mitre_mapping); + expect(report).toContain('## Impact Assessment'); + expect(report).toContain(fullSections.impact_assessment); + expect(report).toContain('## Recommendations'); + expect(report).toContain(fullSections.recommendations); + }); + + it('generates markdown report without optional sections', async () => { + const result = (await tool.handler( + { title: 'Minimal Report', sections: requiredOnlySections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + const report = result.results[0].data.report as string; + expect(report).toContain('## Executive Summary'); + expect(report).toContain('## Incident Timeline'); + expect(report).not.toContain('## MITRE ATT&CK Mapping'); + expect(report).not.toContain('## Impact Assessment'); + expect(report).not.toContain('## Recommendations'); + }); + + it('generates JSON report', async () => { + const result = (await tool.handler( + { title: 'JSON Report', sections: fullSections, format: 'json' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data.format).toBe('json'); + + const report = result.results[0].data.report as Record; + expect(report.title).toBe('JSON Report'); + expect(report.status).toBe('draft'); + expect(report.sections).toEqual( + expect.objectContaining({ + executive_summary: fullSections.executive_summary, + timeline: fullSections.timeline, + mitre_mapping: fullSections.mitre_mapping, + impact_assessment: fullSections.impact_assessment, + recommendations: fullSections.recommendations, + }) + ); + }); + + it('generates JSON report without optional sections', async () => { + const result = (await tool.handler( + { title: 'Minimal JSON', sections: requiredOnlySections, format: 'json' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + const report = result.results[0].data.report as Record; + const reportSections = report.sections as Record; + expect(reportSections.executive_summary).toBe(requiredOnlySections.executive_summary); + expect(reportSections.timeline).toBe(requiredOnlySections.timeline); + expect(reportSections.mitre_mapping).toBeUndefined(); + expect(reportSections.impact_assessment).toBeUndefined(); + expect(reportSections.recommendations).toBeUndefined(); + }); + + it('counts sections correctly with all sections', async () => { + const result = (await tool.handler( + { title: 'Full Report', sections: fullSections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.section_count).toBe(5); + }); + + it('counts sections correctly with only required sections', async () => { + const result = (await tool.handler( + { title: 'Minimal Report', sections: requiredOnlySections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.section_count).toBe(2); + }); + + it('counts sections correctly with partial optional sections', async () => { + const result = (await tool.handler( + { + title: 'Partial Report', + sections: { + ...requiredOnlySections, + recommendations: 'Rotate credentials.', + }, + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.section_count).toBe(3); + }); + + it('includes timestamp in markdown report', async () => { + const result = (await tool.handler( + { title: 'Timestamped Report', sections: requiredOnlySections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + const report = result.results[0].data.report as string; + expect(report).toContain('**Generated:** 2024-01-15T12:00:00.000Z'); + }); + + it('includes timestamp in JSON report', async () => { + const result = (await tool.handler( + { title: 'Timestamped Report', sections: requiredOnlySections, format: 'json' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + const report = result.results[0].data.report as Record; + expect(report.generated_at).toBe('2024-01-15T12:00:00.000Z'); + }); + + it('defaults to markdown format when format is not specified', async () => { + const result = (await tool.handler( + { title: 'Default Format', sections: requiredOnlySections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.format).toBe('markdown'); + }); + + it('includes success message in result', async () => { + const result = (await tool.handler( + { title: 'Message Check', sections: requiredOnlySections }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data.message).toContain('Message Check'); + expect(result.results[0].data.message).toContain('generated successfully'); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/report_generate_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/report_generate_tool.ts new file mode 100644 index 0000000000000..9ce3fee11e644 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/report_generate_tool.ts @@ -0,0 +1,230 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import type { Logger } from '@kbn/logging'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import { securityTool } from './constants'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; + +const reportSectionsSchema = z.object({ + executive_summary: z + .string() + .describe('High-level summary of the incident for executive stakeholders'), + timeline: z + .string() + .describe( + 'Chronological sequence of events during the incident, including timestamps and key actions' + ), + mitre_mapping: z + .string() + .optional() + .describe( + 'MITRE ATT&CK technique and tactic mappings relevant to the incident (e.g., T1059 - Command and Scripting Interpreter)' + ), + impact_assessment: z + .string() + .optional() + .describe( + 'Assessment of the impact including affected systems, data exposure, business impact, and blast radius' + ), + recommendations: z + .string() + .optional() + .describe('Recommended remediation steps, hardening measures, and follow-up actions'), +}); + +const reportGenerateSchema = z.object({ + title: z.string().describe('Title of the incident report'), + sections: reportSectionsSchema.describe('Structured sections of the incident report'), + format: z + .enum(['markdown', 'json']) + .optional() + .describe('Output format for the report. Defaults to markdown.'), +}); + +export const SECURITY_REPORT_GENERATE_TOOL_ID = securityTool('report_generate'); + +/** + * Formats the report sections into a structured markdown document + */ +const formatAsMarkdown = ( + title: string, + sections: z.infer +): string => { + const timestamp = new Date().toISOString(); + const parts: string[] = []; + + parts.push(`# Incident Report: ${title}`); + parts.push(''); + parts.push(`**Generated:** ${timestamp}`); + parts.push(`**Status:** Draft`); + parts.push(''); + parts.push('---'); + parts.push(''); + + // Executive Summary + parts.push('## Executive Summary'); + parts.push(''); + parts.push(sections.executive_summary); + parts.push(''); + + // Timeline + parts.push('## Incident Timeline'); + parts.push(''); + parts.push(sections.timeline); + parts.push(''); + + // MITRE ATT&CK Mapping + if (sections.mitre_mapping) { + parts.push('## MITRE ATT&CK Mapping'); + parts.push(''); + parts.push(sections.mitre_mapping); + parts.push(''); + } + + // Impact Assessment + if (sections.impact_assessment) { + parts.push('## Impact Assessment'); + parts.push(''); + parts.push(sections.impact_assessment); + parts.push(''); + } + + // Recommendations + if (sections.recommendations) { + parts.push('## Recommendations'); + parts.push(''); + parts.push(sections.recommendations); + parts.push(''); + } + + parts.push('---'); + parts.push(''); + parts.push('*This report was generated by the Elastic AI SOC Agent Builder.*'); + + return parts.join('\n'); +}; + +/** + * Formats the report sections into a structured JSON object + */ +const formatAsJson = ( + title: string, + sections: z.infer +): Record => { + return { + title, + generated_at: new Date().toISOString(), + status: 'draft', + sections: { + executive_summary: sections.executive_summary, + timeline: sections.timeline, + ...(sections.mitre_mapping && { mitre_mapping: sections.mitre_mapping }), + ...(sections.impact_assessment && { impact_assessment: sections.impact_assessment }), + ...(sections.recommendations && { recommendations: sections.recommendations }), + }, + }; +}; + +export const reportGenerateTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger +): BuiltinToolDefinition => { + return { + id: SECURITY_REPORT_GENERATE_TOOL_ID, + type: ToolType.builtin, + description: + 'Generate structured incident reports from investigation context with sections for executive summary, technical timeline, MITRE ATT&CK mapping, impact assessment, and recommendations.', + schema: reportGenerateSchema, + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + return getAgentBuilderResourceAvailability({ core, request, logger }); + }, + }, + handler: async ({ title, sections, format }) => { + const outputFormat = format ?? 'markdown'; + + logger.debug( + `${SECURITY_REPORT_GENERATE_TOOL_ID} tool called with title: "${title}", format: ${outputFormat}` + ); + + try { + const sectionCount = [ + 'executive_summary', + 'timeline', + sections.mitre_mapping ? 'mitre_mapping' : null, + sections.impact_assessment ? 'impact_assessment' : null, + sections.recommendations ? 'recommendations' : null, + ].filter(Boolean).length; + + if (outputFormat === 'json') { + const jsonReport = formatAsJson(title, sections); + + logger.debug( + `Successfully generated JSON incident report "${title}" with ${sectionCount} sections` + ); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + format: 'json', + report: jsonReport, + section_count: sectionCount, + message: `Incident report "${title}" generated successfully in JSON format with ${sectionCount} sections.`, + }, + }, + ], + }; + } + + const markdownReport = formatAsMarkdown(title, sections); + + logger.debug( + `Successfully generated markdown incident report "${title}" with ${sectionCount} sections` + ); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + format: 'markdown', + report: markdownReport, + section_count: sectionCount, + message: `Incident report "${title}" generated successfully in markdown format with ${sectionCount} sections.`, + }, + }, + ], + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`Error in ${SECURITY_REPORT_GENERATE_TOOL_ID} tool: ${errorMessage}`); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error generating report: ${errorMessage}`, + }, + }, + ], + }; + } + }, + tags: ['security', 'reporting', 'incident-response'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/response_actions_tool.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/response_actions_tool.test.ts new file mode 100644 index 0000000000000..a2b51680de0d1 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/response_actions_tool.test.ts @@ -0,0 +1,368 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType, type ErrorResult } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../__mocks__/test_helpers'; +import { responseActionsTool } from './response_actions_tool'; +import type { ResponseActionsClient } from '../../endpoint/services/actions/clients/lib/types'; +import type { EndpointAppContextService } from '../../endpoint/endpoint_app_context_services'; +import type { ActionDetails } from '../../../common/endpoint/types'; + +const createMockActionDetails = (overrides: Partial = {}): ActionDetails => ({ + id: 'action-123', + agents: ['endpoint-123'], + hosts: { 'endpoint-123': { name: 'test-host' } }, + command: 'isolate', + isExpired: false, + isCompleted: false, + wasSuccessful: false, + errors: undefined, + startedAt: '2026-03-25T00:00:00.000Z', + completedAt: undefined, + outputs: undefined, + agentState: { + 'endpoint-123': { + isCompleted: false, + wasSuccessful: false, + errors: undefined, + completedAt: undefined, + }, + }, + status: 'pending', + createdBy: 'agent-builder', + agentType: 'endpoint', + ...overrides, +}); + +const createMockResponseActionsClient = (): jest.Mocked => ({ + isolate: jest.fn().mockResolvedValue(createMockActionDetails()), + release: jest.fn().mockResolvedValue(createMockActionDetails({ command: 'unisolate' })), + killProcess: jest.fn().mockResolvedValue(createMockActionDetails({ command: 'kill-process' })), + suspendProcess: jest + .fn() + .mockResolvedValue(createMockActionDetails({ command: 'suspend-process' })), + runningProcesses: jest.fn().mockResolvedValue(createMockActionDetails()), + getFile: jest.fn().mockResolvedValue(createMockActionDetails()), + execute: jest.fn().mockResolvedValue(createMockActionDetails()), + upload: jest.fn().mockResolvedValue(createMockActionDetails()), + processPendingActions: jest.fn().mockResolvedValue(undefined), + getCustomScripts: jest.fn().mockResolvedValue({ data: [] }), + getFileDownload: jest.fn().mockResolvedValue({ stream: null, fileName: '' }), + getFileInfo: jest.fn().mockResolvedValue({}), + scan: jest.fn().mockResolvedValue(createMockActionDetails()), + runscript: jest.fn().mockResolvedValue(createMockActionDetails()), + cancel: jest.fn().mockResolvedValue(createMockActionDetails()), + memoryDump: jest.fn().mockResolvedValue(createMockActionDetails()), +}); + +describe('responseActionsTool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + let mockResponseActionsClient: jest.Mocked; + let mockEndpointAppContextService: jest.Mocked; + + beforeEach(() => { + jest.clearAllMocks(); + mockResponseActionsClient = createMockResponseActionsClient(); + mockEndpointAppContextService = { + getInternalResponseActionsClient: jest.fn().mockReturnValue(mockResponseActionsClient), + } as unknown as jest.Mocked; + }); + + const getTool = () => responseActionsTool(mockCore, mockLogger, mockEndpointAppContextService); + + describe('schema', () => { + it('validates correct isolate action', () => { + const tool = getTool(); + const validInput = { + action: 'isolate', + endpoint_id: 'endpoint-123', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates correct kill_process action with PID', () => { + const tool = getTool(); + const validInput = { + action: 'kill_process', + endpoint_id: 'endpoint-123', + parameters: { + process_pid: 1234, + }, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('rejects missing endpoint_id', () => { + const tool = getTool(); + const invalidInput = { + action: 'isolate', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects empty endpoint_id', () => { + const tool = getTool(); + const invalidInput = { + action: 'isolate', + endpoint_id: '', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('validates action enum values', () => { + const tool = getTool(); + const validActions = ['isolate', 'release', 'kill_process', 'suspend_process']; + for (const action of validActions) { + const result = tool.schema.safeParse({ action, endpoint_id: 'ep-1' }); + expect(result.success).toBe(true); + } + }); + + it('rejects invalid action', () => { + const tool = getTool(); + const invalidInput = { + action: 'reboot', + endpoint_id: 'endpoint-123', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('accepts optional parameters with comment', () => { + const tool = getTool(); + const validInput = { + action: 'isolate', + endpoint_id: 'endpoint-123', + parameters: { + comment: 'Isolating due to malware detection', + }, + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + }); + + describe('handler', () => { + it('rejects kill_process without PID', async () => { + const tool = getTool(); + const result = (await tool.handler( + { action: 'kill_process', endpoint_id: 'endpoint-123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('kill_process'); + expect(errorResult.data.message).toContain('process_pid'); + }); + + it('rejects suspend_process without PID', async () => { + const tool = getTool(); + const result = (await tool.handler( + { action: 'suspend_process', endpoint_id: 'endpoint-123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('suspend_process'); + expect(errorResult.data.message).toContain('process_pid'); + }); + + it('submits isolate action successfully', async () => { + const tool = getTool(); + const result = (await tool.handler( + { action: 'isolate', endpoint_id: 'endpoint-123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + success: true, + action: 'isolate', + action_id: 'action-123', + endpoint_id: 'endpoint-123', + status: 'pending', + started_at: '2026-03-25T00:00:00.000Z', + is_completed: false, + }) + ); + + expect(mockEndpointAppContextService.getInternalResponseActionsClient).toHaveBeenCalledWith({ + spaceId: 'default', + username: 'agent-builder', + }); + + expect(mockResponseActionsClient.isolate).toHaveBeenCalledWith({ + endpoint_ids: ['endpoint-123'], + comment: 'Executed by AI SOC Agent', + }); + }); + + it('submits release action successfully', async () => { + const tool = getTool(); + const result = (await tool.handler( + { action: 'release', endpoint_id: 'endpoint-123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + success: true, + action: 'release', + action_id: 'action-123', + endpoint_id: 'endpoint-123', + }) + ); + + expect(mockResponseActionsClient.release).toHaveBeenCalledWith({ + endpoint_ids: ['endpoint-123'], + comment: 'Executed by AI SOC Agent', + }); + }); + + it('submits kill_process action with PID successfully', async () => { + const tool = getTool(); + const result = (await tool.handler( + { + action: 'kill_process', + endpoint_id: 'endpoint-123', + parameters: { process_pid: 5678 }, + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + success: true, + action: 'kill_process', + endpoint_id: 'endpoint-123', + process_pid: 5678, + status: 'pending', + }) + ); + + expect(mockResponseActionsClient.killProcess).toHaveBeenCalledWith({ + endpoint_ids: ['endpoint-123'], + comment: 'Executed by AI SOC Agent', + parameters: { pid: 5678 }, + }); + }); + + it('submits suspend_process action with PID successfully', async () => { + const tool = getTool(); + const result = (await tool.handler( + { + action: 'suspend_process', + endpoint_id: 'endpoint-123', + parameters: { process_pid: 9999 }, + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + success: true, + action: 'suspend_process', + endpoint_id: 'endpoint-123', + process_pid: 9999, + }) + ); + + expect(mockResponseActionsClient.suspendProcess).toHaveBeenCalledWith({ + endpoint_ids: ['endpoint-123'], + comment: 'Executed by AI SOC Agent', + parameters: { pid: 9999 }, + }); + }); + + it('includes comment in the action request when provided', async () => { + const tool = getTool(); + const result = (await tool.handler( + { + action: 'isolate', + endpoint_id: 'endpoint-123', + parameters: { comment: 'Malware detected' }, + }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + comment: 'Malware detected', + }) + ); + + expect(mockResponseActionsClient.isolate).toHaveBeenCalledWith({ + endpoint_ids: ['endpoint-123'], + comment: 'Malware detected', + }); + }); + + it('handles response actions client errors', async () => { + mockResponseActionsClient.isolate.mockRejectedValue(new Error('Endpoint not found')); + + const tool = getTool(); + const result = (await tool.handler( + { action: 'isolate', endpoint_id: 'endpoint-123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Endpoint not found'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + + it('handles service initialization errors', async () => { + mockEndpointAppContextService.getInternalResponseActionsClient.mockImplementation(() => { + throw new Error('Service not started'); + }); + + const tool = getTool(); + const result = (await tool.handler( + { action: 'isolate', endpoint_id: 'endpoint-123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Service not started'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/response_actions_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/response_actions_tool.ts new file mode 100644 index 0000000000000..b19c9e9ed68b9 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/response_actions_tool.ts @@ -0,0 +1,164 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import type { Logger } from '@kbn/logging'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import { securityTool } from './constants'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; +import type { EndpointAppContextService } from '../../endpoint/endpoint_app_context_services'; + +const responseActionsSchema = z.object({ + action: z + .enum(['isolate', 'release', 'kill_process', 'suspend_process']) + .describe( + 'The response action to execute: isolate (isolate host from network), release (release host from isolation), kill_process (terminate a running process), suspend_process (suspend a running process)' + ), + endpoint_id: z + .string() + .min(1) + .max(255) + .regex(/^[a-zA-Z0-9-]+$/, 'Endpoint ID must contain only alphanumeric characters and hyphens') + .describe('The unique identifier of the endpoint agent to execute the action on'), + parameters: z + .object({ + process_pid: z + .number() + .optional() + .describe( + 'The PID of the process to kill or suspend. Required for kill_process and suspend_process actions' + ), + comment: z + .string() + .max(1000) + .optional() + .describe('An optional comment describing the reason for the response action'), + }) + .optional() + .describe('Optional parameters for the response action'), +}); + +export const SECURITY_RESPONSE_ACTIONS_TOOL_ID = securityTool('response_actions'); + +export const responseActionsTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger, + endpointAppContextService: EndpointAppContextService +): BuiltinToolDefinition => { + return { + id: SECURITY_RESPONSE_ACTIONS_TOOL_ID, + type: ToolType.builtin, + description: + 'Execute response actions on endpoints including host isolation, process termination, and process suspension. Requires the calling user to have endpoint response action privileges.', + schema: responseActionsSchema, + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + return getAgentBuilderResourceAvailability({ core, request, logger }); + }, + }, + handler: async ({ action, endpoint_id: endpointId, parameters }, { spaceId }) => { + logger.debug( + `${SECURITY_RESPONSE_ACTIONS_TOOL_ID} tool called with action: ${action}, endpoint_id: ${endpointId}` + ); + + try { + // Validate that process actions include a PID + if ( + (action === 'kill_process' || action === 'suspend_process') && + parameters?.process_pid === undefined + ) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `The ${action} action requires a process_pid parameter`, + }, + }, + ], + }; + } + + const responseActionsClient = endpointAppContextService.getInternalResponseActionsClient({ + spaceId, + username: 'agent-builder', + }); + + const baseRequestBody = { + endpoint_ids: [endpointId], + comment: parameters?.comment ?? 'Executed by AI SOC Agent', + }; + + let result; + switch (action) { + case 'isolate': + result = await responseActionsClient.isolate(baseRequestBody); + break; + case 'release': + result = await responseActionsClient.release(baseRequestBody); + break; + case 'kill_process': + result = await responseActionsClient.killProcess({ + ...baseRequestBody, + parameters: { pid: parameters!.process_pid! }, + }); + break; + case 'suspend_process': + result = await responseActionsClient.suspendProcess({ + ...baseRequestBody, + parameters: { pid: parameters!.process_pid! }, + }); + break; + } + + logger.debug( + `Response action ${action} submitted for endpoint ${endpointId}, action id: ${result.id}` + ); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + success: true, + action, + action_id: result.id, + endpoint_id: endpointId, + status: result.status, + started_at: result.startedAt, + is_completed: result.isCompleted, + comment: parameters?.comment ?? null, + ...(parameters?.process_pid ? { process_pid: parameters.process_pid } : {}), + }, + }, + ], + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`Error in ${SECURITY_RESPONSE_ACTIONS_TOOL_ID} tool: ${errorMessage}`); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error executing response action: ${errorMessage}`, + }, + }, + ], + }; + } + }, + tags: ['security', 'response-actions', 'endpoint'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/threat_intel_enrich_tool.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/threat_intel_enrich_tool.test.ts new file mode 100644 index 0000000000000..7a94766ebd229 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/threat_intel_enrich_tool.test.ts @@ -0,0 +1,328 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType, type ErrorResult } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../__mocks__/test_helpers'; +import { threatIntelEnrichTool } from './threat_intel_enrich_tool'; + +describe('threatIntelEnrichTool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = threatIntelEnrichTool(mockCore, mockLogger); + + beforeEach(() => { + jest.clearAllMocks(); + }); + + describe('schema', () => { + it('validates correct IP lookup', () => { + const validInput = { + ioc_type: 'ip', + ioc_value: '192.168.1.100', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates correct domain lookup', () => { + const validInput = { + ioc_type: 'domain', + ioc_value: 'evil-domain.com', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates correct hash lookup', () => { + const validInput = { + ioc_type: 'hash', + ioc_value: 'abc123def456abc123def456abc123de', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates correct URL lookup', () => { + const validInput = { + ioc_type: 'url', + ioc_value: 'https://malicious-site.com/payload', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('rejects unknown ioc_type', () => { + const invalidInput = { + ioc_type: 'email', + ioc_value: 'attacker@evil.com', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects missing ioc_value', () => { + const invalidInput = { + ioc_type: 'ip', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('rejects empty ioc_value', () => { + const invalidInput = { + ioc_type: 'ip', + ioc_value: '', + }; + + const result = tool.schema.safeParse(invalidInput); + + expect(result.success).toBe(false); + }); + + it('accepts optional sources', () => { + const validInput = { + ioc_type: 'ip', + ioc_value: '1.2.3.4', + sources: ['AbuseCH', 'AlienVault OTX'], + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + + it('validates without sources', () => { + const validInput = { + ioc_type: 'domain', + ioc_value: 'evil.com', + }; + + const result = tool.schema.safeParse(validInput); + + expect(result.success).toBe(true); + }); + }); + + describe('handler', () => { + it('returns matches when TI data is found for IP', async () => { + const mockTiHit = { + _id: 'ti-doc-1', + _index: '.ds-logs-ti_abusech-default', + _source: { + '@timestamp': '2024-01-15T10:00:00Z', + threat: { + indicator: { + type: 'ipv4-addr', + ip: '1.2.3.4', + provider: 'AbuseCH', + confidence: 'High', + description: 'Known C2 server', + first_seen: '2024-01-01T00:00:00Z', + last_seen: '2024-01-15T00:00:00Z', + }, + feed: { name: 'AbuseCH' }, + }, + }, + }; + + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [mockTiHit], + total: { value: 1, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { ioc_type: 'ip', ioc_value: '1.2.3.4' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + ioc_type: 'ip', + ioc_value: '1.2.3.4', + match_count: 1, + total_matches: 1, + }) + ); + expect(result.results[0].data.matches).toHaveLength(1); + expect(result.results[0].data.matches[0]).toEqual( + expect.objectContaining({ + index: '.ds-logs-ti_abusech-default', + }) + ); + }); + + it('returns empty result when no matches found', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [], + total: { value: 0, relation: 'eq' }, + }, + } as never); + + const result = (await tool.handler( + { ioc_type: 'ip', ioc_value: '10.0.0.1' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toEqual( + expect.objectContaining({ + ioc_type: 'ip', + ioc_value: '10.0.0.1', + match_count: 0, + matches: [], + message: expect.stringContaining('No threat intelligence found'), + }) + ); + }); + + it('builds correct query fields for IP IOC type', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [], total: { value: 0, relation: 'eq' } }, + } as never); + + await tool.handler( + { ioc_type: 'ip', ioc_value: '1.2.3.4' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + const query = searchCall.query as { bool: { filter: Array> } }; + const shouldClause = query.bool.filter[0] as { + bool: { should: Array<{ term: Record }> }; + }; + const fields = shouldClause.bool.should.map((s) => Object.keys(s.term)[0]); + expect(fields).toEqual(['threat.indicator.ip']); + }); + + it('builds correct query fields for domain IOC type', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [], total: { value: 0, relation: 'eq' } }, + } as never); + + await tool.handler( + { ioc_type: 'domain', ioc_value: 'evil.com' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + const query = searchCall.query as { bool: { filter: Array> } }; + const shouldClause = query.bool.filter[0] as { + bool: { should: Array<{ term: Record }> }; + }; + const fields = shouldClause.bool.should.map((s) => Object.keys(s.term)[0]); + expect(fields).toEqual(['threat.indicator.url.domain']); + }); + + it('builds correct query fields for hash IOC type', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [], total: { value: 0, relation: 'eq' } }, + } as never); + + await tool.handler( + { ioc_type: 'hash', ioc_value: 'abc123' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + const query = searchCall.query as { bool: { filter: Array> } }; + const shouldClause = query.bool.filter[0] as { + bool: { should: Array<{ term: Record }> }; + }; + const fields = shouldClause.bool.should.map((s) => Object.keys(s.term)[0]); + expect(fields).toEqual([ + 'threat.indicator.file.hash.md5', + 'threat.indicator.file.hash.sha1', + 'threat.indicator.file.hash.sha256', + ]); + }); + + it('builds correct query fields for URL IOC type', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [], total: { value: 0, relation: 'eq' } }, + } as never); + + await tool.handler( + { ioc_type: 'url', ioc_value: 'https://evil.com/payload' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + const query = searchCall.query as { bool: { filter: Array> } }; + const shouldClause = query.bool.filter[0] as { + bool: { should: Array<{ term: Record }> }; + }; + const fields = shouldClause.bool.should.map((s) => Object.keys(s.term)[0]); + expect(fields).toEqual(['threat.indicator.url.full']); + }); + + it('includes source filter when sources are provided', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [], total: { value: 0, relation: 'eq' } }, + } as never); + + await tool.handler( + { ioc_type: 'ip', ioc_value: '1.2.3.4', sources: ['AbuseCH'] }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + ); + + const searchCall = mockEsClient.asCurrentUser.search.mock.calls[0][0] as Record< + string, + unknown + >; + const query = searchCall.query as { bool: { filter: Array> } }; + expect(query.bool.filter).toHaveLength(2); + expect(query.bool.filter[1]).toEqual({ + terms: { 'threat.indicator.provider': ['AbuseCH'] }, + }); + }); + + it('handles ES errors', async () => { + mockEsClient.asCurrentUser.search.mockRejectedValue(new Error('Search index not found')); + + const result = (await tool.handler( + { ioc_type: 'ip', ioc_value: '1.2.3.4' }, + createToolHandlerContext(mockRequest, mockEsClient, mockLogger) + )) as ToolHandlerStandardReturn; + + expect(result.results).toHaveLength(1); + const errorResult = result.results[0] as ErrorResult; + expect(errorResult.type).toBe(ToolResultType.error); + expect(errorResult.data.message).toContain('Search index not found'); + expect(mockLogger.error).toHaveBeenCalled(); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/threat_intel_enrich_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/threat_intel_enrich_tool.ts new file mode 100644 index 0000000000000..737d685bd0425 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/threat_intel_enrich_tool.ts @@ -0,0 +1,199 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import type { Logger } from '@kbn/logging'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import { securityTool } from './constants'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; + +const iocTypeEnum = z.enum(['ip', 'domain', 'hash', 'url']); + +const threatIntelEnrichSchema = z.object({ + ioc_type: iocTypeEnum.describe( + 'The type of indicator of compromise: ip (IP address), domain (domain name), hash (file hash - MD5, SHA1, or SHA256), url (URL)' + ), + ioc_value: z + .string() + .min(1) + .max(2048) + .describe('The value of the indicator to look up (e.g., "1.2.3.4", "evil.com", "abc123...")'), + sources: z + .array(z.string()) + .optional() + .describe( + 'Optional list of specific threat intel source names to filter by (e.g., ["AbuseCH", "AlienVault OTX"])' + ), +}); + +export const SECURITY_THREAT_INTEL_ENRICH_TOOL_ID = securityTool('threat_intel_enrich'); + +/** Threat intel index patterns to search */ +const TI_INDEX_PATTERNS = ['.ds-logs-ti_*', 'logs-ti_*-*', 'filebeat-*']; + +/** + * Builds the appropriate ES query field path based on the IOC type + */ +const getIocFieldPaths = (iocType: z.infer): string[] => { + switch (iocType) { + case 'ip': + return ['threat.indicator.ip']; + case 'domain': + return ['threat.indicator.url.domain']; + case 'hash': + return [ + 'threat.indicator.file.hash.md5', + 'threat.indicator.file.hash.sha1', + 'threat.indicator.file.hash.sha256', + ]; + case 'url': + return ['threat.indicator.url.full']; + } +}; + +export const threatIntelEnrichTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger +): BuiltinToolDefinition => { + return { + id: SECURITY_THREAT_INTEL_ENRICH_TOOL_ID, + type: ToolType.builtin, + description: + 'Enrich indicators of compromise (IOCs) against configured threat intelligence sources. Queries TI indicator indices for matching threat intelligence with severity, source, and last-seen timestamp.', + schema: threatIntelEnrichSchema, + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + return getAgentBuilderResourceAvailability({ core, request, logger }); + }, + }, + handler: async ({ ioc_type: iocType, ioc_value: iocValue, sources }, { esClient }) => { + logger.debug( + `${SECURITY_THREAT_INTEL_ENRICH_TOOL_ID} tool called with ioc_type: ${iocType}, ioc_value: ${iocValue}` + ); + + try { + const fieldPaths = getIocFieldPaths(iocType); + + // Build a should query to match the IOC value across all relevant fields + const shouldClauses = fieldPaths.map((field) => ({ + term: { [field]: iocValue }, + })); + + const filterClauses: Array> = [ + { + bool: { + should: shouldClauses, + minimum_should_match: 1, + }, + }, + ]; + + // Optionally filter by source names + if (sources && sources.length > 0) { + filterClauses.push({ + terms: { 'threat.indicator.provider': sources }, + }); + } + + const response = await esClient.asCurrentUser.search({ + index: TI_INDEX_PATTERNS.join(','), + ignore_unavailable: true, + allow_no_indices: true, + size: 20, + query: { + bool: { + filter: filterClauses, + }, + }, + sort: [{ '@timestamp': { order: 'desc' } }], + _source: [ + '@timestamp', + 'threat.indicator.type', + 'threat.indicator.ip', + 'threat.indicator.url.domain', + 'threat.indicator.url.full', + 'threat.indicator.file.hash.*', + 'threat.indicator.provider', + 'threat.indicator.confidence', + 'threat.indicator.description', + 'threat.indicator.first_seen', + 'threat.indicator.last_seen', + 'threat.indicator.marking.tlp', + 'threat.feed.name', + 'tags', + ], + }); + + const hits = response.hits.hits; + + if (hits.length === 0) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + ioc_type: iocType, + ioc_value: iocValue, + match_count: 0, + matches: [], + message: `No threat intelligence found for ${iocType}: ${iocValue}`, + }, + }, + ], + }; + } + + const matches = hits.map((hit) => { + const source = hit._source as Record; + return { + index: hit._index, + ...source, + }; + }); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + ioc_type: iocType, + ioc_value: iocValue, + match_count: hits.length, + total_matches: + typeof response.hits.total === 'number' + ? response.hits.total + : response.hits.total?.value ?? hits.length, + matches, + }, + }, + ], + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`Error in ${SECURITY_THREAT_INTEL_ENRICH_TOOL_ID} tool: ${errorMessage}`); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error enriching threat intelligence: ${errorMessage}`, + }, + }, + ], + }; + } + }, + tags: ['security', 'threat-intelligence', 'enrichment'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/timeline_create_tool.test.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/timeline_create_tool.test.ts new file mode 100644 index 0000000000000..5e953b16b67b7 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/timeline_create_tool.test.ts @@ -0,0 +1,272 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { ToolResultType, type ErrorResult } from '@kbn/agent-builder-common'; +import type { ToolHandlerStandardReturn } from '@kbn/agent-builder-server/tools'; +import { createToolHandlerContext, createToolTestMocks } from '../__mocks__/test_helpers'; +import { timelineCreateTool } from './timeline_create_tool'; + +jest.mock('../../lib/timeline/saved_object/timelines', () => ({ + createTimeline: jest.fn(), +})); + +jest.mock('../../lib/timeline/saved_object/pinned_events', () => ({ + savePinnedEvents: jest.fn(), +})); + +import { createTimeline } from '../../lib/timeline/saved_object/timelines'; +import { savePinnedEvents } from '../../lib/timeline/saved_object/pinned_events'; + +const mockCreateTimeline = createTimeline as jest.MockedFunction; +const mockSavePinnedEvents = savePinnedEvents as jest.MockedFunction; + +describe('timelineCreateTool', () => { + const { mockCore, mockLogger, mockEsClient, mockRequest } = createToolTestMocks(); + const tool = timelineCreateTool(mockCore, mockLogger); + + beforeEach(() => { + jest.clearAllMocks(); + + mockCreateTimeline.mockResolvedValue({ + timeline: { + savedObjectId: 'timeline-123', + version: '1', + }, + } as ReturnType extends Promise ? T : never); + + mockSavePinnedEvents.mockResolvedValue( + undefined as unknown as ReturnType extends Promise + ? T + : never + ); + }); + + describe('schema', () => { + it('validates correct input with title and event_ids', () => { + const result = tool.schema.safeParse({ + title: 'Investigation Timeline', + event_ids: ['event-1', 'event-2'], + }); + expect(result.success).toBe(true); + }); + + it('accepts optional description', () => { + const result = tool.schema.safeParse({ + title: 'Timeline', + event_ids: ['event-1'], + description: 'Test description', + }); + expect(result.success).toBe(true); + }); + + it('accepts optional index_pattern', () => { + const result = tool.schema.safeParse({ + title: 'Timeline', + event_ids: ['event-1'], + index_pattern: 'custom-index-*', + }); + expect(result.success).toBe(true); + }); + + it('rejects empty title', () => { + const result = tool.schema.safeParse({ + title: '', + event_ids: ['event-1'], + }); + expect(result.success).toBe(false); + }); + + it('rejects empty event_ids array', () => { + const result = tool.schema.safeParse({ + title: 'Timeline', + event_ids: [], + }); + expect(result.success).toBe(false); + }); + + it('enforces max title length', () => { + const result = tool.schema.safeParse({ + title: 'a'.repeat(257), + event_ids: ['event-1'], + }); + expect(result.success).toBe(false); + }); + + it('enforces max event_ids count', () => { + const result = tool.schema.safeParse({ + title: 'Timeline', + event_ids: Array.from({ length: 101 }, (_, i) => `event-${i}`), + }); + expect(result.success).toBe(false); + }); + }); + + describe('handler', () => { + const mockSavedObjectsClient = { + create: jest.fn(), + get: jest.fn(), + find: jest.fn(), + update: jest.fn(), + delete: jest.fn(), + bulkCreate: jest.fn(), + }; + + const createContext = (overrides = {}) => + createToolHandlerContext(mockRequest, mockEsClient, mockLogger, { + savedObjectsClient: mockSavedObjectsClient as any, + ...overrides, + }); + + it('verifies events exist and creates timeline via Timeline API', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { + hits: [{ _id: 'event-1' }, { _id: 'event-2' }], + }, + } as any); + + const result = (await tool.handler( + { title: 'Test Timeline', event_ids: ['event-1', 'event-2'] }, + createContext() + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toMatchObject({ + timeline_id: 'timeline-123', + title: 'Test Timeline', + pinned_events: 2, + }); + }); + + it('calls createTimeline with correct parameters', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [{ _id: 'event-1' }] }, + } as any); + + await tool.handler( + { title: 'My Timeline', description: 'Test desc', event_ids: ['event-1'] }, + createContext() + ); + + expect(mockCreateTimeline).toHaveBeenCalledWith( + expect.objectContaining({ + timelineId: null, + timeline: expect.objectContaining({ + title: 'My Timeline', + description: 'Test desc', + }), + savedObjectsClient: mockSavedObjectsClient, + userInfo: null, + }) + ); + }); + + it('calls savePinnedEvents with found event IDs', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [{ _id: 'event-1' }, { _id: 'event-2' }] }, + } as any); + + await tool.handler({ title: 'Timeline', event_ids: ['event-1', 'event-2'] }, createContext()); + + expect(mockSavePinnedEvents).toHaveBeenCalledWith( + expect.anything(), // frameworkRequest + 'timeline-123', + ['event-1', 'event-2'] + ); + }); + + it('returns timeline URL', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [{ _id: 'event-1' }] }, + } as any); + + const result = (await tool.handler( + { title: 'Timeline', event_ids: ['event-1'] }, + createContext() + )) as ToolHandlerStandardReturn; + + expect(result.results[0].data).toMatchObject({ + url: expect.stringContaining("timeline=(id:'timeline-123'"), + }); + }); + + it('handles partial events — some found, some missing', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [{ _id: 'event-1' }] }, + } as any); + + const result = (await tool.handler( + { title: 'Timeline', event_ids: ['event-1', 'event-missing'] }, + createContext() + )) as ToolHandlerStandardReturn; + + expect(result.results[0].type).toBe(ToolResultType.other); + expect(result.results[0].data).toMatchObject({ + pinned_events: 1, + missing_events: ['event-missing'], + }); + }); + + it('returns error when no events found', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [] }, + } as any); + + const result = (await tool.handler( + { title: 'Timeline', event_ids: ['event-missing'] }, + createContext() + )) as ToolHandlerStandardReturn; + + const error = result.results[0] as ErrorResult; + expect(error.type).toBe(ToolResultType.error); + expect(error.data.message).toContain('None of the specified event IDs were found'); + }); + + it('uses custom index_pattern when provided', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [{ _id: 'event-1' }] }, + } as any); + + await tool.handler( + { title: 'Timeline', event_ids: ['event-1'], index_pattern: 'custom-index-*' }, + createContext() + ); + + expect(mockEsClient.asCurrentUser.search).toHaveBeenCalledWith( + expect.objectContaining({ index: 'custom-index-*' }) + ); + }); + + it('handles createTimeline errors', async () => { + mockEsClient.asCurrentUser.search.mockResolvedValue({ + hits: { hits: [{ _id: 'event-1' }] }, + } as any); + mockCreateTimeline.mockRejectedValue(new Error('Timeline creation failed')); + + const result = (await tool.handler( + { title: 'Timeline', event_ids: ['event-1'] }, + createContext() + )) as ToolHandlerStandardReturn; + + const error = result.results[0] as ErrorResult; + expect(error.type).toBe(ToolResultType.error); + expect(error.data.message).toContain('Timeline creation failed'); + }); + + it('handles ES search errors', async () => { + mockEsClient.asCurrentUser.search.mockRejectedValue(new Error('ES unavailable')); + + const result = (await tool.handler( + { title: 'Timeline', event_ids: ['event-1'] }, + createContext() + )) as ToolHandlerStandardReturn; + + const error = result.results[0] as ErrorResult; + expect(error.type).toBe(ToolResultType.error); + expect(error.data.message).toContain('ES unavailable'); + }); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/timeline_create_tool.ts b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/timeline_create_tool.ts new file mode 100644 index 0000000000000..13dad67b19879 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/agent_builder/tools/timeline_create_tool.ts @@ -0,0 +1,212 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod/v4'; +import { ToolType, ToolResultType } from '@kbn/agent-builder-common'; +import type { BuiltinToolDefinition } from '@kbn/agent-builder-server'; +import { getToolResultId } from '@kbn/agent-builder-server/tools'; +import type { Logger } from '@kbn/logging'; +import type { SavedTimeline } from '../../../common/api/timeline'; +import { TimelineStatusEnum, TimelineTypeEnum } from '../../../common/api/timeline'; +import { createTimeline } from '../../lib/timeline/saved_object/timelines'; +import { savePinnedEvents } from '../../lib/timeline/saved_object/pinned_events'; +import type { FrameworkRequest } from '../../lib/framework'; +import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; +import { DEFAULT_ALERTS_INDEX } from '../../../common/constants'; +import { securityTool } from './constants'; +import type { SecuritySolutionPluginCoreSetupDependencies } from '../../plugin_contract'; + +const timelineCreateSchema = z.object({ + title: z.string().min(1).max(256).describe('The title for the investigation timeline'), + description: z + .string() + .optional() + .describe('Optional description providing context for the timeline investigation'), + event_ids: z + .array(z.string().max(255)) + .min(1) + .max(100) + .describe('Array of event or alert IDs to pin to the timeline for investigation'), + index_pattern: z + .string() + .optional() + .describe( + 'Index pattern to search for events. Defaults to .alerts-security.alerts-* if not provided.' + ), +}); + +export const SECURITY_TIMELINE_CREATE_TOOL_ID = securityTool('timeline_create'); + +export const timelineCreateTool = ( + core: SecuritySolutionPluginCoreSetupDependencies, + logger: Logger +): BuiltinToolDefinition => { + return { + id: SECURITY_TIMELINE_CREATE_TOOL_ID, + type: ToolType.builtin, + description: + 'Create or update an investigation timeline from event data. Pins specified events to the timeline for investigation context.', + schema: timelineCreateSchema, + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + return getAgentBuilderResourceAvailability({ core, request, logger }); + }, + }, + handler: async ( + { title, description, event_ids: eventIds, index_pattern: indexPattern }, + { esClient, spaceId, request, savedObjectsClient } + ) => { + logger.debug( + `${SECURITY_TIMELINE_CREATE_TOOL_ID} tool called with title: ${title}, eventIds: ${JSON.stringify( + eventIds + )}` + ); + + try { + const searchIndex = indexPattern ?? `${DEFAULT_ALERTS_INDEX}-${spaceId}`; + + // Verify that the specified events exist + const verifyResponse = await esClient.asCurrentUser.search({ + index: searchIndex, + ignore_unavailable: true, + allow_no_indices: true, + size: eventIds.length, + _source: false, + query: { + bool: { + filter: [{ terms: { _id: eventIds } }], + }, + }, + }); + + const foundIds = verifyResponse.hits.hits + .map((hit) => hit._id) + .filter((id): id is string => id !== undefined); + + const missingIds = eventIds.filter((id) => !foundIds.includes(id)); + + if (foundIds.length === 0) { + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `None of the specified event IDs were found in index ${searchIndex}`, + }, + }, + ], + }; + } + + // Create the timeline using the Timeline API service + const timelineData: SavedTimeline = { + title, + description: description ?? '', + status: TimelineStatusEnum.active, + timelineType: TimelineTypeEnum.default, + templateTimelineId: null, + templateTimelineVersion: null, + columns: [ + { columnHeaderType: 'not-filtered', id: '@timestamp' }, + { columnHeaderType: 'not-filtered', id: 'message' }, + { columnHeaderType: 'not-filtered', id: 'event.category' }, + { columnHeaderType: 'not-filtered', id: 'event.action' }, + { columnHeaderType: 'not-filtered', id: 'host.name' }, + { columnHeaderType: 'not-filtered', id: 'source.ip' }, + { columnHeaderType: 'not-filtered', id: 'destination.ip' }, + { columnHeaderType: 'not-filtered', id: 'user.name' }, + ], + dataProviders: [], + dateRange: { + start: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(), + end: new Date().toISOString(), + }, + excludedRowRendererIds: [], + favorite: [], + filters: [], + indexNames: [searchIndex], + kqlMode: 'filter', + kqlQuery: { filterQuery: null }, + sort: [{ columnId: '@timestamp', columnType: 'date', sortDirection: 'desc' }], + }; + + const timelineResponse = await createTimeline({ + timelineId: null, + timeline: timelineData, + savedObjectsClient, + userInfo: null, + }); + + const timelineId = timelineResponse.timeline.savedObjectId; + + // Pin events to the timeline using the Timeline API service. + // savePinnedEvents requires a FrameworkRequest, so we construct a + // compatible wrapper that provides the savedObjectsClient and user + // through the expected context.core resolution path. + const frameworkRequest = { + body: request.body, + user: null, + context: { + core: Promise.resolve({ + savedObjects: { client: savedObjectsClient }, + }), + }, + } as unknown as FrameworkRequest; + + await savePinnedEvents(frameworkRequest, timelineId, foundIds); + + // Build the deep link URL for the timeline + const timelineUrl = `/app/security/timelines?timeline=(id:'${timelineId}',isOpen:!t)`; + + logger.debug( + `Successfully created timeline "${title}" with ID: ${timelineId}, pinned ${foundIds.length} events` + ); + + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.other, + data: { + timeline_id: timelineId, + title, + description: description ?? '', + pinned_events: foundIds.length, + missing_events: missingIds.length > 0 ? missingIds : undefined, + url: timelineUrl, + message: `Timeline "${title}" created successfully with ${ + foundIds.length + } pinned events.${ + missingIds.length > 0 + ? ` ${missingIds.length} event(s) were not found and were skipped.` + : '' + }`, + }, + }, + ], + }; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.error(`Error in ${SECURITY_TIMELINE_CREATE_TOOL_ID} tool: ${errorMessage}`); + return { + results: [ + { + tool_result_id: getToolResultId(), + type: ToolResultType.error, + data: { + message: `Error creating timeline: ${errorMessage}`, + }, + }, + ], + }; + } + }, + tags: ['security', 'timeline', 'investigation'], + }; +}; diff --git a/x-pack/solutions/security/plugins/security_solution/server/plugin.ts b/x-pack/solutions/security/plugins/security_solution/server/plugin.ts index c470bdac8e45e..464df486b14b1 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/plugin.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/plugin.ts @@ -20,10 +20,10 @@ import type { NewPackagePolicy, UpdatePackagePolicy } from '@kbn/fleet-plugin/co import { FLEET_ENDPOINT_PACKAGE } from '@kbn/fleet-plugin/common'; import { registerScriptsLibraryRoutes } from './endpoint/routes/scripts_library'; -import { registerAgents } from './agent_builder/agents'; import { registerAttachments } from './agent_builder/attachments/register_attachments'; import { registerTools } from './agent_builder/tools/register_tools'; import { registerSkills } from './agent_builder/skills/register_skills'; +import { socAlertTriggerDefinition } from '../common/workflows'; import { migrateEndpointDataToSupportSpaces } from './endpoint/migrations/space_awareness_migration'; import { SavedObjectsClientFactory } from './endpoint/services/saved_objects'; import { registerEntityStoreDataViewRefreshTask } from './lib/entity_analytics/entity_store/tasks/data_view_refresh/data_view_refresh_task'; @@ -256,15 +256,18 @@ export class Plugin implements ISecuritySolutionPlugin { const experimentalFeatures = this.config.experimentalFeatures; const endpointAppContextService = this.endpointAppContextService; - registerTools(agentBuilder, core, logger, experimentalFeatures).catch((error) => { + registerTools( + agentBuilder, + core, + logger, + experimentalFeatures, + endpointAppContextService + ).catch((error) => { this.logger.error(`Error registering security tools: ${error}`); }); registerAttachments(agentBuilder).catch((error) => { this.logger.error(`Error registering security attachments: ${error}`); }); - registerAgents(agentBuilder, core, logger).catch((error) => { - this.logger.error(`Error registering security agent: ${error}`); - }); registerSkills({ agentBuilder, experimentalFeatures, @@ -703,6 +706,10 @@ export class Plugin implements ISecuritySolutionPlugin { }, }); + if (experimentalFeatures.aiSocAgents && plugins.workflowsExtensions) { + plugins.workflowsExtensions.registerTriggerDefinition(socAlertTriggerDefinition); + } + return { setProductFeaturesConfigurator: productFeaturesService.setProductFeaturesConfigurator.bind(productFeaturesService), diff --git a/x-pack/solutions/security/plugins/security_solution/server/plugin_contract.ts b/x-pack/solutions/security/plugins/security_solution/server/plugin_contract.ts index c4132e077ff7b..cf700eb1ccfcf 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/plugin_contract.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/plugin_contract.ts @@ -51,6 +51,7 @@ import type { AgentBuilderPluginStart, } from '@kbn/agent-builder-plugin/server'; import type { LlmTasksPluginStart } from '@kbn/llm-tasks-plugin/server'; +import type { WorkflowsExtensionsServerPluginSetup } from '@kbn/workflows-extensions/server'; import type { ProductFeaturesService } from './lib/product_features_service/product_features_service'; import type { ExperimentalFeatures } from '../common'; @@ -76,6 +77,7 @@ export interface SecuritySolutionPluginSetupDependencies { kql: KqlServerPluginSetup; share?: SharePluginSetup; agentBuilder?: AgentBuilderPluginSetup; + workflowsExtensions?: WorkflowsExtensionsServerPluginSetup; } export interface SecuritySolutionPluginStartDependencies { diff --git a/x-pack/solutions/security/plugins/security_solution/server/workflows/index.ts b/x-pack/solutions/security/plugins/security_solution/server/workflows/index.ts new file mode 100644 index 0000000000000..c5c219f3e5dfa --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/workflows/index.ts @@ -0,0 +1,22 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { + INCIDENT_RESPONSE_PLAYBOOK, + FULL_INVESTIGATION_PLAYBOOK, + PROACTIVE_THREAT_HUNT_PLAYBOOK, + DETECTION_COVERAGE_AUDIT_PLAYBOOK, + SOC_PLAYBOOKS, +} from './playbooks'; + +export { + SOC_ALERT_TRIGGER_ID, + socAlertTriggerEventSchema, + socAlertTriggerDefinition, +} from '../../common/workflows'; + +export type { SocAlertTriggerEvent } from '../../common/workflows'; diff --git a/x-pack/solutions/security/plugins/security_solution/server/workflows/playbooks/index.ts b/x-pack/solutions/security/plugins/security_solution/server/workflows/playbooks/index.ts new file mode 100644 index 0000000000000..a0f6e356b4466 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/workflows/playbooks/index.ts @@ -0,0 +1,507 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/** + * Pre-built SOC workflow playbooks as YAML definitions. + * + * These can be imported via the Workflows Management UI. + * Each playbook orchestrates a sequence of AI agent steps to automate + * common Security Operations Center workflows. + * + * All agent steps use structured output schemas so downstream conditions + * reference typed JSON fields instead of fragile string matching. + */ + +/** + * Incident Response playbook + * + * End-to-end pipeline: Triage -> Extract -> Investigate -> Respond -> Approval Gate -> Report. + * Triggers on high/critical severity alerts and routes through + * specialized agents for each phase of incident handling. + * Includes a confidence-gated approval step before executing response actions. + */ +export const INCIDENT_RESPONSE_PLAYBOOK = `\ +name: Incident Response +description: End-to-end incident response pipeline - Triage, Investigate, Respond, Report +triggers: + - type: security.alertCreated + on: + condition: 'event.severity: "critical" OR event.severity: "high"' +steps: + - name: triage + type: ai.agent + with: + schema: + type: object + properties: + verdict: + type: string + enum: [true_positive, benign_true_positive, false_positive] + description: Classification of the alert + confidence: + type: number + description: Confidence score 0.0-1.0 + summary: + type: string + description: Brief summary of the triage analysis + key_evidence: + type: string + description: Key evidence supporting the verdict + recommended_action: + type: string + description: Suggested next action based on triage + required: [verdict, confidence, summary, key_evidence, recommended_action] + message: > + Use the alert-triage skill to triage the following security alert: + Alert ID: {{ context.event.alert_id }} + Rule: {{ context.event.rule_name }} + Severity: {{ context.event.severity }} + Risk Score: {{ context.event.risk_score }} + + Provide a structured verdict (true_positive, benign_true_positive, false_positive) + with confidence score, key evidence, and recommended next action. + + - name: extract_triage + type: data.map + items: "${{ steps.triage.output.structured_output }}" + with: + fields: + verdict: "${{ item.verdict }}" + confidence: "${{ item.confidence }}" + summary: "${{ item.summary }}" + recommended_action: "${{ item.recommended_action }}" + + - name: investigate + type: ai.agent + if: "steps.triage.output.structured_output.verdict == 'true_positive'" + with: + schema: + type: object + properties: + timeline: + type: string + description: Chronological timeline of the incident + affected_entities: + type: array + items: + type: string + description: List of affected entities (hosts, users, IPs) + attack_vector: + type: string + description: Identified attack vector or entry point + root_cause: + type: string + description: Root cause analysis + severity: + type: string + enum: [critical, high, medium, low] + description: Assessed severity after investigation + required: [timeline, affected_entities, attack_vector, root_cause, severity] + message: > + Use the investigation skill to investigate the alert that was triaged as a true positive: + Verdict: {{ steps.triage.output.structured_output.verdict }} + Confidence: {{ steps.triage.output.structured_output.confidence }} + Summary: {{ steps.triage.output.structured_output.summary }} + Key Evidence: {{ steps.triage.output.structured_output.key_evidence }} + + Build a complete timeline, identify affected entities, determine the attack vector, + analyze root cause, and assess severity. + + - name: respond + type: ai.agent + if: "steps.investigate.output.structured_output.severity == 'critical' OR steps.investigate.output.structured_output.severity == 'high'" + with: + schema: + type: object + properties: + confidence: + type: number + description: Confidence score 0.0-1.0 in recommended actions + recommended_actions: + type: array + items: + type: string + description: Ordered list of recommended containment and remediation actions + blast_radius: + type: string + description: Assessment of the potential impact of response actions + rollback_procedures: + type: string + description: Steps to roll back response actions if needed + required: [confidence, recommended_actions, blast_radius, rollback_procedures] + message: > + Use the response-recommendation skill to recommend containment actions based on the investigation findings: + Severity: {{ steps.investigate.output.structured_output.severity }} + Attack Vector: {{ steps.investigate.output.structured_output.attack_vector }} + Root Cause: {{ steps.investigate.output.structured_output.root_cause }} + Affected Entities: {{ steps.investigate.output.structured_output.affected_entities }} + + Provide confidence-scored recommendations with blast radius assessment + and rollback procedures. + + - name: approval_gate + type: console.log + if: "steps.respond.output.structured_output.confidence < 0.70" + with: + message: > + HUMAN APPROVAL REQUIRED: Response actions recommended with confidence + {{ steps.respond.output.structured_output.confidence }}. + Actions: {{ steps.respond.output.structured_output.recommended_actions }} + Blast Radius: {{ steps.respond.output.structured_output.blast_radius }} + Please review and approve before execution. + + - name: report + type: ai.agent + with: + schema: + type: object + properties: + report_markdown: + type: string + description: Full incident report in Markdown format + executive_summary: + type: string + description: Executive-level summary of the incident + case_id: + type: string + description: Generated case identifier for tracking + required: [report_markdown, executive_summary, case_id] + message: > + Use the incident-reporting skill to generate an incident report based on the completed workflow steps. + + Triage findings: + Verdict: {{ steps.triage.output.structured_output.verdict | default: "unknown" }} + Confidence: {{ steps.triage.output.structured_output.confidence | default: "N/A" }} + Summary: {{ steps.triage.output.structured_output.summary | default: "No triage summary available." }} + + Investigation findings (if investigation was performed): + Severity: {{ steps.investigate.output.structured_output.severity | default: "N/A" }} + Timeline: {{ steps.investigate.output.structured_output.timeline | default: "Investigation was not performed - alert was classified as false positive or benign." }} + Root Cause: {{ steps.investigate.output.structured_output.root_cause | default: "N/A" }} + Affected Entities: {{ steps.investigate.output.structured_output.affected_entities | default: "N/A" }} + + Response recommendations (if response was needed): + Actions: {{ steps.respond.output.structured_output.recommended_actions | default: "No response actions were recommended for this incident." }} + Confidence: {{ steps.respond.output.structured_output.confidence | default: "N/A" }} + Rollback: {{ steps.respond.output.structured_output.rollback_procedures | default: "N/A" }} + + Create both executive and technical summaries. If investigation or response + sections show default values, note that in the report and explain why those phases were skipped. +`; + +/** + * Full Investigation playbook + * + * Deep investigation pipeline: Investigate -> Correlate -> MITRE Map -> Report. + * Designed for manual invocation when an analyst needs a thorough + * investigation with cross-campaign correlation and ATT&CK mapping. + */ +export const FULL_INVESTIGATION_PLAYBOOK = `\ +name: Full Investigation +description: Deep investigation pipeline - Investigate, Correlate, MITRE Map, Report +steps: + - name: investigate + type: ai.agent + with: + schema: + type: object + properties: + timeline: + type: string + description: Chronological timeline of the incident + affected_entities: + type: array + items: + type: string + description: List of affected entities + root_cause: + type: string + description: Root cause analysis + confidence: + type: number + description: Confidence score 0.0-1.0 in findings + required: [timeline, affected_entities, root_cause, confidence] + message: > + Use the investigation skill to conduct a deep investigation of the following finding: + {{ context.event.alert_id }} + + Build timeline, identify affected entities, and analyze root cause. + + - name: correlate + type: ai.agent + with: + schema: + type: object + properties: + campaigns: + type: array + items: + type: string + description: Identified related campaigns + related_findings: + type: string + description: Summary of related findings across entity dimensions + attack_chain: + type: string + description: Reconstructed attack chain from correlated events + required: [campaigns, related_findings, attack_chain] + message: > + Use the investigation skill to analyze the investigation findings for cross-campaign correlation: + Timeline: {{ steps.investigate.output.structured_output.timeline }} + Affected Entities: {{ steps.investigate.output.structured_output.affected_entities }} + Root Cause: {{ steps.investigate.output.structured_output.root_cause }} + + Look for related activity across entity dimensions and identify campaign patterns. + + - name: mitre_analysis + type: ai.agent + with: + schema: + type: object + properties: + covered_techniques: + type: array + items: + type: string + description: MITRE ATT&CK techniques observed in this incident + gaps: + type: array + items: + type: string + description: Detection gaps identified from the attack chain + recommendations: + type: string + description: Recommendations for improving detection coverage + required: [covered_techniques, gaps, recommendations] + message: > + Use the mitre-coverage skill to map the correlated findings to MITRE ATT&CK: + Attack Chain: {{ steps.correlate.output.structured_output.attack_chain }} + Campaigns: {{ steps.correlate.output.structured_output.campaigns }} + Related Findings: {{ steps.correlate.output.structured_output.related_findings }} + + Identify coverage gaps and recommend detection improvements. + + - name: report + type: ai.agent + with: + schema: + type: object + properties: + report_markdown: + type: string + description: Full investigation report in Markdown format + required: [report_markdown] + message: > + Use the incident-reporting skill to generate a comprehensive investigation report: + - Investigation Timeline: {{ steps.investigate.output.structured_output.timeline }} + - Affected Entities: {{ steps.investigate.output.structured_output.affected_entities }} + - Root Cause: {{ steps.investigate.output.structured_output.root_cause }} + - Campaigns: {{ steps.correlate.output.structured_output.campaigns }} + - Attack Chain: {{ steps.correlate.output.structured_output.attack_chain }} + - MITRE Techniques: {{ steps.mitre_analysis.output.structured_output.covered_techniques }} + - Detection Gaps: {{ steps.mitre_analysis.output.structured_output.gaps }} + - Recommendations: {{ steps.mitre_analysis.output.structured_output.recommendations }} +`; + +/** + * Proactive Threat Hunt playbook + * + * Automated weekly hunting: Hunt -> Correlate -> Create Rules. + * Scheduled to run every 7 days, scanning for unusual patterns + * and recommending new detection rules for uncovered techniques. + */ +export const PROACTIVE_THREAT_HUNT_PLAYBOOK = `\ +name: Proactive Threat Hunt +description: Automated weekly threat hunting - Hunt, Correlate, Create Rules +triggers: + - type: schedule + on: + interval: 7d +steps: + - name: hunt + type: ai.agent + with: + schema: + type: object + properties: + findings: + type: array + items: + type: object + properties: + description: + type: string + severity: + type: string + enum: [critical, high, medium, low] + entities: + type: array + items: + type: string + description: List of findings from the threat hunt + severity: + type: string + enum: [critical, high, medium, low, none] + description: Overall severity of hunt findings + hunt_areas_searched: + type: array + items: + type: string + description: Areas that were searched during the hunt + required: [findings, severity, hunt_areas_searched] + message: > + Use the threat-hunting skill to conduct a proactive threat hunt across the environment. + Look for: + 1. Unusual authentication patterns in the last 7 days + 2. Suspicious process execution chains + 3. Anomalous network connections to rare external domains + 4. Living-off-the-land (LOTL) technique indicators + + Use the alerts and entity risk tools to identify high-risk entities. + Return structured findings with severity assessments. + + - name: correlate + type: ai.agent + if: "steps.hunt.output.structured_output.findings | size > 0" + with: + schema: + type: object + properties: + patterns: + type: array + items: + type: object + properties: + pattern_name: + type: string + description: + type: string + confidence: + type: number + description: Identified patterns across hunt findings + campaign_assessment: + type: string + description: Assessment of whether findings constitute a coordinated campaign + required: [patterns, campaign_assessment] + message: > + Use the investigation skill to correlate the threat hunt findings across campaigns: + Findings: {{ steps.hunt.output.structured_output.findings }} + Overall Severity: {{ steps.hunt.output.structured_output.severity }} + Areas Searched: {{ steps.hunt.output.structured_output.hunt_areas_searched }} + + Identify patterns, group related findings, and assess if they constitute a campaign. + + - name: create_rules + type: ai.agent + if: "steps.correlate.output.structured_output.patterns | size > 0" + with: + message: > + Use the mitre-coverage skill to recommend new detection rules based on the hunt and correlation findings: + Patterns: {{ steps.correlate.output.structured_output.patterns }} + Campaign Assessment: {{ steps.correlate.output.structured_output.campaign_assessment }} + + Identify MITRE techniques that need coverage and create ES|QL detection rules. +`; + +/** + * Detection Coverage Audit playbook + * + * Monthly MITRE ATT&CK coverage audit: Analyze -> Generate Rules. + * Scheduled to run every 30 days, mapping existing detection rules + * to MITRE techniques and generating rules for the highest-priority gaps. + */ +export const DETECTION_COVERAGE_AUDIT_PLAYBOOK = `\ +name: Detection Coverage Audit +description: Monthly MITRE ATT&CK coverage audit - Analyze, Generate Rules +triggers: + - type: schedule + on: + interval: 30d +steps: + - name: audit + type: ai.agent + with: + schema: + type: object + properties: + total_rules: + type: number + description: Total number of active detection rules + covered_techniques: + type: array + items: + type: string + description: MITRE ATT&CK techniques with existing detection coverage + uncovered_techniques: + type: array + items: + type: object + properties: + technique_id: + type: string + name: + type: string + priority: + type: string + enum: [critical, high, medium, low] + description: MITRE ATT&CK techniques lacking detection coverage + coverage_percentage: + type: number + description: Percentage of MITRE techniques covered (0-100) + required: [total_rules, covered_techniques, uncovered_techniques, coverage_percentage] + message: > + Use the mitre-coverage skill to conduct a comprehensive MITRE ATT&CK detection coverage audit: + 1. Query all active detection rules + 2. Map rules to MITRE techniques + 3. Identify coverage gaps + 4. Prioritize gaps by severity and prevalence + + Focus on critical and high-severity gaps first. + + - name: generate_rules + type: ai.agent + if: "steps.audit.output.structured_output.uncovered_techniques | size > 0" + with: + schema: + type: object + properties: + created_rules: + type: array + items: + type: object + properties: + rule_name: + type: string + technique_id: + type: string + esql_query: + type: string + severity: + type: string + enum: [critical, high, medium, low] + description: Generated detection rules for uncovered techniques + rule_count: + type: number + description: Number of rules generated + required: [created_rules, rule_count] + message: > + Use the mitre-coverage skill to generate detection rules for the top priority gaps based on the coverage audit: + Coverage: {{ steps.audit.output.structured_output.coverage_percentage }}% + Total Rules: {{ steps.audit.output.structured_output.total_rules }} + Uncovered Techniques: {{ steps.audit.output.structured_output.uncovered_techniques }} + + Create ES|QL detection rules for the top 5 uncovered techniques, + prioritizing critical and high severity gaps. +`; + +/** All pre-built playbooks for easy iteration */ +export const SOC_PLAYBOOKS = [ + { name: 'incident_response', yaml: INCIDENT_RESPONSE_PLAYBOOK }, + { name: 'full_investigation', yaml: FULL_INVESTIGATION_PLAYBOOK }, + { name: 'proactive_threat_hunt', yaml: PROACTIVE_THREAT_HUNT_PLAYBOOK }, + { name: 'detection_coverage_audit', yaml: DETECTION_COVERAGE_AUDIT_PLAYBOOK }, +] as const; diff --git a/x-pack/solutions/security/plugins/security_solution/tsconfig.json b/x-pack/solutions/security/plugins/security_solution/tsconfig.json index 9162cb11b3b0d..a17aad2c39ec0 100644 --- a/x-pack/solutions/security/plugins/security_solution/tsconfig.json +++ b/x-pack/solutions/security/plugins/security_solution/tsconfig.json @@ -286,6 +286,8 @@ "@kbn/deeplinks-workflows", "@kbn/core-rendering-browser", "@kbn/anonymization-plugin", - "@kbn/anonymization-common" + "@kbn/anonymization-common", + "@kbn/evals", + "@kbn/workflows-extensions" ] }