Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ab78be2
[Security Solution] Migrate Threat Hunting Agent to modular Agent Bui…
patrykkopycinski Mar 3, 2026
083e04e
Changes from node scripts/eslint_all_files --no-cache --fix
kibanamachine Mar 3, 2026
92b7b07
Merge remote-tracking branch 'upstream/main' into threat-hunting-skill
patrykkopycinski Mar 24, 2026
5ec98e6
Implement PR feedback, fix type errors, and improve security skills
patrykkopycinski Mar 25, 2026
8149580
Harden security tools: ES|QL injection guards, spaceId validation, an…
patrykkopycinski Mar 25, 2026
1a64405
Fix MaybePromise type error in alert_analysis inline tool tests
patrykkopycinski Mar 25, 2026
e8e980d
Add skill activation evaluator, restore cases tool, and cross-skill r…
patrykkopycinski Mar 26, 2026
71c79d8
Merge remote-tracking branch 'upstream/main' into threat-hunting-skill
patrykkopycinski Mar 26, 2026
250eeee
Fix broken tool count assertion and add cross-skill content tests
patrykkopycinski Mar 26, 2026
9bb0fd7
Extract detection-engineering skill to separate PR
patrykkopycinski Mar 26, 2026
d52fca4
Changes from node scripts/eslint_all_files --no-cache --fix
kibanamachine Mar 26, 2026
6d266e0
Remove manage_rule_exceptions tool (move to detection-engineering PR)
patrykkopycinski Mar 26, 2026
56db000
Remove manage_rule_exceptions from allow_lists.ts
patrykkopycinski Mar 26, 2026
e5b1021
Add threat-hunting to AGENT_BUILDER_BUILTIN_SKILLS allow list
patrykkopycinski Mar 27, 2026
a25fe92
Merge branch 'main' into threat-hunting-skill
patrykkopycinski Mar 27, 2026
c5a63f4
Use default agent when skills are enabled in Security
patrykkopycinski Mar 30, 2026
ebdfb35
Merge remote-tracking branch 'upstream/main' into threat-hunting-skill
patrykkopycinski Mar 30, 2026
23d28c5
Revert threat hunting agent description/instructions to match main
patrykkopycinski Mar 30, 2026
e40496b
Address PR feedback: reduce attack discovery limit, add optional enti…
patrykkopycinski Mar 31, 2026
adfbee0
Fix type errors with zod v4 optional array inference
patrykkopycinski Mar 31, 2026
a1ab76b
Merge branch 'main' into threat-hunting-skill
patrykkopycinski Apr 1, 2026
a257a4d
Merge branch 'main' into threat-hunting-skill
patrykkopycinski Apr 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ export const AGENT_BUILDER_BUILTIN_SKILLS = [
'automatic_troubleshooting',
'entity-analytics',
'alert-analysis',
'threat-hunting',

// O11Y
'observability.rca',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { evaluate as base } from '../../src/evaluate';
import type { EvaluateDataset } from '../../src/evaluate_dataset';
import { createEvaluateDataset } from '../../src/evaluate_dataset';

const evaluate = base.extend<{ evaluateDataset: EvaluateDataset }, {}>({
evaluateDataset: [
({ chatClient, evaluators, executorClient, traceEsClient, log }, use) => {
use(
createEvaluateDataset({
chatClient,
evaluators,
executorClient,
traceEsClient,
log,
})
);
},
{ scope: 'test' },
],
});

evaluate.describe('Security Skills - Threat Hunting', () => {
evaluate(
'threat hunting queries activate the correct skill and tools',
async ({ evaluateDataset }) => {
await evaluateDataset({
dataset: {
name: 'agent builder: security-threat-hunting-skill',
description:
'Validates that threat hunting queries activate the threat-hunting skill and use ES|QL tools',
examples: [
{
input: {
question:
'Hunt for lateral movement in my environment. Look for remote service creation and suspicious logon types over the last 7 days.',
},
output: {
expected:
'I will search for lateral movement indicators using ES|QL queries across endpoint and Windows event logs, looking for tools like psexec, wmic, and suspicious logon types 3 and 10.',
},
metadata: {
query_intent: 'Threat Hunting',
expectedSkill: 'threat-hunting',
},
},
{
input: {
question:
'Are there any signs of C2 beaconing in our network logs? Check for processes making periodic outbound connections to external IPs.',
},
output: {
expected:
'I will analyze network connection logs for periodic patterns to external IP addresses, focusing on process-level connection counts and frequency to identify potential C2 beaconing behavior.',
},
metadata: {
query_intent: 'Threat Hunting',
expectedSkill: 'threat-hunting',
},
},
{
input: {
question:
'Find statistically rare process executions across our endpoints. Which processes have only run on a single host with fewer than 3 executions in the past week?',
},
output: {
expected:
'I will query the endpoint process event logs using ES|QL to identify processes with a unique host count of 1 and execution count under 3, which are statistically rare and may indicate malicious activity.',
},
metadata: {
query_intent: 'Threat Hunting',
expectedSkill: 'threat-hunting',
},
},
{
input: {
question:
'Check for brute force login attempts in the last 24 hours. Show me source IPs with more than 10 failed authentication attempts.',
},
output: {
expected:
'I will search authentication event logs for failed login attempts, aggregated by source IP, to identify potential brute force attacks with more than 10 failures from the same source.',
},
metadata: {
query_intent: 'Threat Hunting',
expectedSkill: 'threat-hunting',
},
},
{
input: {
question: 'What indices are available for security hunting in this environment?',
},
output: {
expected:
'I will list the available indices to identify security-relevant data sources for threat hunting, such as endpoint, network, and authentication logs.',
},
metadata: {
query_intent: 'Threat Hunting',
expectedSkill: 'threat-hunting',
expectedOnlyToolId: 'platform.core.list_indices',
},
},
],
},
});
}
);
});

evaluate.describe('Security Skills - Alert Analysis', () => {
evaluate(
'alert analysis queries activate the correct skill and tools',
async ({ evaluateDataset }) => {
await evaluateDataset({
dataset: {
name: 'agent builder: security-alert-analysis-skill',
description:
'Validates that alert triage queries activate the alert-analysis skill and appropriate security tools',
examples: [
{
input: {
question:
'I have a critical severity alert for "Credential Access via LSASS Memory" on host WIN-SRV01. Help me triage this alert.',
},
output: {
expected:
'I will help triage this LSASS credential access alert by first fetching the alert details, then searching for related alerts on WIN-SRV01, checking threat intelligence in Security Labs, and assessing the host risk score.',
},
metadata: {
query_intent: 'Alert Triage',
expectedSkill: 'alert-analysis',
},
},
{
input: {
question:
'Show me all high and critical severity alerts from the last 24 hours and help me prioritize which ones to investigate first.',
},
output: {
expected:
'I will search for high and critical severity alerts from the past 24 hours, correlate them with entity risk scores, and prioritize based on severity, affected entity criticality, and MITRE ATT&CK technique.',
},
metadata: {
query_intent: 'Alert Triage',
expectedSkill: 'alert-analysis',
expectedOnlyToolId: 'security.alerts',
},
},
{
input: {
question:
'Search Security Labs for information about the Lazarus Group and their known attack techniques.',
},
output: {
expected:
'I will search Elastic Security Labs for threat intelligence on the Lazarus Group, including their known TTPs, malware families, and indicators of compromise.',
},
metadata: {
query_intent: 'Threat Intelligence',
expectedSkill: 'alert-analysis',
expectedOnlyToolId: 'security.security_labs_search',
},
},
{
input: {
question: 'What is the current risk score for host DC01? Has it changed recently?',
},
output: {
expected:
'I will look up the entity risk score for host DC01 to assess its current risk level and any recent changes in risk indicators.',
},
metadata: {
query_intent: 'Risk Assessment',
expectedSkill: 'alert-analysis',
expectedOnlyToolId: 'security.entity_risk_score',
},
},
],
},
});
}
);
});

evaluate.describe('Security Skills - Cross-Skill Workflows', () => {
evaluate(
'queries spanning multiple security concerns activate the most relevant skill',
async ({ evaluateDataset }) => {
await evaluateDataset({
dataset: {
name: 'agent builder: security-cross-skill-routing',
description:
'Validates that ambiguous queries spanning multiple security domains are routed to the most relevant skill',
examples: [
{
input: {
question:
'Check the entity risk score for user admin@corp.local and look for any alerts associated with this account in the last 48 hours.',
},
output: {
expected:
"I will retrieve the entity risk score for admin@corp.local and search for associated alerts to build a comprehensive picture of this account's risk profile.",
},
metadata: {
query_intent: 'Risk Assessment with Alerts',
expectedSkill: 'alert-analysis',
},
},
],
},
});
}
);
});

evaluate.describe('Security Skills - Distractor Queries', () => {
evaluate('non-security queries do not activate security skills', async ({ evaluateDataset }) => {
await evaluateDataset({
dataset: {
name: 'agent builder: security-skills-distractor',
description:
'Validates that non-security queries do NOT activate security skills (negative test)',
examples: [
{
input: {
question: 'Show me the available dashboards in Kibana.',
},
output: {
expected:
'I will search for available dashboards. This is a platform query, not a security-specific task.',
},
metadata: {
query_intent: 'Platform',
shouldNotActivateSkill: 'threat-hunting',
},
},
{
input: {
question: 'What is the current status of my APM services?',
},
output: {
expected:
'I will check the status of your APM services. This is an observability query.',
},
metadata: {
query_intent: 'Observability',
shouldNotActivateSkill: 'alert-analysis',
},
},
],
},
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,65 @@ function configureExperiment({
log,
skillName: 'data-exploration',
}),
{
name: 'ExpectedSkillInvocation',
kind: 'CODE' as const,
evaluate: async ({ output, metadata }) => {
const expectedSkill = getStringMeta(metadata, 'expectedSkill');
const shouldNotActivate = getStringMeta(metadata, 'shouldNotActivateSkill');
const skillName = expectedSkill ?? shouldNotActivate;

if (!skillName) return { score: 1 };
if (!/^[a-zA-Z0-9_-]+$/.test(skillName)) {
return { score: null, label: 'error', explanation: `Invalid skill name: ${skillName}` };
}

const traceId = (output as Record<string, unknown>)?.traceId as string | undefined;
if (!traceId) {
return {
score: null,
label: 'unavailable',
explanation: 'No traceId available for skill invocation check',
};
}

const query = `FROM traces-*
| WHERE trace.id == "${traceId}"
| STATS skill_invoked = COUNT(
CASE(
attributes.gen_ai.tool.name == "filestore.read"
AND attributes.elastic.tool.parameters LIKE "*/${skillName}/SKILL.md*",
1,
NULL
)
)`;

try {
const response = (await traceEsClient.esql.query({ query })) as unknown as {
values: number[][];
};
const invoked = (response.values?.[0]?.[0] ?? 0) > 0;

if (expectedSkill) {
return {
score: invoked ? 1 : 0,
metadata: { expectedSkill, invoked },
};
}
return {
score: invoked ? 0 : 1,
metadata: { shouldNotActivateSkill: shouldNotActivate, invoked },
};
} catch (error) {
log.warning(
`ExpectedSkillInvocation failed for trace ${traceId}: ${
error instanceof Error ? error.message : String(error)
}`
);
return { score: null, label: 'error' };
}
},
},
]);

return { task, evaluators: selectedEvaluators };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ import type { AgentBuilderPluginStart } from '@kbn/agent-builder-plugin/public';
import { agentBuilderMocks } from '@kbn/agent-builder-plugin/public/mocks';
import { THREAT_HUNTING_AGENT_ID } from '../../../common/constants';

const mockUseUiSetting = jest.fn().mockReturnValue(false);
jest.mock('@kbn/kibana-react-plugin/public', () => ({
...jest.requireActual('@kbn/kibana-react-plugin/public'),
useUiSetting: (...args: unknown[]) => mockUseUiSetting(...args),
}));

const mockChatRef = {
close: jest.fn(),
};
Expand Down Expand Up @@ -49,8 +55,10 @@ describe('useAgentBuilderAttachment', () => {
};

beforeEach(() => {
jest.clearAllMocks();
mockOpenAgentBuilderChat.mockClear();
mockChatRef.close.mockClear();
jest.spyOn(Date, 'now').mockReturnValue(1234567890);
mockUseUiSetting.mockReturnValue(false);
});

afterEach(() => {
Expand Down Expand Up @@ -138,6 +146,22 @@ describe('useAgentBuilderAttachment', () => {
expect(mockOpenAgentBuilderChat).not.toHaveBeenCalled();
});

it('does not pass agentId when skills are enabled', () => {
mockUseUiSetting.mockReturnValue(true);

const { result } = renderHook(() => useAgentBuilderAttachment(defaultParams), {
wrapper: createWrapper(mockAgentBuilderService),
});

act(() => {
result.current.openAgentBuilderFlyout();
});

expect(mockOpenAgentBuilderChat).toHaveBeenCalledTimes(1);
const callArgs = mockOpenAgentBuilderChat.mock.calls[0][0];
expect(callArgs).not.toHaveProperty('agentId');
});

it('generates attachment ID with timestamp', async () => {
const { result } = renderHook(() => useAgentBuilderAttachment(defaultParams), {
wrapper: createWrapper(mockAgentBuilderService),
Expand Down
Loading
Loading