Skip to content

Commit

Permalink
[8.18] [Rule Migration] Resolve bug around ECS mapping node (#210608) (
Browse files Browse the repository at this point in the history
…#210860)

# Backport

This will backport the following commits from `main` to `8.18`:
- [[Rule Migration] Resolve bug around ECS mapping node
(#210608)](#210608)

<!--- Backport version: 9.4.3 -->

### Questions ?
Please refer to the [Backport tool
documentation](https://github.com/sqren/backport)

<!--BACKPORT [{"author":{"name":"Marius
Iversen","email":"[email protected]"},"sourceCommit":{"committedDate":"2025-02-12T15:22:05Z","message":"[Rule
Migration] Resolve bug around ECS mapping node (#210608)\n\n##
Summary\n\nThis PR was initially to resolve more prompt improvements,
but it will\nbe split into multiple PR's as it also includes a bugfix
for ECS mapping\nnode logic, where ECS mapping node was not always part
of the\ntranslation flow.\n\nSome minor prompt improvements are also
included, an updated field\nmapping for RAG rules (adding the query
field) and filtering out metrics\nintegrations from the RAG for
now.\n\nAdded telemetry metadata parameters to createModel together
with\n`maxRetries` as
well.","sha":"c380edd84877e1c0445226a13ae99a58a2b40c86","branchLabelMapping":{"^v9.1.0$":"main","^v8.19.0$":"8.x","^v(\\d+).(\\d+).\\d+$":"$1.$2"}},"sourcePullRequest":{"labels":["release_note:skip","v9.0.0","Team:
SecuritySolution","backport:version","v8.18.0","v9.1.0","v8.19.0"],"title":"[Rule
Migration] Resolve bug around ECS mapping
node","number":210608,"url":"https://github.com/elastic/kibana/pull/210608","mergeCommit":{"message":"[Rule
Migration] Resolve bug around ECS mapping node (#210608)\n\n##
Summary\n\nThis PR was initially to resolve more prompt improvements,
but it will\nbe split into multiple PR's as it also includes a bugfix
for ECS mapping\nnode logic, where ECS mapping node was not always part
of the\ntranslation flow.\n\nSome minor prompt improvements are also
included, an updated field\nmapping for RAG rules (adding the query
field) and filtering out metrics\nintegrations from the RAG for
now.\n\nAdded telemetry metadata parameters to createModel together
with\n`maxRetries` as
well.","sha":"c380edd84877e1c0445226a13ae99a58a2b40c86"}},"sourceBranch":"main","suggestedTargetBranches":["9.0","8.18","8.x"],"targetPullRequestStates":[{"branch":"9.0","label":"v9.0.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"8.18","label":"v8.18.0","branchLabelMappingKey":"^v(\\d+).(\\d+).\\d+$","isSourceBranch":false,"state":"NOT_CREATED"},{"branch":"main","label":"v9.1.0","branchLabelMappingKey":"^v9.1.0$","isSourceBranch":true,"state":"MERGED","url":"https://github.com/elastic/kibana/pull/210608","number":210608,"mergeCommit":{"message":"[Rule
Migration] Resolve bug around ECS mapping node (#210608)\n\n##
Summary\n\nThis PR was initially to resolve more prompt improvements,
but it will\nbe split into multiple PR's as it also includes a bugfix
for ECS mapping\nnode logic, where ECS mapping node was not always part
of the\ntranslation flow.\n\nSome minor prompt improvements are also
included, an updated field\nmapping for RAG rules (adding the query
field) and filtering out metrics\nintegrations from the RAG for
now.\n\nAdded telemetry metadata parameters to createModel together
with\n`maxRetries` as
well.","sha":"c380edd84877e1c0445226a13ae99a58a2b40c86"}},{"branch":"8.x","label":"v8.19.0","branchLabelMappingKey":"^v8.19.0$","isSourceBranch":false,"state":"NOT_CREATED"}]}]
BACKPORT-->

Co-authored-by: Marius Iversen <[email protected]>
  • Loading branch information
kibanamachine and P1llus authored Feb 12, 2025
1 parent feab022 commit 1e12030
Show file tree
Hide file tree
Showing 19 changed files with 128 additions and 134 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
* 2.0.
*/

import type { InferenceClient } from '@kbn/inference-plugin/server';
import type {
ActionsClientChatOpenAI,
ActionsClientSimpleChatModel,
Expand All @@ -17,6 +16,7 @@ import fs from 'fs/promises';
import path from 'path';
import { getRuleMigrationAgent } from '../../server/lib/siem_migrations/rules/task/agent';
import type { RuleMigrationsRetriever } from '../../server/lib/siem_migrations/rules/task/retrievers';
import type { EsqlKnowledgeBase } from '../../server/lib/siem_migrations/rules/task/util/esql_knowledge_base';
import type { SiemMigrationTelemetryClient } from '../../server/lib/siem_migrations/rules/task/rule_migrations_telemetry_client';

interface Drawable {
Expand All @@ -27,8 +27,7 @@ const mockLlm = new FakeLLM({
response: JSON.stringify({}, null, 2),
}) as unknown as ActionsClientChatOpenAI | ActionsClientSimpleChatModel;

const inferenceClient = {} as InferenceClient;
const connectorId = 'draw_graphs';
const esqlKnowledgeBase = {} as EsqlKnowledgeBase;
const ruleMigrationsRetriever = {} as RuleMigrationsRetriever;

const createLlmInstance = () => {
Expand All @@ -40,9 +39,8 @@ async function getAgentGraph(logger: Logger): Promise<Drawable> {
const telemetryClient = {} as SiemMigrationTelemetryClient;
const graph = getRuleMigrationAgent({
model,
inferenceClient,
esqlKnowledgeBase,
ruleMigrationsRetriever,
connectorId,
logger,
telemetryClient,
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,19 @@ export class RuleMigrationsDataIntegrationsClient extends RuleMigrationsDataBase
id: pkg.name,
description: pkg?.description || '',
data_streams:
pkg.data_streams?.map((stream) => ({
dataset: stream.dataset,
index_pattern: `${stream.type}-${stream.dataset}-*`,
title: stream.title,
})) || [],
pkg.data_streams
?.filter((stream) => stream.type === 'logs')
.map((stream) => ({
dataset: stream.dataset,
index_pattern: `${stream.type}-${stream.dataset}-*`,
title: stream.title,
})) || [],
elser_embedding: [
pkg.title,
pkg.description,
...(pkg.data_streams?.map((stream) => stream.title) || []),
...(pkg.data_streams
?.filter((stream) => stream.type === 'logs')
.map((stream) => stream.title) || []),
].join(' - '),
}));
await this.esClient
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import { RuleMigrationsDataBaseClient } from './rule_migrations_data_base_client

export type { RuleVersions };
export type PrebuildRuleVersionsMap = Map<string, RuleVersions>;
/* The minimum score required for a integration to be considered correct, might need to change this later */
/* The minimum score required for a prebuilt rule to be considered correct */
const MIN_SCORE = 40 as const;
/* The number of integrations the RAG will return, sorted by score */
/* The number of prebuilt rules the RAG will return, sorted by score */
const RETURNED_RULES = 5 as const;

/* BULK_MAX_SIZE defines the number to break down the bulk operations by.
Expand All @@ -31,12 +31,12 @@ export class RuleMigrationsDataPrebuiltRulesClient extends RuleMigrationsDataBas
return fetchRuleVersionsTriad({ ruleAssetsClient, ruleObjectsClient });
}

/** Indexes an array of integrations to be used with ELSER semantic search queries */
/** Indexes an array of prebuilt rules to be used with ELSER semantic search queries */
async populate(ruleVersionsMap: PrebuildRuleVersionsMap): Promise<void> {
const filteredRules: RuleMigrationPrebuiltRule[] = [];

ruleVersionsMap.forEach((ruleVersions) => {
const rule = ruleVersions.target || ruleVersions.current;
const rule = ruleVersions.target;
if (rule) {
const mitreAttackIds = rule?.threat?.flatMap(
({ technique }) => technique?.map(({ id }) => id) ?? []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
* 2.0.
*/

import type { InferenceClient } from '@kbn/inference-plugin/server';
import type {
ActionsClientChatOpenAI,
ActionsClientSimpleChatModel,
Expand All @@ -14,25 +13,25 @@ import { loggerMock } from '@kbn/logging-mocks';
import { FakeLLM } from '@langchain/core/utils/testing';
import type { RuleMigrationsRetriever } from '../retrievers';
import type { SiemMigrationTelemetryClient } from '../rule_migrations_telemetry_client';
import type { EsqlKnowledgeBase } from '../util/esql_knowledge_base';
import { getRuleMigrationAgent } from './graph';

describe('getRuleMigrationAgent', () => {
const model = new FakeLLM({
response: JSON.stringify({}, null, 2),
}) as unknown as ActionsClientChatOpenAI | ActionsClientSimpleChatModel;
const telemetryClient = {} as SiemMigrationTelemetryClient;
const inferenceClient = {} as InferenceClient;
const connectorId = 'draw_graphs';
const esqlKnowledgeBase = {} as EsqlKnowledgeBase;

const ruleMigrationsRetriever = {} as RuleMigrationsRetriever;
const logger = loggerMock.create();

it('Ensures that the graph compiles', async () => {
try {
await getRuleMigrationAgent({
model,
inferenceClient,
esqlKnowledgeBase,
ruleMigrationsRetriever,
connectorId,
logger,
telemetryClient,
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ import { getTranslateRuleGraph } from './sub_graphs/translate_rule';
import type { MigrateRuleGraphParams, MigrateRuleState } from './types';
export function getRuleMigrationAgent({
model,
inferenceClient,
esqlKnowledgeBase,
ruleMigrationsRetriever,
connectorId,
logger,
telemetryClient,
}: MigrateRuleGraphParams) {
Expand All @@ -27,9 +26,8 @@ export function getRuleMigrationAgent({
});
const translationSubGraph = getTranslateRuleGraph({
model,
inferenceClient,
esqlKnowledgeBase,
ruleMigrationsRetriever,
connectorId,
telemetryClient,
logger,
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ export const getMatchPrebuiltRuleNode = ({
return {
name: rule.name,
description: rule.description,
query: rule.target?.type !== 'machine_learning' ? rule.target?.query : '',
};
});

const splunkRule = {
title: state.original_rule.title,
description: state.original_rule.description,
query: state.original_rule.query,
};

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,47 @@ You will be provided with a Splunk Detection Rule name by the user, your goal is
Here are some context for you to reference for your task, read it carefully as you will get questions about it later:
<context>
<elastic_detection_rule_names>
<elastic_detection_rules>
{rules}
</elastic_detection_rule_names>
</elastic_detection_rules>
</context>
`,
],
[
'human',
`See the below description of the splunk rule, try to find a Elastic Prebuilt Rule with similar purpose.
`See the below description of the splunk rule, try to find a Elastic Prebuilt Rule with similar purpose. If the splunk rule covers a much more complex usecase than the prebuilt rule, it is not a match.
<splunk_rule>
{splunk_rule}
</splunk_rule>
<guidelines>
- Carefully analyze the Splunk Detection Rule data provided by the user.
- Match the Splunk rule to the most relevant Elastic Prebuilt Rules from the list provided above.
- If no related Elastic Prebuilt Rule is found, reply with an empty string.
- Match the Splunk rule to the most relevant Elastic Prebuilt Rules from the list provided above but only if the usecase is almost identical.
- If no related Elastic Prebuilt Rule is found, ensure the value of "match" in the response is an empty string.
- Provide a concise reasoning summary for your decision, explaining why the selected Prebuilt Rule is the best fit, or why no suitable match was found.
</guidelines>
<expected_output>
- Always reply with a JSON object with the key "match" and the value being the most relevant matched elastic detection rule name, and a "summary" entry with the reasons behind the match. Do not reply with anything else.
- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match key, do not guess or reply with anything else.
- Always reply with a JSON object with the field "match" and the value being the most relevant matched elastic detection rule name if any, else the value should be an emptry string, and a "summary" entry with the reasons behind the match. Do not reply with anything else.
- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match field, do not guess or reply with anything else.
- If the Splunk rule is a much more complex usecase with custom logic not covered by the prebuilt rules, reply with an empty string in the match field.
- If there is only one match, answer with the name of the rule in the "match" key. Do not reply with anything else.
- If there are multiple matches, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
- Finally, write a "summary" in markdown format with the reasoning behind the decision. Starting with "## Prebuilt Rule Matching Summary\n".
- Finally, write a "summary" in markdown format with the reasoning behind the decision. Starting with "## Prebuilt Rule Matching Summary" followed by a newline. Make sure the content is valid JSON by escaping any necessary special characters.
- Make sure the JSON object is formatted correctly and the values properly escaped.
</expected_output>
<example_response>
U: <splunk_rule>
Title: Linux Auditd Add User Account Type
Description: The following analytic detects the suspicious add user account type.
</splunk_rule>
A: Please find the match JSON object below:
A: Please find the resulting JSON response below:
\`\`\`json
{{
"match": "Linux User Account Creation",
"summary": "## Prebuilt Rule Matching Summary\\\nThe Splunk rule \"Linux Auditd Add User Account Type\" is matched with the Elastic rule \"Linux User Account Creation\" because both rules cover the same use case of detecting user account creation on Linux systems."
"summary": "## Prebuilt Rule Matching Summary
The Splunk rule \"Linux Auditd Add User Account Type\" is matched with the Elastic rule \"Linux User Account Creation\" because both rules cover the same use case of detecting user account creation on Linux systems."
}}
\`\`\`
</example_response>
`,
],
['ai', 'Please find the match JSON object below:'],
['ai', 'Please find the resulting JSON response below:'],
]);
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

import { END, START, StateGraph } from '@langchain/langgraph';
import { isEmpty } from 'lodash/fp';
import { RuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
import { getEcsMappingNode } from './nodes/ecs_mapping';
import { getFixQueryErrorsNode } from './nodes/fix_query_errors';
import { getInlineQueryNode } from './nodes/inline_query';
Expand All @@ -23,27 +22,25 @@ const MAX_VALIDATION_ITERATIONS = 3;

export function getTranslateRuleGraph({
model,
inferenceClient,
connectorId,
esqlKnowledgeBase,
ruleMigrationsRetriever,
logger,
telemetryClient,
}: TranslateRuleGraphParams) {
const translateRuleNode = getTranslateRuleNode({
inferenceClient,
connectorId,
esqlKnowledgeBase,
logger,
});
const translationResultNode = getTranslationResultNode();
const inlineQueryNode = getInlineQueryNode({ model, ruleMigrationsRetriever });
const validationNode = getValidationNode({ logger });
const fixQueryErrorsNode = getFixQueryErrorsNode({ inferenceClient, connectorId, logger });
const fixQueryErrorsNode = getFixQueryErrorsNode({ esqlKnowledgeBase, logger });
const retrieveIntegrationsNode = getRetrieveIntegrationsNode({
model,
ruleMigrationsRetriever,
telemetryClient,
});
const ecsMappingNode = getEcsMappingNode({ inferenceClient, connectorId, logger });
const ecsMappingNode = getEcsMappingNode({ esqlKnowledgeBase, logger });

const translateRuleGraph = new StateGraph(translateRuleState)
// Nodes
Expand Down Expand Up @@ -86,14 +83,13 @@ const translatableRouter = (state: TranslateRuleState) => {
const validationRouter = (state: TranslateRuleState) => {
if (
state.validation_errors.iterations <= MAX_VALIDATION_ITERATIONS &&
state.translation_result === RuleTranslationResult.FULL
!isEmpty(state.validation_errors?.esql_errors)
) {
if (!isEmpty(state.validation_errors?.esql_errors)) {
return 'fixQueryErrors';
}
if (!state.translation_finalized) {
return 'ecsMapping';
}
return 'fixQueryErrors';
}
if (!state.includes_ecs_mapping) {
return 'ecsMapping';
}

return 'translationResult';
};
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,21 @@
*/

import type { Logger } from '@kbn/core/server';
import type { InferenceClient } from '@kbn/inference-plugin/server';
import { RuleTranslationResult } from '../../../../../../../../../../common/siem_migrations/constants';
import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
import type { EsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base';
import type { GraphNode } from '../../types';
import { SIEM_RULE_MIGRATION_CIM_ECS_MAP } from './cim_ecs_map';
import { ESQL_TRANSLATE_ECS_MAPPING_PROMPT } from './prompts';
import { cleanMarkdown, generateAssistantComment } from '../../../../../util/comments';

interface GetEcsMappingNodeParams {
inferenceClient: InferenceClient;
connectorId: string;
esqlKnowledgeBase: EsqlKnowledgeBase;
logger: Logger;
}

export const getEcsMappingNode = ({
inferenceClient,
connectorId,
esqlKnowledgeBase,
logger,
}: GetEcsMappingNodeParams): GraphNode => {
const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
return async (state) => {
const elasticRule = {
title: state.elastic_rule.title,
Expand All @@ -39,29 +34,21 @@ export const getEcsMappingNode = ({
elastic_rule: JSON.stringify(elasticRule, null, 2),
});

const response = await esqlKnowledgeBaseCaller(prompt);
const response = await esqlKnowledgeBase.translate(prompt);

const updatedQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';
const ecsSummary = response.match(/## Field Mapping Summary[\s\S]*$/)?.[0] ?? '';

const translationResult = getTranslationResult(updatedQuery);

// We set includes_ecs_mapping to true to indicate that the ecs mapping has been applied.
// This is to ensure that the node only runs once
return {
response,
comments: [generateAssistantComment(cleanMarkdown(ecsSummary))],
translation_finalized: true,
translation_result: translationResult,
includes_ecs_mapping: true,
elastic_rule: {
...state.elastic_rule,
query: updatedQuery,
},
};
};
};

const getTranslationResult = (esqlQuery: string): RuleTranslationResult => {
if (esqlQuery.match(/\[(macro|lookup):[\s\S]*\]/)) {
return RuleTranslationResult.PARTIAL;
}
return RuleTranslationResult.FULL;
};
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,26 @@
*/

import type { Logger } from '@kbn/core/server';
import type { InferenceClient } from '@kbn/inference-plugin/server';
import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
import type { EsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base';
import type { GraphNode } from '../../types';
import { RESOLVE_ESQL_ERRORS_TEMPLATE } from './prompts';

interface GetFixQueryErrorsNodeParams {
inferenceClient: InferenceClient;
connectorId: string;
esqlKnowledgeBase: EsqlKnowledgeBase;
logger: Logger;
}

export const getFixQueryErrorsNode = ({
inferenceClient,
connectorId,
esqlKnowledgeBase,
logger,
}: GetFixQueryErrorsNodeParams): GraphNode => {
const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
return async (state) => {
const rule = state.elastic_rule;
const prompt = await RESOLVE_ESQL_ERRORS_TEMPLATE.format({
esql_errors: state.validation_errors.esql_errors,
esql_query: rule.query,
});
const response = await esqlKnowledgeBaseCaller(prompt);
const response = await esqlKnowledgeBase.translate(prompt);

const esqlQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';
rule.query = esqlQuery;
Expand Down
Loading

0 comments on commit 1e12030

Please sign in to comment.