[8.18] [Rule Migration] Resolve bug around ECS mapping node (#210608) (…

…#210860) # Backport This will backport the following commits from `main` to `8.18`: - [[Rule Migration] Resolve bug around ECS mapping node (#210608)](#210608)  ### Questions ? Please refer to the [Backport tool documentation](https://github.com/sqren/backport)  Co-authored-by: Marius Iversen <[email protected]>
elastic · Feb 12, 2025 · 1e12030 · 1e12030
1 parent feab022
commit 1e12030
Show file tree

Hide file tree

Showing 19 changed files with 128 additions and 134 deletions.
diff --git a/...solutions/security/plugins/security_solution/scripts/siem_migration/draw_graphs_script.ts b/...solutions/security/plugins/security_solution/scripts/siem_migration/draw_graphs_script.ts
@@ -5,7 +5,6 @@
  * 2.0.
  */
 
-import type { InferenceClient } from '@kbn/inference-plugin/server';
 import type {
   ActionsClientChatOpenAI,
   ActionsClientSimpleChatModel,
@@ -17,6 +16,7 @@ import fs from 'fs/promises';
 import path from 'path';
 import { getRuleMigrationAgent } from '../../server/lib/siem_migrations/rules/task/agent';
 import type { RuleMigrationsRetriever } from '../../server/lib/siem_migrations/rules/task/retrievers';
+import type { EsqlKnowledgeBase } from '../../server/lib/siem_migrations/rules/task/util/esql_knowledge_base';
 import type { SiemMigrationTelemetryClient } from '../../server/lib/siem_migrations/rules/task/rule_migrations_telemetry_client';
 
 interface Drawable {
@@ -27,8 +27,7 @@ const mockLlm = new FakeLLM({
   response: JSON.stringify({}, null, 2),
 }) as unknown as ActionsClientChatOpenAI | ActionsClientSimpleChatModel;
 
-const inferenceClient = {} as InferenceClient;
-const connectorId = 'draw_graphs';
+const esqlKnowledgeBase = {} as EsqlKnowledgeBase;
 const ruleMigrationsRetriever = {} as RuleMigrationsRetriever;
 
 const createLlmInstance = () => {
@@ -40,9 +39,8 @@ async function getAgentGraph(logger: Logger): Promise<Drawable> {
   const telemetryClient = {} as SiemMigrationTelemetryClient;
   const graph = getRuleMigrationAgent({
     model,
-    inferenceClient,
+    esqlKnowledgeBase,
     ruleMigrationsRetriever,
-    connectorId,
     logger,
     telemetryClient,
   });

diff --git a/...olution/server/lib/siem_migrations/rules/data/rule_migrations_data_integrations_client.ts b/...olution/server/lib/siem_migrations/rules/data/rule_migrations_data_integrations_client.ts
@@ -32,15 +32,19 @@ export class RuleMigrationsDataIntegrationsClient extends RuleMigrationsDataBase
         id: pkg.name,
         description: pkg?.description || '',
         data_streams:
-          pkg.data_streams?.map((stream) => ({
-            dataset: stream.dataset,
-            index_pattern: `${stream.type}-${stream.dataset}-*`,
-            title: stream.title,
-          })) || [],
+          pkg.data_streams
+            ?.filter((stream) => stream.type === 'logs')
+            .map((stream) => ({
+              dataset: stream.dataset,
+              index_pattern: `${stream.type}-${stream.dataset}-*`,
+              title: stream.title,
+            })) || [],
         elser_embedding: [
           pkg.title,
           pkg.description,
-          ...(pkg.data_streams?.map((stream) => stream.title) || []),
+          ...(pkg.data_streams
+            ?.filter((stream) => stream.type === 'logs')
+            .map((stream) => stream.title) || []),
         ].join(' - '),
       }));
       await this.esClient

diff --git a/...ution/server/lib/siem_migrations/rules/data/rule_migrations_data_prebuilt_rules_client.ts b/...ution/server/lib/siem_migrations/rules/data/rule_migrations_data_prebuilt_rules_client.ts
@@ -14,9 +14,9 @@ import { RuleMigrationsDataBaseClient } from './rule_migrations_data_base_client
 
 export type { RuleVersions };
 export type PrebuildRuleVersionsMap = Map<string, RuleVersions>;
-/* The minimum score required for a integration to be considered correct, might need to change this later */
+/* The minimum score required for a prebuilt rule to be considered correct */
 const MIN_SCORE = 40 as const;
-/* The number of integrations the RAG will return, sorted by score */
+/* The number of prebuilt rules the RAG will return, sorted by score */
 const RETURNED_RULES = 5 as const;
 
 /* BULK_MAX_SIZE defines the number to break down the bulk operations by.
@@ -31,12 +31,12 @@ export class RuleMigrationsDataPrebuiltRulesClient extends RuleMigrationsDataBas
     return fetchRuleVersionsTriad({ ruleAssetsClient, ruleObjectsClient });
   }
 
-  /** Indexes an array of integrations to be used with ELSER semantic search queries */
+  /** Indexes an array of prebuilt rules to be used with ELSER semantic search queries */
   async populate(ruleVersionsMap: PrebuildRuleVersionsMap): Promise<void> {
     const filteredRules: RuleMigrationPrebuiltRule[] = [];
 
     ruleVersionsMap.forEach((ruleVersions) => {
-      const rule = ruleVersions.target || ruleVersions.current;
+      const rule = ruleVersions.target;
       if (rule) {
         const mitreAttackIds = rule?.threat?.flatMap(
           ({ technique }) => technique?.map(({ id }) => id) ?? []

diff --git a/...urity/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.test.ts b/...urity/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.test.ts
@@ -5,7 +5,6 @@
  * 2.0.
  */
 
-import type { InferenceClient } from '@kbn/inference-plugin/server';
 import type {
   ActionsClientChatOpenAI,
   ActionsClientSimpleChatModel,
@@ -14,25 +13,25 @@ import { loggerMock } from '@kbn/logging-mocks';
 import { FakeLLM } from '@langchain/core/utils/testing';
 import type { RuleMigrationsRetriever } from '../retrievers';
 import type { SiemMigrationTelemetryClient } from '../rule_migrations_telemetry_client';
+import type { EsqlKnowledgeBase } from '../util/esql_knowledge_base';
 import { getRuleMigrationAgent } from './graph';
 
 describe('getRuleMigrationAgent', () => {
   const model = new FakeLLM({
     response: JSON.stringify({}, null, 2),
   }) as unknown as ActionsClientChatOpenAI | ActionsClientSimpleChatModel;
   const telemetryClient = {} as SiemMigrationTelemetryClient;
-  const inferenceClient = {} as InferenceClient;
-  const connectorId = 'draw_graphs';
+  const esqlKnowledgeBase = {} as EsqlKnowledgeBase;
+
   const ruleMigrationsRetriever = {} as RuleMigrationsRetriever;
   const logger = loggerMock.create();
 
   it('Ensures that the graph compiles', async () => {
     try {
       await getRuleMigrationAgent({
         model,
-        inferenceClient,
+        esqlKnowledgeBase,
         ruleMigrationsRetriever,
-        connectorId,
         logger,
         telemetryClient,
       });

diff --git a/...s/security/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts b/...s/security/plugins/security_solution/server/lib/siem_migrations/rules/task/agent/graph.ts
@@ -13,9 +13,8 @@ import { getTranslateRuleGraph } from './sub_graphs/translate_rule';
 import type { MigrateRuleGraphParams, MigrateRuleState } from './types';
 export function getRuleMigrationAgent({
   model,
-  inferenceClient,
+  esqlKnowledgeBase,
   ruleMigrationsRetriever,
-  connectorId,
   logger,
   telemetryClient,
 }: MigrateRuleGraphParams) {
@@ -27,9 +26,8 @@ export function getRuleMigrationAgent({
   });
   const translationSubGraph = getTranslateRuleGraph({
     model,
-    inferenceClient,
+    esqlKnowledgeBase,
     ruleMigrationsRetriever,
-    connectorId,
     telemetryClient,
     logger,
   });

diff --git a/...ver/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts b/...ver/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/match_prebuilt_rule.ts
@@ -63,12 +63,14 @@ export const getMatchPrebuiltRuleNode = ({
       return {
         name: rule.name,
         description: rule.description,
+        query: rule.target?.type !== 'machine_learning' ? rule.target?.query : '',
       };
     });
 
     const splunkRule = {
       title: state.original_rule.title,
       description: state.original_rule.description,
+      query: state.original_rule.query,
     };
 
     /*

diff --git a/...solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts b/...solution/server/lib/siem_migrations/rules/task/agent/nodes/match_prebuilt_rule/prompts.ts
@@ -15,49 +15,47 @@ You will be provided with a Splunk Detection Rule name by the user, your goal is
 Here are some context for you to reference for your task, read it carefully as you will get questions about it later:
 
 <context>
-<elastic_detection_rule_names>
+<elastic_detection_rules>
 {rules}
-</elastic_detection_rule_names>
+</elastic_detection_rules>
 </context>
 `,
   ],
   [
     'human',
-    `See the below description of the splunk rule, try to find a Elastic Prebuilt Rule with similar purpose.     
+    `See the below description of the splunk rule, try to find a Elastic Prebuilt Rule with similar purpose. If the splunk rule covers a much more complex usecase than the prebuilt rule, it is not a match.
 <splunk_rule>
 {splunk_rule}
 </splunk_rule>
 
 <guidelines>
 - Carefully analyze the Splunk Detection Rule data provided by the user.
-- Match the Splunk rule to the most relevant Elastic Prebuilt Rules from the list provided above.
-- If no related Elastic Prebuilt Rule is found, reply with an empty string.
+- Match the Splunk rule to the most relevant Elastic Prebuilt Rules from the list provided above but only if the usecase is almost identical.
+- If no related Elastic Prebuilt Rule is found, ensure the value of "match" in the response is an empty string.
 - Provide a concise reasoning summary for your decision, explaining why the selected Prebuilt Rule is the best fit, or why no suitable match was found.
 </guidelines>
 
 <expected_output>
-- Always reply with a JSON object with the key "match" and the value being the most relevant matched elastic detection rule name, and a "summary" entry with the reasons behind the match. Do not reply with anything else.
-- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match key, do not guess or reply with anything else.
+- Always reply with a JSON object with the field "match" and the value being the most relevant matched elastic detection rule name if any, else the value should be an emptry string, and a "summary" entry with the reasons behind the match. Do not reply with anything else.
+- Only reply with exact matches, if you are unsure or do not find a very confident match, always reply with an empty string value in the match field, do not guess or reply with anything else.
+- If the Splunk rule is a much more complex usecase with custom logic not covered by the prebuilt rules, reply with an empty string in the match field.
 - If there is only one match, answer with the name of the rule in the "match" key. Do not reply with anything else.
 - If there are multiple matches, answer with the most specific of them, for example: "Linux User Account Creation" is more specific than "User Account Creation".
-- Finally, write a "summary" in markdown format with the reasoning behind the decision. Starting with "## Prebuilt Rule Matching Summary\n".
+- Finally, write a "summary" in markdown format with the reasoning behind the decision. Starting with "## Prebuilt Rule Matching Summary" followed by a newline. Make sure the content is valid JSON by escaping any necessary special characters.
 - Make sure the JSON object is formatted correctly and the values properly escaped.
 </expected_output>
 
 <example_response>
-U: <splunk_rule>
-Title: Linux Auditd Add User Account Type
-Description: The following analytic detects the suspicious add user account type.
-</splunk_rule>
-A: Please find the match JSON object below:
+A: Please find the resulting JSON response below:
 \`\`\`json
 {{
   "match": "Linux User Account Creation",
-  "summary": "## Prebuilt Rule Matching Summary\\\nThe Splunk rule \"Linux Auditd Add User Account Type\" is matched with the Elastic rule \"Linux User Account Creation\" because both rules cover the same use case of detecting user account creation on Linux systems."
+  "summary": "## Prebuilt Rule Matching Summary
+The Splunk rule \"Linux Auditd Add User Account Type\" is matched with the Elastic rule \"Linux User Account Creation\" because both rules cover the same use case of detecting user account creation on Linux systems."
 }}
 \`\`\`
 </example_response>
 `,
   ],
-  ['ai', 'Please find the match JSON object below:'],
+  ['ai', 'Please find the resulting JSON response below:'],
 ]);
diff --git a/...y_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/graph.ts b/...y_solution/server/lib/siem_migrations/rules/task/agent/sub_graphs/translate_rule/graph.ts
@@ -7,7 +7,6 @@
 
 import { END, START, StateGraph } from '@langchain/langgraph';
 import { isEmpty } from 'lodash/fp';
-import { RuleTranslationResult } from '../../../../../../../../common/siem_migrations/constants';
 import { getEcsMappingNode } from './nodes/ecs_mapping';
 import { getFixQueryErrorsNode } from './nodes/fix_query_errors';
 import { getInlineQueryNode } from './nodes/inline_query';
@@ -23,27 +22,25 @@ const MAX_VALIDATION_ITERATIONS = 3;
 
 export function getTranslateRuleGraph({
   model,
-  inferenceClient,
-  connectorId,
+  esqlKnowledgeBase,
   ruleMigrationsRetriever,
   logger,
   telemetryClient,
 }: TranslateRuleGraphParams) {
   const translateRuleNode = getTranslateRuleNode({
-    inferenceClient,
-    connectorId,
+    esqlKnowledgeBase,
     logger,
   });
   const translationResultNode = getTranslationResultNode();
   const inlineQueryNode = getInlineQueryNode({ model, ruleMigrationsRetriever });
   const validationNode = getValidationNode({ logger });
-  const fixQueryErrorsNode = getFixQueryErrorsNode({ inferenceClient, connectorId, logger });
+  const fixQueryErrorsNode = getFixQueryErrorsNode({ esqlKnowledgeBase, logger });
   const retrieveIntegrationsNode = getRetrieveIntegrationsNode({
     model,
     ruleMigrationsRetriever,
     telemetryClient,
   });
-  const ecsMappingNode = getEcsMappingNode({ inferenceClient, connectorId, logger });
+  const ecsMappingNode = getEcsMappingNode({ esqlKnowledgeBase, logger });
 
   const translateRuleGraph = new StateGraph(translateRuleState)
     // Nodes
@@ -86,14 +83,13 @@ const translatableRouter = (state: TranslateRuleState) => {
 const validationRouter = (state: TranslateRuleState) => {
   if (
     state.validation_errors.iterations <= MAX_VALIDATION_ITERATIONS &&
-    state.translation_result === RuleTranslationResult.FULL
+    !isEmpty(state.validation_errors?.esql_errors)
   ) {
-    if (!isEmpty(state.validation_errors?.esql_errors)) {
-      return 'fixQueryErrors';
-    }
-    if (!state.translation_finalized) {
-      return 'ecsMapping';
-    }
+    return 'fixQueryErrors';
   }
+  if (!state.includes_ecs_mapping) {
+    return 'ecsMapping';
+  }
+
   return 'translationResult';
 };
diff --git a/...em_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/ecs_mapping/ecs_mapping.ts b/...em_migrations/rules/task/agent/sub_graphs/translate_rule/nodes/ecs_mapping/ecs_mapping.ts
@@ -6,26 +6,21 @@
  */
 
 import type { Logger } from '@kbn/core/server';
-import type { InferenceClient } from '@kbn/inference-plugin/server';
-import { RuleTranslationResult } from '../../../../../../../../../../common/siem_migrations/constants';
-import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
+import type { EsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base';
 import type { GraphNode } from '../../types';
 import { SIEM_RULE_MIGRATION_CIM_ECS_MAP } from './cim_ecs_map';
 import { ESQL_TRANSLATE_ECS_MAPPING_PROMPT } from './prompts';
 import { cleanMarkdown, generateAssistantComment } from '../../../../../util/comments';
 
 interface GetEcsMappingNodeParams {
-  inferenceClient: InferenceClient;
-  connectorId: string;
+  esqlKnowledgeBase: EsqlKnowledgeBase;
   logger: Logger;
 }
 
 export const getEcsMappingNode = ({
-  inferenceClient,
-  connectorId,
+  esqlKnowledgeBase,
   logger,
 }: GetEcsMappingNodeParams): GraphNode => {
-  const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
   return async (state) => {
     const elasticRule = {
       title: state.elastic_rule.title,
@@ -39,29 +34,21 @@ export const getEcsMappingNode = ({
       elastic_rule: JSON.stringify(elasticRule, null, 2),
     });
 
-    const response = await esqlKnowledgeBaseCaller(prompt);
+    const response = await esqlKnowledgeBase.translate(prompt);
 
     const updatedQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';
     const ecsSummary = response.match(/## Field Mapping Summary[\s\S]*$/)?.[0] ?? '';
 
-    const translationResult = getTranslationResult(updatedQuery);
-
+    // We set includes_ecs_mapping to true to indicate that the ecs mapping has been applied.
+    // This is to ensure that the node only runs once
     return {
       response,
       comments: [generateAssistantComment(cleanMarkdown(ecsSummary))],
-      translation_finalized: true,
-      translation_result: translationResult,
+      includes_ecs_mapping: true,
       elastic_rule: {
         ...state.elastic_rule,
         query: updatedQuery,
       },
     };
   };
 };
-
-const getTranslationResult = (esqlQuery: string): RuleTranslationResult => {
-  if (esqlQuery.match(/\[(macro|lookup):[\s\S]*\]/)) {
-    return RuleTranslationResult.PARTIAL;
-  }
-  return RuleTranslationResult.FULL;
-};
diff --git a/...ons/rules/task/agent/sub_graphs/translate_rule/nodes/fix_query_errors/fix_query_errors.ts b/...ons/rules/task/agent/sub_graphs/translate_rule/nodes/fix_query_errors/fix_query_errors.ts
@@ -6,30 +6,26 @@
  */
 
 import type { Logger } from '@kbn/core/server';
-import type { InferenceClient } from '@kbn/inference-plugin/server';
-import { getEsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base_caller';
+import type { EsqlKnowledgeBase } from '../../../../../util/esql_knowledge_base';
 import type { GraphNode } from '../../types';
 import { RESOLVE_ESQL_ERRORS_TEMPLATE } from './prompts';
 
 interface GetFixQueryErrorsNodeParams {
-  inferenceClient: InferenceClient;
-  connectorId: string;
+  esqlKnowledgeBase: EsqlKnowledgeBase;
   logger: Logger;
 }
 
 export const getFixQueryErrorsNode = ({
-  inferenceClient,
-  connectorId,
+  esqlKnowledgeBase,
   logger,
 }: GetFixQueryErrorsNodeParams): GraphNode => {
-  const esqlKnowledgeBaseCaller = getEsqlKnowledgeBase({ inferenceClient, connectorId, logger });
   return async (state) => {
     const rule = state.elastic_rule;
     const prompt = await RESOLVE_ESQL_ERRORS_TEMPLATE.format({
       esql_errors: state.validation_errors.esql_errors,
       esql_query: rule.query,
     });
-    const response = await esqlKnowledgeBaseCaller(prompt);
+    const response = await esqlKnowledgeBase.translate(prompt);
 
     const esqlQuery = response.match(/```esql\n([\s\S]*?)\n```/)?.[1] ?? '';
     rule.query = esqlQuery;