diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql.ts index 1b3a3aeef80d2..28049a9af5f74 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql.ts @@ -33,6 +33,7 @@ import type { RulePreviewLoggedRequest } from '../../../../../common/api/detecti import type { SecurityRuleServices, SecuritySharedParams, SignalSource } from '../types'; import { getDataTierFilter } from '../utils/get_data_tier_filter'; import { checkErrorDetails } from '../utils/check_error_details'; +import { logClusterShardFailuresEsql } from '../utils/log_cluster_shard_failures_esql'; import type { ExcludedDocument, EsqlState } from './types'; import { @@ -131,7 +132,12 @@ export const esqlExecutor = async ({ excludedDocumentIds: excludedDocuments.map(({ id }) => id), ruleExecutionTimeout, }); - const esqlQueryString = { drop_null_columns: true }; + + const esqlQueryString = { + drop_null_columns: true, + // allow_partial_results is true by default, but we need to set it to false for aggregating queries + allow_partial_results: !isRuleAggregating, + }; const hasLoggedRequestsReachedLimit = iteration >= 2; ruleExecutionLogger.debug(`ES|QL query request: ${JSON.stringify(esqlRequest)}`); @@ -151,6 +157,7 @@ export const esqlExecutor = async ({ loggedRequests: isLoggedRequestsEnabled ? loggedRequests : undefined, }); + logClusterShardFailuresEsql({ response, result }); const esqlSearchDuration = performance.now() - esqlSignalSearchStart; result.searchAfterTimes.push(makeFloatString(esqlSearchDuration)); diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql_request.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql_request.ts index 564b018bac4dd..e844ffcc88fee 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql_request.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/esql/esql_request.ts @@ -8,7 +8,10 @@ import { performance } from 'perf_hooks'; import type { ElasticsearchClient } from '@kbn/core/server'; import { getKbnServerError } from '@kbn/kibana-utils-plugin/server'; -import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; +import type { + QueryDslQueryContainer, + EsqlEsqlShardFailure, +} from '@elastic/elasticsearch/lib/api/types'; import type { IRuleExecutionLogForExecutors } from '../../rule_monitoring'; import type { RulePreviewLoggedRequest } from '../../../../../common/api/detection_engine/rule_preview/rule_preview.gen'; import { logEsqlRequest } from '../utils/logged_requests'; @@ -39,6 +42,13 @@ export type EsqlResultRow = Array; export interface EsqlTable { columns: EsqlResultColumn[]; values: EsqlResultRow[]; + _clusters?: { + details?: { + [key: string]: { + failures?: EsqlEsqlShardFailure[]; + }; + }; + }; } export const performEsqlRequest = async ({ diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/translations.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/translations.ts index 10901d0f5fcfc..e1eac3f24ed95 100644 --- a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/translations.ts +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/translations.ts @@ -63,6 +63,14 @@ export const EQL_SHARD_FAILURE_MESSAGE = ( }, }); +export const ESQL_SHARD_FAILURE_MESSAGE = (shardFailuresMessage: string) => + i18n.translate('xpack.securitySolution.detectionEngine.esqlRuleType.esqlShardFailures', { + defaultMessage: `The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards: {shardFailures}`, + values: { + shardFailures: shardFailuresMessage, + }, + }); + export const FIND_THRESHOLD_BUCKETS_DESCRIPTION = (afterBucket?: string) => afterBucket ? i18n.translate( diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/log_cluster_shard_failures_esql.test.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/log_cluster_shard_failures_esql.test.ts new file mode 100644 index 0000000000000..f4eb8d89229a4 --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/log_cluster_shard_failures_esql.test.ts @@ -0,0 +1,127 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import type { EsqlEsqlShardFailure } from '@elastic/elasticsearch/lib/api/types'; +import type { EsqlTable } from '../esql/esql_request'; +import type { SearchAfterAndBulkCreateReturnType } from '../types'; +import { logClusterShardFailuresEsql } from './log_cluster_shard_failures_esql'; + +describe('logClusterShardFailuresEsql', () => { + let mockResult: SearchAfterAndBulkCreateReturnType; + + beforeEach(() => { + mockResult = { + warningMessages: [], + bulkCreateTimes: [], + createdSignalsCount: 0, + createdSignals: [], + errors: [], + searchAfterTimes: [], + success: true, + warning: false, + enrichmentTimes: [], + }; + }); + + it('should not add warning message when no shard failures exist', () => { + const response: EsqlTable = { + columns: [], + values: [], + _clusters: { + details: {}, + }, + }; + + logClusterShardFailuresEsql({ response, result: mockResult }); + expect(mockResult.warningMessages).toHaveLength(0); + }); + + it('should add warning message when shard failures exist in a single cluster', () => { + const shardFailure: EsqlEsqlShardFailure = { + reason: { type: 'test_failure', reason: 'test failure' }, + shard: '0', + index: 'test-index', + }; + + const response: EsqlTable = { + columns: [], + values: [], + _clusters: { + details: { + 'cluster-1': { + failures: [shardFailure], + }, + }, + }, + }; + + logClusterShardFailuresEsql({ response, result: mockResult }); + expect(mockResult.warningMessages).toHaveLength(1); + expect(mockResult.warningMessages[0]).toBe( + `The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards: ${JSON.stringify( + [shardFailure] + )}` + ); + }); + + it('should add warning message when shard failures exist in multiple clusters', () => { + const shardFailure1: EsqlEsqlShardFailure = { + reason: { type: 'test_failure_1', reason: 'test failure 1' }, + shard: '0', + index: 'test-index-1', + }; + + const shardFailure2: EsqlEsqlShardFailure = { + reason: { type: 'test_failure_2', reason: 'test failure 2' }, + shard: '1', + index: 'test-index-2', + }; + + const response: EsqlTable = { + columns: [], + values: [], + _clusters: { + details: { + 'cluster-1': { + failures: [shardFailure1], + }, + 'cluster-2': { + failures: [shardFailure2], + }, + }, + }, + }; + + logClusterShardFailuresEsql({ response, result: mockResult }); + expect(mockResult.warningMessages).toHaveLength(1); + expect(mockResult.warningMessages[0]).toBe( + `The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards: ${JSON.stringify( + [shardFailure1, shardFailure2] + )}` + ); + }); + + it('should handle undefined _clusters property', () => { + const response: EsqlTable = { + columns: [], + values: [], + }; + + logClusterShardFailuresEsql({ response, result: mockResult }); + expect(mockResult.warningMessages).toHaveLength(0); + }); + + it('should handle undefined details property', () => { + const response: EsqlTable = { + columns: [], + values: [], + _clusters: {}, + }; + + logClusterShardFailuresEsql({ response, result: mockResult }); + expect(mockResult.warningMessages).toHaveLength(0); + }); +}); diff --git a/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/log_cluster_shard_failures_esql.ts b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/log_cluster_shard_failures_esql.ts new file mode 100644 index 0000000000000..6f2237f57033b --- /dev/null +++ b/x-pack/solutions/security/plugins/security_solution/server/lib/detection_engine/rule_types/utils/log_cluster_shard_failures_esql.ts @@ -0,0 +1,33 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ +import type { EsqlEsqlShardFailure } from '@elastic/elasticsearch/lib/api/types'; +import type { EsqlTable } from '../esql/esql_request'; +import * as i18n from '../translations'; +import type { SearchAfterAndBulkCreateReturnType } from '../types'; + +export const logClusterShardFailuresEsql = ({ + response, + result, +}: { + response: EsqlTable; + result: SearchAfterAndBulkCreateReturnType; +}) => { + const clusters = response?._clusters?.details ?? {}; + const shardFailures = Object.keys(clusters).reduce((acc, cluster) => { + const failures = clusters[cluster]?.failures ?? []; + + if (failures.length > 0) { + acc.push(...failures); + } + + return acc; + }, []); + + if (shardFailures.length > 0) { + result.warningMessages.push(i18n.ESQL_SHARD_FAILURE_MESSAGE(JSON.stringify(shardFailures))); + } +}; diff --git a/x-pack/test/security_solution_api_integration/test_suites/detections_response/detection_engine/rule_execution_logic/esql/trial_license_complete_tier/esql.ts b/x-pack/test/security_solution_api_integration/test_suites/detections_response/detection_engine/rule_execution_logic/esql/trial_license_complete_tier/esql.ts index 27ab94ad99be0..ab478af15462a 100644 --- a/x-pack/test/security_solution_api_integration/test_suites/detections_response/detection_engine/rule_execution_logic/esql/trial_license_complete_tier/esql.ts +++ b/x-pack/test/security_solution_api_integration/test_suites/detections_response/detection_engine/rule_execution_logic/esql/trial_license_complete_tier/esql.ts @@ -34,6 +34,8 @@ import { waitForBackfillExecuted, setAdvancedSettings, getOpenAlerts, + setBrokenRuntimeField, + unsetBrokenRuntimeField, } from '../../../../utils'; import { deleteAllRules, @@ -42,6 +44,7 @@ import { } from '../../../../../../../common/utils/security_solution'; import { deleteAllExceptions } from '../../../../../lists_and_exception_lists/utils'; import { FtrProviderContext } from '../../../../../../ftr_provider_context'; +import { EsArchivePathBuilder } from '../../../../../../es_archive_path_builder'; export default ({ getService }: FtrProviderContext) => { const supertest = getService('supertest'); @@ -2186,6 +2189,85 @@ export default ({ getService }: FtrProviderContext) => { }); }); + describe('shard failures', () => { + const config = getService('config'); + const isServerless = config.get('serverless'); + const dataPathBuilder = new EsArchivePathBuilder(isServerless); + const packetBeatPath = dataPathBuilder.getPath('packetbeat/default'); + + before(async () => { + await esArchiver.load(packetBeatPath); + await setBrokenRuntimeField({ es, index: 'packetbeat-*' }); + }); + + after(async () => { + await unsetBrokenRuntimeField({ es, index: 'packetbeat-*' }); + await esArchiver.unload(packetBeatPath); + }); + + it('should handle shard failures and include warning in logs for query that is not aggregating', async () => { + const doc1 = { agent: { name: 'test-1' } }; + await indexEnhancedDocuments({ + documents: [doc1], + id: uuidv4(), + }); + + const rule: EsqlRuleCreateProps = { + ...getCreateEsqlRulesSchemaMock('rule-1', true), + query: `from packetbeat-*, ecs_compliant METADATA _id | limit 101`, + from: 'now-100000h', + }; + + const { logs, previewId } = await previewRule({ + supertest, + rule, + }); + + const previewAlerts = await getPreviewAlerts({ es, previewId }); + + expect(logs).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + warnings: expect.arrayContaining([ + expect.stringContaining( + 'The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards' + ), + ]), + }), + ]) + ); + + expect(previewAlerts?.length).toBeGreaterThan(0); + }); + + it('should handle shard failures and include errors in logs for query that is aggregating', async () => { + const rule: EsqlRuleCreateProps = { + ...getCreateEsqlRulesSchemaMock(), + query: `from packetbeat-* | stats _count=count(broken) by @timestamp`, + from: 'now-100000h', + }; + + const { logs, previewId } = await previewRule({ + supertest, + rule, + }); + + const previewAlerts = await getPreviewAlerts({ es, previewId }); + + expect(logs).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + errors: expect.arrayContaining([ + expect.stringContaining('No field found for [non_existing] in mapping'), + ]), + }), + ]) + ); + + expect(previewAlerts).toHaveLength(0); + }); + }); + describe('alerts on alerts', () => { let id: string; let ruleId: string;