Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import type { RulePreviewLoggedRequest } from '../../../../../common/api/detecti
import type { SecurityRuleServices, SecuritySharedParams, SignalSource } from '../types';
import { getDataTierFilter } from '../utils/get_data_tier_filter';
import { checkErrorDetails } from '../utils/check_error_details';
import { logClusterShardFailuresEsql } from '../utils/log_cluster_shard_failures_esql';
import type { ExcludedDocument, EsqlState } from './types';

import {
Expand Down Expand Up @@ -131,7 +132,12 @@ export const esqlExecutor = async ({
excludedDocumentIds: excludedDocuments.map(({ id }) => id),
ruleExecutionTimeout,
});
const esqlQueryString = { drop_null_columns: true };

const esqlQueryString = {
drop_null_columns: true,
// allow_partial_results is true by default, but we need to set it to false for aggregating queries
allow_partial_results: !isRuleAggregating,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a comment here just stating allow_partial_results defaults to true? I like to have it documented in-line.

Ref: elastic/elasticsearch#122802

};
const hasLoggedRequestsReachedLimit = iteration >= 2;

ruleExecutionLogger.debug(`ES|QL query request: ${JSON.stringify(esqlRequest)}`);
Expand All @@ -151,6 +157,7 @@ export const esqlExecutor = async ({
loggedRequests: isLoggedRequestsEnabled ? loggedRequests : undefined,
});

logClusterShardFailuresEsql({ response, result });
const esqlSearchDuration = performance.now() - esqlSignalSearchStart;
result.searchAfterTimes.push(makeFloatString(esqlSearchDuration));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
import { performance } from 'perf_hooks';
import type { ElasticsearchClient } from '@kbn/core/server';
import { getKbnServerError } from '@kbn/kibana-utils-plugin/server';
import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types';
import type {
QueryDslQueryContainer,
EsqlEsqlShardFailure,
} from '@elastic/elasticsearch/lib/api/types';
import type { IRuleExecutionLogForExecutors } from '../../rule_monitoring';
import type { RulePreviewLoggedRequest } from '../../../../../common/api/detection_engine/rule_preview/rule_preview.gen';
import { logEsqlRequest } from '../utils/logged_requests';
Expand Down Expand Up @@ -39,6 +42,13 @@ export type EsqlResultRow = Array<string | null>;
export interface EsqlTable {
columns: EsqlResultColumn[];
values: EsqlResultRow[];
_clusters?: {
details?: {
[key: string]: {
failures?: EsqlEsqlShardFailure[];
};
};
};
}

export const performEsqlRequest = async ({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ export const EQL_SHARD_FAILURE_MESSAGE = (
},
});

export const ESQL_SHARD_FAILURE_MESSAGE = (shardFailuresMessage: string) =>
i18n.translate('xpack.securitySolution.detectionEngine.esqlRuleType.esqlShardFailures', {
defaultMessage: `The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards: {shardFailures}`,
values: {
shardFailures: shardFailuresMessage,
},
});

export const FIND_THRESHOLD_BUCKETS_DESCRIPTION = (afterBucket?: string) =>
afterBucket
? i18n.translate(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { EsqlEsqlShardFailure } from '@elastic/elasticsearch/lib/api/types';
import type { EsqlTable } from '../esql/esql_request';
import type { SearchAfterAndBulkCreateReturnType } from '../types';
import { logClusterShardFailuresEsql } from './log_cluster_shard_failures_esql';

describe('logClusterShardFailuresEsql', () => {
let mockResult: SearchAfterAndBulkCreateReturnType;

beforeEach(() => {
mockResult = {
warningMessages: [],
bulkCreateTimes: [],
createdSignalsCount: 0,
createdSignals: [],
errors: [],
searchAfterTimes: [],
success: true,
warning: false,
enrichmentTimes: [],
};
});

it('should not add warning message when no shard failures exist', () => {
const response: EsqlTable = {
columns: [],
values: [],
_clusters: {
details: {},
},
};

logClusterShardFailuresEsql({ response, result: mockResult });
expect(mockResult.warningMessages).toHaveLength(0);
});

it('should add warning message when shard failures exist in a single cluster', () => {
const shardFailure: EsqlEsqlShardFailure = {
reason: { type: 'test_failure', reason: 'test failure' },
shard: '0',
index: 'test-index',
};

const response: EsqlTable = {
columns: [],
values: [],
_clusters: {
details: {
'cluster-1': {
failures: [shardFailure],
},
},
},
};

logClusterShardFailuresEsql({ response, result: mockResult });
expect(mockResult.warningMessages).toHaveLength(1);
expect(mockResult.warningMessages[0]).toBe(
`The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards: ${JSON.stringify(
[shardFailure]
)}`
);
});

it('should add warning message when shard failures exist in multiple clusters', () => {
const shardFailure1: EsqlEsqlShardFailure = {
reason: { type: 'test_failure_1', reason: 'test failure 1' },
shard: '0',
index: 'test-index-1',
};

const shardFailure2: EsqlEsqlShardFailure = {
reason: { type: 'test_failure_2', reason: 'test failure 2' },
shard: '1',
index: 'test-index-2',
};

const response: EsqlTable = {
columns: [],
values: [],
_clusters: {
details: {
'cluster-1': {
failures: [shardFailure1],
},
'cluster-2': {
failures: [shardFailure2],
},
},
},
};

logClusterShardFailuresEsql({ response, result: mockResult });
expect(mockResult.warningMessages).toHaveLength(1);
expect(mockResult.warningMessages[0]).toBe(
`The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards: ${JSON.stringify(
[shardFailure1, shardFailure2]
)}`
);
});

it('should handle undefined _clusters property', () => {
const response: EsqlTable = {
columns: [],
values: [],
};

logClusterShardFailuresEsql({ response, result: mockResult });
expect(mockResult.warningMessages).toHaveLength(0);
});

it('should handle undefined details property', () => {
const response: EsqlTable = {
columns: [],
values: [],
_clusters: {},
};

logClusterShardFailuresEsql({ response, result: mockResult });
expect(mockResult.warningMessages).toHaveLength(0);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { EsqlEsqlShardFailure } from '@elastic/elasticsearch/lib/api/types';
import type { EsqlTable } from '../esql/esql_request';
import * as i18n from '../translations';
import type { SearchAfterAndBulkCreateReturnType } from '../types';

export const logClusterShardFailuresEsql = ({
response,
result,
}: {
response: EsqlTable;
result: SearchAfterAndBulkCreateReturnType;
}) => {
const clusters = response?._clusters?.details ?? {};
const shardFailures = Object.keys(clusters).reduce<EsqlEsqlShardFailure[]>((acc, cluster) => {
const failures = clusters[cluster]?.failures ?? [];

if (failures.length > 0) {
acc.push(...failures);
}

return acc;
}, []);

if (shardFailures.length > 0) {
result.warningMessages.push(i18n.ESQL_SHARD_FAILURE_MESSAGE(JSON.stringify(shardFailures)));
}
};
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import {
waitForBackfillExecuted,
setAdvancedSettings,
getOpenAlerts,
setBrokenRuntimeField,
unsetBrokenRuntimeField,
} from '../../../../utils';
import {
deleteAllRules,
Expand All @@ -42,6 +44,7 @@ import {
} from '../../../../../../../common/utils/security_solution';
import { deleteAllExceptions } from '../../../../../lists_and_exception_lists/utils';
import { FtrProviderContext } from '../../../../../../ftr_provider_context';
import { EsArchivePathBuilder } from '../../../../../../es_archive_path_builder';

export default ({ getService }: FtrProviderContext) => {
const supertest = getService('supertest');
Expand Down Expand Up @@ -2186,6 +2189,85 @@ export default ({ getService }: FtrProviderContext) => {
});
});

describe('shard failures', () => {
const config = getService('config');
const isServerless = config.get('serverless');
const dataPathBuilder = new EsArchivePathBuilder(isServerless);
const packetBeatPath = dataPathBuilder.getPath('packetbeat/default');

before(async () => {
await esArchiver.load(packetBeatPath);
await setBrokenRuntimeField({ es, index: 'packetbeat-*' });
});

after(async () => {
await unsetBrokenRuntimeField({ es, index: 'packetbeat-*' });
await esArchiver.unload(packetBeatPath);
});

it('should handle shard failures and include warning in logs for query that is not aggregating', async () => {
const doc1 = { agent: { name: 'test-1' } };
await indexEnhancedDocuments({
documents: [doc1],
id: uuidv4(),
});

const rule: EsqlRuleCreateProps = {
...getCreateEsqlRulesSchemaMock('rule-1', true),
query: `from packetbeat-*, ecs_compliant METADATA _id | limit 101`,
from: 'now-100000h',
};

const { logs, previewId } = await previewRule({
supertest,
rule,
});

const previewAlerts = await getPreviewAlerts({ es, previewId });

expect(logs).toEqual(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we add assertion that rule still creates alerts from available shards?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

expect.arrayContaining([
expect.objectContaining({
warnings: expect.arrayContaining([
expect.stringContaining(
'The ES|QL event query was only executed on the available shards. The query failed to run successfully on the following shards'
),
]),
}),
])
);

expect(previewAlerts?.length).toBeGreaterThan(0);
});

it('should handle shard failures and include errors in logs for query that is aggregating', async () => {
const rule: EsqlRuleCreateProps = {
...getCreateEsqlRulesSchemaMock(),
query: `from packetbeat-* | stats _count=count(broken) by @timestamp`,
from: 'now-100000h',
};

const { logs, previewId } = await previewRule({
supertest,
rule,
});

const previewAlerts = await getPreviewAlerts({ es, previewId });

expect(logs).toEqual(
expect.arrayContaining([
expect.objectContaining({
errors: expect.arrayContaining([
expect.stringContaining('No field found for [non_existing] in mapping'),
]),
}),
])
);

expect(previewAlerts).toHaveLength(0);
});
});

describe('alerts on alerts', () => {
let id: string;
let ruleId: string;
Expand Down