Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/settings/task-manager-settings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Task Manager runs background tasks by polling for work on an interval. You can
| `xpack.task_manager.max_workers`
| The maximum number of tasks that this Kibana instance will run simultaneously. Defaults to 10.
Starting in 8.0, it will not be possible to set the value greater than 100.

| `xpack.task_manager.monitored_stats_warn_delayed_task_start_in_seconds`
| The amount of seconds we allow a task to delay before printing a warning server log. Defaults to 60.
|===

[float]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ kibana_vars=(
xpack.task_manager.monitored_aggregated_stats_refresh_rate
xpack.task_manager.monitored_stats_required_freshness
xpack.task_manager.monitored_stats_running_average_window
xpack.task_manager.monitored_stats_warn_delayed_task_start_in_seconds
xpack.task_manager.monitored_task_execution_thresholds
xpack.task_manager.poll_interval
xpack.task_manager.request_capacity
Expand Down
3 changes: 3 additions & 0 deletions x-pack/plugins/task_manager/server/config.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ describe('config validation', () => {
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_required_freshness": 4000,
"monitored_stats_running_average_window": 50,
"monitored_stats_warn_delayed_task_start_in_seconds": 60,
"monitored_task_execution_thresholds": Object {
"custom": Object {},
"default": Object {
Expand Down Expand Up @@ -68,6 +69,7 @@ describe('config validation', () => {
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_required_freshness": 4000,
"monitored_stats_running_average_window": 50,
"monitored_stats_warn_delayed_task_start_in_seconds": 60,
"monitored_task_execution_thresholds": Object {
"custom": Object {},
"default": Object {
Expand Down Expand Up @@ -103,6 +105,7 @@ describe('config validation', () => {
"monitored_aggregated_stats_refresh_rate": 60000,
"monitored_stats_required_freshness": 4000,
"monitored_stats_running_average_window": 50,
"monitored_stats_warn_delayed_task_start_in_seconds": 60,
"monitored_task_execution_thresholds": Object {
"custom": Object {
"alerting:always-fires": Object {
Expand Down
5 changes: 5 additions & 0 deletions x-pack/plugins/task_manager/server/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export const DEFAULT_VERSION_CONFLICT_THRESHOLD = 80;
// Refresh aggregated monitored stats at a default rate of once a minute
export const DEFAULT_MONITORING_REFRESH_RATE = 60 * 1000;
export const DEFAULT_MONITORING_STATS_RUNNING_AVERGAE_WINDOW = 50;
export const DEFAULT_MONITORING_STATS_WARN_DELAYED_TASK_START_IN_SECONDS = 60;

export const taskExecutionFailureThresholdSchema = schema.object(
{
Expand Down Expand Up @@ -109,6 +110,10 @@ export const configSchema = schema.object(
defaultValue: {},
}),
}),
/* The amount of seconds we allow a task to delay before printing a warning server log */
monitored_stats_warn_delayed_task_start_in_seconds: schema.number({
defaultValue: DEFAULT_MONITORING_STATS_WARN_DELAYED_TASK_START_IN_SECONDS,
}),
},
{
validate: (config) => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ describe('managed configuration', () => {
version_conflict_threshold: 80,
max_poll_inactivity_cycles: 10,
monitored_aggregated_stats_refresh_rate: 60000,
monitored_stats_warn_delayed_task_start_in_seconds: 60,
monitored_stats_required_freshness: 4000,
monitored_stats_running_average_window: 50,
request_capacity: 1000,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

const createCalculateHealthStatusMock = () => {
return jest.fn();
};

export const calculateHealthStatusMock = {
create: createCalculateHealthStatusMock,
};
79 changes: 79 additions & 0 deletions x-pack/plugins/task_manager/server/lib/calculate_health_status.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { isString } from 'lodash';
import { JsonValue } from '@kbn/common-utils';
import { HealthStatus, RawMonitoringStats } from '../monitoring';
import { TaskManagerConfig } from '../config';

export function calculateHealthStatus(
summarizedStats: RawMonitoringStats,
config: TaskManagerConfig
): HealthStatus {
const now = Date.now();

// if "hot" health stats are any more stale than monitored_stats_required_freshness (pollInterval +1s buffer by default)
// consider the system unhealthy
const requiredHotStatsFreshness: number = config.monitored_stats_required_freshness;

// if "cold" health stats are any more stale than the configured refresh (+ a buffer), consider the system unhealthy
const requiredColdStatsFreshness: number = config.monitored_aggregated_stats_refresh_rate * 1.5;

/**
* If the monitored stats aren't fresh, return a red status
*/
const healthStatus =
hasStatus(summarizedStats.stats, HealthStatus.Error) ||
hasExpiredHotTimestamps(summarizedStats, now, requiredHotStatsFreshness) ||
hasExpiredColdTimestamps(summarizedStats, now, requiredColdStatsFreshness)
? HealthStatus.Error
: hasStatus(summarizedStats.stats, HealthStatus.Warning)
? HealthStatus.Warning
: HealthStatus.OK;
return healthStatus;
}

function hasStatus(stats: RawMonitoringStats['stats'], status: HealthStatus): boolean {
return Object.values(stats)
.map((stat) => stat?.status === status)
.includes(true);
}

/**
* If certain "hot" stats are not fresh, then the _health api will should return a Red status
* @param monitoringStats The monitored stats
* @param now The time to compare against
* @param requiredFreshness How fresh should these stats be
*/
function hasExpiredHotTimestamps(
monitoringStats: RawMonitoringStats,
now: number,
requiredFreshness: number
): boolean {
const diff =
now -
getOldestTimestamp(
monitoringStats.last_update,
monitoringStats.stats.runtime?.value.polling.last_successful_poll
);
return diff > requiredFreshness;
}

function hasExpiredColdTimestamps(
monitoringStats: RawMonitoringStats,
now: number,
requiredFreshness: number
): boolean {
return now - getOldestTimestamp(monitoringStats.stats.workload?.timestamp) > requiredFreshness;
}

function getOldestTimestamp(...timestamps: Array<JsonValue | undefined>): number {
const validTimestamps = timestamps
.map((timestamp) => (isString(timestamp) ? Date.parse(timestamp) : NaN))
.filter((timestamp) => !isNaN(timestamp));
return validTimestamps.length ? Math.min(...validTimestamps) : 0;
}
14 changes: 14 additions & 0 deletions x-pack/plugins/task_manager/server/lib/log_health_metrics.mock.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

const createLogHealthMetricsMock = () => {
return jest.fn();
};

export const logHealthMetricsMock = {
create: createLogHealthMetricsMock,
};
Loading