Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
05dd29e
add observability get_trace_change_points tool
arturoliduena Jan 5, 2026
6472b69
add trace chenge points analisis for avg latency
arturoliduena Jan 8, 2026
f029a16
Update x-pack/solutions/observability/plugins/observability_agent_bui…
arturoliduena Jan 9, 2026
199993c
add latency, throughput and failure_rate trace change points analisis
arturoliduena Jan 9, 2026
57483be
move getPreferredDocumentSource to utils
arturoliduena Jan 9, 2026
62957c4
add support for avg|p95|p99 latancy agg type
arturoliduena Jan 9, 2026
bb94790
update latency aggregation handling
arturoliduena Jan 12, 2026
c6d7b6f
update get_trace_change_points tool test
arturoliduena Jan 12, 2026
ad6eaaa
update README and tool description
arturoliduena Jan 12, 2026
d960180
fix useDurationSummaryField logic
arturoliduena Jan 13, 2026
295d862
refactor after: Move helpers required for trace metrics to the apm-da…
arturoliduena Jan 14, 2026
751d5dd
Changes from node scripts/lint_ts_projects --fix
kibanamachine Jan 14, 2026
7f4520f
Changes from node scripts/regenerate_moon_projects.js --update
kibanamachine Jan 14, 2026
da284e1
Merge branch 'main' into obs-agent-418-trace-change-points-tool
arturoliduena Jan 14, 2026
5faa5c3
Merge branch 'main' into obs-agent-418-trace-change-points-tool
arturoliduena Jan 15, 2026
b8f2245
Changes from node scripts/eslint_all_files --no-cache --fix
kibanamachine Jan 15, 2026
2599523
fix circular dependency
arturoliduena Jan 15, 2026
186cfa7
Merge branch 'main' into obs-agent-418-trace-change-points-tool
arturoliduena Jan 15, 2026
ffd8127
Update x-pack/solutions/observability/plugins/observability_agent_bui…
arturoliduena Jan 16, 2026
ad0be75
remove duplicated code
arturoliduena Jan 16, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export const AGENT_BUILDER_BUILTIN_TOOLS: string[] = [
`${internalNamespaces.observability}.get_log_change_points`,
`${internalNamespaces.observability}.get_metric_change_points`,
`${internalNamespaces.observability}.get_index_info`,
`${internalNamespaces.observability}.get_trace_change_points`,

// Dashboards
'platform.dashboard.create_dashboard',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# get_trace_change_points

Detects statistically significant changes (e.g., "spike", "dip", "trend_change", "step_change", "distribution_change", "non_stationary", "stationary", or "indeterminable") in trace metrics (latency, throughput, and failure rate). Returns the top 25 most significant change points ordered by p-value.

## Examples

### Basic time range

```
POST kbn://api/agent_builder/tools/_execute
{
"tool_id": "observability.get_trace_change_points",
"tool_params": {
"start": "now-1h",
"end": "now"
}
}
```

```
POST kbn://api/agent_builder/tools/_execute
{
"tool_id": "observability.get_trace_change_points",
"tool_params": {
"start": "now-1h",
"end": "now",
"latencyType": "p95"
}
}
```

```
POST kbn://api/agent_builder/tools/_execute
{
"tool_id": "observability.get_trace_change_points",
"tool_params": {
"start": "now-1h",
"end": "now",
"groupBy": "transaction.name"
}
}
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import type { CoreSetup, KibanaRequest, Logger } from '@kbn/core/server';
import { ApmDocumentType } from '@kbn/apm-data-access-plugin/common';
import type { ChangePointType } from '@kbn/es-types/src';
import type { AggregationsAggregationContainer } from '@elastic/elasticsearch/lib/api/types';
import { intervalToSeconds } from '@kbn/apm-data-access-plugin/common/utils/get_preferred_bucket_size_and_data_source';
import {
getOutcomeAggregation,
getDurationFieldForTransactions,
} from '@kbn/apm-data-access-plugin/server/utils';
import type {
ObservabilityAgentBuilderPluginSetupDependencies,
ObservabilityAgentBuilderPluginStart,
ObservabilityAgentBuilderPluginStartDependencies,
} from '../../types';
import { timeRangeFilter, kqlFilter as buildKqlFilter } from '../../utils/dsl_filters';
import { parseDatemath } from '../../utils/time';
import { buildApmResources } from '../../utils/build_apm_resources';
import { getPreferredDocumentSource } from '../../utils/get_preferred_document_source';
import type { ChangePointDetails } from '../../utils/get_change_points';

interface Bucket {
key: string | number;
key_as_string?: string;
doc_count: number;
}

interface ChangePointResult {
type: Record<ChangePointType, ChangePointDetails>;
bucket?: Bucket;
}

interface BucketChangePoints extends Bucket {
changes_latency: ChangePointResult;
changes_throughput: ChangePointResult;
changes_failure_rate: ChangePointResult;
time_series: {
buckets: Array<
Bucket & {
latency: {
value: number | null;
};
throughput: {
value: number | null;
};
failure_rate: {
value: number | null;
};
}
>;
};
}

type LatencyAggregationType = 'avg' | 'p99' | 'p95';

type DocumentType =
| ApmDocumentType.ServiceTransactionMetric
| ApmDocumentType.TransactionMetric
| ApmDocumentType.TransactionEvent;

function getChangePointsAggs(bucketsPath: string) {
const changePointAggs = {
change_point: {
buckets_path: bucketsPath,
},
// elasticsearch@9.0.0 change_point aggregation is missing in the types: https://github.com/elastic/elasticsearch-specification/issues/3671
} as AggregationsAggregationContainer;
return changePointAggs;
}

function getLatencyAggregation(latencyAggregationType: LatencyAggregationType, field: string) {
return {
latency: {
...(latencyAggregationType === 'avg'
? { avg: { field } }
: {
percentiles: {
field,
percents: [latencyAggregationType === 'p95' ? 95 : 99],
},
}),
},
};
}

export async function getToolHandler({
core,
plugins,
request,
logger,
start,
end,
kqlFilter,
groupBy,
latencyType = 'avg',
}: {
core: CoreSetup<
ObservabilityAgentBuilderPluginStartDependencies,
ObservabilityAgentBuilderPluginStart
>;
plugins: ObservabilityAgentBuilderPluginSetupDependencies;
request: KibanaRequest;
logger: Logger;
start: string;
end: string;
kqlFilter?: string;
groupBy: string;
latencyType: LatencyAggregationType | undefined;
}): Promise<BucketChangePoints[]> {
const { apmEventClient, apmDataAccessServices } = await buildApmResources({
core,
plugins,
request,
logger,
});

const startMs = parseDatemath(start);
const endMs = parseDatemath(end);
const source = await getPreferredDocumentSource({
apmDataAccessServices,
start: startMs,
end: endMs,
groupBy,
kqlFilter,
});

const { rollupInterval, hasDurationSummaryField } = source;
const documentType = source.documentType as DocumentType;
// cant calculate percentile aggregation on transaction.duration.summary field
const useDurationSummaryField =
hasDurationSummaryField && latencyType !== 'p95' && latencyType !== 'p99';
const durationField = getDurationFieldForTransactions(documentType, useDurationSummaryField);
const bucketSizeInSeconds = intervalToSeconds(rollupInterval);

const calculateFailedTransactionRate =
'params.successful_or_failed != null && params.successful_or_failed > 0 ? (params.successful_or_failed - params.success) / params.successful_or_failed : 0';

const response = await apmEventClient.search('get_trace_change_points', {
apm: {
sources: [{ documentType, rollupInterval }],
},
size: 0,
track_total_hits: false,
query: {
bool: {
filter: [
...timeRangeFilter('@timestamp', {
start: startMs,
end: endMs,
}),
...buildKqlFilter(kqlFilter),
],
},
},
aggs: {
groups: {
terms: {
field: groupBy,
},
aggs: {
time_series: {
date_histogram: {
field: '@timestamp',
fixed_interval: `${bucketSizeInSeconds}s`,
},
aggs: {
...getOutcomeAggregation(documentType),
...getLatencyAggregation(latencyType, durationField),
failure_rate:
documentType === ApmDocumentType.ServiceTransactionMetric
? {
bucket_script: {
buckets_path: {
successful_or_failed: 'successful_or_failed',
success: 'successful',
},
script: {
source: calculateFailedTransactionRate,
},
},
}
: {
bucket_script: {
buckets_path: {
successful_or_failed: 'successful_or_failed>_count',
success: 'successful>_count',
},
script: {
source: calculateFailedTransactionRate,
},
},
},
throughput: {
bucket_script: {
buckets_path: {
count: '_count',
},
script: {
source: 'params.count != null ? params.count / (params.bucketSize / 60.0) : 0',
params: {
bucketSize: bucketSizeInSeconds,
},
},
},
},
},
},
changes_latency: getChangePointsAggs('time_series>latency'),
changes_throughput: getChangePointsAggs('time_series>throughput'),
changes_failure_rate: getChangePointsAggs('time_series>failure_rate'),
},
},
},
});

return (response.aggregations?.groups?.buckets as BucketChangePoints[]) ?? [];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
import { z } from '@kbn/zod';
import { ToolType } from '@kbn/agent-builder-common';
import { ToolResultType } from '@kbn/agent-builder-common/tools/tool_result';
import type { BuiltinToolDefinition } from '@kbn/agent-builder-server';
import type { CoreSetup, Logger } from '@kbn/core/server';
import type {
ObservabilityAgentBuilderPluginSetupDependencies,
ObservabilityAgentBuilderPluginStart,
ObservabilityAgentBuilderPluginStartDependencies,
} from '../../types';
import { timeRangeSchemaRequired } from '../../utils/tool_schemas';
import { getToolHandler } from './handler';

export const OBSERVABILITY_GET_TRACE_CHANGE_POINTS_TOOL_ID =
'observability.get_trace_change_points';

const getTraceChangePointsSchema = z.object({
...timeRangeSchemaRequired,
kqlFilter: z
.string()
.describe(
'Optional KQL query to filter the trace documents. Examples: trace.id:"abc123", service.name:"my-service"'
)
.optional(),
groupBy: z
.string()
.describe(
`Field to group results by. Use only low-cardinality fields. Using many fields or high-cardinality fields can cause a large number of groups and severely impact performance. Common fields to group by include:
- Service level: 'service.name', 'service.environment', 'service.version'
- Transaction level: 'transaction.name', 'transaction.type'
- Infrastructure level: 'host.name', 'container.id', 'kubernetes.pod.name'
`
)
.optional(),
latencyType: z
.enum(['avg', 'p95', 'p99'])
.describe('Aggregation type for latency change points analysis. default is avg.')
.optional(),
});

export function createGetTraceChangePointsTool({
core,
plugins,
logger,
}: {
core: CoreSetup<
ObservabilityAgentBuilderPluginStartDependencies,
ObservabilityAgentBuilderPluginStart
>;
plugins: ObservabilityAgentBuilderPluginSetupDependencies;
logger: Logger;
}) {
const toolDefinition: BuiltinToolDefinition<typeof getTraceChangePointsSchema> = {
id: OBSERVABILITY_GET_TRACE_CHANGE_POINTS_TOOL_ID,
type: ToolType.builtin,
description: `Analyzes traces to detect statistically significant change points in latency, throughput, and failure rate across group (e.g., service, transaction, host).
Trace metrics:
- Latency: avg/p95/p99 response time.
- Throughput: requests per minute.
- Failure rate: percentage of failed transactions.

Supports optional KQL filtering

When to use:
- Detecting significant changes in trace behavior (spike, dip, step change, trend change, distribution change, stationary/non‑stationary, indeterminable) and identifying when they occur.
`,
schema: getTraceChangePointsSchema,
tags: ['observability', 'traces'],
handler: async (
{ start, end, kqlFilter, groupBy = 'service.name', latencyType = 'avg' },
{ request }
) => {
try {
const changePoints = await getToolHandler({
core,
plugins,
request,
logger,
start,
end,
kqlFilter,
groupBy,
latencyType,
});

return {
results: [
{
type: ToolResultType.other,
data: {
changePoints,
},
},
],
};
} catch (error) {
logger.error(`Error getting trace change points: ${error.message}`);
logger.debug(error);
return {
results: [
{
type: ToolResultType.error,
data: {
message: `Error getting trace change points: ${error.message}`,
stack: error.stack,
},
},
],
};
}
},
};

return toolDefinition;
}
Loading