diff --git a/src/platform/packages/shared/kbn-synthtrace-client/src/lib/apm/apm_fields.ts b/src/platform/packages/shared/kbn-synthtrace-client/src/lib/apm/apm_fields.ts index 2e989c1168abb..8f3bfd66d76f2 100644 --- a/src/platform/packages/shared/kbn-synthtrace-client/src/lib/apm/apm_fields.ts +++ b/src/platform/packages/shared/kbn-synthtrace-client/src/lib/apm/apm_fields.ts @@ -153,6 +153,7 @@ export type ApmFields = Fields<{ 'http.response.status_code': number; 'kubernetes.pod.name': string; 'kubernetes.pod.uid': string; + 'kubernetes.namespace': string; 'labels.name': string; 'labels.telemetry_auto_version': string; 'labels.lifecycle_state': string; @@ -213,6 +214,7 @@ export type ApmFields = Fields<{ 'url.original': string; 'url.domain': string; 'url.full': string; + 'url.path': string; }> & ApmApplicationMetricFields & ExperimentalFields; diff --git a/src/platform/packages/shared/kbn-synthtrace-client/src/lib/infra/host.ts b/src/platform/packages/shared/kbn-synthtrace-client/src/lib/infra/host.ts index 6a6d147f3c473..38d19eb4bd597 100644 --- a/src/platform/packages/shared/kbn-synthtrace-client/src/lib/infra/host.ts +++ b/src/platform/packages/shared/kbn-synthtrace-client/src/lib/infra/host.ts @@ -20,6 +20,8 @@ interface HostDocument extends Fields { 'host.name': string; 'metricset.name'?: string; 'event.module'?: string; + 'data_stream.dataset'?: string; + 'event.dataset'?: string; 'service.name'?: string; 'host.ip'?: string; 'host.os.name'?: string; diff --git a/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/index.ts b/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/index.ts index 33c1e234829d5..83eab35918763 100644 --- a/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/index.ts +++ b/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/index.ts @@ -14,7 +14,7 @@ export * from './tools/get_correlated_logs/correlated_logs'; export * from './tools/get_downstream_dependencies/dependencies'; export * from './tools/get_alerts/alerts'; export * from './tools/get_alerts/apm_errors'; -export * from './tools/get_data_sources/data_sources'; export * from './tools/run_log_rate_analysis/log_rate_analysis_spike'; export * from './tools/get_anomaly_detection_jobs/anomalies'; export * from './tools/get_trace_metrics/trace_metrics'; +export * from './tools/get_index_info/field_discovery'; diff --git a/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_data_sources/data_sources.ts b/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_data_sources/data_sources.ts deleted file mode 100644 index ce9d6e30ed31e..0000000000000 --- a/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_data_sources/data_sources.ts +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -/** - * SCENARIO: Generated Data Sources - * - * Story: Generates minimal data across Logs and APM to verify `get_data_sources`. - * - * Data generated: - * - Logs: Simple log messages in `logs-web.access-default` - * - APM: Failed transactions with errors from `my-service` in production - * - * Validate via: - * - * ``` - * POST kbn:///api/agent_builder/tools/_execute - * { - * "tool_id": "observability.get_data_sources", - * "tool_params": {} - * } - * ``` - */ - -import type { ApmFields, LogDocument, Timerange } from '@kbn/synthtrace-client'; -import { log } from '@kbn/synthtrace-client'; -import { createCliScenario } from '../../../../lib/utils/create_scenario'; -import type { LogsSynthtraceEsClient } from '../../../../lib/logs/logs_synthtrace_es_client'; -import type { ApmSynthtraceEsClient } from '../../../../lib/apm/client/apm_synthtrace_es_client'; -import { withClient } from '../../../../lib/utils/with_client'; -import type { ScenarioReturnType } from '../../../../lib/utils/with_client'; -import { generateApmErrorData } from '../get_alerts/apm_errors'; - -export function generateSimpleLogsData({ - range, - logsEsClient, - message = 'simple log message', - dataset = 'web.access', -}: { - range: Timerange; - logsEsClient: LogsSynthtraceEsClient; - message?: string; - dataset?: string; -}): ScenarioReturnType { - const simpleLogs = range - .interval('1m') - .rate(1) - .generator((timestamp) => log.create().message(message).dataset(dataset).timestamp(timestamp)); - - return withClient(logsEsClient, simpleLogs); -} - -export function generateDataSourcesData({ - range, - logsEsClient, - apmEsClient, - serviceName = 'my-service', - environment = 'production', - language = 'go', -}: { - range: Timerange; - logsEsClient: LogsSynthtraceEsClient; - apmEsClient: ApmSynthtraceEsClient; - serviceName?: string; - environment?: string; - language?: string; -}): Array> { - // 1. Simple logs - const logs = generateSimpleLogsData({ range, logsEsClient }); - - // 2. APM error data (reuses generateApmErrorData from get_alerts) - const apmData = generateApmErrorData({ - range, - apmEsClient, - serviceName, - environment, - language, - }); - - return [logs, ...apmData]; -} - -export default createCliScenario(({ range, clients: { logsEsClient, apmEsClient } }) => - generateDataSourcesData({ range, logsEsClient, apmEsClient }) -); diff --git a/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_index_info/field_discovery.ts b/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_index_info/field_discovery.ts new file mode 100644 index 0000000000000..c1fe5d695f41f --- /dev/null +++ b/src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_index_info/field_discovery.ts @@ -0,0 +1,415 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +/** + * SCENARIO: Field Discovery Test Data + * + * Story: An SRE investigates a payment processing incident. The get_index_info tool + * helps them discover what data is available and drill down to specific fields. + * + * Drill-down demonstration: + * 1. get-index-patterns → See data streams: logs, metrics, traces + * 2. list-fields on metrics-system.cpu → Find system.cpu.* fields + * 3. get-field-values on host.name → See hosts: payment-host-01, order-host-02 + * 4. get-field-values on message (text) → See log message samples + * + * Services: + * - `payment-service` (production) - High latency issues + * - `order-service` (production) - Downstream dependency + * - `notification-service` (staging) - Healthy baseline + * + * Hosts: + * - `payment-host-01` (AWS, us-east-1): 85% CPU, 90% Memory - stressed + * - `order-host-02` (AWS, us-west-2): 45% CPU, 60% Memory - normal + * - `notification-host-03` (GCP, europe-west1): 25% CPU, 40% Memory - healthy + * + * Field types covered: + * - keyword: service.name, host.name, cloud.provider, log.level + * - numeric: system.cpu.total.norm.pct, transaction.duration.us + * - date: @timestamp + * - boolean: event.ingested + * - text: message (log messages with varied content) + * + * Run: + * ``` + * node scripts/synthtrace \ + * src/platform/packages/shared/kbn-synthtrace/src/scenarios/agent_builder/tools/get_index_info/field_discovery.ts \ + * --from "now-15m" --to "now" --clean --workers=1 + * ``` + */ + +import type { ApmFields, InfraDocument, LogDocument, Timerange } from '@kbn/synthtrace-client'; +import { apm, generateShortId, infra, log } from '@kbn/synthtrace-client'; +import { createCliScenario } from '../../../../lib/utils/create_scenario'; +import { withClient, type ScenarioReturnType } from '../../../../lib/utils/with_client'; +import type { ApmSynthtraceEsClient } from '../../../../lib/apm/client/apm_synthtrace_es_client'; +import type { InfraSynthtraceEsClient } from '../../../../lib/infra/infra_synthtrace_es_client'; +import type { LogsSynthtraceEsClient } from '../../../../lib/logs/logs_synthtrace_es_client'; + +// ============================================================================= +// CONFIGURATION - Realistic incident scenario data +// ============================================================================= + +interface ServiceConfig { + name: string; + environment: string; + host: string; + agentName: 'nodejs' | 'java' | 'go'; + errorRate: number; // 0-1, probability of errors + avgLatencyMs: number; +} + +interface HostConfig { + name: string; + cpuUsage: number; + memoryUsage: number; + diskUsage: number; + cloudProvider: 'aws' | 'gcp'; + cloudRegion: string; + k8sNamespace: string; + k8sPodName: string; +} + +const SERVICES: ServiceConfig[] = [ + { + name: 'payment-service', + environment: 'production', + host: 'payment-host-01', + agentName: 'nodejs', + errorRate: 0.3, // High errors - incident + avgLatencyMs: 2500, + }, + { + name: 'order-service', + environment: 'production', + host: 'order-host-02', + agentName: 'java', + errorRate: 0.1, + avgLatencyMs: 150, + }, + { + name: 'notification-service', + environment: 'staging', + host: 'notification-host-03', + agentName: 'go', + errorRate: 0.02, + avgLatencyMs: 50, + }, +]; + +const HOSTS: HostConfig[] = [ + { + name: 'payment-host-01', + cpuUsage: 0.85, + memoryUsage: 0.92, + diskUsage: 0.78, + cloudProvider: 'aws', + cloudRegion: 'us-east-1', + k8sNamespace: 'production', + k8sPodName: 'payment-pod-abc123', + }, + { + name: 'order-host-02', + cpuUsage: 0.45, + memoryUsage: 0.6, + diskUsage: 0.35, + cloudProvider: 'aws', + cloudRegion: 'us-west-2', + k8sNamespace: 'production', + k8sPodName: 'order-pod-def456', + }, + { + name: 'notification-host-03', + cpuUsage: 0.25, + memoryUsage: 0.4, + diskUsage: 0.2, + cloudProvider: 'gcp', + cloudRegion: 'europe-west1', + k8sNamespace: 'staging', + k8sPodName: 'notification-pod-ghi789', + }, +]; + +// Diverse log messages to demonstrate text field sampling +const LOG_TEMPLATES = { + info: [ + 'Request processed successfully for user {userId} in {duration}ms', + 'Payment transaction {txId} completed for amount ${amount}', + 'Order {orderId} shipped to customer {customerId}', + 'Cache hit for key {cacheKey}, returning cached response', + 'Health check passed: database={dbStatus}, redis={redisStatus}', + ], + warn: [ + 'High latency detected: {endpoint} took {duration}ms (threshold: 500ms)', + 'Rate limit approaching for client {clientId}: {current}/{limit} requests', + 'Retry attempt {attempt}/3 for downstream service {service}', + 'Memory usage at {memPct}% - approaching threshold', + 'Connection pool exhausted, waiting for available connection', + ], + error: [ + 'Payment failed for transaction {txId}: {errorCode} - {errorMessage}', + 'Database connection timeout after {timeout}ms to {dbHost}', + 'External API error from {service}: HTTP {statusCode} - {responseBody}', + 'Authentication failed for user {userId}: invalid credentials', + 'Circuit breaker OPEN for {service} after {failures} consecutive failures', + ], +}; + +function generateLogMessage(level: 'info' | 'warn' | 'error'): string { + const templates = LOG_TEMPLATES[level]; + const template = templates[Math.floor(Math.random() * templates.length)]; + + // Replace placeholders with realistic values + return template + .replace('{userId}', `user-${Math.floor(Math.random() * 1000)}`) + .replace('{txId}', `tx-${generateShortId()}`) + .replace('{orderId}', `order-${Math.floor(Math.random() * 10000)}`) + .replace('{customerId}', `cust-${Math.floor(Math.random() * 500)}`) + .replace('{amount}', (Math.random() * 1000).toFixed(2)) + .replace('{duration}', String(Math.floor(Math.random() * 3000))) + .replace( + '{endpoint}', + ['/api/payment', '/api/orders', '/api/users'][Math.floor(Math.random() * 3)] + ) + .replace('{clientId}', `client-${Math.floor(Math.random() * 100)}`) + .replace('{current}', String(Math.floor(Math.random() * 900) + 100)) + .replace('{limit}', '1000') + .replace('{attempt}', String(Math.floor(Math.random() * 3) + 1)) + .replace( + '{service}', + ['payment-gateway', 'inventory-api', 'shipping-service'][Math.floor(Math.random() * 3)] + ) + .replace('{memPct}', String(Math.floor(Math.random() * 20) + 80)) + .replace( + '{cacheKey}', + `cache:${['user', 'product', 'session'][Math.floor(Math.random() * 3)]}:${generateShortId()}` + ) + .replace('{dbStatus}', 'healthy') + .replace('{redisStatus}', 'healthy') + .replace('{errorCode}', ['DECLINED', 'TIMEOUT', 'INVALID_CARD'][Math.floor(Math.random() * 3)]) + .replace('{errorMessage}', 'Transaction could not be processed') + .replace('{timeout}', String(Math.floor(Math.random() * 5000) + 5000)) + .replace('{dbHost}', 'db-primary.internal') + .replace('{statusCode}', ['500', '502', '503', '504'][Math.floor(Math.random() * 4)]) + .replace('{responseBody}', 'Service temporarily unavailable') + .replace('{failures}', String(Math.floor(Math.random() * 5) + 5)); +} + +// ============================================================================= +// GENERATORS - Reusable for API tests +// ============================================================================= + +export interface FieldDiscoveryDataParams { + range: Timerange; + infraEsClient: InfraSynthtraceEsClient; + apmEsClient: ApmSynthtraceEsClient; + logsEsClient: LogsSynthtraceEsClient; + hosts?: HostConfig[]; + services?: ServiceConfig[]; +} + +/** + * Generates comprehensive observability data for testing get_index_info. + * Exports reusable function for API integration tests. + */ +export function generateFieldDiscoveryData({ + range, + infraEsClient, + apmEsClient, + logsEsClient, + hosts = HOSTS, + services = SERVICES, +}: FieldDiscoveryDataParams): Array> { + // ========================================================================= + // INFRASTRUCTURE METRICS (metrics-system.*) + // ========================================================================= + const infraData = range + .interval('30s') + .rate(1) + .generator((timestamp) => + hosts.flatMap((hostConfig) => { + const host = infra.host(hostConfig.name); + const totalMemory = 68_719_476_736; // 64GB + const usedMemory = Math.floor(totalMemory * hostConfig.memoryUsage); + + const baseOverrides = { + 'agent.id': 'synthtrace-field-discovery', + 'host.name': hostConfig.name, + 'host.hostname': hostConfig.name, + 'host.architecture': 'x86_64', + 'cloud.provider': hostConfig.cloudProvider, + 'cloud.region': hostConfig.cloudRegion, + 'kubernetes.namespace': hostConfig.k8sNamespace, + 'kubernetes.pod.name': hostConfig.k8sPodName, + }; + + return [ + host + .cpu({ 'system.cpu.total.norm.pct': hostConfig.cpuUsage }) + .overrides({ ...baseOverrides, 'data_stream.dataset': 'system.cpu' }) + .timestamp(timestamp), + host + .memory({ + 'system.memory.actual.free': totalMemory - usedMemory, + 'system.memory.actual.used.bytes': usedMemory, + 'system.memory.actual.used.pct': hostConfig.memoryUsage, + 'system.memory.total': totalMemory, + }) + .overrides({ ...baseOverrides, 'data_stream.dataset': 'system.memory' }) + .timestamp(timestamp), + host + .filesystem({ 'system.filesystem.used.pct': hostConfig.diskUsage }) + .overrides({ ...baseOverrides, 'data_stream.dataset': 'system.filesystem' }) + .timestamp(timestamp), + host + .load() + .overrides({ ...baseOverrides, 'data_stream.dataset': 'system.load' }) + .timestamp(timestamp), + host + .network() + .overrides({ ...baseOverrides, 'data_stream.dataset': 'system.network' }) + .timestamp(timestamp), + ]; + }) + ); + + // ========================================================================= + // APM TRACES (traces-apm-*) + // ========================================================================= + const apmData = range + .interval('5s') + .rate(3) + .generator((timestamp) => + services.flatMap((serviceConfig) => { + const hostConfig = hosts.find((h) => h.name === serviceConfig.host)!; + const isError = Math.random() < serviceConfig.errorRate; + const latency = serviceConfig.avgLatencyMs * (0.5 + Math.random()); + + const instance = apm + .service({ + name: serviceConfig.name, + environment: serviceConfig.environment, + agentName: serviceConfig.agentName, + }) + .instance(`${serviceConfig.name}-instance`) + .defaults({ + 'host.name': hostConfig.name, + 'cloud.provider': hostConfig.cloudProvider, + 'cloud.region': hostConfig.cloudRegion, + 'kubernetes.namespace': hostConfig.k8sNamespace, + }); + + const endpoints = [ + 'GET /api/health', + 'POST /api/payment', + 'GET /api/orders', + 'POST /api/checkout', + ]; + const transactionName = endpoints[Math.floor(Math.random() * endpoints.length)]; + + const transaction = instance + .transaction({ transactionName }) + .timestamp(timestamp) + .duration(latency); + + if (isError) { + return [ + transaction.failure().defaults({ 'http.response.status_code': 500 }), + instance + .error({ + message: `Error in ${transactionName}: Connection timeout`, + type: 'TimeoutError', + }) + .timestamp(timestamp), + ]; + } + + return [ + transaction + .success() + .defaults({ 'http.response.status_code': 200 }) + .children( + instance + .span({ + spanName: 'SELECT * FROM orders', + spanType: 'db', + spanSubtype: 'postgresql', + }) + .duration(latency * 0.4) + .success() + .timestamp(timestamp) + ), + ]; + }) + ); + + // ========================================================================= + // LOGS (logs-generic-*) + // Includes diverse text messages for text field sampling tests + // ========================================================================= + const logsData = range + .interval('3s') + .rate(2) + .generator((timestamp) => { + const serviceConfig = services[Math.floor(Math.random() * services.length)]; + const hostConfig = hosts.find((h) => h.name === serviceConfig.host)!; + + // Weight log levels based on service error rate + const rand = Math.random(); + const level: 'info' | 'warn' | 'error' = + rand < serviceConfig.errorRate + ? 'error' + : rand < serviceConfig.errorRate + 0.2 + ? 'warn' + : 'info'; + + const message = generateLogMessage(level); + + return log + .create() + .dataset('generic') + .namespace(serviceConfig.environment) + .message(message) + .logLevel(level) + .service(serviceConfig.name) + .hostName(hostConfig.name) + .containerId(`container-${hostConfig.name}`) + .defaults({ + 'service.environment': serviceConfig.environment, + 'cloud.provider': hostConfig.cloudProvider, + 'cloud.region': hostConfig.cloudRegion, + 'kubernetes.namespace': hostConfig.k8sNamespace, + 'kubernetes.pod.name': hostConfig.k8sPodName, + 'trace.id': generateShortId(), + 'agent.name': 'filebeat', + }) + .timestamp(timestamp); + }); + + return [ + withClient(infraEsClient, infraData), + withClient(apmEsClient, apmData), + withClient(logsEsClient, logsData), + ]; +} + +// ============================================================================= +// CLI SCENARIO +// ============================================================================= + +export default createCliScenario( + ({ range, clients: { infraEsClient, apmEsClient, logsEsClient } }) => { + return generateFieldDiscoveryData({ + range, + infraEsClient, + apmEsClient, + logsEsClient, + }); + } +); diff --git a/x-pack/platform/packages/shared/agent-builder/agent-builder-server/allow_lists.ts b/x-pack/platform/packages/shared/agent-builder/agent-builder-server/allow_lists.ts index b8984e012c037..0b8e7748509ef 100644 --- a/x-pack/platform/packages/shared/agent-builder/agent-builder-server/allow_lists.ts +++ b/x-pack/platform/packages/shared/agent-builder/agent-builder-server/allow_lists.ts @@ -17,7 +17,6 @@ export const AGENT_BUILDER_BUILTIN_TOOLS: string[] = [ ...Object.values(platformCoreTools), // Observability - `${internalNamespaces.observability}.get_data_sources`, `${internalNamespaces.observability}.get_anomaly_detection_jobs`, `${internalNamespaces.observability}.run_log_rate_analysis`, `${internalNamespaces.observability}.get_log_categories`, @@ -29,6 +28,7 @@ export const AGENT_BUILDER_BUILTIN_TOOLS: string[] = [ `${internalNamespaces.observability}.get_trace_metrics`, `${internalNamespaces.observability}.get_log_change_points`, `${internalNamespaces.observability}.get_metric_change_points`, + `${internalNamespaces.observability}.get_index_info`, // Dashboards 'platform.dashboard.create_dashboard', diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/agent/register_observability_agent.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/agent/register_observability_agent.ts index 29a54bb34290f..e4679131a0dd1 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/agent/register_observability_agent.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/agent/register_observability_agent.ts @@ -6,12 +6,14 @@ */ import type { CoreSetup, Logger } from '@kbn/core/server'; +import dedent from 'dedent'; import type { ObservabilityAgentBuilderPluginSetupDependencies, ObservabilityAgentBuilderPluginStartDependencies, ObservabilityAgentBuilderPluginStart, } from '../types'; import { OBSERVABILITY_AGENT_TOOL_IDS } from '../tools/register_tools'; +import { OBSERVABILITY_GET_INDEX_INFO_TOOL_ID } from '../tools'; import { getAgentBuilderResourceAvailability } from '../utils/get_agent_builder_resource_availability'; export const OBSERVABILITY_AGENT_ID = 'observability.agent'; @@ -40,14 +42,49 @@ export async function registerObservabilityAgent({ }, }, configuration: { - instructions: 'You are an observability specialist agent.\n', - tools: [ - { - tool_ids: OBSERVABILITY_AGENT_TOOL_IDS, - }, - ], + instructions: + dedent(`You are an observability specialist agent that helps Site Reliability Engineers (SREs) investigate incidents and understand system health. + + ${getInvestigationInstructions()} + ${getReasoningInstructions()} + ${getFieldDiscoveryInstructions()} + `), + tools: [{ tool_ids: OBSERVABILITY_AGENT_TOOL_IDS }], }, }); logger.debug('Successfully registered observability agent in agent-builder'); } + +function getInvestigationInstructions() { + return dedent(` + ### INVESTIGATION APPROACH + + Follow a progressive workflow - start broad, then narrow down: + 1. **Triage**: What's the severity? How many users/services affected? + 2. **Scope**: Which components are affected? What's the blast radius? + 3. **Timeline**: When did it start? What changed before symptoms appeared? + 4. **Correlation**: What error patterns exist? What's the sequence of events? + 5. **Root Cause**: Distinguish the SOURCE (where the problem started) from AFFECTED services (impacted downstream) + 6. **Verification**: Does your hypothesis explain ALL the symptoms? If not, dig deeper. + `); +} + +function getReasoningInstructions() { + return dedent(` + ### REASONING PRINCIPLES + + - **Be quantitative**: Quote specific metrics (error rate %, latency ms, throughput rpm). Avoid vague terms like "high" without numbers. + - **Correlation ≠ causation**: Look for temporal sequence (what happened FIRST) and causal mechanism. + - **Consider all layers**: Infrastructure (CPU, memory, disk) → Application (latency, throughput, failure rate) → Dependencies (databases, caches, external APIs). + - **Follow evidence**: Support hypotheses with data. Acknowledge uncertainty when evidence is inconclusive. + `); +} + +function getFieldDiscoveryInstructions() { + return dedent(` + ### FIELD DISCOVERY + Before using field names in \`groupBy\`, \`kqlFilter\`, or \`aggregation.field\` parameters, call \`${OBSERVABILITY_GET_INDEX_INFO_TOOL_ID}\` first. + Clusters use different naming conventions (ECS vs OpenTelemetry) - discovering fields first prevents errors. + `); +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/AGENTS.md b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/AGENTS.md index 7fba444715249..f541e9affcabd 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/AGENTS.md +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/AGENTS.md @@ -263,6 +263,26 @@ curl -X POST http://localhost:5601/api/agent_builder/tools/_execute \ }' ``` +### Chatting with the Observability Agent + +Test your tools end-to-end by chatting with the Observability agent: + +```bash +curl -X POST http://localhost:5601/api/agent_builder/converse \ + -u elastic:changeme \ + -H 'kbn-xsrf: true' \ + -H 'Content-Type: application/json' \ + -d '{ + "input": "What services are experiencing issues?", + "agent_id": "observability.agent" + }' +``` + +Notes: + +- To continue a conversation, include the `conversation_id` from the previous response +- For streaming responses (SSE), use the `/api/agent_builder/converse/async` endpoint + ### API Integration Tests The Kibana API test server: http://elastic:changeme@localhost:5620 diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_alerts/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_alerts/tool.ts index d2b68676b5ab4..fb77d986ea30f 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_alerts/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_alerts/tool.ts @@ -106,13 +106,7 @@ Supports filtering by status (active/recovered) and KQL queries.`, }, }, handler: async (toolParams, { request }) => { - const { - start = DEFAULT_TIME_RANGE.start, - end = DEFAULT_TIME_RANGE.end, - kqlFilter, - includeRecovered, - fields, - } = toolParams; + const { start, end, kqlFilter, includeRecovered, fields } = toolParams; try { const { alerts, total } = await getToolHandler({ diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_anomaly_detection_jobs/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_anomaly_detection_jobs/tool.ts index ca09877d0c58f..bb5c7287ea2c0 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_anomaly_detection_jobs/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_anomaly_detection_jobs/tool.ts @@ -95,8 +95,8 @@ When to use: const { jobIds, limit: jobsLimit = DEFAULT_JOBS_LIMIT, - start: rangeStart = DEFAULT_TIME_RANGE.start, - end: rangeEnd = DEFAULT_TIME_RANGE.end, + start: rangeStart, + end: rangeEnd, } = toolParams; const scopedEsClient = esClient.asCurrentUser; const mlClient = scopedEsClient.ml; diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/handler.ts index 0a392267996a1..7d10a69aa5394 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/handler.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/handler.ts @@ -13,7 +13,7 @@ import type { } from '../../types'; import { getLogsIndices } from '../../utils/get_logs_indices'; import { parseDatemath } from '../../utils/time'; -import { DEFAULT_CORRELATION_IDENTIFIER_FIELDS, DEFAULT_LOG_SOURCE_FIELDS } from './constants'; +import { DEFAULT_CORRELATION_IDENTIFIER_FIELDS } from './constants'; import { getAnchorLogs } from './fetch_anchor_logs/fetch_anchor_logs'; import { getCorrelatedLogsForAnchor } from './get_correlated_logs_for_anchor'; @@ -66,13 +66,13 @@ export async function getToolHandler({ start, end, kqlFilter, - errorLogsOnly = true, + errorLogsOnly, index, - correlationFields = DEFAULT_CORRELATION_IDENTIFIER_FIELDS, + correlationFields, logId, - logSourceFields = DEFAULT_LOG_SOURCE_FIELDS, - maxSequences = 10, - maxLogsPerSequence = 200, + logSourceFields, + maxSequences, + maxLogsPerSequence, }: { core: CoreSetup< ObservabilityAgentBuilderPluginStartDependencies, @@ -83,13 +83,13 @@ export async function getToolHandler({ start: string; end: string; kqlFilter?: string; - errorLogsOnly?: boolean; + errorLogsOnly: boolean; index?: string; - correlationFields?: string[]; + correlationFields: string[]; logId?: string; - logSourceFields?: string[]; - maxSequences?: number; - maxLogsPerSequence?: number; + logSourceFields: string[]; + maxSequences: number; + maxLogsPerSequence: number; }) { const logsIndices = index?.split(',') ?? (await getLogsIndices({ core, logger })); const startTime = parseDatemath(start); diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/tool.ts index 9b5a85bbfc15f..74f8544a70d44 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_correlated_logs/tool.ts @@ -108,8 +108,8 @@ Do NOT use for: }, handler: async (toolParams, { esClient }) => { const { - start = DEFAULT_TIME_RANGE.start, - end = DEFAULT_TIME_RANGE.end, + start, + end, kqlFilter, errorLogsOnly = true, index, diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/README.md b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/README.md deleted file mode 100644 index 5cb313b444326..0000000000000 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# get_data_sources - -Retrieve information about where observability data (logs, metrics, traces, alerts) is stored in Elasticsearch. Use this tool to discover which indices or index patterns to query for different types of observability signals. - -## Example - -``` -POST kbn://api/agent_builder/tools/_execute -{ - "tool_id": "observability.get_data_sources", - "tool_params": {} -} -``` diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/handler.ts deleted file mode 100644 index ca7c2568927bc..0000000000000 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/handler.ts +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import type { CoreSetup, Logger } from '@kbn/core/server'; -import type { - ObservabilityAgentBuilderPluginSetupDependencies, - ObservabilityAgentBuilderPluginStart, - ObservabilityAgentBuilderPluginStartDependencies, -} from '../../types'; -import { getObservabilityDataSources } from '../../utils/get_observability_data_sources'; - -export async function getToolHandler({ - core, - plugins, - logger, -}: { - core: CoreSetup< - ObservabilityAgentBuilderPluginStartDependencies, - ObservabilityAgentBuilderPluginStart - >; - plugins: ObservabilityAgentBuilderPluginSetupDependencies; - logger: Logger; -}) { - const { - apmIndexPatterns: apmIndices, - logIndexPatterns, - metricIndexPatterns, - alertsIndexPattern, - } = await getObservabilityDataSources({ core, plugins, logger }); - - return { - apm: { - indexPatterns: apmIndices, - }, - logs: { - indexPatterns: logIndexPatterns, - }, - metrics: { - indexPatterns: metricIndexPatterns, - }, - alerts: { - indexPattern: alertsIndexPattern, - }, - }; -} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/tool.ts deleted file mode 100644 index 88f0ccf42663c..0000000000000 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_data_sources/tool.ts +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import { z } from '@kbn/zod'; -import { ToolType } from '@kbn/agent-builder-common'; -import { ToolResultType } from '@kbn/agent-builder-common/tools/tool_result'; -import type { BuiltinToolDefinition, StaticToolRegistration } from '@kbn/agent-builder-server'; -import type { CoreSetup, Logger } from '@kbn/core/server'; -import { getAgentBuilderResourceAvailability } from '../../utils/get_agent_builder_resource_availability'; -import type { - ObservabilityAgentBuilderPluginSetupDependencies, - ObservabilityAgentBuilderPluginStart, - ObservabilityAgentBuilderPluginStartDependencies, -} from '../../types'; -import { getToolHandler } from './handler'; - -export const OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID = 'observability.get_data_sources'; - -const getDataSourcesSchema = z.object({}); - -export function createGetDataSourcesTool({ - core, - plugins, - logger, -}: { - core: CoreSetup< - ObservabilityAgentBuilderPluginStartDependencies, - ObservabilityAgentBuilderPluginStart - >; - plugins: ObservabilityAgentBuilderPluginSetupDependencies; - logger: Logger; -}): StaticToolRegistration { - const toolDefinition: BuiltinToolDefinition = { - id: OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID, - type: ToolType.builtin, - description: - 'Lists the Elasticsearch indices and index patterns where observability data (logs, metrics, traces, alerts) is stored. Essential for determining the correct indices to target in subsequent queries.', - schema: getDataSourcesSchema, - tags: ['observability'], - availability: { - cacheMode: 'space', - handler: async ({ request }) => { - return getAgentBuilderResourceAvailability({ core, request, logger }); - }, - }, - handler: async () => { - try { - const data = await getToolHandler({ core, plugins, logger }); - - return { - results: [ - { - type: ToolResultType.other, - data, - }, - ], - }; - } catch (error) { - logger.error(`Error getting observability data sources: ${error.message}`); - logger.debug(error); - return { - results: [ - { - type: ToolResultType.error, - data: { - message: `Failed to retrieve observability data sources: ${error.message}`, - stack: error.stack, - }, - }, - ], - }; - } - }, - }; - - return toolDefinition; -} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_downstream_dependencies/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_downstream_dependencies/tool.ts index 2d77203835187..128ff650a625a 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_downstream_dependencies/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_downstream_dependencies/tool.ts @@ -65,12 +65,7 @@ When to use: }, }, handler: async (toolParams, context) => { - const { - serviceName, - serviceEnvironment, - start = DEFAULT_TIME_RANGE.start, - end = DEFAULT_TIME_RANGE.end, - } = toolParams; + const { serviceName, serviceEnvironment, start, end } = toolParams; const { request } = context; try { diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_hosts/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_hosts/tool.ts index b796af4eab5ec..163a49921463d 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_hosts/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_hosts/tool.ts @@ -98,12 +98,7 @@ Returns host names, metrics (CPU percentage, memory usage, disk space, network r toolParams, { request } ): Promise> => { - const { - start = DEFAULT_TIME_RANGE.start, - end = DEFAULT_TIME_RANGE.end, - limit = DEFAULT_LIMIT, - kqlFilter, - } = toolParams; + const { start, end, limit = DEFAULT_LIMIT, kqlFilter } = toolParams; try { const { hosts, total } = await getToolHandler({ diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/README.md b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/README.md new file mode 100644 index 0000000000000..ca3d4b73f3c81 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/README.md @@ -0,0 +1,104 @@ +# get_index_info Tool + +Discovers available index patterns, data streams, fields, and field values in the user's Elasticsearch cluster. + +## Operations + +### 1. `get-index-patterns` — Index Patterns and Data Streams + +``` +POST kbn://api/agent_builder/tools/_execute +{ + "tool_id": "observability.get_index_info", + "tool_params": { + "operation": "get-index-patterns" + } +} +``` + +Use data streams for targeted field discovery (e.g., `metrics-system.memory-*` for memory fields). + +### 2. `list-fields` — Fields in an Index + +Use specific data stream for better results: + +``` +POST kbn://api/agent_builder/tools/_execute +{ + "tool_id": "observability.get_index_info", + "tool_params": { + "operation": "list-fields", + "index": "metrics-system.memory-*" + } +} +``` + +With intent and >100 fields, LLM filters to relevant ones: + +``` +POST kbn://api/agent_builder/tools/_execute +{ + "tool_id": "observability.get_index_info", + "tool_params": { + "operation": "list-fields", + "index": "logs-*", + "intent": "high latency" + } +} +``` + +### 3. `get-field-values` — Field Values + +Single field: + +``` +POST kbn://api/agent_builder/tools/_execute +{ + "tool_id": "observability.get_index_info", + "tool_params": { + "operation": "get-field-values", + "index": "traces-apm*", + "fields": ["service.name"] + } +} +``` + +Wildcard patterns — discover values for all matching fields at once: + +``` +POST kbn://api/agent_builder/tools/_execute +{ + "tool_id": "observability.get_index_info", + "tool_params": { + "operation": "get-field-values", + "index": "traces-*", + "fields": ["attributes.app.*"] + } +} +``` + +With time range and KQL filter: + +``` +POST kbn://api/agent_builder/tools/_execute +{ + "tool_id": "observability.get_index_info", + "tool_params": { + "operation": "get-field-values", + "index": "logs-*", + "fields": ["host.name"], + "start": "now-1h", + "end": "now", + "kqlFilter": "service.name: checkout" + } +} +``` + +## Example Workflow + +``` +1. get-index-patterns → Discover data streams (e.g., metrics-system.memory-*) +2. list-fields → Get fields from specific data stream +3. get-field-values → Get valid values for filtering +4. Build kqlFilter → "host.name: discover-host-01" +``` diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_data_streams_handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_data_streams_handler.ts new file mode 100644 index 0000000000000..71f1c00a0667d --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_data_streams_handler.ts @@ -0,0 +1,73 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { IScopedClusterClient, Logger } from '@kbn/core/server'; +import { sortBy } from 'lodash'; +import type { ObservabilityDataSources } from '../../utils/get_observability_data_sources'; + +/** Information about an individual data stream */ +export interface DataStreamInfo { + /** Full data stream name (e.g., "metrics-system.memory-default") */ + name: string; + /** Dataset extracted from name (e.g., "system.memory") */ + dataset: string; +} + +/** + * Extracts the dataset from a data stream name. + * Data stream names follow the pattern: {type}-{dataset}-{namespace} + * e.g., "metrics-system.memory-default" -> "system.memory" + */ +function extractDataset(name: string): string { + const parts = name.split('-'); + return parts.slice(1, -1).join('-'); +} + +/** + * Discovers observability data streams in the cluster. + * Returns a flat list of data streams with their datasets, sorted by name. + * + * Uses the configured observability index patterns (from getObservabilityDataSources) + * to ensure consistency and support for CCS (Cross-Cluster Search) if configured. + * + * @example + * // Returns: + * [ + * { name: "logs-apm.error-default", dataset: "apm.error" }, + * { name: "metrics-system.cpu-default", dataset: "system.cpu" }, + * { name: "traces-apm-default", dataset: "apm" } + * ] + */ +export async function getDataStreamsHandler({ + esClient, + dataSources, + logger, +}: { + esClient: IScopedClusterClient; + dataSources: ObservabilityDataSources; + logger: Logger; +}): Promise { + try { + // Build pattern from configured observability index patterns (supports CCS) + const indexPatterns = [ + ...dataSources.logIndexPatterns, + ...dataSources.metricIndexPatterns, + dataSources.apmIndexPatterns.transaction, + dataSources.apmIndexPatterns.span, + ].join(','); + + const response = await esClient.asCurrentUser.indices.getDataStream({ name: indexPatterns }); + + return sortBy( + response.data_streams.map((ds) => ({ name: ds.name, dataset: extractDataset(ds.name) })), + 'name' + ); + } catch (error) { + logger.error(`Error retrieving data streams: ${error}`); + return []; + } +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_field_type.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_field_type.ts new file mode 100644 index 0000000000000..8059980deaa70 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_field_type.ts @@ -0,0 +1,16 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { FieldCapsFieldCapability } from '@elastic/elasticsearch/lib/api/types'; + +const EXCLUDED_FIELD_TYPES = ['unmapped', 'object', 'nested']; + +export function getFieldType( + fieldTypes: Record +): string | undefined { + return Object.keys(fieldTypes).find((type) => !EXCLUDED_FIELD_TYPES.includes(type)); +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_field_values_handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_field_values_handler.ts new file mode 100644 index 0000000000000..b640acf5a916d --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_field_values_handler.ts @@ -0,0 +1,366 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { QueryDslQueryContainer } from '@elastic/elasticsearch/lib/api/types'; +import type { IScopedClusterClient } from '@kbn/core/server'; +import { groupBy, keyBy, mapValues } from 'lodash'; +import { getTypedSearch } from '../../utils/get_typed_search'; +import { timeRangeFilter, kqlFilter as toKqlFilter } from '../../utils/dsl_filters'; +import { parseDatemath } from '../../utils/time'; +import { getFieldType } from './get_field_type'; + +const MAX_KEYWORD_VALUES = 50; +const MAX_TEXT_SAMPLES = 5; +const MAX_CHARS_PER_SAMPLE = 500; + +const KEYWORD_TYPES = ['keyword', 'constant_keyword', 'ip']; +const NUMERIC_TYPES = [ + 'long', + 'integer', + 'short', + 'byte', + 'double', + 'float', + 'half_float', + 'scaled_float', + 'unsigned_long', +]; +const DATE_TYPES = ['date', 'date_nanos']; +const BOOLEAN_TYPES = ['boolean']; +const TEXT_TYPES = ['text', 'match_only_text']; + +interface KeywordFieldResult { + type: 'keyword'; + field: string; + values: string[]; + hasMoreValues: boolean; +} +interface NumericFieldResult { + type: 'numeric'; + field: string; + min: number; + max: number; +} +interface DateFieldResult { + type: 'date'; + field: string; + min: string; + max: string; +} +interface BooleanFieldResult { + type: 'boolean'; + field: string; +} +interface TextFieldResult { + type: 'text'; + field: string; + samples: string[]; +} +interface UnsupportedFieldResult { + type: 'unsupported'; + field: string; + fieldType: string; +} +interface FieldErrorResult { + type: 'error'; + field: string; + message: string; +} + +export interface FieldValuesRecordResult { + fields: Record< + string, + | KeywordFieldResult + | NumericFieldResult + | DateFieldResult + | BooleanFieldResult + | TextFieldResult + | UnsupportedFieldResult + | FieldErrorResult + >; +} + +/** Gets distinct values for a single keyword field via termsEnum with optional filtering */ +async function getKeywordFieldValues( + esClient: IScopedClusterClient, + index: string, + field: string, + queryFilter?: QueryDslQueryContainer +): Promise { + try { + const { terms } = await esClient.asCurrentUser.termsEnum({ + index, + field, + size: MAX_KEYWORD_VALUES + 1, + index_filter: queryFilter, + }); + + return { + type: 'keyword', + field, + values: terms.slice(0, MAX_KEYWORD_VALUES), + hasMoreValues: terms.length > MAX_KEYWORD_VALUES, + }; + } catch (error) { + return { type: 'error', field, message: error.message }; + } +} + +/** Gets min/max for multiple numeric fields in a single request */ +async function getNumericFieldValuesBatch( + esClient: IScopedClusterClient, + index: string, + fields: string[], + queryFilter?: QueryDslQueryContainer +): Promise> { + if (fields.length === 0) return {}; + + const search = getTypedSearch(esClient.asCurrentUser); + const response = await search({ + index, + size: 0, + track_total_hits: false, + query: queryFilter, + aggs: Object.fromEntries(fields.map((field) => [field, { stats: { field } }])), + }); + + return keyBy( + fields.map((field): NumericFieldResult | FieldErrorResult => { + const stats = response.aggregations?.[field]; + if (stats?.min != null && stats?.max != null) { + return { type: 'numeric', field, min: stats.min, max: stats.max }; + } + return { type: 'error', field, message: 'No numeric values found' }; + }), + 'field' + ); +} + +/** Gets min/max for multiple date fields in a single request */ +async function getDateFieldValuesBatch( + esClient: IScopedClusterClient, + index: string, + fields: string[], + queryFilter?: QueryDslQueryContainer +): Promise> { + if (fields.length === 0) return {}; + + const aggs = Object.fromEntries( + fields.flatMap((field) => [ + [`${field}_min`, { min: { field } }], + [`${field}_max`, { max: { field } }], + ]) + ) as Record; + + const search = getTypedSearch(esClient.asCurrentUser); + const response = await search({ + index, + size: 0, + track_total_hits: false, + query: queryFilter, + aggs, + }); + + return keyBy( + fields.map((field): DateFieldResult | FieldErrorResult => { + const minAgg = response.aggregations?.[`${field}_min`]; + const maxAgg = response.aggregations?.[`${field}_max`]; + if (minAgg?.value_as_string && maxAgg?.value_as_string) { + return { type: 'date', field, min: minAgg.value_as_string, max: maxAgg.value_as_string }; + } + return { type: 'error', field, message: 'No date values found' }; + }), + 'field' + ); +} + +/** Gets sample values for multiple text fields in a single request */ +async function getTextFieldSampleValues( + esClient: IScopedClusterClient, + index: string, + fields: string[], + queryFilter?: QueryDslQueryContainer +): Promise> { + if (fields.length === 0) return {}; + + const response = await esClient.asCurrentUser.search({ + index, + size: MAX_TEXT_SAMPLES, + track_total_hits: false, + query: queryFilter, + _source: false, + fields, + }); + + return Object.fromEntries( + fields.map((field): [string, TextFieldResult | FieldErrorResult] => { + const samples = response.hits.hits + .flatMap((hit) => hit.fields?.[field] ?? []) + .filter((v): v is string => typeof v === 'string') + .slice(0, MAX_TEXT_SAMPLES) + .map((v) => (v.length > MAX_CHARS_PER_SAMPLE ? v.slice(0, MAX_CHARS_PER_SAMPLE) + '…' : v)); + + if (samples.length === 0) { + return [field, { type: 'error', field, message: 'No text values found' }]; + } + return [field, { type: 'text', field, samples }]; + }) + ); +} + +/** Converts a wildcard pattern to a regex */ +function wildcardToRegex(pattern: string): RegExp { + return new RegExp(`^${pattern.replace(/\./g, '\\.').replace(/\*/g, '.*')}$`); +} + +/** Determines the category for a field type */ +function getFieldCategory( + fieldType: string +): 'keyword' | 'numeric' | 'date' | 'boolean' | 'text' | 'unsupported' { + if (KEYWORD_TYPES.includes(fieldType)) return 'keyword'; + if (NUMERIC_TYPES.includes(fieldType)) return 'numeric'; + if (DATE_TYPES.includes(fieldType)) return 'date'; + if (BOOLEAN_TYPES.includes(fieldType)) return 'boolean'; + if (TEXT_TYPES.includes(fieldType)) return 'text'; + return 'unsupported'; +} + +interface ResolvedValidField { + field: string; + fieldType: string; + category: ReturnType; +} +interface ResolvedErrorField { + input: string; + error: string; +} +type ResolvedField = ResolvedValidField | ResolvedErrorField; + +/** Resolves an input (field name or wildcard) to concrete fields or an error */ +function resolveInputToConcreteFields( + input: string, + allFieldNames: string[], + fieldNameToTypeMap: Record +): ResolvedField[] { + const isWildcard = input.includes('*'); + const matchingFields = isWildcard + ? allFieldNames.filter((f) => wildcardToRegex(input).test(f) && fieldNameToTypeMap[f]) + : fieldNameToTypeMap[input] + ? [input] + : []; + + if (matchingFields.length === 0) { + return [ + { + input, + error: isWildcard ? `No fields match pattern "${input}"` : `Field "${input}" not found`, + }, + ]; + } + + return matchingFields.map((field) => ({ + field, + fieldType: fieldNameToTypeMap[field]!, + category: getFieldCategory(fieldNameToTypeMap[field]!), + })); +} + +/** + * Field value discovery - returns values/ranges for multiple fields. + * Batches requests by field type to minimize ES calls. + * Supports wildcard patterns in field names (e.g., "attributes.*"). + */ +export async function getFieldValuesHandler({ + esClient, + index, + fields, + start, + end, + kqlFilter, +}: { + esClient: IScopedClusterClient; + index: string; + fields: string[]; + start: string; + end: string; + kqlFilter?: string; +}): Promise { + const queryFilter = { + bool: { + filter: [ + ...timeRangeFilter('@timestamp', { + start: parseDatemath(start), + end: parseDatemath(end, { roundUp: true }), + }), + ...toKqlFilter(kqlFilter), + ], + }, + }; + + // fieldCaps expands wildcards and returns field types + // Note: also returns parent object fields from passthrough types (filtered out below) + const capsResponse = await esClient.asCurrentUser.fieldCaps({ + index, + fields, + ignore_unavailable: true, + allow_no_indices: true, + index_filter: queryFilter, + }); + + // Map field names to their concrete types (undefined for object/nested/unmapped) + const fieldNameToTypeMap = mapValues(capsResponse.fields, getFieldType); + const allFieldNames = Object.keys(fieldNameToTypeMap); + + // Resolve all inputs to concrete fields or errors + const concreteFields = fields.flatMap((input) => + resolveInputToConcreteFields(input, allFieldNames, fieldNameToTypeMap) + ); + const errors = concreteFields.filter((r): r is ResolvedErrorField => 'error' in r); + const validFields = concreteFields.filter((r): r is ResolvedValidField => 'field' in r); + + // Group valid fields by category + const byCategory = groupBy(validFields, (r) => r.category); + const keywordFields = (byCategory.keyword ?? []).map((r) => r.field); + const numericFields = (byCategory.numeric ?? []).map((r) => r.field); + const dateFields = (byCategory.date ?? []).map((r) => r.field); + const booleanFields = (byCategory.boolean ?? []).map((r) => r.field); + const textFields = (byCategory.text ?? []).map((r) => r.field); + const unsupportedFields = byCategory.unsupported ?? []; + + // Fetch values in parallel by type + const [keywordResults, numericResults, dateResults, textResults] = await Promise.all([ + Promise.all(keywordFields.map((f) => getKeywordFieldValues(esClient, index, f, queryFilter))), + getNumericFieldValuesBatch(esClient, index, numericFields, queryFilter), + getDateFieldValuesBatch(esClient, index, dateFields, queryFilter), + getTextFieldSampleValues(esClient, index, textFields, queryFilter), + ]); + + const errorResults = Object.fromEntries( + errors.map((e) => [e.input, { type: 'error' as const, field: e.input, message: e.error }]) + ); + const unsupportedResults = Object.fromEntries( + unsupportedFields.map((r) => [ + r.field, + { type: 'unsupported' as const, field: r.field, fieldType: r.fieldType }, + ]) + ); + const booleanResults = Object.fromEntries( + booleanFields.map((f) => [f, { type: 'boolean' as const, field: f }]) + ); + + return { + fields: { + ...errorResults, + ...unsupportedResults, + ...booleanResults, + ...keyBy(keywordResults, 'field'), + ...numericResults, + ...dateResults, + ...textResults, + }, + }; +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_index_fields_handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_index_fields_handler.ts new file mode 100644 index 0000000000000..b066830ac4cf6 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_index_fields_handler.ts @@ -0,0 +1,181 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { IScopedClusterClient, Logger } from '@kbn/core/server'; +import type { ModelProvider } from '@kbn/agent-builder-server'; +import { compact, groupBy, mapValues, uniq } from 'lodash'; +import { selectRelevantFields } from './select_relevant_fields'; +import { getFieldType } from './get_field_type'; +import { timeRangeFilter, kqlFilter as toKqlFilter } from '../../utils/dsl_filters'; +import { parseDatemath } from '../../utils/time'; + +export interface IndexFieldsResult { + fieldsByType: Record; + message?: string; +} + +/** Threshold above which LLM filtering is applied */ +const MIN_FIELDS_FOR_INTENT_FILTERING = 100; + +/** Number of documents to sample for field discovery */ +const SAMPLE_SIZE = 1000; + +/** + * Extracts all field paths from a nested object. + * e.g., { a: { b: 1, c: 2 } } -> ['a.b', 'a.c'] + */ +function extractFieldPaths(obj: Record, prefix = ''): string[] { + return Object.entries(obj).flatMap(([key, value]) => { + const path = prefix ? `${prefix}.${key}` : key; + if (value !== null && typeof value === 'object' && !Array.isArray(value)) { + return extractFieldPaths(value as Record, path); + } + return [path]; + }); +} + +/** + * Fetches documents and extracts unique field names. + * Uses a simple search to get up to SAMPLE_SIZE documents. + */ +async function getFieldNamesWithData( + esClient: IScopedClusterClient, + index: string, + start: string, + end: string, + kqlFilter: string | undefined, + logger: Logger +): Promise { + const response = await esClient.asCurrentUser.search({ + index, + size: SAMPLE_SIZE, + query: { + bool: { + filter: [ + ...timeRangeFilter('@timestamp', { + start: parseDatemath(start), + end: parseDatemath(end, { roundUp: true }), + }), + ...toKqlFilter(kqlFilter), + ], + }, + }, + _source: true, + ignore_unavailable: true, + allow_no_indices: true, + }); + + const docs = response.hits.hits; + logger.debug(`Sampled ${docs.length} documents from ${index}`); + + // Extract field paths from documents + const fieldPaths = docs + .map((hit) => hit._source) + .filter( + (source): source is Record => source != null && typeof source === 'object' + ) + .flatMap((source) => extractFieldPaths(source)); + + return uniq(fieldPaths); +} + +/** + * Gets field types for a list of field names using fieldCaps API. + * Returns array of { name, type } objects for fields that exist. + */ +async function getFieldsWithTypes( + esClient: IScopedClusterClient, + index: string, + fieldNames: string[] +) { + const capsResponse = await esClient.asCurrentUser.fieldCaps({ + index, + fields: fieldNames, + ignore_unavailable: true, + allow_no_indices: true, + filters: '-metadata', + }); + + return compact( + Object.entries(capsResponse.fields) + .filter(([fieldName]) => !fieldName.startsWith('_')) + .map(([fieldName, fieldTypes]) => { + const type = getFieldType(fieldTypes); + return type ? { name: fieldName, type } : undefined; + }) + ); +} + +/** + * Returns fields from a specific index pattern that have actual data, grouped by type. + * Samples up to 1000 documents to discover populated fields. + * When userIntentDescription is provided and field count exceeds threshold, uses LLM to filter. + */ +export async function listFieldsHandler({ + esClient, + index, + intent, + start, + end, + kqlFilter, + modelProvider, + logger, +}: { + esClient: IScopedClusterClient; + index: string; + intent?: string; + start: string; + end: string; + kqlFilter?: string; + modelProvider: ModelProvider; + logger: Logger; +}): Promise { + try { + // Step 1: Sample documents to find fields with data + const fieldNamesWithData = await getFieldNamesWithData( + esClient, + index, + start, + end, + kqlFilter, + logger + ); + + if (fieldNamesWithData.length === 0) { + return { + fieldsByType: {}, + message: 'No documents found in index or no fields with data.', + }; + } + + // Step 2: Get field types for fields that have data + let fieldsWithTypes = await getFieldsWithTypes(esClient, index, fieldNamesWithData); + + // Step 3: Apply LLM filtering if needed + if (fieldsWithTypes.length > MIN_FIELDS_FOR_INTENT_FILTERING && intent) { + const { inferenceClient } = await modelProvider.getDefaultModel(); + fieldsWithTypes = await selectRelevantFields({ + intent, + candidateFields: fieldsWithTypes, + inferenceClient, + logger, + }); + } + + // Step 4: Group field names by type + const fieldsGroupedByType = groupBy(fieldsWithTypes, ({ type }) => type); + const fieldsByType = mapValues(fieldsGroupedByType, (fields) => fields.map((f) => f.name)); + + return { fieldsByType }; + } catch (error) { + logger.error(`Error getting index fields for "${index}": ${error.message}`); + return { + fieldsByType: {}, + message: `Failed to discover fields: ${error.message}`, + }; + } +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_index_overview_handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_index_overview_handler.ts new file mode 100644 index 0000000000000..1a0685dc09f56 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/get_index_overview_handler.ts @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { CoreSetup, IScopedClusterClient, Logger } from '@kbn/core/server'; +import type { + ObservabilityAgentBuilderPluginSetupDependencies, + ObservabilityAgentBuilderPluginStart, + ObservabilityAgentBuilderPluginStartDependencies, +} from '../../types'; +import { getObservabilityDataSources } from '../../utils/get_observability_data_sources'; +import { getDataStreamsHandler, type DataStreamInfo } from './get_data_streams_handler'; + +export interface IndexPatternsResult { + indexPatterns: { + apm: { transaction: string; span: string; error: string; metric: string }; + logs: string[]; + metrics: string[]; + alerts: string[]; + }; + /** Discovered data streams for targeted field discovery */ + dataStreams: DataStreamInfo[]; +} + +/** + * Returns observability index patterns and discovered data streams. + * The data streams help identify what specific datasets exist in the cluster, + * enabling more targeted field discovery (e.g., query metrics-system.memory-* for memory fields). + */ +export async function getIndexPatternsHandler({ + core, + plugins, + esClient, + logger, +}: { + core: CoreSetup< + ObservabilityAgentBuilderPluginStartDependencies, + ObservabilityAgentBuilderPluginStart + >; + plugins: ObservabilityAgentBuilderPluginSetupDependencies; + esClient: IScopedClusterClient; + logger: Logger; +}): Promise { + const dataSources = await getObservabilityDataSources({ core, plugins, logger }); + const dataStreams = await getDataStreamsHandler({ esClient, dataSources, logger }); + + return { + indexPatterns: { + apm: { + transaction: dataSources.apmIndexPatterns.transaction, + span: dataSources.apmIndexPatterns.span, + error: dataSources.apmIndexPatterns.error, + metric: dataSources.apmIndexPatterns.metric, + }, + logs: dataSources.logIndexPatterns, + metrics: dataSources.metricIndexPatterns, + alerts: dataSources.alertsIndexPattern, + }, + dataStreams, + }; +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/index.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/index.ts new file mode 100644 index 0000000000000..1ff1b2a986452 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/index.ts @@ -0,0 +1,8 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +export { createGetIndexInfoTool, OBSERVABILITY_GET_INDEX_INFO_TOOL_ID } from './tool'; diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/select_relevant_fields.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/select_relevant_fields.ts new file mode 100644 index 0000000000000..72e5072d7d372 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/select_relevant_fields.ts @@ -0,0 +1,122 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { chunk } from 'lodash'; +import type { Logger } from '@kbn/core/server'; +import type { BoundInferenceClient } from '@kbn/inference-common'; +import dedent from 'dedent'; + +const SYSTEM_PROMPT = `You are a helpful AI assistant for Elastic Observability. +Your task is to select fields that would help investigate the user's query. +Return the field names that are relevant. + +Selection guidelines: +1. Select fields directly matching the query (e.g., "service.name" for service queries) +2. Select RELATED fields even if not explicitly mentioned: + - For memory issues: include container.*, kubernetes.*, system.memory.*, process.memory.* + - For CPU issues: include system.cpu.*, process.cpu.*, kubernetes.node.* + - For latency issues: include transaction.duration.*, span.duration.*, @timestamp + - For errors: include error.*, http.response.status_code, event.outcome, log.level + - For Kubernetes issues: include kubernetes.*, container.*, host.* +3. Always include identifying fields: service.name, host.name, @timestamp, trace.id when relevant +4. Prefer keyword fields for filtering, numeric for metrics, date for time ranges +5. When in doubt, include the field - it's better to have extra context than miss important data`; + +const MAX_CHUNKS = 5; +const MAX_FIELDS_PER_CHUNK = 250; +const MAX_RELEVANT_FIELDS = 100; + +interface FieldWithType { + name: string; + type: string; +} + +/** Formats fields for the LLM prompt */ +function formatFieldList(fields: FieldWithType[]): string { + return fields.map((f) => `${f.name} (${f.type})`).join('\n'); +} + +/** Calls LLM to select relevant fields from a chunk */ +async function selectFieldsFromChunk( + inferenceClient: BoundInferenceClient, + userIntentDescription: string, + fields: FieldWithType[] +): Promise { + const response = await inferenceClient.output({ + id: 'select_relevant_fields', + system: SYSTEM_PROMPT, + input: dedent(`User intent: ${userIntentDescription} + Available fields (name and type): + ${formatFieldList(fields)} + + Select the field names that are most relevant to this investigation. + `), + schema: { + type: 'object', + properties: { + fieldNames: { + type: 'array', + items: { type: 'string' }, + description: 'Array of selected field names', + }, + }, + required: ['fieldNames'], + }, + }); + + // type guard to ensure the response is an array of strings + return Array.isArray(response.output?.fieldNames) + ? response.output.fieldNames.filter((v): v is string => typeof v === 'string') + : []; +} + +/** Matches selected field names back to field objects */ +function getFieldObjects( + selectedNames: string[], + availableFields: FieldWithType[] +): FieldWithType[] { + const fieldMap = new Map(availableFields.map((f) => [f.name, f])); + return selectedNames + .map((name) => fieldMap.get(name)) + .filter((f): f is FieldWithType => f !== undefined); +} + +/** + * Uses an LLM to filter a large list of fields down to those relevant to the user's intent. + */ +export async function selectRelevantFields({ + intent, + candidateFields, + inferenceClient, + logger, +}: { + intent: string; + candidateFields: FieldWithType[]; + inferenceClient: BoundInferenceClient; + logger: Logger; +}): Promise { + if (candidateFields.length === 0) { + return []; + } + + const chunks = chunk(candidateFields, MAX_FIELDS_PER_CHUNK).slice(0, MAX_CHUNKS); + const relevantFields: FieldWithType[] = []; + + for (const fieldsChunk of chunks) { + try { + const selectedFieldNames = await selectFieldsFromChunk(inferenceClient, intent, fieldsChunk); + const fields = getFieldObjects(selectedFieldNames, fieldsChunk); + relevantFields.push(...fields); + } catch (e) { + logger.debug(`Chunk selection failed: ${e?.message}`); + // On failure, include all fields from this chunk to avoid losing data + relevantFields.push(...fieldsChunk); + } + } + + return relevantFields.slice(0, MAX_RELEVANT_FIELDS); +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/tool.ts new file mode 100644 index 0000000000000..3538692ee29b8 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_index_info/tool.ts @@ -0,0 +1,174 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { z } from '@kbn/zod'; +import { ToolType } from '@kbn/agent-builder-common'; +import { ToolResultType } from '@kbn/agent-builder-common/tools/tool_result'; +import type { BuiltinToolDefinition, StaticToolRegistration } from '@kbn/agent-builder-server'; +import type { CoreSetup, Logger } from '@kbn/core/server'; +import dedent from 'dedent'; +import { getAgentBuilderResourceAvailability } from '../../utils/get_agent_builder_resource_availability'; +import { timeRangeSchemaOptional } from '../../utils/tool_schemas'; +import type { + ObservabilityAgentBuilderPluginSetupDependencies, + ObservabilityAgentBuilderPluginStart, + ObservabilityAgentBuilderPluginStartDependencies, +} from '../../types'; +import { getIndexPatternsHandler } from './get_index_overview_handler'; +import { listFieldsHandler } from './get_index_fields_handler'; +import { getFieldValuesHandler } from './get_field_values_handler'; + +export const OBSERVABILITY_GET_INDEX_INFO_TOOL_ID = 'observability.get_index_info'; + +const getIndexInfoSchema = z.object({ + operation: z.enum(['get-index-patterns', 'list-fields', 'get-field-values']).describe( + dedent(`Operation to perform: + - "get-index-patterns": Get observability index patterns and discovered data streams + - "list-fields": List names of populated fields in an index (requires: index) + - "get-field-values": Get values for specific fields (requires: index, fields)`) + ), + index: z + .string() + .optional() + .describe( + 'Index pattern (e.g., "logs-*", "metrics-*"). Required for "list-fields" and "get-field-values".' + ), + fields: z + .array(z.string()) + .max(10) + .optional() + .describe( + 'Array of field names or wildcard patterns to get values for (e.g., ["host.name"], ["attributes.app.*"]). Required for "get-field-values".' + ), + ...timeRangeSchemaOptional({ start: 'now-24h', end: 'now' }), + kqlFilter: z + .string() + .optional() + .describe('KQL filter to scope field discovery (e.g., ["service.name: checkout"]).'), + intent: z + .string() + .optional() + .describe( + 'Investigation focus to filter relevant fields (e.g., "memory issues", "high latency").' + ), +}); + +export function createGetIndexInfoTool({ + core, + plugins, + logger, +}: { + core: CoreSetup< + ObservabilityAgentBuilderPluginStartDependencies, + ObservabilityAgentBuilderPluginStart + >; + plugins: ObservabilityAgentBuilderPluginSetupDependencies; + logger: Logger; +}): StaticToolRegistration { + const toolDefinition: BuiltinToolDefinition = { + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + type: ToolType.builtin, + description: dedent(` + Discovers observability index patterns, fields, and field values in the user's Elasticsearch cluster. + + **When to use:** + - Before calling tools with "kqlFilter" param to discover valid fields and values + - To discover custom fields available beyond standard ECS or OTel fields + - To understand which fields have data in the index + - To understand the sample values and ranges for fields + + **When NOT to use:** + - When you already know the field names and values you need + + **Examples:** + - getIndexInfo(operation: "get-index-patterns" ) + - getIndexInfo(operation: "list-fields", index: "logs-*" ) + - getIndexInfo(operation: "get-field-values", index: "logs-*", fields: ["host.name"] ) + `), + schema: getIndexInfoSchema, + tags: ['observability', 'index', 'fields'], + availability: { + cacheMode: 'space', + handler: async ({ request }) => { + return getAgentBuilderResourceAvailability({ core, request, logger }); + }, + }, + handler: async (params, { esClient, modelProvider }) => { + try { + let result; + + switch (params.operation) { + case 'get-index-patterns': + result = await getIndexPatternsHandler({ core, plugins, esClient, logger }); + break; + + case 'list-fields': + if (!params.index) { + return { + results: [ + { + type: ToolResultType.error, + data: { message: '"index" is required for operation "list-fields"' }, + }, + ], + }; + } + result = await listFieldsHandler({ + esClient, + index: params.index, + intent: params.intent, + start: params.start, + end: params.end, + kqlFilter: params.kqlFilter, + modelProvider, + logger, + }); + break; + + case 'get-field-values': + if (!params.index || !params.fields) { + return { + results: [ + { + type: ToolResultType.error, + data: { + message: '"index" and "fields" are required for operation "get-field-values"', + }, + }, + ], + }; + } + result = await getFieldValuesHandler({ + esClient, + index: params.index, + fields: params.fields, + start: params.start, + end: params.end, + kqlFilter: params.kqlFilter, + }); + break; + } + + return { + results: [{ type: ToolResultType.other, data: { ...result } }], + }; + } catch (error) { + logger.error(`Error getting index info: ${error.message}`); + return { + results: [ + { + type: ToolResultType.error, + data: { message: `Failed to get index info: ${error.message}` }, + }, + ], + }; + } + }, + }; + + return toolDefinition; +} diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_log_categories/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_log_categories/tool.ts index 8abf667e3a72d..6a2206cdd8f12 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_log_categories/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_log_categories/tool.ts @@ -55,7 +55,8 @@ const getLogsSchema = z.object({ .optional() .describe( 'The field containing the log message. Use "message" for ECS logs or "body.text" for OpenTelemetry logs. Defaults to "message".' - ), + ) + .default('message'), }); export function createGetLogCategoriesTool({ @@ -101,14 +102,7 @@ Do NOT use for: }, }, handler: async (toolParams, { esClient }) => { - const { - index, - start = DEFAULT_TIME_RANGE.start, - end = DEFAULT_TIME_RANGE.end, - kqlFilter, - fields = [], - messageField = 'message', - } = toolParams; + const { index, start, end, kqlFilter, fields = [], messageField } = toolParams; try { const { highSeverityCategories, lowSeverityCategories } = await getToolHandler({ diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_services/tool.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_services/tool.ts index f9dbb08eb793f..9fbe6297dae27 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_services/tool.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/get_services/tool.ts @@ -75,12 +75,7 @@ When to use: }, }, handler: async (toolParams, context) => { - const { - start = DEFAULT_TIME_RANGE.start, - end = DEFAULT_TIME_RANGE.end, - environment, - healthStatus, - } = toolParams; + const { start, end, environment, healthStatus } = toolParams; const { request, esClient } = context; try { diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/index.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/index.ts index c663962a14b13..8f2e70e7cbc5c 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/index.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/index.ts @@ -5,8 +5,7 @@ * 2.0. */ -export { OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID } from './get_data_sources/tool'; -export { OBSERVABILITY_GET_ALERTS_TOOL_ID, defaultFields } from './get_alerts/tool'; +export { OBSERVABILITY_GET_ALERTS_TOOL_ID } from './get_alerts/tool'; export { OBSERVABILITY_GET_LOG_CATEGORIES_TOOL_ID } from './get_log_categories/tool'; export { OBSERVABILITY_RUN_LOG_RATE_ANALYSIS_TOOL_ID } from './run_log_rate_analysis/tool'; export { OBSERVABILITY_GET_ANOMALY_DETECTION_JOBS_TOOL_ID } from './get_anomaly_detection_jobs/tool'; @@ -15,3 +14,4 @@ export { OBSERVABILITY_GET_DOWNSTREAM_DEPENDENCIES_TOOL_ID } from './get_downstr export { OBSERVABILITY_GET_CORRELATED_LOGS_TOOL_ID } from './get_correlated_logs/tool'; export { OBSERVABILITY_GET_HOSTS_TOOL_ID } from './get_hosts/tool'; export { OBSERVABILITY_GET_TRACE_METRICS_TOOL_ID } from './get_trace_metrics/tool'; +export { OBSERVABILITY_GET_INDEX_INFO_TOOL_ID } from './get_index_info'; diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/register_tools.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/register_tools.ts index 66205629a19b7..416f4a2a114a7 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/register_tools.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/register_tools.ts @@ -14,10 +14,6 @@ import type { ObservabilityAgentBuilderPluginStartDependencies, } from '../types'; import type { ObservabilityAgentBuilderDataRegistry } from '../data_registry/data_registry'; -import { - OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID, - createGetDataSourcesTool, -} from './get_data_sources/tool'; import { OBSERVABILITY_RUN_LOG_RATE_ANALYSIS_TOOL_ID, createRunLogRateAnalysisTool, @@ -53,6 +49,7 @@ import { OBSERVABILITY_GET_METRIC_CHANGE_POINTS_TOOL_ID, createGetMetricChangePointsTool, } from './get_metric_change_points/tool'; +import { OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, createGetIndexInfoTool } from './get_index_info'; const PLATFORM_TOOL_IDS = [ platformCoreTools.search, @@ -63,7 +60,6 @@ const PLATFORM_TOOL_IDS = [ ]; const OBSERVABILITY_TOOL_IDS = [ - OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID, OBSERVABILITY_RUN_LOG_RATE_ANALYSIS_TOOL_ID, OBSERVABILITY_GET_ANOMALY_DETECTION_JOBS_TOOL_ID, OBSERVABILITY_GET_ALERTS_TOOL_ID, @@ -75,6 +71,7 @@ const OBSERVABILITY_TOOL_IDS = [ OBSERVABILITY_GET_TRACE_METRICS_TOOL_ID, OBSERVABILITY_GET_LOG_CHANGE_POINTS_TOOL_ID, OBSERVABILITY_GET_METRIC_CHANGE_POINTS_TOOL_ID, + OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, ]; export const OBSERVABILITY_AGENT_TOOL_IDS = [...PLATFORM_TOOL_IDS, ...OBSERVABILITY_TOOL_IDS]; @@ -94,7 +91,6 @@ export async function registerTools({ logger: Logger; }) { const observabilityTools: StaticToolRegistration[] = [ - createGetDataSourcesTool({ core, plugins, logger }), createRunLogRateAnalysisTool({ core, logger }), createGetAnomalyDetectionJobsTool({ core, plugins, logger }), createGetAlertsTool({ core, logger }), @@ -106,6 +102,7 @@ export async function registerTools({ createGetTraceMetricsTool({ core, plugins, logger }), createGetLogChangePointsTool({ core, plugins, logger }), createGetMetricChangePointsTool({ core, plugins, logger }), + createGetIndexInfoTool({ core, plugins, logger }), ]; for (const tool of observabilityTools) { diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/run_log_rate_analysis/handler.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/run_log_rate_analysis/handler.ts index 2c3a226054535..58b9b38a453e1 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/run_log_rate_analysis/handler.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/tools/run_log_rate_analysis/handler.ts @@ -15,7 +15,7 @@ export async function getToolHandler({ esClient, logger, index, - timeFieldName = '@timestamp', + timeFieldName, baseline, deviation, searchQuery, @@ -23,7 +23,7 @@ export async function getToolHandler({ esClient: ElasticsearchClient; logger: Logger; index: string; - timeFieldName?: string; + timeFieldName: string; baseline: { start: string; end: string }; deviation: { start: string; end: string }; searchQuery?: Record; diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/dsl_filters.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/dsl_filters.ts index 11e9fe3a148a9..301bc0e5bbf97 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/dsl_filters.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/dsl_filters.ts @@ -13,6 +13,10 @@ export function timeRangeFilter( timeField: string, { start, end }: { start: number; end: number } ): QueryDslQueryContainer[] { + if (!start || !end) { + return []; + } + return [ { range: { diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/get_observability_data_sources.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/get_observability_data_sources.ts index b81435262c6b6..04723492d3781 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/get_observability_data_sources.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/get_observability_data_sources.ts @@ -16,6 +16,13 @@ import type { ObservabilityAgentBuilderPluginStartDependencies, } from '../types'; +export interface ObservabilityDataSources { + apmIndexPatterns: APMIndices; + logIndexPatterns: string[]; + metricIndexPatterns: string[]; + alertsIndexPattern: string[]; +} + export async function getObservabilityDataSources({ core, plugins, @@ -27,16 +34,11 @@ export async function getObservabilityDataSources({ >; plugins: ObservabilityAgentBuilderPluginSetupDependencies; logger: Logger; -}): Promise<{ - apmIndexPatterns: APMIndices; - logIndexPatterns: string[]; - metricIndexPatterns: string[]; - alertsIndexPattern: string[]; -}> { +}): Promise { const apmIndexPatterns = await getApmIndices({ core, plugins, logger }); const logIndexPatterns = await getLogsIndices({ core, logger }); const metricIndexPatterns = await getMetricsIndices({ core, plugins, logger }); - const alertsIndexPattern = ['alerts-observability-*']; + const alertsIndexPattern = ['.alerts-observability.*']; return { apmIndexPatterns, diff --git a/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/tool_schemas.ts b/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/tool_schemas.ts index f2a00e4e42479..02a39ef15c92d 100644 --- a/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/tool_schemas.ts +++ b/x-pack/solutions/observability/plugins/observability_agent_builder/server/utils/tool_schemas.ts @@ -25,8 +25,11 @@ export function timeRangeSchemaOptional(defaultTimeRange: { start: string; end: start: z .string() .describe(`${startDescription} Defaults to ${defaultTimeRange.start}.`) - .optional(), + .default(defaultTimeRange.start), - end: z.string().describe(`${endDescription} Defaults to ${defaultTimeRange.end}.`).optional(), + end: z + .string() + .describe(`${endDescription} Defaults to ${defaultTimeRange.end}.`) + .default(defaultTimeRange.end), }; } diff --git a/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/index.ts b/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/index.ts index fd75016ec724a..9cefde8dc0614 100644 --- a/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/index.ts +++ b/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/index.ts @@ -10,7 +10,6 @@ import type { DeploymentAgnosticFtrProviderContext } from '../../ftr_provider_co export default function ({ loadTestFile }: DeploymentAgnosticFtrProviderContext) { describe('Observability Agent', function () { // tools - loadTestFile(require.resolve('./tools/get_data_sources.spec.ts')); loadTestFile(require.resolve('./tools/get_alerts.spec.ts')); loadTestFile(require.resolve('./tools/get_downstream_dependencies.spec.ts')); loadTestFile(require.resolve('./tools/get_services.spec.ts')); @@ -22,7 +21,7 @@ export default function ({ loadTestFile }: DeploymentAgnosticFtrProviderContext) loadTestFile(require.resolve('./tools/get_trace_metrics.spec.ts')); loadTestFile(require.resolve('./tools/get_log_change_points.spec.ts')); loadTestFile(require.resolve('./tools/get_metric_change_points.spec.ts')); - + loadTestFile(require.resolve('./tools/get_index_info.spec.ts')); // ai insights loadTestFile(require.resolve('./ai_insights/error.spec.ts')); loadTestFile(require.resolve('./ai_insights/alert.spec.ts')); diff --git a/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/tools/get_data_sources.spec.ts b/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/tools/get_data_sources.spec.ts deleted file mode 100644 index be93790712c75..0000000000000 --- a/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/tools/get_data_sources.spec.ts +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -import expect from '@kbn/expect'; -import { timerange } from '@kbn/synthtrace-client'; -import type { ApmSynthtraceEsClient, LogsSynthtraceEsClient } from '@kbn/synthtrace'; -import { generateDataSourcesData, indexAll } from '@kbn/synthtrace'; -import type { OtherResult } from '@kbn/agent-builder-common'; -import { OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID } from '@kbn/observability-agent-builder-plugin/server/tools'; -import type { DeploymentAgnosticFtrProviderContext } from '../../../ftr_provider_context'; -import { createAgentBuilderApiClient } from '../utils/agent_builder_client'; - -interface GetDataSourcesToolResult extends OtherResult { - data: { - apm: { - indexPatterns: { - transaction: string; - span: string; - error: string; - metric: string; - onboarding: string; - sourcemap: string; - }; - }; - logs: { - indexPatterns: string[]; - }; - metrics: { - indexPatterns: string[]; - }; - alerts: { - indexPattern: string[]; - }; - }; -} - -export default function ({ getService }: DeploymentAgnosticFtrProviderContext) { - const roleScopedSupertest = getService('roleScopedSupertest'); - const synthtrace = getService('synthtrace'); - - describe(`tool: ${OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID}`, function () { - let agentBuilderApiClient: ReturnType; - let apmSynthtraceEsClient: ApmSynthtraceEsClient; - let logsSynthtraceEsClient: LogsSynthtraceEsClient; - - before(async () => { - const scoped = await roleScopedSupertest.getSupertestWithRoleScope('editor'); - agentBuilderApiClient = createAgentBuilderApiClient(scoped); - - apmSynthtraceEsClient = await synthtrace.createApmSynthtraceEsClient(); - logsSynthtraceEsClient = synthtrace.createLogsSynthtraceEsClient(); - - await apmSynthtraceEsClient.clean(); - await logsSynthtraceEsClient.clean(); - - const range = timerange('now-15m', 'now'); - - await indexAll( - generateDataSourcesData({ - range, - logsEsClient: logsSynthtraceEsClient, - apmEsClient: apmSynthtraceEsClient, - }) - ); - }); - - after(async () => { - await apmSynthtraceEsClient.clean(); - await logsSynthtraceEsClient.clean(); - }); - - describe('when fetching data sources', () => { - let resultData: GetDataSourcesToolResult['data']; - - before(async () => { - const results = await agentBuilderApiClient.executeTool({ - id: OBSERVABILITY_GET_DATA_SOURCES_TOOL_ID, - params: {}, - }); - - expect(results).to.have.length(1); - resultData = results[0].data; - }); - - it('returns the correct tool results structure', () => { - expect(resultData).to.have.property('apm'); - expect(resultData).to.have.property('logs'); - expect(resultData).to.have.property('metrics'); - expect(resultData).to.have.property('alerts'); - }); - - it('returns tool results with the relevant index patterns', () => { - const expectedIndexPatterns = { - apm: { - indexPatterns: { - transaction: 'traces-apm*,apm-*,traces-*.otel-*', - span: 'traces-apm*,apm-*,traces-*.otel-*', - error: 'logs-apm*,apm-*,logs-*.otel-*', - metric: 'metrics-apm*,apm-*,metrics-*.otel-*', - onboarding: 'apm-*', - sourcemap: 'apm-*', - }, - }, - logs: { - indexPatterns: ['logs-*-*', 'logs-*', 'filebeat-*'], - }, - metrics: { - indexPatterns: ['metrics-*', 'metricbeat-*'], - }, - alerts: { - indexPattern: ['alerts-observability-*'], - }, - }; - - expect(resultData).to.eql(expectedIndexPatterns); - }); - }); - }); -} diff --git a/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/tools/get_index_info.spec.ts b/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/tools/get_index_info.spec.ts new file mode 100644 index 0000000000000..6f12ff5c7642c --- /dev/null +++ b/x-pack/solutions/observability/test/api_integration_deployment_agnostic/apis/observability_agent_builder/tools/get_index_info.spec.ts @@ -0,0 +1,582 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import expect from '@kbn/expect'; +import { timerange } from '@kbn/synthtrace-client'; +import { + type ApmSynthtraceEsClient, + type InfraSynthtraceEsClient, + type LogsSynthtraceEsClient, + generateFieldDiscoveryData, + indexAll, +} from '@kbn/synthtrace'; +import { OBSERVABILITY_GET_INDEX_INFO_TOOL_ID } from '@kbn/observability-agent-builder-plugin/server/tools'; +import type { IndexPatternsResult } from '@kbn/observability-agent-builder-plugin/server/tools/get_index_info/get_index_overview_handler'; +import type { IndexFieldsResult } from '@kbn/observability-agent-builder-plugin/server/tools/get_index_info/get_index_fields_handler'; +import type { FieldValuesRecordResult } from '@kbn/observability-agent-builder-plugin/server/tools/get_index_info/get_field_values_handler'; +import type { DeploymentAgnosticFtrProviderContext } from '../../../ftr_provider_context'; +import { createAgentBuilderApiClient } from '../utils/agent_builder_client'; + +export default function ({ getService }: DeploymentAgnosticFtrProviderContext) { + const roleScopedSupertest = getService('roleScopedSupertest'); + const synthtrace = getService('synthtrace'); + + describe(`tool: ${OBSERVABILITY_GET_INDEX_INFO_TOOL_ID}`, function () { + let agentBuilderApiClient: ReturnType; + let infraSynthtraceEsClient: InfraSynthtraceEsClient; + let apmSynthtraceEsClient: ApmSynthtraceEsClient; + let logsSynthtraceEsClient: LogsSynthtraceEsClient; + + before(async () => { + const scoped = await roleScopedSupertest.getSupertestWithRoleScope('editor'); + agentBuilderApiClient = createAgentBuilderApiClient(scoped); + + infraSynthtraceEsClient = synthtrace.createInfraSynthtraceEsClient(); + apmSynthtraceEsClient = await synthtrace.createApmSynthtraceEsClient(); + logsSynthtraceEsClient = synthtrace.createLogsSynthtraceEsClient(); + + await infraSynthtraceEsClient.clean(); + await apmSynthtraceEsClient.clean(); + await logsSynthtraceEsClient.clean(); + + // Generate comprehensive test data using field_discovery scenario + await indexAll( + generateFieldDiscoveryData({ + range: timerange('now-15m', 'now'), + infraEsClient: infraSynthtraceEsClient, + apmEsClient: apmSynthtraceEsClient, + logsEsClient: logsSynthtraceEsClient, + hosts: [ + { + name: 'field-discovery-host-01', + cpuUsage: 0.65, + memoryUsage: 0.72, + diskUsage: 0.45, + cloudProvider: 'aws', + cloudRegion: 'us-east-1', + k8sNamespace: 'production', + k8sPodName: 'payment-pod-abc123', + }, + { + name: 'field-discovery-host-02', + cpuUsage: 0.35, + memoryUsage: 0.55, + diskUsage: 0.25, + cloudProvider: 'aws', + cloudRegion: 'us-west-2', + k8sNamespace: 'production', + k8sPodName: 'order-pod-def456', + }, + ], + services: [ + { + name: 'payment-service', + environment: 'production', + host: 'field-discovery-host-01', + agentName: 'nodejs', + errorRate: 0.1, + avgLatencyMs: 150, + }, + { + name: 'order-service', + environment: 'production', + host: 'field-discovery-host-02', + agentName: 'java', + errorRate: 0.05, + avgLatencyMs: 100, + }, + ], + }) + ); + }); + + after(async () => { + if (infraSynthtraceEsClient) await infraSynthtraceEsClient.clean(); + if (apmSynthtraceEsClient) await apmSynthtraceEsClient.clean(); + if (logsSynthtraceEsClient) await logsSynthtraceEsClient.clean(); + }); + + describe('operation: "get-index-patterns"', () => { + it('returns observability index patterns', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-index-patterns' }, + }); + const data = results[0].data as unknown as IndexPatternsResult; + + expect(data).to.have.property('indexPatterns'); + expect(data.indexPatterns.apm).to.have.property('transaction'); + expect(data.indexPatterns.apm).to.have.property('span'); + expect(data.indexPatterns.apm).to.have.property('error'); + expect(data.indexPatterns.apm).to.have.property('metric'); + expect(data.indexPatterns.logs).to.be.an('array'); + expect(data.indexPatterns.metrics).to.be.an('array'); + expect(data.indexPatterns.alerts).to.be.an('array'); + }); + + it('returns discovered data streams as a flat array', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-index-patterns' }, + }); + const data = results[0].data as unknown as IndexPatternsResult; + + expect(data).to.have.property('dataStreams'); + expect(data.dataStreams).to.be.an('array'); + expect(data.dataStreams.length).to.be.greaterThan(0); + }); + + it('returns data streams with name and dataset properties', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-index-patterns' }, + }); + const data = results[0].data as unknown as IndexPatternsResult; + + // Each data stream should have name and dataset + const firstStream = data.dataStreams[0]; + expect(firstStream).to.have.property('name'); + expect(firstStream).to.have.property('dataset'); + expect(firstStream.name).to.be.a('string'); + expect(firstStream.dataset).to.be.a('string'); + }); + + it('includes system metric data streams for targeted field discovery', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-index-patterns' }, + }); + const data = results[0].data as unknown as IndexPatternsResult; + + // Should include system metrics data streams + const datasets = data.dataStreams.map((ds) => ds.dataset); + expect(datasets).to.contain('system.cpu'); + expect(datasets).to.contain('system.memory'); + }); + + it('data streams can be used for targeted field discovery', async () => { + // First, get data streams + const indexPatternsResults = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-index-patterns' }, + }); + const indexPatternsData = indexPatternsResults[0].data as unknown as IndexPatternsResult; + + // Find the memory data stream + const memoryStream = indexPatternsData.dataStreams.find( + (ds) => ds.dataset === 'system.memory' + ); + expect(memoryStream).to.not.be(undefined); + + // Use the data stream name for targeted field discovery + const fieldsResults = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'list-fields', index: memoryStream!.name }, + }); + const fieldsData = fieldsResults[0].data as unknown as IndexFieldsResult; + + // Should find memory-specific fields + const allFields = Object.values(fieldsData.fieldsByType).flat(); + expect(allFields).to.contain('system.memory.total'); + expect(allFields).to.contain('system.memory.used.pct'); + }); + }); + + describe('operation: "list-fields"', () => { + it('returns fields grouped by type', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'list-fields', index: 'metrics-*' }, + }); + const data = results[0].data as unknown as IndexFieldsResult; + + expect(data).to.have.property('fieldsByType'); + expect(data.fieldsByType.keyword).to.contain('host.name'); + }); + + it('returns only concrete field types (excludes object, nested, unmapped)', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'list-fields', index: 'metrics-*' }, + }); + const data = results[0].data as unknown as IndexFieldsResult; + + const fieldTypes = Object.keys(data.fieldsByType); + // Should have concrete field types + expect(fieldTypes.length).to.be.greaterThan(0); + // Should NOT include non-concrete types that aren't useful for queries + const nonConcreteTypes = ['object', 'nested', 'unmapped', 'flattened']; + nonConcreteTypes.forEach((type) => { + expect(fieldTypes).to.not.contain(type); + }); + }); + + it('returns empty for non-existent index', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'list-fields', index: 'non-existent-index-12345' }, + }); + const data = results[0].data as unknown as IndexFieldsResult; + + expect(Object.keys(data.fieldsByType)).to.have.length(0); + }); + + it('returns error when index is missing', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'list-fields' }, + }); + + expect(results[0].type).to.be('error'); + expect((results[0].data as { message: string }).message).to.contain( + '"index" is required for operation "list-fields"' + ); + }); + + it('supports kqlFilter parameter', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'list-fields', + index: 'metrics-*', + kqlFilter: 'host.name: field-discovery-host-01', + }, + }); + const data = results[0].data as unknown as IndexFieldsResult; + + expect(Object.keys(data.fieldsByType).length).to.be.greaterThan(0); + }); + + it('supports start and end time range parameters', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'list-fields', + index: 'metrics-*', + start: 'now-10m', + end: 'now', + }, + }); + const data = results[0].data as unknown as IndexFieldsResult; + + expect(Object.keys(data.fieldsByType).length).to.be.greaterThan(0); + }); + }); + + describe('operation: "get-field-values"', () => { + it('returns keyword values for host.name', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-field-values', index: 'metrics-*', fields: ['host.name'] }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const hostResult = data.fields['host.name']; + expect(hostResult.type).to.be('keyword'); + if (hostResult.type === 'keyword') { + expect(hostResult.values).to.contain('field-discovery-host-01'); + expect(hostResult.values).to.contain('field-discovery-host-02'); + } + }); + + it('returns numeric min/max for numeric field', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['system.cpu.total.norm.pct'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const result = data.fields['system.cpu.total.norm.pct']; + expect(result.type).to.be('numeric'); + if (result.type === 'numeric') { + expect(result.min).to.be.a('number'); + expect(result.max).to.be.a('number'); + } + }); + + it('returns date min/max for date field', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-field-values', index: 'metrics-*', fields: ['@timestamp'] }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const result = data.fields['@timestamp']; + expect(result.type).to.be('date'); + if (result.type !== 'date') { + throw new Error('Expected date field'); + } + + const minDate = new Date(result.min); + const maxDate = new Date(result.max); + + expect(minDate.getTime()).to.be.a('number'); + expect(maxDate.getTime()).to.be.a('number'); + expect(maxDate.getTime()).to.be.greaterThan(minDate.getTime()); + }); + + it('returns error for non-existent field', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['non.existent.field'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + expect(data.fields['non.existent.field'].type).to.be('error'); + }); + + it('supports batch discovery for multiple fields', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['host.name', 'cloud.provider'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + expect(data.fields['host.name'].type).to.be('keyword'); + expect(data.fields['cloud.provider'].type).to.be('keyword'); + }); + + it('returns error when index is missing', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-field-values', fields: ['host.name'] }, + }); + + expect(results[0].type).to.be('error'); + expect((results[0].data as { message: string }).message).to.contain( + '"index" and "fields" are required' + ); + }); + + it('returns error when fields is missing', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { operation: 'get-field-values', index: 'metrics-*' }, + }); + + expect(results[0].type).to.be('error'); + expect((results[0].data as { message: string }).message).to.contain( + '"index" and "fields" are required' + ); + }); + + it('supports wildcard patterns in fields parameter', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['system.cpu.*'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const fieldNames = Object.keys(data.fields); + + // Should have expanded the wildcard to multiple fields + expect(fieldNames.length).to.be.greaterThan(1); + + // All returned fields should match the pattern + fieldNames.forEach((field) => { + expect(field.startsWith('system.cpu.')).to.be(true); + }); + + // Should include expected system.cpu fields + expect(fieldNames).to.contain('system.cpu.total.norm.pct'); + }); + + it('supports multiple wildcard patterns', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['system.cpu.*', 'cloud.*'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const fieldNames = Object.keys(data.fields); + + // Should have fields from both patterns + const cpuFields = fieldNames.filter((f) => f.startsWith('system.cpu.')); + const cloudFields = fieldNames.filter((f) => f.startsWith('cloud.')); + + expect(cpuFields.length).to.be.greaterThan(0); + expect(cloudFields.length).to.be.greaterThan(0); + }); + + it('returns error for wildcard pattern with no matches', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['nonexistent.pattern.*'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const result = data.fields['nonexistent.pattern.*']; + expect(result.type).to.be('error'); + if (result.type === 'error') { + expect(result.message).to.contain('No fields match pattern'); + } + }); + + it('does not return parent object fields when querying a specific field', async () => { + // This tests that fieldCaps parent objects (from passthrough types) are filtered out + // When querying "host.name", we should NOT get "host" as an error + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['host.name'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const fieldNames = Object.keys(data.fields); + + // Should only return the requested field, not parent objects + expect(fieldNames).to.eql(['host.name']); + expect(data.fields['host.name'].type).to.be('keyword'); + + // Verify no parent object errors are present + expect(data.fields).to.not.have.property('host'); + }); + + it('handles mixed explicit fields and wildcard patterns', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['host.name', 'system.cpu.*'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const fieldNames = Object.keys(data.fields); + + // Should include explicit field + expect(fieldNames).to.contain('host.name'); + expect(data.fields['host.name'].type).to.be('keyword'); + + // Should include expanded wildcard fields + const cpuFields = fieldNames.filter((f) => f.startsWith('system.cpu.')); + expect(cpuFields.length).to.be.greaterThan(0); + }); + + it('returns text samples for text/match_only_text fields', async () => { + // The message field in logs is typically text or match_only_text type + // Use logs-generic* to target the scenario's log data specifically + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'logs-generic*', + fields: ['message'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const messageResult = data.fields.message; + expect(messageResult.type).to.be('text'); + if (messageResult.type === 'text') { + expect(messageResult.samples).to.be.an('array'); + expect(messageResult.samples.length).to.be.greaterThan(0); + expect(messageResult.samples.length).to.be.lessThan(6); // Max 5 samples + // Each sample should be a non-empty string + messageResult.samples.forEach((sample) => { + expect(sample).to.be.a('string'); + expect(sample.length).to.be.greaterThan(0); + }); + } + }); + + it('returns IP values as keyword type', async () => { + // IP fields are treated as keyword type internally + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['host.ip'], + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const result = data.fields['host.ip']; + expect(result.type).to.be('keyword'); + if (result.type === 'keyword') { + expect(result.values).to.be.an('array'); + expect(result.values.length).to.be.greaterThan(0); + } + }); + + it('applies kqlFilter to numeric field aggregations', async () => { + // Get CPU values for specific host only + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['system.cpu.total.norm.pct'], + kqlFilter: 'host.name: field-discovery-host-01', + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const result = data.fields['system.cpu.total.norm.pct']; + expect(result.type).to.be('numeric'); + if (result.type === 'numeric') { + // Host-01 has cpuUsage: 0.65, so min/max should be around that value + expect(result.min).to.be.greaterThan(0.6); + expect(result.max).to.be.lessThan(0.7); + } + }); + + it('applies time range to date field aggregations', async () => { + const results = await agentBuilderApiClient.executeTool({ + id: OBSERVABILITY_GET_INDEX_INFO_TOOL_ID, + params: { + operation: 'get-field-values', + index: 'metrics-*', + fields: ['@timestamp'], + start: 'now-5m', + end: 'now', + }, + }); + const data = results[0].data as unknown as FieldValuesRecordResult; + + const result = data.fields['@timestamp']; + expect(result.type).to.be('date'); + if (result.type === 'date') { + const minDate = new Date(result.min); + const fiveMinutesAgo = Date.now() - 5 * 60 * 1000; + + // Min date should be within last 5 minutes + expect(minDate.getTime()).to.be.greaterThan(fiveMinutesAgo - 60000); // 1 min tolerance + } + }); + }); + }); +}