diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/alert_insight.spec.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/alert_insight.spec.ts index c9442b5da89da..2d20f8bea191d 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/alert_insight.spec.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/alert_insight.spec.ts @@ -118,6 +118,7 @@ function createScenarioTest(scenario: AlertScenario) { esClient.deleteByQuery({ index: scenario.alertRule.alertsIndex, query: { match_all: {} }, + conflicts: 'proceed', refresh: true, }), ...(ruleId diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/apm_error_insight.spec.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/apm_error_insight.spec.ts index ec311df1deeff..dd7d71836f407 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/apm_error_insight.spec.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/apm_error_insight.spec.ts @@ -36,9 +36,6 @@ function createScenarioTest(scenario: ApmErrorScenario) { let replayResult: LoadResult; evaluate.beforeAll(async ({ esClient, log }) => { - end = moment().toISOString(); - start = moment().subtract(15, 'minutes').toISOString(); - log.info(`Replaying scenario: ${scenario.id}`); replayResult = await replayObservabilityDataStreams( esClient, @@ -50,6 +47,9 @@ function createScenarioTest(scenario: ApmErrorScenario) { log.debug('Waiting to make sure all indices are refreshed'); await new Promise((resolve) => setTimeout(resolve, INDEX_REFRESH_WAIT_MS)); + end = moment().toISOString(); + start = moment().subtract(15, 'minutes').toISOString(); + log.info(`Querying for APM error: ${scenario.errorQuery.errorMessage}`); const errorsResponse = await esClient.search({ index: 'logs-*', diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/log_insight.spec.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/log_insight.spec.ts new file mode 100644 index 0000000000000..f7699af09adb0 --- /dev/null +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/evals/ai_insights/log_insight.spec.ts @@ -0,0 +1,117 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import { tags } from '@kbn/scout'; +import type { LoadResult } from '@kbn/es-snapshot-loader'; +import type { LogInsightParams } from '../../src/clients/ai_insight_client'; +import { + replayObservabilityDataStreams, + cleanObservabilityDataStreams, +} from '../../src/data_generators/replay'; +import { getLogScenarios, type LogScenario } from '../../src/scenarios/log_scenarios'; +import { evaluate } from './evaluate_ai_insights'; + +const INDEX_REFRESH_WAIT_MS = 2500; + +const scenarios = getLogScenarios(); + +for (const scenario of scenarios) { + createScenarioTest(scenario); +} + +function createScenarioTest(scenario: LogScenario) { + evaluate.describe( + `Log AI Insights - ${scenario.id} (${scenario.snapshotName})`, + { tag: tags.serverless.observability.complete }, + () => { + let logDocId: string; + let logIndex: string; + let replayResult: LoadResult; + + evaluate.beforeAll(async ({ esClient, log }) => { + log.info(`Replaying scenario: ${scenario.id}`); + replayResult = await replayObservabilityDataStreams( + esClient, + log, + scenario.snapshotName, + scenario.gcs + ); + + log.debug('Waiting to make sure all indices are refreshed'); + await new Promise((resolve) => setTimeout(resolve, INDEX_REFRESH_WAIT_MS)); + + log.info( + `Querying for log: service=${scenario.logQuery.serviceName}, pattern="${scenario.logQuery.messagePattern}"` + ); + + const logResponse = await esClient.search({ + index: scenario.logQuery.index, + query: { + bool: { + filter: [{ term: { 'service.name': scenario.logQuery.serviceName } }], + should: [ + { match_phrase: { message: scenario.logQuery.messagePattern } }, + { match_phrase: { 'exception.message': scenario.logQuery.messagePattern } }, + ], + minimum_should_match: 1, + }, + }, + sort: [{ '@timestamp': 'desc' }], + size: 1, + _source: false, + }); + + const logDoc = logResponse.hits.hits[0]; + if (!logDoc) { + throw new Error( + `No log found for scenario ${scenario.id} (service: ${scenario.logQuery.serviceName}, pattern: "${scenario.logQuery.messagePattern}")` + ); + } + + if (!logDoc._id || !logDoc._index) { + throw new Error(`Log document missing _id or _index for scenario ${scenario.id}`); + } + logDocId = logDoc._id; + logIndex = logDoc._index; + log.info(`Found log document: ${logIndex}/${logDocId}`); + }); + + evaluate( + `Log AI insight correctness (${scenario.id}, ${scenario.snapshotName})`, + async ({ aiInsightClient, evaluateDataset }) => { + await evaluateDataset({ + getInsight: (params) => aiInsightClient.getLogInsight(params), + dataset: { + name: `ai insights: log analysis (${scenario.id}, ${scenario.snapshotName})`, + description: `Evaluates correctness of log AI insight summaries for ${scenario.id} (snapshot: ${scenario.snapshotName})`, + examples: [ + { + input: { + requestPayload: { + index: logIndex, + id: logDocId, + }, + question: + 'Analyze this log entry and provide a summary explaining what it means, identify where it originated, assess the root cause and impact, and recommend next steps.', + }, + output: { + expected: scenario.expectedOutput, + }, + }, + ], + }, + }); + } + ); + + evaluate.afterAll(async ({ esClient, log }) => { + log.debug('Cleaning up indices'); + await cleanObservabilityDataStreams(esClient, replayResult, log); + }); + } + ); +} diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/clients/ai_insight_client.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/clients/ai_insight_client.ts index 4ec92ff79a2d0..a01157dea4038 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/clients/ai_insight_client.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/clients/ai_insight_client.ts @@ -23,20 +23,82 @@ export interface ErrorInsightParams { environment?: string; } +export interface LogInsightParams { + index: string; + id: string; +} + +const EVENT_PREFIX = 'event: '; +const DATA_PREFIX = 'data: '; + +/** + * The AI insight endpoints return SSE (Server-Sent Events) streams. + * This parses the raw SSE text into the summary and context fields. + */ +function parseSseResponse(raw: unknown): AiInsightResponse { + const text = typeof raw === 'string' ? raw : String(raw); + + const events = text + .split(/\n\n/) + .map((block) => { + const lines = block.split('\n').map((line) => line.trim()); + const eventLine = lines.find((line) => line.startsWith(EVENT_PREFIX)); + const dataLine = lines.find((line) => line.startsWith(DATA_PREFIX)); + + if (!eventLine || !dataLine) return null; + + try { + return { + type: eventLine.slice(EVENT_PREFIX.length).trim(), + data: JSON.parse(dataLine.slice(DATA_PREFIX.length)) as Record, + }; + } catch { + return null; + } + }) + .filter((event): event is { type: string; data: Record } => event !== null); + + const contextEvent = events.find((e) => e.type === 'context'); + const messageEvent = events.find((e) => e.type === 'chatCompletionMessage'); + + const summary = (messageEvent?.data?.content as string) || ''; + const context = (contextEvent?.data?.context as string) || ''; + + if (!summary) { + const chunks = events + .filter((e) => e.type === 'chatCompletionChunk') + .map((e) => (e.data?.content as string) || '') + .join(''); + return { summary: chunks, context }; + } + + return { summary, context }; +} + export class AiInsightClient { constructor(private readonly fetch: HttpHandler) {} async getAlertInsight(params: AlertInsightParams): Promise { - return this.fetch('/internal/observability_agent_builder/ai_insights/alert', { + const raw = await this.fetch('/internal/observability_agent_builder/ai_insights/alert', { method: 'POST', body: JSON.stringify(params), - }) as Promise; + }); + return parseSseResponse(raw); } async getErrorInsight(params: ErrorInsightParams): Promise { - return this.fetch('/internal/observability_agent_builder/ai_insights/error', { + const raw = await this.fetch('/internal/observability_agent_builder/ai_insights/error', { + method: 'POST', + body: JSON.stringify(params), + }); + return parseSseResponse(raw); + } + + async getLogInsight(params: LogInsightParams): Promise { + const raw = await this.fetch('/internal/observability_agent_builder/ai_insights/log', { method: 'POST', body: JSON.stringify(params), - }) as Promise; + }); + return parseSseResponse(raw); } } diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/alert_scenarios.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/alert_scenarios.ts index a6d4ed674e5f9..55b952fbf6cc0 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/alert_scenarios.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/alert_scenarios.ts @@ -6,9 +6,10 @@ */ import type { AlertScenario } from './types'; -import { PAYMENT_SERVICE_GCS } from './constants'; +import { PAYMENT_SERVICE_GCS, PAYMENT_UNREACHABLE_GCS } from './constants'; const PAYMENT_ERROR_COUNT_ALERT_SCENARIO_ID = 'payment-error-count-alert'; +const PAYMENT_UNREACHABLE_ALERT_SCENARIO_ID = 'payment-unreachable-alert'; const PAYMENT_ALERT_EXPECTED_OUTPUT = `- Summary: A single handled error was detected in the payment service, specifically related to an invalid token during a payment request. The error appears isolated, with no evidence of broader anomalies or downstream impact. @@ -26,6 +27,21 @@ const PAYMENT_ALERT_EXPECTED_OUTPUT = `- Summary: A single handled error was d - Validate that the error is properly handled and does not impact payment processing for valid tokens. - If no further errors occur, monitor for recurrence but no urgent action is required. If errors increase, investigate token validation logic and upstream authentication flows.`; +const PAYMENT_UNREACHABLE_ALERT_EXPECTED = `- Summary: An APM error count alert fired for the frontend service because the payment service is unreachable. The checkout flow fails with a gRPC Unavailable error ("name resolver error: produced zero addresses") when attempting to charge a card via the payment service. This is a connectivity or infrastructure failure, not an application code defect. + +- Assessment: The payment service is entirely unreachable from the checkout service — DNS or name resolution returns zero addresses for the payment endpoint. This causes all checkout attempts to fail, resulting in user-facing errors propagated through the frontend. The \`paymentUnreachable\` feature flag in flagd is the most likely cause if this is a test environment; otherwise, this indicates a real infrastructure issue (service down, DNS failure, network partition). + +- Related signals: + + - Errors: "failed to charge card: could not charge the card: rpc error: code = Unavailable desc = name resolver error: produced zero addresses" (apmErrors, last seen within alert window, Direct) — all checkout/payment flows fail. + - Anomalies: Payment service absent from traces (apmServiceSummary, alert window, Direct) — the payment service is not running or not reachable. + - Downstream: checkout and frontend-proxy report errors due to payment unavailability (apmServiceTopology, Indirect). +- Immediate actions: + + 1. Verify the payment service is running, healthy, and reachable from the checkout service's network. + 2. Check DNS resolution for the payment service endpoint from within the checkout service's environment. + 3. If using the \`paymentUnreachable\` feature flag, verify its state in flagd and disable it if unintentional.`; + export const ALERT_SCENARIOS: Record = { [PAYMENT_ERROR_COUNT_ALERT_SCENARIO_ID]: { id: PAYMENT_ERROR_COUNT_ALERT_SCENARIO_ID, @@ -56,6 +72,35 @@ export const ALERT_SCENARIOS: Record = { }, expectedOutput: PAYMENT_ALERT_EXPECTED_OUTPUT, }, + [PAYMENT_UNREACHABLE_ALERT_SCENARIO_ID]: { + id: PAYMENT_UNREACHABLE_ALERT_SCENARIO_ID, + description: 'APM error count alert for frontend when payment service is unreachable', + snapshotName: 'payment-unreachable', + gcs: PAYMENT_UNREACHABLE_GCS, + alertRule: { + ruleParams: { + consumer: 'apm', + enabled: true, + name: 'Error count threshold - payment unreachable', + rule_type_id: 'apm.error_rate', + tags: [], + params: { + threshold: 1, + windowSize: 5, + windowUnit: 'm', + serviceName: 'frontend', + environment: 'ENVIRONMENT_ALL', + groupBy: ['service.name', 'service.environment'], + }, + actions: [], + schedule: { + interval: '1m', + }, + }, + alertsIndex: '.alerts-observability.apm.alerts-default', + }, + expectedOutput: PAYMENT_UNREACHABLE_ALERT_EXPECTED, + }, }; export const getAlertScenarios = (): AlertScenario[] => Object.values(ALERT_SCENARIOS); diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/constants.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/constants.ts index 6cf5122dd5906..eea5cf91ef9f5 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/constants.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/constants.ts @@ -13,3 +13,18 @@ export const PAYMENT_SERVICE_GCS: GcsConfig = { bucket: GCS_BUCKET, basePath: 'otel-demo/payment-service-failures', }; + +export const PAYMENT_UNREACHABLE_GCS: GcsConfig = { + bucket: GCS_BUCKET, + basePath: 'otel-demo/payment-unreachable', +}; + +export const PRODUCT_CATALOG_GCS: GcsConfig = { + bucket: GCS_BUCKET, + basePath: 'otel-demo/product-catalog', +}; + +export const AD_HIGH_CPU_GCS: GcsConfig = { + bucket: GCS_BUCKET, + basePath: 'otel-demo/ad-high-cpu', +}; diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/error_scenarios.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/error_scenarios.ts index 44dc8eb84a5b4..bc56ff2d4a6cb 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/error_scenarios.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/error_scenarios.ts @@ -5,16 +5,12 @@ * 2.0. */ -import type { ApmErrorScenario, GcsConfig } from './types'; -import { GCS_BUCKET, PAYMENT_SERVICE_GCS } from './constants'; +import type { ApmErrorScenario } from './types'; +import { PAYMENT_SERVICE_GCS, PAYMENT_UNREACHABLE_GCS, PRODUCT_CATALOG_GCS } from './constants'; const PAYMENT_SERVICE_FAILURE_SCENARIO_ID = 'payment-service-failure'; const PAYMENT_UNREACHABLE_SCENARIO_ID = 'payment-unreachable'; - -const PAYMENT_UNREACHABLE_GCS: GcsConfig = { - bucket: GCS_BUCKET, - basePath: 'otel-demo/payment-unreachable', -}; +const PRODUCT_CATALOG_FAILURE_SCENARIO_ID = 'product-catalog-failure'; const PAYMENT_ERROR_EXPECTED_OUTPUT = `- Error summary: The payment service failed to process a charge request due to an "Invalid token" error, as indicated by the handled exception in the payment service and corroborated by error propagation through checkout and frontend services. @@ -62,6 +58,26 @@ const PAYMENT_UNREACHABLE_EXPECTED_OUTPUT = `- Error summary: - Why is the payment service unreachable (deployment, scaling, network partition)? - Are there recent changes to service discovery, configuration, or infrastructure that could have broken connectivity?`; +const PRODUCT_CATALOG_FAILURE_EXPECTED_OUTPUT = `- Error summary: + The frontend fails with "failed to prepare order: failed to get product #OLJCESPC7Z" because the \`product-catalog\` service returns a gRPC Internal error ("Product Catalog Fail Feature Flag Enabled") when retrieving that specific product. The root cause is the \`productCatalogFailure\` feature flag being enabled, which causes a deliberate fault injection in the product catalog service for product \`OLJCESPC7Z\`. + +- Failure pinpoint: + + - The error is observed in the \`frontend\` service when preparing an order. It propagates from \`checkout\`, which calls \`product-catalog\` to validate cart items. The \`product-catalog\` service's \`GetProduct\` RPC fails for product ID \`OLJCESPC7Z\` with gRPC status Internal and message "Error: Product Catalog Fail Feature Flag Enabled". + - The failure originates in the \`product-catalog\` service, which evaluates the \`productCatalogFailure\` feature flag via the flagd provider. When the flag is enabled, the service intentionally rejects requests for this specific product. The feature flag evaluation itself succeeds (flagd dependency is healthy). + - This is a deliberate fault injection, not a code defect or infrastructure failure. +- Impact: + + - Any request that requires fetching product \`OLJCESPC7Z\` (product detail pages, checkout with this item in cart, recommendations including this product) will fail while the feature flag remains enabled. + - Other products are unaffected; \`ListProducts\` and \`SearchProducts\` do not check this flag. + - Multiple services in the trace report errors: \`product-catalog\`, \`checkout\`, \`frontend\`, and \`frontend-proxy\`, indicating user-facing impact on orders containing this product. +- Immediate actions: + + 1. Disable the \`productCatalogFailure\` feature flag in the flagd configuration (\`demo.flagd.json\`) or set its \`defaultVariant\` to \`"off"\` to restore normal behavior. + 2. Verify the flag state via the flagd OFREP API or management interface to confirm it is currently enabled. + 3. Review recent changes to \`demo.flagd.json\` or flagd targeting rules to determine if the flag was enabled intentionally (e.g., chaos testing) or accidentally. + 4. Monitor the \`product-catalog\` service error rate after toggling the flag to confirm the errors stop.`; + export const APM_ERROR_SCENARIOS: Record = { [PAYMENT_SERVICE_FAILURE_SCENARIO_ID]: { id: PAYMENT_SERVICE_FAILURE_SCENARIO_ID, @@ -86,6 +102,18 @@ export const APM_ERROR_SCENARIOS: Record = { }, expectedOutput: PAYMENT_UNREACHABLE_EXPECTED_OUTPUT, }, + [PRODUCT_CATALOG_FAILURE_SCENARIO_ID]: { + id: PRODUCT_CATALOG_FAILURE_SCENARIO_ID, + description: + 'Product catalog service fails on product OLJCESPC7Z due to productCatalogFailure feature flag', + snapshotName: 'product-catalog', + gcs: PRODUCT_CATALOG_GCS, + errorQuery: { + errorMessage: 'failed to prepare order: failed to get product #"OLJCESPC7Z"', + serviceName: 'checkout', + }, + expectedOutput: PRODUCT_CATALOG_FAILURE_EXPECTED_OUTPUT, + }, }; export const getErrorScenarios = (): ApmErrorScenario[] => Object.values(APM_ERROR_SCENARIOS); diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/log_scenarios.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/log_scenarios.ts new file mode 100644 index 0000000000000..28ccb5d51ce48 --- /dev/null +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/log_scenarios.ts @@ -0,0 +1,101 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +import type { LogScenario } from './types'; +import { PAYMENT_SERVICE_GCS, PAYMENT_UNREACHABLE_GCS, PRODUCT_CATALOG_GCS } from './constants'; + +const PAYMENT_UNREACHABLE_ERROR_LOG_ID = 'payment-unreachable-error-log'; +const PRODUCT_CATALOG_ERROR_LOG_ID = 'product-catalog-error-log'; +const INFO_LOG_ID = 'info-log'; + +const PAYMENT_UNREACHABLE_ERROR_LOG_EXPECTED = `- What happened: + A log entry indicates that the checkout or frontend service failed to charge a card because the payment service is unreachable. The gRPC connection fails with "name resolver error: produced zero addresses" and status Unavailable. + +- Where it originated: + The error is logged by the service making the outbound gRPC call to \`oteldemo.PaymentService/Charge\`. The payment service itself is not reachable at the network level. + +- Root cause analysis: + The payment service is unreachable — DNS or name resolution returns no addresses. This is a connectivity or infrastructure failure, not an application bug. The \`paymentUnreachable\` feature flag may be responsible. + +- Impact: + All payment and checkout operations fail. Users cannot complete purchases. + +- Next steps: + 1. Verify the payment service is running and healthy. + 2. Check DNS resolution and network connectivity between checkout and payment services. + 3. Check the \`paymentUnreachable\` feature flag state in flagd.`; + +const PRODUCT_CATALOG_ERROR_LOG_EXPECTED = `- What happened: + The checkout service logged an error when attempting to prepare an order, because it failed to retrieve product OLJCESPC7Z from the product catalog service. The product catalog service returned a gRPC Internal error indicating the \`productCatalogFailure\` feature flag is enabled. + +- Where it originated: + The error originates in the \`product-catalog\` service's \`GetProduct\` handler, which checks the \`productCatalogFailure\` feature flag and intentionally rejects requests for product ID \`OLJCESPC7Z\` when the flag is enabled. + +- Root cause analysis: + This is a deliberate fault injection via the \`productCatalogFailure\` feature flag, not a code defect or infrastructure issue. The flagd dependency is healthy; the error is intentional. + +- Impact: + Any request involving product \`OLJCESPC7Z\` (product detail pages, checkout, recommendations) fails while the flag is enabled. Other products are unaffected. + +- Next steps: + 1. Disable the \`productCatalogFailure\` feature flag in the flagd configuration. + 2. Review recent changes to determine if the flag was enabled intentionally. + 3. Monitor the product-catalog service error rate after toggling the flag.`; + +const INFO_LOG_EXPECTED = `- What the log message means: + This is a normal operational log entry from the \`recommendation\` service. It indicates that the service received a \`ListRecommendations\` request with a set of product IDs and is processing the request. The log level is info, confirming there is no error or warning condition. + +- Source: + The log originates from the \`recommendation\` service's \`ListRecommendations\` gRPC handler, which returns product recommendations excluding the provided product IDs. + +- Context: + This is expected operational behavior. The recommendation service is functioning normally, processing requests as designed. No investigation or action is required.`; + +export const LOG_SCENARIOS: Record = { + [PAYMENT_UNREACHABLE_ERROR_LOG_ID]: { + id: PAYMENT_UNREACHABLE_ERROR_LOG_ID, + description: 'Error log when payment service is unreachable', + snapshotName: 'payment-unreachable', + gcs: PAYMENT_UNREACHABLE_GCS, + logQuery: { + messagePattern: + '13 INTERNAL: failed to charge card: could not charge the card: rpc error: code = Unavailable desc = name resolver error: produced zero addresses', + serviceName: 'frontend', + index: 'logs-*', + }, + expectedOutput: PAYMENT_UNREACHABLE_ERROR_LOG_EXPECTED, + }, + [PRODUCT_CATALOG_ERROR_LOG_ID]: { + id: PRODUCT_CATALOG_ERROR_LOG_ID, + description: 'Error log when product catalog rejects product OLJCESPC7Z due to feature flag', + snapshotName: 'product-catalog', + gcs: PRODUCT_CATALOG_GCS, + logQuery: { + messagePattern: '13 INTERNAL: failed to prepare order: failed to get product #"OLJCESPC7Z"', + serviceName: 'frontend', + index: 'logs-*', + }, + expectedOutput: PRODUCT_CATALOG_ERROR_LOG_EXPECTED, + }, + [INFO_LOG_ID]: { + id: INFO_LOG_ID, + description: 'Info-level log entry from a healthy service', + snapshotName: 'payment-service-failures', + gcs: PAYMENT_SERVICE_GCS, + logQuery: { + messagePattern: + 'Receive ListRecommendations for product ids:["9SIQT8TOJO", "0PUK6V6EV0", "HQTGWGPNH4", "1YMWWN1N4O", "6E92ZMYYFZ"]', + serviceName: 'recommendation', + index: 'logs-*', + }, + expectedOutput: INFO_LOG_EXPECTED, + }, +}; + +export const getLogScenarios = (): LogScenario[] => Object.values(LOG_SCENARIOS); + +export type { LogScenario } from './types'; diff --git a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/types.ts b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/types.ts index dacb832072574..8e78d5f1601ef 100644 --- a/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/types.ts +++ b/x-pack/solutions/observability/packages/kbn-evals-suite-observability-ai/src/scenarios/types.ts @@ -7,6 +7,9 @@ import type { CreateRuleRequestBody } from '@kbn/alerting-plugin/common/routes/rule/apis/create'; import type { ErrorCountRuleParams } from '@kbn/response-ops-rule-params/error_count'; +import type { TransactionDurationRuleParams } from '@kbn/response-ops-rule-params/transaction_duration'; + +export type TransactionDurationAggregationType = TransactionDurationRuleParams['aggregationType']; export interface GcsConfig { bucket: string; @@ -19,9 +22,11 @@ export interface ErrorQuery { } export type ApmErrorCountRuleCreateRequest = CreateRuleRequestBody; +export type ApmTransactionDurationRuleCreateRequest = + CreateRuleRequestBody; export interface AlertRuleConfig { - ruleParams: ApmErrorCountRuleCreateRequest; + ruleParams: ApmErrorCountRuleCreateRequest | ApmTransactionDurationRuleCreateRequest; alertsIndex: string; } @@ -44,3 +49,13 @@ export interface ApmErrorIdSearchFields { export interface AlertScenario extends BaseScenario { alertRule: AlertRuleConfig; } + +export interface LogQuery { + messagePattern: string; + serviceName: string; + index: string; +} + +export interface LogScenario extends BaseScenario { + logQuery: LogQuery; +}