From eb614193d46f1c9b32466e01947f964382b3393e Mon Sep 17 00:00:00 2001 From: Oliver Gupte Date: Thu, 29 Oct 2020 16:16:32 -0700 Subject: [PATCH] Closes #80629, with proper timeout messaging and docs for user to work around the scalability issue. (#82083) --- docs/settings/apm-settings.asciidoc | 6 +++ x-pack/plugins/apm/common/service_map.ts | 2 + .../components/app/ServiceMap/index.tsx | 18 ++++++- .../app/ServiceMap/timeout_prompt.tsx | 53 +++++++++++++++++++ .../plugins/apm/public/hooks/useFetcher.tsx | 2 +- .../lib/service_map/get_trace_sample_ids.ts | 46 +++++++++------- 6 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 x-pack/plugins/apm/public/components/app/ServiceMap/timeout_prompt.tsx diff --git a/docs/settings/apm-settings.asciidoc b/docs/settings/apm-settings.asciidoc index f8eacc12eb8d6..eb5260f13fbfc 100644 --- a/docs/settings/apm-settings.asciidoc +++ b/docs/settings/apm-settings.asciidoc @@ -48,6 +48,12 @@ Changing these settings may disable features of the APM App. | `xpack.apm.enabled` | Set to `false` to disable the APM app. Defaults to `true`. +| `xpack.apm.serviceMapFingerprintBucketSize` + | Maximum number of unique transaction combinations sampled for generating service map focused on a specific service. Defaults to `100`. + +| `xpack.apm.serviceMapFingerprintGlobalBucketSize` + | Maximum number of unique transaction combinations sampled for generating the global service map. Defaults to `100`. + | `xpack.apm.ui.enabled` {ess-icon} | Set to `false` to hide the APM app from the main menu. Defaults to `true`. diff --git a/x-pack/plugins/apm/common/service_map.ts b/x-pack/plugins/apm/common/service_map.ts index 02456f9b2050f..6edf56fb9a1ae 100644 --- a/x-pack/plugins/apm/common/service_map.ts +++ b/x-pack/plugins/apm/common/service_map.ts @@ -91,3 +91,5 @@ export function isSpanGroupingSupported(type?: string, subtype?: string) { nongroupedSubType === 'all' || nongroupedSubType === subtype ); } + +export const SERVICE_MAP_TIMEOUT_ERROR = 'ServiceMapTimeoutError'; diff --git a/x-pack/plugins/apm/public/components/app/ServiceMap/index.tsx b/x-pack/plugins/apm/public/components/app/ServiceMap/index.tsx index d167b6a9a0565..752f9b7fda243 100644 --- a/x-pack/plugins/apm/public/components/app/ServiceMap/index.tsx +++ b/x-pack/plugins/apm/public/components/app/ServiceMap/index.tsx @@ -10,6 +10,7 @@ import { useTrackPageview } from '../../../../../observability/public'; import { invalidLicenseMessage, isActivePlatinumLicense, + SERVICE_MAP_TIMEOUT_ERROR, } from '../../../../common/service_map'; import { FETCH_STATUS, useFetcher } from '../../../hooks/useFetcher'; import { useLicense } from '../../../hooks/useLicense'; @@ -22,6 +23,7 @@ import { Cytoscape } from './Cytoscape'; import { getCytoscapeDivStyle } from './cytoscape_options'; import { EmptyBanner } from './EmptyBanner'; import { EmptyPrompt } from './empty_prompt'; +import { TimeoutPrompt } from './timeout_prompt'; import { Popover } from './Popover'; import { useRefDimensions } from './useRefDimensions'; @@ -61,7 +63,7 @@ export function ServiceMap({ serviceName }: ServiceMapProps) { const license = useLicense(); const { urlParams } = useUrlParams(); - const { data = { elements: [] }, status } = useFetcher(() => { + const { data = { elements: [] }, status, error } = useFetcher(() => { // When we don't have a license or a valid license, don't make the request. if (!license || !isActivePlatinumLicense(license)) { return; @@ -109,6 +111,20 @@ export function ServiceMap({ serviceName }: ServiceMapProps) { ); } + if ( + status === FETCH_STATUS.FAILURE && + error && + 'body' in error && + error.body.statusCode === 500 && + error.body.message === SERVICE_MAP_TIMEOUT_ERROR + ) { + return ( + + + + ); + } + return (
+ {i18n.translate('xpack.apm.serviceMap.timeoutPromptTitle', { + defaultMessage: 'Service map timeout', + })} + + } + body={ +

+ {i18n.translate('xpack.apm.serviceMap.timeoutPromptDescription', { + defaultMessage: `Timed out while fetching data for service map. Limit the scope by selecting a smaller time range, or use configuration setting '{configName}' with a reduced value.`, + values: { + configName: isGlobalServiceMap + ? 'xpack.apm.serviceMapFingerprintGlobalBucketSize' + : 'xpack.apm.serviceMapFingerprintBucketSize', + }, + })} +

+ } + actions={} + /> + ); +} + +function ApmSettingsDocLink() { + return ( + + {i18n.translate('xpack.apm.serviceMap.timeoutPrompt.docsLink', { + defaultMessage: 'Learn more about APM settings in the docs', + })} + + ); +} diff --git a/x-pack/plugins/apm/public/hooks/useFetcher.tsx b/x-pack/plugins/apm/public/hooks/useFetcher.tsx index 5d65424844c5a..6add0e8a2b480 100644 --- a/x-pack/plugins/apm/public/hooks/useFetcher.tsx +++ b/x-pack/plugins/apm/public/hooks/useFetcher.tsx @@ -21,7 +21,7 @@ export enum FETCH_STATUS { export interface FetcherResult { data?: Data; status: FETCH_STATUS; - error?: Error; + error?: IHttpFetchError; } // fetcher functions can return undefined OR a promise. Previously we had a more simple type diff --git a/x-pack/plugins/apm/server/lib/service_map/get_trace_sample_ids.ts b/x-pack/plugins/apm/server/lib/service_map/get_trace_sample_ids.ts index dfc4e02c25a7f..524b9bfdc7891 100644 --- a/x-pack/plugins/apm/server/lib/service_map/get_trace_sample_ids.ts +++ b/x-pack/plugins/apm/server/lib/service_map/get_trace_sample_ids.ts @@ -4,6 +4,7 @@ * you may not use this file except in compliance with the Elastic License. */ import { uniq, take, sortBy } from 'lodash'; +import Boom from 'boom'; import { ProcessorEvent } from '../../../common/processor_event'; import { Setup, SetupTimeRange } from '../helpers/setup_request'; import { rangeFilter } from '../../../common/utils/range_filter'; @@ -15,6 +16,7 @@ import { SPAN_DESTINATION_SERVICE_RESOURCE, } from '../../../common/elasticsearch_fieldnames'; import { getEnvironmentUiFilterES } from '../helpers/convert_ui_filters/get_environment_ui_filter_es'; +import { SERVICE_MAP_TIMEOUT_ERROR } from '../../../common/service_map'; const MAX_TRACES_TO_INSPECT = 1000; @@ -122,26 +124,30 @@ export async function getTraceSampleIds({ }, }; - const tracesSampleResponse = await apmEventClient.search(params); + try { + const tracesSampleResponse = await apmEventClient.search(params); + // make sure at least one trace per composite/connection bucket + // is queried + const traceIdsWithPriority = + tracesSampleResponse.aggregations?.connections.buckets.flatMap((bucket) => + bucket.sample.trace_ids.buckets.map((sampleDocBucket, index) => ({ + traceId: sampleDocBucket.key as string, + priority: index, + })) + ) || []; - // make sure at least one trace per composite/connection bucket - // is queried - const traceIdsWithPriority = - tracesSampleResponse.aggregations?.connections.buckets.flatMap((bucket) => - bucket.sample.trace_ids.buckets.map((sampleDocBucket, index) => ({ - traceId: sampleDocBucket.key as string, - priority: index, - })) - ) || []; + const traceIds = take( + uniq( + sortBy(traceIdsWithPriority, 'priority').map(({ traceId }) => traceId) + ), + MAX_TRACES_TO_INSPECT + ); - const traceIds = take( - uniq( - sortBy(traceIdsWithPriority, 'priority').map(({ traceId }) => traceId) - ), - MAX_TRACES_TO_INSPECT - ); - - return { - traceIds, - }; + return { traceIds }; + } catch (error) { + if ('displayName' in error && error.displayName === 'RequestTimeout') { + throw Boom.internal(SERVICE_MAP_TIMEOUT_ERROR); + } + throw error; + } }