diff --git a/.changes/73448090-7b5b-4c5a-875a-730efacacf9d.json b/.changes/73448090-7b5b-4c5a-875a-730efacacf9d.json new file mode 100644 index 000000000..d9a80fdd0 --- /dev/null +++ b/.changes/73448090-7b5b-4c5a-875a-730efacacf9d.json @@ -0,0 +1,8 @@ +{ + "id": "73448090-7b5b-4c5a-875a-730efacacf9d", + "type": "misc", + "description": "Tweak metrics to better support service-level benchmarks", + "issues": [ + "awslabs/aws-sdk-kotlin#968" + ] +} \ No newline at end of file diff --git a/runtime/observability/telemetry-api/common/src/aws/smithy/kotlin/runtime/telemetry/metrics/Histogram.kt b/runtime/observability/telemetry-api/common/src/aws/smithy/kotlin/runtime/telemetry/metrics/Histogram.kt index 8cfd5c555..2e23e7d49 100644 --- a/runtime/observability/telemetry-api/common/src/aws/smithy/kotlin/runtime/telemetry/metrics/Histogram.kt +++ b/runtime/observability/telemetry-api/common/src/aws/smithy/kotlin/runtime/telemetry/metrics/Histogram.kt @@ -39,8 +39,8 @@ public typealias DoubleHistogram = Histogram */ @InternalApi public fun DoubleHistogram.recordSeconds(value: Duration, attributes: Attributes = emptyAttributes(), context: Context? = null) { - val ms = value.inWholeMilliseconds.toDouble() - val sec = ms / 1000 + val ms = value.inWholeNanoseconds.toDouble() + val sec = ms / 1_000_000_000 record(sec, attributes, context) } diff --git a/runtime/protocol/http-client-engines/http-client-engine-okhttp/jvm/src/aws/smithy/kotlin/runtime/http/engine/okhttp/HttpEngineEventListener.kt b/runtime/protocol/http-client-engines/http-client-engine-okhttp/jvm/src/aws/smithy/kotlin/runtime/http/engine/okhttp/HttpEngineEventListener.kt index 5e7bb5ef4..bb75b814b 100644 --- a/runtime/protocol/http-client-engines/http-client-engine-okhttp/jvm/src/aws/smithy/kotlin/runtime/http/engine/okhttp/HttpEngineEventListener.kt +++ b/runtime/protocol/http-client-engines/http-client-engine-okhttp/jvm/src/aws/smithy/kotlin/runtime/http/engine/okhttp/HttpEngineEventListener.kt @@ -5,6 +5,7 @@ package aws.smithy.kotlin.runtime.http.engine.okhttp import aws.smithy.kotlin.runtime.ExperimentalApi +import aws.smithy.kotlin.runtime.http.engine.EngineAttributes import aws.smithy.kotlin.runtime.http.engine.internal.HttpClientMetrics import aws.smithy.kotlin.runtime.net.HostResolver import aws.smithy.kotlin.runtime.net.toHostAddress @@ -54,6 +55,8 @@ internal class HttpEngineEventListener( private var signaledConnectAcquireDuration = false private var dnsStartTime: TimeMark? = null + private var requestTimeEnd: TimeMark? = null + private inline fun trace(crossinline msg: MessageSupplier) { logger.trace { msg() } } @@ -158,14 +161,22 @@ internal class HttpEngineEventListener( override fun requestBodyStart(call: Call) = trace { "sending request body" } - override fun requestBodyEnd(call: Call, byteCount: Long) = + override fun requestBodyEnd(call: Call, byteCount: Long) { + requestTimeEnd = TimeSource.Monotonic.markNow() trace { "finished sending request body: bytesSent=$byteCount" } + } override fun requestFailed(call: Call, ioe: IOException) = trace(ioe) { "request failed" } override fun requestHeadersStart(call: Call) = trace { "sending request headers" } - override fun requestHeadersEnd(call: Call, request: Request) = trace { "finished sending request headers" } + override fun requestHeadersEnd(call: Call, request: Request) { + if (request.body == null) { + requestTimeEnd = TimeSource.Monotonic.markNow() + } + + trace { "finished sending request headers" } + } override fun responseBodyStart(call: Call) = trace { "response body available" } @@ -174,7 +185,13 @@ internal class HttpEngineEventListener( override fun responseFailed(call: Call, ioe: IOException) = trace(ioe) { "response failed" } - override fun responseHeadersStart(call: Call) = trace { "response headers start" } + override fun responseHeadersStart(call: Call) { + requestTimeEnd?.elapsedNow()?.let { ttfb -> + metrics.timeToFirstByteDuration.recordSeconds(ttfb) + call.request().tag()?.execContext?.set(EngineAttributes.TimeToFirstByte, ttfb) + } + trace { "response headers start" } + } override fun responseHeadersEnd(call: Call, response: Response) { val contentLength = response.body.contentLength() diff --git a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/EngineAttributes.kt b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/EngineAttributes.kt new file mode 100644 index 000000000..1f167e507 --- /dev/null +++ b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/EngineAttributes.kt @@ -0,0 +1,22 @@ +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +package aws.smithy.kotlin.runtime.http.engine + +import aws.smithy.kotlin.runtime.InternalApi +import aws.smithy.kotlin.runtime.util.AttributeKey +import kotlin.time.Duration + +/** + * Common attributes related to HTTP engines. + */ +@InternalApi +public object EngineAttributes { + /** + * The time between sending the request completely and receiving the first byte of the response. This effectively + * measures the time spent waiting on a response. + */ + public val TimeToFirstByte: AttributeKey = AttributeKey("TimeToFirstByte") +} diff --git a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/internal/HttpClientMetrics.kt b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/internal/HttpClientMetrics.kt index 0fa7d7cec..f518ba1c2 100644 --- a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/internal/HttpClientMetrics.kt +++ b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/engine/internal/HttpClientMetrics.kt @@ -116,6 +116,12 @@ public class HttpClientMetrics( "The total number of bytes received by the HTTP client", ) + public val timeToFirstByteDuration: DoubleHistogram = meter.createDoubleHistogram( + "smithy.client.http.time_to_first_byte", + "s", + "The amount of time after a request has been sent spent waiting on a response from the remote server", + ) + /** * The maximum number of connections configured for the client */ diff --git a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/interceptors/OperationTelemetryInterceptor.kt b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/interceptors/OperationTelemetryInterceptor.kt index 579202c5a..6756d0880 100644 --- a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/interceptors/OperationTelemetryInterceptor.kt +++ b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/interceptors/OperationTelemetryInterceptor.kt @@ -9,13 +9,12 @@ import aws.smithy.kotlin.runtime.client.ProtocolRequestInterceptorContext import aws.smithy.kotlin.runtime.client.ProtocolResponseInterceptorContext import aws.smithy.kotlin.runtime.client.RequestInterceptorContext import aws.smithy.kotlin.runtime.client.ResponseInterceptorContext +import aws.smithy.kotlin.runtime.http.engine.EngineAttributes import aws.smithy.kotlin.runtime.http.operation.OperationMetrics import aws.smithy.kotlin.runtime.http.request.HttpRequest import aws.smithy.kotlin.runtime.http.response.HttpResponse import aws.smithy.kotlin.runtime.telemetry.metrics.recordSeconds -import aws.smithy.kotlin.runtime.util.attributesOf -import aws.smithy.kotlin.runtime.util.merge -import aws.smithy.kotlin.runtime.util.mutableAttributesOf +import aws.smithy.kotlin.runtime.util.* import kotlin.time.ExperimentalTime import kotlin.time.TimeMark import kotlin.time.TimeSource @@ -39,7 +38,7 @@ internal class OperationTelemetryInterceptor( private var callStart: TimeMark? = null private var serializeStart: TimeMark? = null private var deserializeStart: TimeMark? = null - private var transmitStart: TimeMark? = null + private var attemptStart: TimeMark? = null private var attempts = 0 private val perRpcAttributes = attributesOf { @@ -94,6 +93,13 @@ internal class OperationTelemetryInterceptor( if (attempts > 1) { metrics.rpcRetryCount.add(1L, perRpcAttributes, metrics.provider.contextManager.current()) } + + val attemptDuration = attemptStart?.elapsedNow() ?: return + metrics.rpcAttemptDuration.recordSeconds(attemptDuration, perRpcAttributes, metrics.provider.contextManager.current()) + + context.executionContext.takeOrNull(EngineAttributes.TimeToFirstByte)?.let { ttfb -> + metrics.rpcAttemptOverheadDuration.recordSeconds(attemptDuration - ttfb, perRpcAttributes) + } } override fun readBeforeDeserialization(context: ProtocolResponseInterceptorContext) { @@ -105,12 +111,7 @@ internal class OperationTelemetryInterceptor( metrics.deserializationDuration.recordSeconds(deserializeDuration, perRpcAttributes, metrics.provider.contextManager.current()) } - override fun readBeforeTransmit(context: ProtocolRequestInterceptorContext) { - transmitStart = timeSource.markNow() - } - - override fun readAfterTransmit(context: ProtocolResponseInterceptorContext) { - val serviceCallDuration = transmitStart?.elapsedNow() ?: return - metrics.rpcAttemptDuration.recordSeconds(serviceCallDuration, perRpcAttributes, metrics.provider.contextManager.current()) + override fun readBeforeAttempt(context: ProtocolRequestInterceptorContext) { + attemptStart = timeSource.markNow() } } diff --git a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/operation/OperationMetrics.kt b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/operation/OperationMetrics.kt index 269a42842..f7d899667 100644 --- a/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/operation/OperationMetrics.kt +++ b/runtime/protocol/http-client/common/src/aws/smithy/kotlin/runtime/http/operation/OperationMetrics.kt @@ -34,7 +34,8 @@ public class OperationMetrics( public val rpcRetryCount: MonotonicCounter = meter.createMonotonicCounter("smithy.client.retries", "{count}", "The number of retries for an operation") public val rpcRequestSize: LongHistogram = meter.createLongHistogram("smithy.client.request.size", "By", "Size of the serialized request message") public val rpcResponseSize: LongHistogram = meter.createLongHistogram("smithy.client.response.size", "By", "Size of the serialized response message") - public val rpcAttemptDuration: DoubleHistogram = meter.createDoubleHistogram("smithy.client.attempt_duration", "s", "The time it takes to connect to the service, send the request, and receive the HTTP status code and headers from the response for an operation") + public val rpcAttemptDuration: DoubleHistogram = meter.createDoubleHistogram("smithy.client.attempt_duration", "s", "The time it takes to connect to complete an entire call attempt, including identity resolution, endpoint resolution, signing, sending the request, and receiving the HTTP status code and headers from the response for an operation") + public val rpcAttemptOverheadDuration: DoubleHistogram = meter.createDoubleHistogram("smithy.client.attempt_overhead_duration", "s", "The time it takes to execute an attempt minus the time spent waiting for a response from the remote server") public val serializationDuration: DoubleHistogram = meter.createDoubleHistogram("smithy.client.serialization_duration", "s", "The time it takes to serialize a request message body") public val deserializationDuration: DoubleHistogram = meter.createDoubleHistogram("smithy.client.deserialization_duration", "s", "The time it takes to deserialize a response message body") public val resolveEndpointDuration: DoubleHistogram = meter.createDoubleHistogram("smithy.client.resolve_endpoint_duration", "s", "The time it takes to resolve an endpoint for a request")