-
Notifications
You must be signed in to change notification settings - Fork 234
fix: improve router.http.request.duration_milliseconds #2564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
alepane21
wants to merge
27
commits into
main
from
ale/eng-8915-router-router_http_request_duration-and-other-request
Closed
Changes from all commits
Commits
Show all changes
27 commits
Select commit
Hold shift + click to select a range
24ec69c
refactor(router): improve latency metrics handling in engine loader h…
alepane21 befb1cb
Merge remote-tracking branch 'origin/main' into ale/eng-8915-router-r…
alepane21 38d3d54
fix: ignore different response order
alepane21 4e050ea
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 1dca661
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 caea193
chore: improve readibility
alepane21 f0cc218
Merge remote-tracking branch 'origin/ale/eng-8915-router-router_http_…
alepane21 8f4baa8
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 3928ae1
chore: replace Eq To Jsoneq to avoid flakiness
alepane21 6a30f1f
Merge remote-tracking branch 'origin/main' into ale/eng-8915-router-r…
alepane21 f162013
Merge remote-tracking branch 'origin/main' into ale/eng-8915-router-r…
alepane21 08efda8
fix: make test less flaky
alepane21 6823e5e
Merge remote-tracking branch 'origin/main' into ale/eng-8915-router-r…
alepane21 fc2df50
chore: undo changes not needed anymore
alepane21 3c901ce
chore: remove start var calculation
alepane21 3cd8de9
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 dc7fa25
fix: remove unused field
alepane21 c59e8d1
Merge remote-tracking branch 'origin/ale/eng-8915-router-router_http_…
alepane21 3965823
chore: remove unused context type
alepane21 317cb0b
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 d4266d8
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 c413eb3
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 d4f717d
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 b874625
Merge branch 'main' into ale/eng-8915-router-router_http_request_dura…
alepane21 60ab2ba
feat: use latency coming up from engine
alepane21 3fa19c4
chore: remove local engine
alepane21 f185aab
fix: remove wrong file
alepane21 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
133 changes: 133 additions & 0 deletions
133
router-tests/observability/prometheus_parallel_subgraph_metrics_test.go
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,133 @@ | ||
| package integration | ||
|
|
||
| import ( | ||
| "context" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/prometheus/client_golang/prometheus" | ||
| io_prometheus_client "github.com/prometheus/client_model/go" | ||
| "github.com/stretchr/testify/require" | ||
| "github.com/wundergraph/cosmo/router-tests/testenv" | ||
| "go.opentelemetry.io/otel/sdk/metric" | ||
| "google.golang.org/grpc" | ||
| ) | ||
|
|
||
| func requireSubgraphDurationHistogram( | ||
| t *testing.T, | ||
| metricFamilies []*io_prometheus_client.MetricFamily, | ||
| subgraphName string, | ||
| ) *io_prometheus_client.Histogram { | ||
| t.Helper() | ||
|
|
||
| requestDuration := findMetricFamilyByName(metricFamilies, "router_http_request_duration_milliseconds") | ||
| require.NotNil(t, requestDuration) | ||
|
|
||
| durationMetrics := findMetricsByLabel(requestDuration, "wg_subgraph_name", subgraphName) | ||
| require.Len(t, durationMetrics, 1) | ||
|
|
||
| histogram := durationMetrics[0].GetHistogram() | ||
| require.NotNil(t, histogram) | ||
| require.EqualValues(t, 1, histogram.GetSampleCount()) | ||
|
|
||
| return histogram | ||
| } | ||
|
|
||
| func TestPrometheusParallelSubgraphRequestDurationMetrics(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| const productsDelay = 1200 * time.Millisecond | ||
|
|
||
| metricReader := metric.NewManualReader() | ||
| promRegistry := prometheus.NewRegistry() | ||
|
|
||
| testenv.Run(t, &testenv.Config{ | ||
| MetricReader: metricReader, | ||
| PrometheusRegistry: promRegistry, | ||
| Subgraphs: testenv.SubgraphsConfig{ | ||
| Products: testenv.SubgraphConfig{ | ||
| Delay: productsDelay, | ||
| }, | ||
| }, | ||
| }, func(t *testing.T, xEnv *testenv.Environment) { | ||
| res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ | ||
| Query: `query TestParallelSubgraphDurations { | ||
| employee(id: 1) { | ||
| id | ||
| } | ||
| productTypes { | ||
| ... on Consultancy { | ||
| name | ||
| } | ||
| ... on Cosmo { | ||
| name | ||
| } | ||
| } | ||
| }`, | ||
| }) | ||
|
|
||
| require.Contains(t, res.Body, `"employee":{"id":1}`) | ||
| require.Contains(t, res.Body, `"productTypes"`) | ||
|
|
||
| metricFamilies, err := promRegistry.Gather() | ||
| require.NoError(t, err) | ||
|
|
||
| employeesDurationMs := requireSubgraphDurationHistogram(t, metricFamilies, "employees").GetSampleSum() | ||
| productsDurationMs := requireSubgraphDurationHistogram(t, metricFamilies, "products").GetSampleSum() | ||
|
|
||
| require.GreaterOrEqual(t, productsDurationMs, float64(productsDelay.Milliseconds())) | ||
| require.Less(t, employeesDurationMs, float64(productsDelay.Milliseconds())/2) | ||
| }) | ||
| } | ||
|
|
||
| func TestPrometheusParallelHTTPAndGRPCSubgraphRequestDurationMetrics(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| const projectsDelay = 900 * time.Millisecond | ||
|
|
||
| metricReader := metric.NewManualReader() | ||
| promRegistry := prometheus.NewRegistry() | ||
|
|
||
| testenv.Run(t, &testenv.Config{ | ||
| RouterConfigJSONTemplate: testenv.ConfigWithGRPCJSONTemplate, | ||
| EnableGRPC: true, | ||
| MetricReader: metricReader, | ||
| PrometheusRegistry: promRegistry, | ||
| Subgraphs: testenv.SubgraphsConfig{ | ||
| Projects: testenv.SubgraphConfig{ | ||
| GRPCInterceptor: func( | ||
| ctx context.Context, | ||
| req any, | ||
| _ *grpc.UnaryServerInfo, | ||
| handler grpc.UnaryHandler, | ||
| ) (any, error) { | ||
| time.Sleep(projectsDelay) | ||
| return handler(ctx, req) | ||
| }, | ||
| }, | ||
| }, | ||
| }, func(t *testing.T, xEnv *testenv.Environment) { | ||
| res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ | ||
| Query: `query TestParallelHTTPAndGRPCSubgraphDurations { | ||
| employees { | ||
| id | ||
| } | ||
| projects { | ||
| id | ||
| } | ||
| }`, | ||
| }) | ||
|
|
||
| require.Contains(t, res.Body, `"employees":[`) | ||
| require.Contains(t, res.Body, `"projects":[`) | ||
|
|
||
| metricFamilies, err := promRegistry.Gather() | ||
| require.NoError(t, err) | ||
|
|
||
| employeesDurationMs := requireSubgraphDurationHistogram(t, metricFamilies, "employees").GetSampleSum() | ||
| projectsDurationMs := requireSubgraphDurationHistogram(t, metricFamilies, "projects").GetSampleSum() | ||
|
|
||
| require.GreaterOrEqual(t, projectsDurationMs, float64(projectsDelay.Milliseconds())) | ||
| require.Less(t, employeesDurationMs, float64(projectsDelay.Milliseconds())/2) | ||
| }) | ||
| } |
88 changes: 88 additions & 0 deletions
88
router-tests/observability/structured_logging_grpc_subgraph_latency_test.go
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| package integration | ||
|
|
||
| import ( | ||
| "context" | ||
| "testing" | ||
| "time" | ||
|
|
||
| "github.com/stretchr/testify/require" | ||
| "github.com/wundergraph/cosmo/router-tests/testenv" | ||
| "go.uber.org/zap/zapcore" | ||
| "go.uber.org/zap/zaptest/observer" | ||
| "google.golang.org/grpc" | ||
| ) | ||
|
|
||
| func requireSubgraphLogContext( | ||
| t *testing.T, | ||
| entries []observer.LoggedEntry, | ||
| subgraphName string, | ||
| ) map[string]interface{} { | ||
| t.Helper() | ||
|
|
||
| for _, entry := range entries { | ||
| contextMap := entry.ContextMap() | ||
| if contextMap["log_type"] == "client/subgraph" && contextMap["subgraph_name"] == subgraphName { | ||
| return contextMap | ||
| } | ||
| } | ||
|
|
||
| t.Fatalf("subgraph log for %q not found", subgraphName) | ||
| return nil | ||
| } | ||
|
|
||
| func TestStructuredLoggingGRPCSubgraphLatency(t *testing.T) { | ||
| t.Parallel() | ||
|
|
||
| const projectsDelay = 900 * time.Millisecond | ||
|
|
||
| testenv.Run(t, &testenv.Config{ | ||
| RouterConfigJSONTemplate: testenv.ConfigWithGRPCJSONTemplate, | ||
| EnableGRPC: true, | ||
| SubgraphAccessLogsEnabled: true, | ||
| LogObservation: testenv.LogObservationConfig{ | ||
| Enabled: true, | ||
| LogLevel: zapcore.InfoLevel, | ||
| }, | ||
| Subgraphs: testenv.SubgraphsConfig{ | ||
| Projects: testenv.SubgraphConfig{ | ||
| GRPCInterceptor: func( | ||
| ctx context.Context, | ||
| req any, | ||
| _ *grpc.UnaryServerInfo, | ||
| handler grpc.UnaryHandler, | ||
| ) (any, error) { | ||
| time.Sleep(projectsDelay) | ||
| return handler(ctx, req) | ||
| }, | ||
| }, | ||
| }, | ||
| }, func(t *testing.T, xEnv *testenv.Environment) { | ||
| res := xEnv.MakeGraphQLRequestOK(testenv.GraphQLRequest{ | ||
| Query: `query TestStructuredLoggingGRPCSubgraphLatency { | ||
| employees { | ||
| id | ||
| } | ||
| projects { | ||
| id | ||
| } | ||
| }`, | ||
| }) | ||
|
|
||
| require.Equal(t, res.Response.StatusCode, 200) | ||
|
|
||
| logEntries := xEnv.Observer().All() | ||
| employeeContext := requireSubgraphLogContext(t, logEntries, "employees") | ||
| projectContext := requireSubgraphLogContext(t, logEntries, "projects") | ||
|
|
||
| employeeLatency, ok := employeeContext["latency"].(time.Duration) | ||
| require.True(t, ok) | ||
| require.Greater(t, employeeLatency, time.Duration(0)) | ||
|
|
||
| projectLatency, ok := projectContext["latency"].(time.Duration) | ||
| require.True(t, ok) | ||
| require.Greater(t, projectLatency, time.Duration(0)) | ||
|
|
||
| // They should be different | ||
| require.NotEqual(t, projectLatency, employeeLatency) | ||
| }) | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This measures the low level round trip, whereas the current measurement should measure this plus the engine hook logic that would add to the request duration.
However more importantly, would it work with GRPC data sources?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch! It would not work with gRPC data sources.