Skip to content

Commit 345dc85

Browse files
committed
Add -frontend.enabled-ruler-query-stats flag
Signed-off-by: SungJin1212 <[email protected]>
1 parent cf61041 commit 345dc85

File tree

4 files changed

+61
-21
lines changed

4 files changed

+61
-21
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* [FEATURE] Query Frontend: Support a metadata federated query when `-tenant-federation.enabled=true`. #6461
2323
* [FEATURE] Query Frontend: Support an exemplar federated query when `-tenant-federation.enabled=true`. #6455
2424
* [FEATURE] Ingester/StoreGateway: Add support for cache regex query matchers via `-ingester.matchers-cache-max-items` and `-blocks-storage.bucket-store.matchers-cache-max-items`. #6477 #6491
25+
* [ENHANCEMENT] Query Frontend: Add a `-frontend.enabled-ruler-query-stats` flag to configure whether to report the query stats log for queries coming from the Ruler. #6504
2526
* [ENHANCEMENT] Query Frontend: Add more operation label values to the `cortex_query_frontend_queries_total` metric. #6519
2627
* [ENHANCEMENT] Query Frontend: Add a `source` label to query stat metrics. #6470
2728
* [ENHANCEMENT] Query Frontend: Add a flag `-tenant-federation.max-tenant` to limit the number of tenants for federated query. #6493

docs/configuration/config-file-reference.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4104,6 +4104,12 @@ The `query_frontend_config` configures the Cortex query-frontend.
41044104
# CLI flag: -frontend.query-stats-enabled
41054105
[query_stats_enabled: <boolean> | default = false]
41064106
4107+
# If enabled, report the query stats log for queries coming from the ruler to
4108+
# evaluate rules. It only takes effect when '-ruler.frontend-address' is
4109+
# configured.
4110+
# CLI flag: -frontend.enabled-ruler-query-stats
4111+
[enabled_ruler_query_stats_log: <boolean> | default = false]
4112+
41074113
# If a querier disconnects without sending notification about graceful shutdown,
41084114
# the query-frontend will keep the querier in the tenant's shard until the
41094115
# forget delay has passed. This feature is useful to reduce the blast radius

pkg/frontend/transport/handler.go

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -73,15 +73,17 @@ const (
7373

7474
// Config for a Handler.
7575
type HandlerConfig struct {
76-
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
77-
MaxBodySize int64 `yaml:"max_body_size"`
78-
QueryStatsEnabled bool `yaml:"query_stats_enabled"`
76+
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
77+
MaxBodySize int64 `yaml:"max_body_size"`
78+
QueryStatsEnabled bool `yaml:"query_stats_enabled"`
79+
EnabledRulerQueryStatsLog bool `yaml:"enabled_ruler_query_stats_log"`
7980
}
8081

8182
func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) {
8283
f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.")
8384
f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.")
8485
f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.")
86+
f.BoolVar(&cfg.EnabledRulerQueryStatsLog, "frontend.enabled-ruler-query-stats", false, "If enabled, report the query stats log for queries coming from the ruler to evaluate rules. It only takes effect when '-ruler.frontend-address' is configured.")
8587
}
8688

8789
// Handler accepts queries and forwards them to RoundTripper. It can log slow queries,
@@ -240,10 +242,11 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
240242
r.Body = io.NopCloser(&buf)
241243
}
242244

245+
source := tripperware.GetSource(r.Header.Get("User-Agent"))
243246
// Log request
244247
if f.cfg.QueryStatsEnabled {
245248
queryString = f.parseRequestQueryString(r, buf)
246-
f.logQueryRequest(r, queryString)
249+
f.logQueryRequest(r, queryString, source)
247250
}
248251

249252
startTime := time.Now()
@@ -276,7 +279,6 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
276279
}
277280
}
278281

279-
source := tripperware.GetSource(r.Header.Get("User-Agent"))
280282
f.reportQueryStats(r, source, userID, queryString, queryResponseTime, stats, err, statusCode, resp)
281283
}
282284

@@ -317,7 +319,7 @@ func formatGrafanaStatsFields(r *http.Request) []interface{} {
317319
}
318320

319321
// logQueryRequest logs query request before query execution.
320-
func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values) {
322+
func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values, source string) {
321323
logMessage := []interface{}{
322324
"msg", "query request",
323325
"component", "query-frontend",
@@ -328,9 +330,12 @@ func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values) {
328330
if len(grafanaFields) > 0 {
329331
logMessage = append(logMessage, grafanaFields...)
330332
}
331-
logMessage = append(logMessage, formatQueryString(queryString)...)
332333

333-
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
334+
shouldLog := source == tripperware.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == tripperware.SourceRuler)
335+
if shouldLog {
336+
logMessage = append(logMessage, formatQueryString(queryString)...)
337+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
338+
}
334339
}
335340

336341
// reportSlowQuery reports slow queries.
@@ -449,11 +454,15 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
449454
logMessage = append(logMessage, "error", s.Message())
450455
}
451456
}
452-
logMessage = append(logMessage, formatQueryString(queryString)...)
453-
if error != nil {
454-
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
455-
} else {
456-
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
457+
458+
shouldLog := source == tripperware.SourceAPI || (f.cfg.EnabledRulerQueryStatsLog && source == tripperware.SourceRuler)
459+
if shouldLog {
460+
logMessage = append(logMessage, formatQueryString(queryString)...)
461+
if error != nil {
462+
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
463+
} else {
464+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
465+
}
457466
}
458467

459468
var reason string

pkg/frontend/transport/handler_test.go

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -417,28 +417,31 @@ func TestHandler_ServeHTTP(t *testing.T) {
417417
func TestReportQueryStatsFormat(t *testing.T) {
418418
outputBuf := bytes.NewBuffer(nil)
419419
logger := log.NewSyncLogger(log.NewLogfmtLogger(outputBuf))
420-
handler := NewHandler(HandlerConfig{QueryStatsEnabled: true}, tenantfederation.Config{}, http.DefaultTransport, logger, nil)
421420
userID := "fake"
422421
req, _ := http.NewRequest(http.MethodGet, "http://localhost:8080/prometheus/api/v1/query", nil)
423422
resp := &http.Response{ContentLength: 1000}
424423
responseTime := time.Second
425424
statusCode := http.StatusOK
426425

427426
type testCase struct {
428-
queryString url.Values
429-
queryStats *querier_stats.QueryStats
430-
header http.Header
431-
responseErr error
432-
expectedLog string
427+
queryString url.Values
428+
queryStats *querier_stats.QueryStats
429+
header http.Header
430+
responseErr error
431+
expectedLog string
432+
enabledRulerQueryStatsLog bool
433+
source string
433434
}
434435

435436
tests := map[string]testCase{
436437
"should not include query and header details if empty": {
437438
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000`,
439+
source: tripperware.SourceAPI,
438440
},
439441
"should include query length and string at the end": {
440442
queryString: url.Values(map[string][]string{"query": {"up"}}),
441443
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 query_length=2 param_query=up`,
444+
source: tripperware.SourceAPI,
442445
},
443446
"should include query stats": {
444447
queryStats: &querier_stats.QueryStats{
@@ -455,14 +458,17 @@ func TestReportQueryStatsFormat(t *testing.T) {
455458
},
456459
},
457460
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=3 response_series_count=100 fetched_series_count=100 fetched_chunks_count=200 fetched_samples_count=300 fetched_chunks_bytes=1024 fetched_data_bytes=2048 split_queries=10 status_code=200 response_size=1000 query_storage_wall_time_seconds=6000`,
461+
source: tripperware.SourceAPI,
458462
},
459463
"should include user agent": {
460464
header: http.Header{"User-Agent": []string{"Grafana"}},
461465
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 user_agent=Grafana`,
466+
source: tripperware.SourceAPI,
462467
},
463468
"should include response error": {
464469
responseErr: errors.New("foo_err"),
465470
expectedLog: `level=error msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 error=foo_err`,
471+
source: tripperware.SourceAPI,
466472
},
467473
"should include query priority": {
468474
queryString: url.Values(map[string][]string{"query": {"up"}}),
@@ -471,6 +477,7 @@ func TestReportQueryStatsFormat(t *testing.T) {
471477
PriorityAssigned: true,
472478
},
473479
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 query_length=2 priority=99 param_query=up`,
480+
source: tripperware.SourceAPI,
474481
},
475482
"should include data fetch min and max time": {
476483
queryString: url.Values(map[string][]string{"query": {"up"}}),
@@ -479,6 +486,7 @@ func TestReportQueryStatsFormat(t *testing.T) {
479486
DataSelectMinTime: 1704067200000,
480487
},
481488
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 data_select_max_time=1704153600 data_select_min_time=1704067200 query_length=2 param_query=up`,
489+
source: tripperware.SourceAPI,
482490
},
483491
"should include query stats with store gateway stats": {
484492
queryStats: &querier_stats.QueryStats{
@@ -497,16 +505,32 @@ func TestReportQueryStatsFormat(t *testing.T) {
497505
},
498506
},
499507
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=3 response_series_count=100 fetched_series_count=100 fetched_chunks_count=200 fetched_samples_count=300 fetched_chunks_bytes=1024 fetched_data_bytes=2048 split_queries=10 status_code=200 response_size=1000 store_gateway_touched_postings_count=20 store_gateway_touched_posting_bytes=200 query_storage_wall_time_seconds=6000`,
508+
source: tripperware.SourceAPI,
509+
},
510+
"should not report a log": {
511+
expectedLog: ``,
512+
source: tripperware.SourceRuler,
513+
enabledRulerQueryStatsLog: false,
514+
},
515+
"should report a log": {
516+
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000`,
517+
source: tripperware.SourceRuler,
518+
enabledRulerQueryStatsLog: true,
500519
},
501520
}
502521

503522
for testName, testData := range tests {
504523
t.Run(testName, func(t *testing.T) {
524+
handler := NewHandler(HandlerConfig{QueryStatsEnabled: true, EnabledRulerQueryStatsLog: testData.enabledRulerQueryStatsLog}, tenantfederation.Config{}, http.DefaultTransport, logger, nil)
505525
req.Header = testData.header
506-
handler.reportQueryStats(req, tripperware.SourceAPI, userID, testData.queryString, responseTime, testData.queryStats, testData.responseErr, statusCode, resp)
526+
handler.reportQueryStats(req, testData.source, userID, testData.queryString, responseTime, testData.queryStats, testData.responseErr, statusCode, resp)
507527
data, err := io.ReadAll(outputBuf)
508528
require.NoError(t, err)
509-
require.Equal(t, testData.expectedLog+"\n", string(data))
529+
if testData.expectedLog == "" {
530+
require.Empty(t, string(data))
531+
} else {
532+
require.Equal(t, testData.expectedLog+"\n", string(data))
533+
}
510534
})
511535
}
512536
}

0 commit comments

Comments
 (0)