Skip to content

Commit 3665b72

Browse files
committed
Add -frontend.enabled-ruler-query-stats flag
Signed-off-by: SungJin1212 <[email protected]>
1 parent 5dd1072 commit 3665b72

File tree

4 files changed

+79
-22
lines changed

4 files changed

+79
-22
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* [FEATURE] Query Frontend: Support a metadata federated query when `-tenant-federation.enabled=true`. #6461
2323
* [FEATURE] Query Frontend: Support an exemplar federated query when `-tenant-federation.enabled=true`. #6455
2424
* [FEATURE] Ingester/StoreGateway: Add support for cache regex query matchers via `-ingester.matchers-cache-max-items` and `-blocks-storage.bucket-store.matchers-cache-max-items`. #6477 #6491
25+
* [ENHANCEMENT] Query Frontend: Add a `-frontend.enabled-ruler-query-stats` flag to configure whether to report the query stats log for queries coming from the Ruler. #6501
2526
* [ENHANCEMENT] Query Frontend: Add a `source` label to query stat metrics. #6470
2627
* [ENHANCEMENT] Querier: Add a `-tenant-federation.max-concurrent` flags to configure the number of worker processing federated query and add a `cortex_querier_federated_tenants_per_query` histogram to track the number of tenants per query. #6449
2728
* [ENHANCEMENT] Query Frontend: Add a number of series in the query response to the query stat log. #6423

docs/configuration/config-file-reference.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4094,6 +4094,12 @@ The `query_frontend_config` configures the Cortex query-frontend.
40944094
# CLI flag: -frontend.query-stats-enabled
40954095
[query_stats_enabled: <boolean> | default = false]
40964096
4097+
# If enabled, report the log of the query stats for queries coming from the
4098+
# ruler to evaluate rules. It only takes effect when '-ruler.frontend-address'
4099+
# is configured.
4100+
# CLI flag: -frontend.enabled-ruler-query-stats
4101+
[enabled_ruler_query_stats-log: <boolean> | default = false]
4102+
40974103
# If a querier disconnects without sending notification about graceful shutdown,
40984104
# the query-frontend will keep the querier in the tenant's shard until the
40994105
# forget delay has passed. This feature is useful to reduce the blast radius

pkg/frontend/transport/handler.go

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -70,15 +70,17 @@ const (
7070

7171
// Config for a Handler.
7272
type HandlerConfig struct {
73-
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
74-
MaxBodySize int64 `yaml:"max_body_size"`
75-
QueryStatsEnabled bool `yaml:"query_stats_enabled"`
73+
LogQueriesLongerThan time.Duration `yaml:"log_queries_longer_than"`
74+
MaxBodySize int64 `yaml:"max_body_size"`
75+
QueryStatsEnabled bool `yaml:"query_stats_enabled"`
76+
EnabledRulerQueryStatsLog bool `yaml:"enabled_ruler_query_stats-log"`
7677
}
7778

7879
func (cfg *HandlerConfig) RegisterFlags(f *flag.FlagSet) {
7980
f.DurationVar(&cfg.LogQueriesLongerThan, "frontend.log-queries-longer-than", 0, "Log queries that are slower than the specified duration. Set to 0 to disable. Set to < 0 to enable on all queries.")
8081
f.Int64Var(&cfg.MaxBodySize, "frontend.max-body-size", 10*1024*1024, "Max body size for downstream prometheus.")
8182
f.BoolVar(&cfg.QueryStatsEnabled, "frontend.query-stats-enabled", false, "True to enable query statistics tracking. When enabled, a message with some statistics is logged for every query.")
83+
f.BoolVar(&cfg.EnabledRulerQueryStatsLog, "frontend.enabled-ruler-query-stats", false, "If enabled, report the log of the query stats for queries coming from the ruler to evaluate rules. It only takes effect when '-ruler.frontend-address' is configured.")
8284
}
8385

8486
// Handler accepts queries and forwards them to RoundTripper. It can log slow queries,
@@ -226,10 +228,11 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
226228
r.Body = io.NopCloser(&buf)
227229
}
228230

231+
source := tripperware.GetSource(r.Header.Get("User-Agent"))
229232
// Log request
230233
if f.cfg.QueryStatsEnabled {
231234
queryString = f.parseRequestQueryString(r, buf)
232-
f.logQueryRequest(r, queryString)
235+
f.logQueryRequest(r, queryString, source)
233236
}
234237

235238
startTime := time.Now()
@@ -262,7 +265,6 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
262265
}
263266
}
264267

265-
source := tripperware.GetSource(r.Header.Get("User-Agent"))
266268
f.reportQueryStats(r, source, userID, queryString, queryResponseTime, stats, err, statusCode, resp)
267269
}
268270

@@ -303,7 +305,7 @@ func formatGrafanaStatsFields(r *http.Request) []interface{} {
303305
}
304306

305307
// logQueryRequest logs query request before query execution.
306-
func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values) {
308+
func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values, source string) {
307309
logMessage := []interface{}{
308310
"msg", "query request",
309311
"component", "query-frontend",
@@ -314,9 +316,17 @@ func (f *Handler) logQueryRequest(r *http.Request, queryString url.Values) {
314316
if len(grafanaFields) > 0 {
315317
logMessage = append(logMessage, grafanaFields...)
316318
}
317-
logMessage = append(logMessage, formatQueryString(queryString)...)
318-
319-
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
319+
switch source {
320+
case tripperware.SourceAPI:
321+
// always report a if source is not a `ruler`.
322+
logMessage = append(logMessage, formatQueryString(queryString)...)
323+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
324+
case tripperware.SourceRuler:
325+
if f.cfg.EnabledRulerQueryStatsLog {
326+
logMessage = append(logMessage, formatQueryString(queryString)...)
327+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
328+
}
329+
}
320330
}
321331

322332
// reportSlowQuery reports slow queries.
@@ -435,11 +445,27 @@ func (f *Handler) reportQueryStats(r *http.Request, source, userID string, query
435445
logMessage = append(logMessage, "error", s.Message())
436446
}
437447
}
438-
logMessage = append(logMessage, formatQueryString(queryString)...)
439-
if error != nil {
440-
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
441-
} else {
442-
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
448+
449+
switch source {
450+
case tripperware.SourceAPI:
451+
// always report a if source is not a `ruler`.
452+
logMessage = append(logMessage, formatQueryString(queryString)...)
453+
if error != nil {
454+
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
455+
} else {
456+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
457+
}
458+
case tripperware.SourceRuler:
459+
if f.cfg.EnabledRulerQueryStatsLog {
460+
// report a log only if `-frontend.enabled-ruler-query-stats`
461+
// is enabled when queries come from the Ruler.
462+
logMessage = append(logMessage, formatQueryString(queryString)...)
463+
if error != nil {
464+
level.Error(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
465+
} else {
466+
level.Info(util_log.WithContext(r.Context(), f.log)).Log(logMessage...)
467+
}
468+
}
443469
}
444470

445471
var reason string

pkg/frontend/transport/handler_test.go

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -413,28 +413,31 @@ func TestHandler_ServeHTTP(t *testing.T) {
413413
func TestReportQueryStatsFormat(t *testing.T) {
414414
outputBuf := bytes.NewBuffer(nil)
415415
logger := log.NewSyncLogger(log.NewLogfmtLogger(outputBuf))
416-
handler := NewHandler(HandlerConfig{QueryStatsEnabled: true}, http.DefaultTransport, logger, nil)
417416
userID := "fake"
418417
req, _ := http.NewRequest(http.MethodGet, "http://localhost:8080/prometheus/api/v1/query", nil)
419418
resp := &http.Response{ContentLength: 1000}
420419
responseTime := time.Second
421420
statusCode := http.StatusOK
422421

423422
type testCase struct {
424-
queryString url.Values
425-
queryStats *querier_stats.QueryStats
426-
header http.Header
427-
responseErr error
428-
expectedLog string
423+
queryString url.Values
424+
queryStats *querier_stats.QueryStats
425+
header http.Header
426+
responseErr error
427+
expectedLog string
428+
enabledRulerQueryStatsLog bool
429+
source string
429430
}
430431

431432
tests := map[string]testCase{
432433
"should not include query and header details if empty": {
433434
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000`,
435+
source: tripperware.SourceAPI,
434436
},
435437
"should include query length and string at the end": {
436438
queryString: url.Values(map[string][]string{"query": {"up"}}),
437439
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 query_length=2 param_query=up`,
440+
source: tripperware.SourceAPI,
438441
},
439442
"should include query stats": {
440443
queryStats: &querier_stats.QueryStats{
@@ -451,14 +454,17 @@ func TestReportQueryStatsFormat(t *testing.T) {
451454
},
452455
},
453456
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=3 response_series_count=100 fetched_series_count=100 fetched_chunks_count=200 fetched_samples_count=300 fetched_chunks_bytes=1024 fetched_data_bytes=2048 split_queries=10 status_code=200 response_size=1000 query_storage_wall_time_seconds=6000`,
457+
source: tripperware.SourceAPI,
454458
},
455459
"should include user agent": {
456460
header: http.Header{"User-Agent": []string{"Grafana"}},
457461
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 user_agent=Grafana`,
462+
source: tripperware.SourceAPI,
458463
},
459464
"should include response error": {
460465
responseErr: errors.New("foo_err"),
461466
expectedLog: `level=error msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 error=foo_err`,
467+
source: tripperware.SourceAPI,
462468
},
463469
"should include query priority": {
464470
queryString: url.Values(map[string][]string{"query": {"up"}}),
@@ -467,6 +473,7 @@ func TestReportQueryStatsFormat(t *testing.T) {
467473
PriorityAssigned: true,
468474
},
469475
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 query_length=2 priority=99 param_query=up`,
476+
source: tripperware.SourceAPI,
470477
},
471478
"should include data fetch min and max time": {
472479
queryString: url.Values(map[string][]string{"query": {"up"}}),
@@ -475,6 +482,7 @@ func TestReportQueryStatsFormat(t *testing.T) {
475482
DataSelectMinTime: 1704067200000,
476483
},
477484
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000 data_select_max_time=1704153600 data_select_min_time=1704067200 query_length=2 param_query=up`,
485+
source: tripperware.SourceAPI,
478486
},
479487
"should include query stats with store gateway stats": {
480488
queryStats: &querier_stats.QueryStats{
@@ -493,16 +501,32 @@ func TestReportQueryStatsFormat(t *testing.T) {
493501
},
494502
},
495503
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=3 response_series_count=100 fetched_series_count=100 fetched_chunks_count=200 fetched_samples_count=300 fetched_chunks_bytes=1024 fetched_data_bytes=2048 split_queries=10 status_code=200 response_size=1000 store_gateway_touched_postings_count=20 store_gateway_touched_posting_bytes=200 query_storage_wall_time_seconds=6000`,
504+
source: tripperware.SourceAPI,
505+
},
506+
"should not report a log": {
507+
expectedLog: ``,
508+
source: tripperware.SourceRuler,
509+
enabledRulerQueryStatsLog: false,
510+
},
511+
"should report a log": {
512+
expectedLog: `level=info msg="query stats" component=query-frontend method=GET path=/prometheus/api/v1/query response_time=1s query_wall_time_seconds=0 response_series_count=0 fetched_series_count=0 fetched_chunks_count=0 fetched_samples_count=0 fetched_chunks_bytes=0 fetched_data_bytes=0 split_queries=0 status_code=200 response_size=1000`,
513+
source: tripperware.SourceRuler,
514+
enabledRulerQueryStatsLog: true,
496515
},
497516
}
498517

499518
for testName, testData := range tests {
500519
t.Run(testName, func(t *testing.T) {
520+
handler := NewHandler(HandlerConfig{QueryStatsEnabled: true, EnabledRulerQueryStatsLog: testData.enabledRulerQueryStatsLog}, http.DefaultTransport, logger, nil)
501521
req.Header = testData.header
502-
handler.reportQueryStats(req, tripperware.SourceAPI, userID, testData.queryString, responseTime, testData.queryStats, testData.responseErr, statusCode, resp)
522+
handler.reportQueryStats(req, testData.source, userID, testData.queryString, responseTime, testData.queryStats, testData.responseErr, statusCode, resp)
503523
data, err := io.ReadAll(outputBuf)
504524
require.NoError(t, err)
505-
require.Equal(t, testData.expectedLog+"\n", string(data))
525+
if testData.expectedLog == "" {
526+
require.Empty(t, string(data))
527+
} else {
528+
require.Equal(t, testData.expectedLog+"\n", string(data))
529+
}
506530
})
507531
}
508532
}

0 commit comments

Comments
 (0)