diff --git a/docs/sources/operators-guide/mimir-runbooks/_index.md b/docs/sources/operators-guide/mimir-runbooks/_index.md index 49b6a23ed58..426a51b5d0c 100644 --- a/docs/sources/operators-guide/mimir-runbooks/_index.md +++ b/docs/sources/operators-guide/mimir-runbooks/_index.md @@ -1372,7 +1372,7 @@ How to **fix** it: - Increase the per-tenant limit by using the `-distributor.ingestion-rate-limit` (samples per second) and `-distributor.ingestion-burst-size` (number of samples) options (or `ingestion_rate` and `ingestion_burst_size` in the runtime configuration). The configurable burst represents how many samples, exemplars and metadata can temporarily exceed the limit, in case of short traffic peaks. The configured burst size must be greater or equal than the configured limit. -### err-mimir-too-many-ha-clusters +### err-mimir-tenant-too-many-ha-clusters This error occurs when a distributor rejects a write request because the number of [high-availability (HA) clusters]({{< relref "../configuring/configuring-high-availability-deduplication.md" >}}) has hit the configured limit for this tenant. diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 2d6a573cdb9..1e26b84ad58 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -60,8 +60,8 @@ var ( errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater or equal to zero") // Distributor instance limits errors. - errMaxInflightRequestsReached = errors.New(globalerror.DistributorMaxInflightPushRequests.MessageWithLimitConfig(maxInflightPushRequestsFlag, "the write request has been rejected because the distributor exceeded the allowed number of inflight push requests")) - errMaxIngestionRateReached = errors.New(globalerror.DistributorMaxIngestionRate.MessageWithLimitConfig(maxIngestionRateFlag, "the write request has been rejected because the distributor exceeded the ingestion rate limit")) + errMaxInflightRequestsReached = errors.New(globalerror.DistributorMaxInflightPushRequests.MessageWithLimitConfig("the write request has been rejected because the distributor exceeded the allowed number of inflight push requests", maxInflightPushRequestsFlag)) + errMaxIngestionRateReached = errors.New(globalerror.DistributorMaxIngestionRate.MessageWithLimitConfig("the write request has been rejected because the distributor exceeded the ingestion rate limit", maxIngestionRateFlag)) ) const ( diff --git a/pkg/distributor/ha_tracker.go b/pkg/distributor/ha_tracker.go index 129b2dfc581..7780290862e 100644 --- a/pkg/distributor/ha_tracker.go +++ b/pkg/distributor/ha_tracker.go @@ -531,8 +531,8 @@ type tooManyClustersError struct { func (e tooManyClustersError) Error() string { return globalerror.TooManyHAClusters.MessageWithLimitConfig( - validation.HATrackerMaxClustersFlag, - fmt.Sprintf("the write request has been rejected because the maximum number of high-availability (HA) clusters has been reached for this tenant (limit: %d)", e.limit)) + fmt.Sprintf("the write request has been rejected because the maximum number of high-availability (HA) clusters has been reached for this tenant (limit: %d)", e.limit), + validation.HATrackerMaxClustersFlag) } // Needed for errors.Is to work properly. diff --git a/pkg/ingester/instance_limits.go b/pkg/ingester/instance_limits.go index 142f7bf9618..36a32414aee 100644 --- a/pkg/ingester/instance_limits.go +++ b/pkg/ingester/instance_limits.go @@ -22,10 +22,10 @@ const ( var ( // We don't include values in the message to avoid leaking Mimir cluster configuration to users. - errMaxIngestionRateReached = errors.New(globalerror.IngesterMaxIngestionRate.MessageWithLimitConfig(maxIngestionRateFlag, "the write request has been rejected because the ingester exceeded the samples ingestion rate limit")) - errMaxTenantsReached = errors.New(globalerror.IngesterMaxTenants.MessageWithLimitConfig(maxInMemoryTenantsFlag, "the write request has been rejected because the ingester exceeded the allowed number of tenants")) - errMaxInMemorySeriesReached = errors.New(globalerror.IngesterMaxInMemorySeries.MessageWithLimitConfig(maxInMemorySeriesFlag, "the write request has been rejected because the ingester exceeded the allowed number of in-memory series")) - errMaxInflightRequestsReached = errors.New(globalerror.IngesterMaxInflightPushRequests.MessageWithLimitConfig(maxInflightPushRequestsFlag, "the write request has been rejected because the ingester exceeded the allowed number of inflight push requests")) + errMaxIngestionRateReached = errors.New(globalerror.IngesterMaxIngestionRate.MessageWithLimitConfig("the write request has been rejected because the ingester exceeded the samples ingestion rate limit", maxIngestionRateFlag)) + errMaxTenantsReached = errors.New(globalerror.IngesterMaxTenants.MessageWithLimitConfig("the write request has been rejected because the ingester exceeded the allowed number of tenants", maxInMemoryTenantsFlag)) + errMaxInMemorySeriesReached = errors.New(globalerror.IngesterMaxInMemorySeries.MessageWithLimitConfig("the write request has been rejected because the ingester exceeded the allowed number of in-memory series", maxInMemorySeriesFlag)) + errMaxInflightRequestsReached = errors.New(globalerror.IngesterMaxInflightPushRequests.MessageWithLimitConfig("the write request has been rejected because the ingester exceeded the allowed number of inflight push requests", maxInflightPushRequestsFlag)) ) // InstanceLimits describes limits used by ingester. Reaching any of these will result in Push method to return diff --git a/pkg/ingester/limiter.go b/pkg/ingester/limiter.go index 7e81dd0a278..7c013aea321 100644 --- a/pkg/ingester/limiter.go +++ b/pkg/ingester/limiter.go @@ -117,8 +117,8 @@ func (l *Limiter) formatMaxSeriesPerUserError(userID string) error { globalLimit := l.limits.MaxGlobalSeriesPerUser(userID) return errors.New(globalerror.MaxSeriesPerUser.MessageWithLimitConfig( - validation.MaxSeriesPerUserFlag, fmt.Sprintf("per-user series limit of %d exceeded", globalLimit), + validation.MaxSeriesPerUserFlag, )) } @@ -126,8 +126,8 @@ func (l *Limiter) formatMaxSeriesPerMetricError(userID string) error { globalLimit := l.limits.MaxGlobalSeriesPerMetric(userID) return errors.New(globalerror.MaxSeriesPerMetric.MessageWithLimitConfig( - validation.MaxSeriesPerMetricFlag, fmt.Sprintf("per-metric series limit of %d exceeded", globalLimit), + validation.MaxSeriesPerMetricFlag, )) } @@ -135,8 +135,8 @@ func (l *Limiter) formatMaxMetadataPerUserError(userID string) error { globalLimit := l.limits.MaxGlobalMetricsWithMetadataPerUser(userID) return errors.New(globalerror.MaxMetadataPerUser.MessageWithLimitConfig( - validation.MaxMetadataPerUserFlag, fmt.Sprintf("per-user metric metadata limit of %d exceeded", globalLimit), + validation.MaxMetadataPerUserFlag, )) } @@ -144,8 +144,8 @@ func (l *Limiter) formatMaxMetadataPerMetricError(userID string) error { globalLimit := l.limits.MaxGlobalMetadataPerMetric(userID) return errors.New(globalerror.MaxMetadataPerMetric.MessageWithLimitConfig( - validation.MaxMetadataPerMetricFlag, fmt.Sprintf("per-metric metadata limit of %d exceeded", globalLimit), + validation.MaxMetadataPerMetricFlag, )) } diff --git a/pkg/querier/blocks_store_queryable.go b/pkg/querier/blocks_store_queryable.go index 47859301986..e1b042635ed 100644 --- a/pkg/querier/blocks_store_queryable.go +++ b/pkg/querier/blocks_store_queryable.go @@ -65,8 +65,8 @@ const ( var ( maxChunksPerQueryLimitMsgFormat = globalerror.MaxChunksPerQuery.MessageWithLimitConfig( - validation.MaxChunksPerQueryFlag, "the query exceeded the maximum number of chunks fetched from store-gateways when querying '%s' (limit: %d)", + validation.MaxChunksPerQueryFlag, ) ) diff --git a/pkg/util/globalerror/errors.go b/pkg/util/globalerror/errors.go index f43765a5c58..ad1ab7d9038 100644 --- a/pkg/util/globalerror/errors.go +++ b/pkg/util/globalerror/errors.go @@ -51,7 +51,7 @@ const ( MaxQueryLength ID = "max-query-length" RequestRateLimited ID = "tenant-max-request-rate" IngestionRateLimited ID = "tenant-max-ingestion-rate" - TooManyHAClusters ID = "too-many-ha-clusters" + TooManyHAClusters ID = "tenant-too-many-ha-clusters" ) // Message returns the provided msg, appending the error id. @@ -59,13 +59,9 @@ func (id ID) Message(msg string) string { return fmt.Sprintf("%s (%s%s)", msg, errPrefix, id) } -// MessageWithLimitConfig return the provided msg, appending the error id and a suggestion on -// which configuration flag to use to change the limit. -func (id ID) MessageWithLimitConfig(flag, msg string) string { - return fmt.Sprintf("%s (%s%s). You can adjust the related per-tenant limit by configuring -%s, or by contacting your service administrator.", msg, errPrefix, id, flag) -} - -func (id ID) MessageWithLimitConfigs(msg, flag string, addFlags ...string) string { +// MessageWithLimitConfig returns the provided msg, appending the error id and a suggestion on +// which configuration flag(s) to use to change the limit. +func (id ID) MessageWithLimitConfig(msg, flag string, addFlags ...string) string { var sb strings.Builder sb.WriteString("-") sb.WriteString(flag) diff --git a/pkg/util/globalerror/errors_test.go b/pkg/util/globalerror/errors_test.go index 53b2a8439fc..1eda1c1533c 100644 --- a/pkg/util/globalerror/errors_test.go +++ b/pkg/util/globalerror/errors_test.go @@ -16,28 +16,21 @@ func TestID_Message(t *testing.T) { } func TestID_MessageWithLimitConfig(t *testing.T) { - assert.Equal( - t, - "an error (err-mimir-missing-metric-name). You can adjust the related per-tenant limit by configuring -my-flag, or by contacting your service administrator.", - MissingMetricName.MessageWithLimitConfig("my-flag", "an error")) -} - -func TestID_MessageWithLimitConfigs(t *testing.T) { for _, tc := range []struct { actual string expected string }{ { actual: "an error (err-mimir-missing-metric-name). You can adjust the related per-tenant limit by configuring -my-flag1, or by contacting your service administrator.", - expected: MissingMetricName.MessageWithLimitConfigs("an error", "my-flag1"), + expected: MissingMetricName.MessageWithLimitConfig("an error", "my-flag1"), }, { actual: "an error (err-mimir-missing-metric-name). You can adjust the related per-tenant limits by configuring -my-flag1 and -my-flag2, or by contacting your service administrator.", - expected: MissingMetricName.MessageWithLimitConfigs("an error", "my-flag1", "my-flag2"), + expected: MissingMetricName.MessageWithLimitConfig("an error", "my-flag1", "my-flag2"), }, { actual: "an error (err-mimir-missing-metric-name). You can adjust the related per-tenant limits by configuring -my-flag1, -my-flag2 and -my-flag3, or by contacting your service administrator.", - expected: MissingMetricName.MessageWithLimitConfigs("an error", "my-flag1", "my-flag2", "my-flag3"), + expected: MissingMetricName.MessageWithLimitConfig("an error", "my-flag1", "my-flag2", "my-flag3"), }, } { assert.Equal(t, tc.actual, tc.expected) diff --git a/pkg/util/limiter/query_limiter.go b/pkg/util/limiter/query_limiter.go index a5282b7ef05..2fdc92f8ba7 100644 --- a/pkg/util/limiter/query_limiter.go +++ b/pkg/util/limiter/query_limiter.go @@ -25,16 +25,16 @@ type queryLimiterCtxKey struct{} var ( ctxKey = &queryLimiterCtxKey{} MaxSeriesHitMsgFormat = globalerror.MaxSeriesPerQuery.MessageWithLimitConfig( - validation.MaxSeriesPerQueryFlag, "the query exceeded the maximum number of series (limit: %d series)", + validation.MaxSeriesPerQueryFlag, ) MaxChunkBytesHitMsgFormat = globalerror.MaxChunkBytesPerQuery.MessageWithLimitConfig( - validation.MaxChunkBytesPerQueryFlag, "the query exceeded the aggregated chunks size limit (limit: %d bytes)", + validation.MaxChunkBytesPerQueryFlag, ) MaxChunksPerQueryLimitMsgFormat = globalerror.MaxChunksPerQuery.MessageWithLimitConfig( - validation.MaxChunksPerQueryFlag, "the query exceeded the maximum number of chunks (limit: %d chunks)", + validation.MaxChunksPerQueryFlag, ) ) diff --git a/pkg/util/validation/errors.go b/pkg/util/validation/errors.go index fad63d40b89..43b06385c16 100644 --- a/pkg/util/validation/errors.go +++ b/pkg/util/validation/errors.go @@ -34,8 +34,8 @@ func (e genericValidationError) Error() string { } var labelNameTooLongMsgFormat = globalerror.SeriesLabelNameTooLong.MessageWithLimitConfig( - maxLabelNameLengthFlag, - "received a series whose label name length exceeds the limit, label: '%.200s' series: '%.200s'") + "received a series whose label name length exceeds the limit, label: '%.200s' series: '%.200s'", + maxLabelNameLengthFlag) func newLabelNameTooLongError(series []mimirpb.LabelAdapter, labelName string) ValidationError { return genericValidationError{ @@ -54,8 +54,8 @@ type labelValueTooLongError struct { func (e labelValueTooLongError) Error() string { return globalerror.SeriesLabelValueTooLong.MessageWithLimitConfig( - maxLabelValueLengthFlag, - fmt.Sprintf("received a series whose label value length exceeds the limit, value: '%.200s' (truncated) series: '%.200s'", e.labelValue, formatLabelSet(e.series))) + fmt.Sprintf("received a series whose label value length exceeds the limit, value: '%.200s' (truncated) series: '%.200s'", e.labelValue, formatLabelSet(e.series)), + maxLabelValueLengthFlag) } func newLabelValueTooLongError(series []mimirpb.LabelAdapter, labelValue string) ValidationError { @@ -112,8 +112,8 @@ func newTooManyLabelsError(series []mimirpb.LabelAdapter, limit int) ValidationE func (e tooManyLabelsError) Error() string { return globalerror.MaxLabelNamesPerSeries.MessageWithLimitConfig( - maxLabelNamesPerSeriesFlag, - fmt.Sprintf("received a series whose number of labels exceeds the limit (actual: %d, limit: %d) series: '%.200s'", len(e.series), e.limit, mimirpb.FromLabelAdaptersToMetric(e.series).String())) + fmt.Sprintf("received a series whose number of labels exceeds the limit (actual: %d, limit: %d) series: '%.200s'", len(e.series), e.limit, mimirpb.FromLabelAdaptersToMetric(e.series).String()), + maxLabelNamesPerSeriesFlag) } type noMetricNameError struct{} @@ -152,8 +152,8 @@ func (e sampleValidationError) Error() string { } var sampleTimestampTooNewMsgFormat = globalerror.SampleTooFarInFuture.MessageWithLimitConfig( - creationGracePeriodFlag, - "received a sample whose timestamp is too far in the future, timestamp: %d series: '%.200s'") + "received a sample whose timestamp is too far in the future, timestamp: %d series: '%.200s'", + creationGracePeriodFlag) func newSampleTimestampTooNewError(metricName string, timestamp int64) ValidationError { return sampleValidationError{ @@ -233,9 +233,9 @@ func (e metadataValidationError) Error() string { } var metadataMetricNameTooLongMsgFormat = globalerror.MetricMetadataMetricNameTooLong.MessageWithLimitConfig( - maxMetadataLengthFlag, // When formatting this error the "cause" will always be an empty string. - "received a metric metadata whose metric name length exceeds the limit, metric name: '%.200[2]s'") + "received a metric metadata whose metric name length exceeds the limit, metric name: '%.200[2]s'", + maxMetadataLengthFlag) func newMetadataMetricNameTooLongError(metadata *mimirpb.MetricMetadata) ValidationError { return metadataValidationError{ @@ -246,8 +246,8 @@ func newMetadataMetricNameTooLongError(metadata *mimirpb.MetricMetadata) Validat } var metadataHelpTooLongMsgFormat = globalerror.MetricMetadataHelpTooLong.MessageWithLimitConfig( - maxMetadataLengthFlag, - "received a metric metadata whose help description length exceeds the limit, help: '%.200s' metric name: '%.200s'") + "received a metric metadata whose help description length exceeds the limit, help: '%.200s' metric name: '%.200s'", + maxMetadataLengthFlag) func newMetadataHelpTooLongError(metadata *mimirpb.MetricMetadata) ValidationError { return metadataValidationError{ @@ -258,8 +258,8 @@ func newMetadataHelpTooLongError(metadata *mimirpb.MetricMetadata) ValidationErr } var metadataUnitTooLongMsgFormat = globalerror.MetricMetadataUnitTooLong.MessageWithLimitConfig( - maxMetadataLengthFlag, - "received a metric metadata whose unit name length exceeds the limit, unit: '%.200s' metric name: '%.200s'") + "received a metric metadata whose unit name length exceeds the limit, unit: '%.200s' metric name: '%.200s'", + maxMetadataLengthFlag) func newMetadataUnitTooLongError(metadata *mimirpb.MetricMetadata) ValidationError { return metadataValidationError{ @@ -271,18 +271,18 @@ func newMetadataUnitTooLongError(metadata *mimirpb.MetricMetadata) ValidationErr func NewMaxQueryLengthError(actualQueryLen, maxQueryLength time.Duration) LimitError { return LimitError(globalerror.MaxQueryLength.MessageWithLimitConfig( - maxQueryLengthFlag, - fmt.Sprintf("the query time range exceeds the limit (query length: %s, limit: %s)", actualQueryLen, maxQueryLength))) + fmt.Sprintf("the query time range exceeds the limit (query length: %s, limit: %s)", actualQueryLen, maxQueryLength), + maxQueryLengthFlag)) } func NewRequestRateLimitedError(limit float64, burst int) LimitError { - return LimitError(globalerror.RequestRateLimited.MessageWithLimitConfigs( + return LimitError(globalerror.RequestRateLimited.MessageWithLimitConfig( fmt.Sprintf("the request has been rejected because the tenant exceeded the request rate limit, set to %v req/s with a maximum allowed burst of %d", limit, burst), requestRateFlag, requestBurstSizeFlag)) } func NewIngestionRateLimitedError(limit float64, burst, numSamples, numExemplars, numMetadata int) LimitError { - return LimitError(globalerror.IngestionRateLimited.MessageWithLimitConfigs( + return LimitError(globalerror.IngestionRateLimited.MessageWithLimitConfig( fmt.Sprintf("the request has been rejected because the tenant exceeded the ingestion rate limit, set to %v items/s with a maximum allowed burst of %d, while adding %d samples, %d exemplars and %d metadata", limit, burst, numSamples, numExemplars, numMetadata), ingestionRateFlag, ingestionBurstSizeFlag)) } diff --git a/pkg/util/validation/validate.go b/pkg/util/validation/validate.go index 4bb915d19d8..14fb942b039 100644 --- a/pkg/util/validation/validate.go +++ b/pkg/util/validation/validate.go @@ -58,7 +58,7 @@ var ( ReasonRateLimited = "rate_limited" // same for request and ingestion which are separate errors, so not using metricReasonFromErrorID with global error // ReasonTooManyHAClusters is one of the reasons for discarding samples. - ReasonTooManyHAClusters = metricReasonFromErrorID(globalerror.TooManyHAClusters) + ReasonTooManyHAClusters = "too_many_ha_clusters" ) func metricReasonFromErrorID(id globalerror.ID) string {