diff --git a/CHANGELOG.md b/CHANGELOG.md index e4889940101..51f47b5135e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ * [CHANGE] Querier: deprecated `-store.max-look-back-period`. You should use `-querier.max-query-lookback` instead. #3452 * [CHANGE] Blocks storage: increased `-blocks-storage.bucket-store.chunks-cache.attributes-ttl` default from `24h` to `168h` (1 week). #3528 * [CHANGE] Blocks storage: the config option `-blocks-storage.bucket-store.index-cache.postings-compression-enabled` has been deprecated and postings compression is always enabled. #3538 +* [CHANGE] Ruler: gRPC message size default limits on the Ruler-client side have changed: #3523 + - limit for outgoing gRPC messages has changed from 2147483647 to 16777216 bytes + - limit for incoming gRPC messages has changed from 4194304 to 104857600 bytes * [FEATURE] Distributor/Ingester: Provide ability to not overflow writes in the presence of a leaving or unhealthy ingester. This allows for more efficient ingester rolling restarts. #3305 * [ENHANCEMENT] API: Add GZIP HTTP compression to the API responses. Compression can be enabled via `-api.response-compression-enabled`. #3536 * [ENHANCEMENT] Added zone-awareness support on queries. When zone-awareness is enabled, queries will still succeed if all ingesters in a single zone will fail. #3414 @@ -35,6 +38,7 @@ * [ENHANCEMENT] Exported process metrics to monitor the number of memory map areas allocated. #3537 * - `process_memory_map_areas` * - `process_memory_map_areas_limit` +* [ENHANCEMENT] Ruler: Expose gRPC client options. #3523 * [BUGFIX] Blocks storage ingester: fixed some cases leading to a TSDB WAL corruption after a partial write to disk. #3423 * [BUGFIX] Blocks storage: Fix the race between ingestion and `/flush` call resulting in overlapping blocks. #3422 * [BUGFIX] Querier: fixed `-querier.max-query-into-future` which wasn't correctly enforced on range queries. #3452 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 0c56e6aa9c3..09e662b0d3a 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -1057,6 +1057,49 @@ The `ruler_config` configures the Cortex ruler. [external_url: | default = ] ruler_client: + # gRPC client max receive message size (bytes). + # CLI flag: -ruler.client.grpc-max-recv-msg-size + [max_recv_msg_size: | default = 104857600] + + # gRPC client max send message size (bytes). + # CLI flag: -ruler.client.grpc-max-send-msg-size + [max_send_msg_size: | default = 16777216] + + # Deprecated: Use gzip compression when sending messages. If true, overrides + # grpc-compression flag. + # CLI flag: -ruler.client.grpc-use-gzip-compression + [use_gzip_compression: | default = false] + + # Use compression when sending messages. Supported values are: 'gzip', + # 'snappy' and '' (disable compression) + # CLI flag: -ruler.client.grpc-compression + [grpc_compression: | default = ""] + + # Rate limit for gRPC client; 0 means disabled. + # CLI flag: -ruler.client.grpc-client-rate-limit + [rate_limit: | default = 0] + + # Rate limit burst for gRPC client. + # CLI flag: -ruler.client.grpc-client-rate-limit-burst + [rate_limit_burst: | default = 0] + + # Enable backoff and retry when we hit ratelimits. + # CLI flag: -ruler.client.backoff-on-ratelimits + [backoff_on_ratelimits: | default = false] + + backoff_config: + # Minimum delay when backing off. + # CLI flag: -ruler.client.backoff-min-period + [min_period: | default = 100ms] + + # Maximum delay when backing off. + # CLI flag: -ruler.client.backoff-max-period + [max_period: | default = 10s] + + # Number of times to backoff and retry before failing. + # CLI flag: -ruler.client.backoff-retries + [max_retries: | default = 10] + # Path to the client certificate file, which will be used for authenticating # with the server. Also requires the key path to be configured. # CLI flag: -ruler.client.tls-cert-path diff --git a/pkg/cortex/cortex.go b/pkg/cortex/cortex.go index f1a0e3bf9ec..f6bf144eb14 100644 --- a/pkg/cortex/cortex.go +++ b/pkg/cortex/cortex.go @@ -180,7 +180,7 @@ func (c *Config) Validate(log log.Logger) error { if err := c.ChunkStore.Validate(log); err != nil { return errors.Wrap(err, "invalid chunk store config") } - if err := c.Ruler.Validate(c.LimitsConfig); err != nil { + if err := c.Ruler.Validate(c.LimitsConfig, log); err != nil { return errors.Wrap(err, "invalid ruler config") } if err := c.BlocksStorage.Validate(); err != nil { diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 3383617b891..6d8c6ab5bfc 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -33,8 +33,8 @@ import ( "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/flagext" + "github.com/cortexproject/cortex/pkg/util/grpcclient" "github.com/cortexproject/cortex/pkg/util/services" - "github.com/cortexproject/cortex/pkg/util/tls" "github.com/cortexproject/cortex/pkg/util/validation" ) @@ -63,8 +63,8 @@ const ( type Config struct { // This is used for template expansion in alerts; must be a valid URL. ExternalURL flagext.URLValue `yaml:"external_url"` - // TLS parameters for the GRPC Client - ClientTLSConfig tls.ClientConfig `yaml:"ruler_client"` + // GRPC Client configuration. + ClientTLSConfig grpcclient.ConfigWithTLS `yaml:"ruler_client"` // How frequently to evaluate rules by default. EvaluationInterval time.Duration `yaml:"evaluation_interval"` // Deprecated. Replaced with pkg/util/validation/Limits.RulerEvaluationDelay field. @@ -110,7 +110,7 @@ type Config struct { } // Validate config and returns error on failure -func (cfg *Config) Validate(limits validation.Limits) error { +func (cfg *Config) Validate(limits validation.Limits, log log.Logger) error { if !util.StringsContain(supportedShardingStrategies, cfg.ShardingStrategy) { return errInvalidShardingStrategy } @@ -122,6 +122,9 @@ func (cfg *Config) Validate(limits validation.Limits) error { if err := cfg.StoreConfig.Validate(); err != nil { return errors.Wrap(err, "invalid storage config") } + if err := cfg.ClientTLSConfig.Validate(log); err != nil { + return errors.Wrap(err, "invalid ruler gRPC client config") + } return nil } @@ -695,14 +698,14 @@ func (r *Ruler) getShardedRules(ctx context.Context) ([]*GroupStateDesc, error) return nil, fmt.Errorf("unable to inject user ID into grpc request, %v", err) } - rgs := []*GroupStateDesc{} + var rgs []*GroupStateDesc for _, rlr := range rulers.Ingesters { - dialOpts, err := r.cfg.ClientTLSConfig.GetGRPCDialOptions() + dialOpts, err := r.cfg.ClientTLSConfig.DialOption(nil, nil) if err != nil { return nil, err } - conn, err := grpc.Dial(rlr.Addr, dialOpts...) + conn, err := grpc.DialContext(ctx, rlr.Addr, dialOpts...) if err != nil { return nil, err }