Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* [FEATURE] Query Frontend: Add dynamic interval size for query splitting. This is enabled by configuring experimental flags `querier.max-shards-per-query` and/or `querier.max-fetched-data-duration-per-query`. The split interval size is dynamically increased to maintain a number of shards and total duration fetched below the configured values. #6458
* [FEATURE] Querier/Ruler: Add `query_partial_data` and `rules_partial_data` limits to allow queries/rules to be evaluated with data from a single zone, if other zones are not available. #6526
* [FEATURE] Update prometheus alertmanager version to v0.28.0 and add new integration msteamsv2, jira, and rocketchat. #6590
* [FEATURE] AlertManager: Add `keep_instance_in_the_ring_on_shutdown` and `tokens_file_path` configs for alertmanager ring. #6628
* [ENHANCEMENT] Alertmanager: Add new limits `-alertmanager.max-silences-count` and `-alertmanager.max-silences-size-bytes` for limiting silences per tenant. #6605
* [ENHANCEMENT] Update prometheus version to v3.1.0. #6583
* [ENHANCEMENT] Add `compactor.auto-forget-delay` for compactor to auto forget compactors after X minutes without heartbeat. #6533
Expand Down
9 changes: 9 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,11 @@ sharding_ring:
# CLI flag: -alertmanager.sharding-ring.zone-awareness-enabled
[zone_awareness_enabled: <boolean> | default = false]

# File path where tokens are stored. If empty, tokens are not stored at
# shutdown and restored at startup.
# CLI flag: -alertmanager.sharding-ring.tokens-file-path
[tokens_file_path: <string> | default = ""]

# The sleep seconds when alertmanager is shutting down. Need to be close to or
# larger than KV Store information propagation delay
# CLI flag: -alertmanager.sharding-ring.final-sleep
Expand All @@ -397,6 +402,10 @@ sharding_ring:
# CLI flag: -alertmanager.sharding-ring.wait-instance-state-timeout
[wait_instance_state_timeout: <duration> | default = 10m]

# Keep instance in the ring on shut down.
# CLI flag: -alertmanager.sharding-ring.keep-instance-in-the-ring-on-shutdown
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]

# Name of network interface to read address from.
# CLI flag: -alertmanager.sharding-ring.instance-interface-names
[instance_interface_names: <list of string> | default = [eth0 en0]]
Expand Down
23 changes: 14 additions & 9 deletions pkg/alertmanager/alertmanager_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ type RingConfig struct {
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
ReplicationFactor int `yaml:"replication_factor"`
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
TokensFilePath string `yaml:"tokens_file_path"`

FinalSleep time.Duration `yaml:"final_sleep"`
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
FinalSleep time.Duration `yaml:"final_sleep"`
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
KeepInstanceInTheRingOnShutdown bool `yaml:"keep_instance_in_the_ring_on_shutdown"`

// Instance details
InstanceID string `yaml:"instance_id" doc:"hidden"`
Expand Down Expand Up @@ -85,6 +87,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.FinalSleep, rfprefix+"final-sleep", 0*time.Second, "The sleep seconds when alertmanager is shutting down. Need to be close to or larger than KV Store information propagation delay")
f.IntVar(&cfg.ReplicationFactor, rfprefix+"replication-factor", 3, "The replication factor to use when sharding the alertmanager.")
f.BoolVar(&cfg.ZoneAwarenessEnabled, rfprefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate alerts across different availability zones.")
f.StringVar(&cfg.TokensFilePath, rfprefix+"tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
Expand All @@ -93,6 +96,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&cfg.InstancePort, rfprefix+"instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.InstanceID, rfprefix+"instance-id", hostname, "Instance ID to register in the ring.")
f.StringVar(&cfg.InstanceZone, rfprefix+"instance-availability-zone", "", "The availability zone where this instance is running. Required if zone-awareness is enabled.")
f.BoolVar(&cfg.KeepInstanceInTheRingOnShutdown, rfprefix+"keep-instance-in-the-ring-on-shutdown", false, "Keep instance in the ring on shut down.")

cfg.RingCheckPeriod = 5 * time.Second

Expand All @@ -111,13 +115,14 @@ func (cfg *RingConfig) ToLifecyclerConfig(logger log.Logger) (ring.BasicLifecycl
instancePort := ring.GetInstancePort(cfg.InstancePort, cfg.ListenPort)

return ring.BasicLifecyclerConfig{
ID: cfg.InstanceID,
Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort),
HeartbeatPeriod: cfg.HeartbeatPeriod,
TokensObservePeriod: 0,
Zone: cfg.InstanceZone,
NumTokens: RingNumTokens,
FinalSleep: cfg.FinalSleep,
ID: cfg.InstanceID,
Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort),
HeartbeatPeriod: cfg.HeartbeatPeriod,
TokensObservePeriod: 0,
Zone: cfg.InstanceZone,
NumTokens: RingNumTokens,
FinalSleep: cfg.FinalSleep,
KeepInstanceInTheRingOnShutdown: cfg.KeepInstanceInTheRingOnShutdown,
}, nil
}

Expand Down
1 change: 1 addition & 0 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackC
delegate := ring.BasicLifecyclerDelegate(am)
delegate = ring.NewLeaveOnStoppingDelegate(delegate, am.logger)
delegate = ring.NewAutoForgetDelegate(am.cfg.ShardingRing.HeartbeatTimeout*ringAutoForgetUnhealthyPeriods, delegate, am.logger)
delegate = ring.NewTokensPersistencyDelegate(am.cfg.ShardingRing.TokensFilePath, ring.JOINING, delegate, am.logger)

am.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, RingNameForServer, RingKey, ringStore, delegate, am.logger, prometheus.WrapRegistererWithPrefix("cortex_", am.registry))
if err != nil {
Expand Down
Loading