Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* [ENHANCEMENT] Alertmanager: Add receiver validations for msteamsv2 and rocketchat. #6606
* [ENHANCEMENT] Query Frontend: Add a `-frontend.enabled-ruler-query-stats` flag to configure whether to report the query stats log for queries coming from the Ruler. #6504
* [ENHANCEMENT] OTLP: Support otlp metadata ingestion. #6617
* [ENHANCEMENT] AlertManager: Add `keep_instance_in_the_ring_on_shutdown` and `tokens_file_path` configs for alertmanager ring. #6628
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
* [BUGFIX] Ingester: Fix labelset data race condition. #6573
* [BUGFIX] Compactor: Cleaner should not put deletion marker for blocks with no-compact marker. #6576
Expand Down
9 changes: 9 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,11 @@ sharding_ring:
# CLI flag: -alertmanager.sharding-ring.zone-awareness-enabled
[zone_awareness_enabled: <boolean> | default = false]

# File path where tokens are stored. If empty, tokens are not stored at
# shutdown and restored at startup.
# CLI flag: -alertmanager.sharding-ring.tokens-file-path
[tokens_file_path: <string> | default = ""]

# The sleep seconds when alertmanager is shutting down. Need to be close to or
# larger than KV Store information propagation delay
# CLI flag: -alertmanager.sharding-ring.final-sleep
Expand All @@ -397,6 +402,10 @@ sharding_ring:
# CLI flag: -alertmanager.sharding-ring.wait-instance-state-timeout
[wait_instance_state_timeout: <duration> | default = 10m]

# Keep instance in the ring on shut down.
# CLI flag: -alertmanager.sharding-ring.keep-instance-in-the-ring-on-shutdown
[keep_instance_in_the_ring_on_shutdown: <boolean> | default = false]

# Name of network interface to read address from.
# CLI flag: -alertmanager.sharding-ring.instance-interface-names
[instance_interface_names: <list of string> | default = [eth0 en0]]
Expand Down
23 changes: 14 additions & 9 deletions pkg/alertmanager/alertmanager_ring.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,11 @@ type RingConfig struct {
HeartbeatTimeout time.Duration `yaml:"heartbeat_timeout"`
ReplicationFactor int `yaml:"replication_factor"`
ZoneAwarenessEnabled bool `yaml:"zone_awareness_enabled"`
TokensFilePath string `yaml:"tokens_file_path"`

FinalSleep time.Duration `yaml:"final_sleep"`
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
FinalSleep time.Duration `yaml:"final_sleep"`
WaitInstanceStateTimeout time.Duration `yaml:"wait_instance_state_timeout"`
KeepInstanceInTheRingOnShutdown bool `yaml:"keep_instance_in_the_ring_on_shutdown"`

// Instance details
InstanceID string `yaml:"instance_id" doc:"hidden"`
Expand Down Expand Up @@ -85,6 +87,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.FinalSleep, rfprefix+"final-sleep", 0*time.Second, "The sleep seconds when alertmanager is shutting down. Need to be close to or larger than KV Store information propagation delay")
f.IntVar(&cfg.ReplicationFactor, rfprefix+"replication-factor", 3, "The replication factor to use when sharding the alertmanager.")
f.BoolVar(&cfg.ZoneAwarenessEnabled, rfprefix+"zone-awareness-enabled", false, "True to enable zone-awareness and replicate alerts across different availability zones.")
f.StringVar(&cfg.TokensFilePath, rfprefix+"tokens-file-path", "", "File path where tokens are stored. If empty, tokens are not stored at shutdown and restored at startup.")

// Instance flags
cfg.InstanceInterfaceNames = []string{"eth0", "en0"}
Expand All @@ -93,6 +96,7 @@ func (cfg *RingConfig) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&cfg.InstancePort, rfprefix+"instance-port", 0, "Port to advertise in the ring (defaults to server.grpc-listen-port).")
f.StringVar(&cfg.InstanceID, rfprefix+"instance-id", hostname, "Instance ID to register in the ring.")
f.StringVar(&cfg.InstanceZone, rfprefix+"instance-availability-zone", "", "The availability zone where this instance is running. Required if zone-awareness is enabled.")
f.BoolVar(&cfg.KeepInstanceInTheRingOnShutdown, rfprefix+"keep-instance-in-the-ring-on-shutdown", false, "Keep instance in the ring on shut down.")

cfg.RingCheckPeriod = 5 * time.Second

Expand All @@ -111,13 +115,14 @@ func (cfg *RingConfig) ToLifecyclerConfig(logger log.Logger) (ring.BasicLifecycl
instancePort := ring.GetInstancePort(cfg.InstancePort, cfg.ListenPort)

return ring.BasicLifecyclerConfig{
ID: cfg.InstanceID,
Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort),
HeartbeatPeriod: cfg.HeartbeatPeriod,
TokensObservePeriod: 0,
Zone: cfg.InstanceZone,
NumTokens: RingNumTokens,
FinalSleep: cfg.FinalSleep,
ID: cfg.InstanceID,
Addr: fmt.Sprintf("%s:%d", instanceAddr, instancePort),
HeartbeatPeriod: cfg.HeartbeatPeriod,
TokensObservePeriod: 0,
Zone: cfg.InstanceZone,
NumTokens: RingNumTokens,
FinalSleep: cfg.FinalSleep,
KeepInstanceInTheRingOnShutdown: cfg.KeepInstanceInTheRingOnShutdown,
}, nil
}

Expand Down
1 change: 1 addition & 0 deletions pkg/alertmanager/multitenant.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ func createMultitenantAlertmanager(cfg *MultitenantAlertmanagerConfig, fallbackC
delegate := ring.BasicLifecyclerDelegate(am)
delegate = ring.NewLeaveOnStoppingDelegate(delegate, am.logger)
delegate = ring.NewAutoForgetDelegate(am.cfg.ShardingRing.HeartbeatTimeout*ringAutoForgetUnhealthyPeriods, delegate, am.logger)
delegate = ring.NewTokensPersistencyDelegate(am.cfg.ShardingRing.TokensFilePath, ring.JOINING, delegate, am.logger)

am.ringLifecycler, err = ring.NewBasicLifecycler(lifecyclerCfg, RingNameForServer, RingKey, ringStore, delegate, am.logger, prometheus.WrapRegistererWithPrefix("cortex_", am.registry))
if err != nil {
Expand Down
Loading