Skip to content

Commit 11e981c

Browse files
MichaHoffmannsaswatamcode
authored andcommitted
Sidecar: wait for prometheus on startup (thanos-io#7323)
Signed-off-by: Michael Hoffmann <[email protected]>
1 parent 8e0321b commit 11e981c

File tree

2 files changed

+63
-38
lines changed

2 files changed

+63
-38
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
1212

1313
### Fixed
1414

15+
- [#7323](https://github.com/thanos-io/thanos/pull/7323) Sidecar: wait for prometheus on startup
16+
1517
### Added
1618

1719
### Changed

cmd/thanos/sidecar.go

Lines changed: 61 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -172,64 +172,87 @@ func runSidecar(
172172
Help: "Boolean indicator whether the sidecar can reach its Prometheus peer.",
173173
})
174174

175-
ctx, cancel := context.WithCancel(context.Background())
176-
g.Add(func() error {
177-
// Only check Prometheus's flags when upload is enabled.
178-
if uploads {
179-
// Check prometheus's flags to ensure same sidecar flags.
180-
if err := validatePrometheus(ctx, m.client, logger, conf.shipper.ignoreBlockSize, m); err != nil {
181-
return errors.Wrap(err, "validate Prometheus flags")
182-
}
183-
}
175+
ctx := context.Background()
176+
// Only check Prometheus's flags when upload is enabled.
177+
if uploads {
178+
// Check prometheus's flags to ensure same sidecar flags.
179+
// We retry infinitely until we validated prometheus flags
180+
err := runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
181+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
182+
defer iterCancel()
184183

185-
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus.
186-
err := runutil.Retry(2*time.Second, ctx.Done(), func() error {
187-
if err := m.BuildVersion(ctx); err != nil {
184+
if err := validatePrometheus(iterCtx, m.client, logger, conf.shipper.ignoreBlockSize, m); err != nil {
188185
level.Warn(logger).Log(
189-
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying",
186+
"msg", "failed to validate prometheus flags. Is Prometheus running? Retrying",
190187
"err", err,
191188
)
192189
return err
193190
}
194191

195192
level.Info(logger).Log(
196-
"msg", "successfully loaded prometheus version",
193+
"msg", "successfully validated prometheus flags",
197194
)
198195
return nil
199196
})
200197
if err != nil {
201-
return errors.Wrap(err, "failed to get prometheus version")
198+
return errors.Wrap(err, "failed to validate prometheus flags")
202199
}
200+
}
203201

204-
// Blocking query of external labels before joining as a Source Peer into gossip.
205-
// We retry infinitely until we reach and fetch labels from our Prometheus.
206-
err = runutil.Retry(2*time.Second, ctx.Done(), func() error {
207-
if err := m.UpdateLabels(ctx); err != nil {
208-
level.Warn(logger).Log(
209-
"msg", "failed to fetch initial external labels. Is Prometheus running? Retrying",
210-
"err", err,
211-
)
212-
promUp.Set(0)
213-
statusProber.NotReady(err)
214-
return err
215-
}
202+
// We retry infinitely until we reach and fetch BuildVersion from our Prometheus.
203+
err := runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
204+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
205+
defer iterCancel()
216206

217-
level.Info(logger).Log(
218-
"msg", "successfully loaded prometheus external labels",
219-
"external_labels", m.Labels().String(),
207+
if err := m.BuildVersion(iterCtx); err != nil {
208+
level.Warn(logger).Log(
209+
"msg", "failed to fetch prometheus version. Is Prometheus running? Retrying",
210+
"err", err,
220211
)
221-
promUp.Set(1)
222-
statusProber.Ready()
223-
return nil
224-
})
225-
if err != nil {
226-
return errors.Wrap(err, "initial external labels query")
212+
return err
227213
}
228214

229-
if len(m.Labels()) == 0 {
230-
return errors.New("no external labels configured on Prometheus server, uniquely identifying external labels must be configured; see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
215+
level.Info(logger).Log(
216+
"msg", "successfully loaded prometheus version",
217+
)
218+
return nil
219+
})
220+
if err != nil {
221+
return errors.Wrap(err, "failed to get prometheus version")
222+
}
223+
224+
// Blocking query of external labels before joining as a Source Peer into gossip.
225+
// We retry infinitely until we reach and fetch labels from our Prometheus.
226+
err = runutil.Retry(conf.prometheus.getConfigInterval, ctx.Done(), func() error {
227+
iterCtx, iterCancel := context.WithTimeout(context.Background(), conf.prometheus.getConfigTimeout)
228+
defer iterCancel()
229+
230+
if err := m.UpdateLabels(iterCtx); err != nil {
231+
level.Warn(logger).Log(
232+
"msg", "failed to fetch initial external labels. Is Prometheus running? Retrying",
233+
"err", err,
234+
)
235+
return err
231236
}
232237

238+
level.Info(logger).Log(
239+
"msg", "successfully loaded prometheus external labels",
240+
"external_labels", m.Labels().String(),
241+
)
242+
return nil
243+
})
244+
if err != nil {
245+
return errors.Wrap(err, "initial external labels query")
246+
}
247+
248+
if len(m.Labels()) == 0 {
249+
return errors.New("no external labels configured on Prometheus server, uniquely identifying external labels must be configured; see https://thanos.io/tip/thanos/storage.md#external-labels for details.")
250+
}
251+
promUp.Set(1)
252+
statusProber.Ready()
253+
254+
ctx, cancel := context.WithCancel(context.Background())
255+
g.Add(func() error {
233256
// Periodically query the Prometheus config. We use this as a heartbeat as well as for updating
234257
// the external labels we apply.
235258
return runutil.Repeat(conf.prometheus.getConfigInterval, ctx.Done(), func() error {

0 commit comments

Comments
 (0)