Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
## master / unreleased

* [CHANGE] Experimental Delete Series: Change target flag for purger from `data-purger` to `purger`. #2777
* [CHANGE] Experimental TSDB: The max concurrent queries against the long-term storage, configured via `-experimental.tsdb.bucket-store.max-concurrent`, is now a limit shared across all tenants and not a per-tenant limit anymore. The default value has changed from `20` to `100` and the following new metrics have been added: #2797
* `cortex_bucket_stores_gate_queries_concurrent_max`
* `cortex_bucket_stores_gate_queries_in_flight`
* `cortex_bucket_stores_gate_duration_seconds`
* [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783
* [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783
* [FEATURE] Introduced `ruler.for-resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ require (
github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e
github.com/spf13/afero v1.2.2
github.com/stretchr/testify v1.5.1
github.com/thanos-io/thanos v0.12.3-0.20200618165043-6c513e5f5c5f
github.com/thanos-io/thanos v0.13.1-0.20200625180332-f078faed1b96
github.com/uber/jaeger-client-go v2.23.1+incompatible
github.com/weaveworks/common v0.0.0-20200512154658-384f10054ec5
go.etcd.io/bbolt v1.3.5-0.20200615073812-232d8fc87f50
Expand Down
6 changes: 3 additions & 3 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ github.com/prometheus/prometheus v0.0.0-20180315085919-58e2a31db8de/go.mod h1:oA
github.com/prometheus/prometheus v0.0.0-20190818123050-43acd0e2e93f/go.mod h1:rMTlmxGCvukf2KMu3fClMDKLLoJ5hl61MhcJ7xKakf0=
github.com/prometheus/prometheus v1.8.2-0.20200107122003-4708915ac6ef/go.mod h1:7U90zPoLkWjEIQcy/rweQla82OCTUzxVHE51G3OhJbI=
github.com/prometheus/prometheus v1.8.2-0.20200213233353-b90be6f32a33/go.mod h1:fkIPPkuZnkXyopYHmXPxf9rgiPkVgZCN8w9o8+UgBlY=
github.com/prometheus/prometheus v1.8.2-0.20200609165731-66dfb951c4ca/go.mod h1:CwaXafRa0mm72de2GQWtfQxjGytbSKIGivWxQvjpRZs=
github.com/prometheus/prometheus v1.8.2-0.20200619100132-74207c04655e/go.mod h1:QV6T0PPQi5UFmqcLBJw3JiyIR8r1O7KEv9qlVw4VV40=
github.com/prometheus/prometheus v1.8.2-0.20200622142935-153f859b7499 h1:q+yGm39CmSV1S7oxCz36nlvx9ugRoEodwuHusgJw+iU=
github.com/prometheus/prometheus v1.8.2-0.20200622142935-153f859b7499/go.mod h1:QV6T0PPQi5UFmqcLBJw3JiyIR8r1O7KEv9qlVw4VV40=
github.com/rafaeljusto/redigomock v0.0.0-20190202135759-257e089e14a1 h1:+kGqA4dNN5hn7WwvKdzHl0rdN5AEkbNZd0VjRltAiZg=
Expand Down Expand Up @@ -1046,8 +1046,8 @@ github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81P
github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/thanos-io/thanos v0.8.1-0.20200109203923-552ffa4c1a0d/go.mod h1:usT/TxtJQ7DzinTt+G9kinDQmRS5sxwu0unVKZ9vdcw=
github.com/thanos-io/thanos v0.12.3-0.20200618165043-6c513e5f5c5f h1:UnMVEOejh6tWKUag5tuC0WjKfKmGwJ2+ky0MV4KM52I=
github.com/thanos-io/thanos v0.12.3-0.20200618165043-6c513e5f5c5f/go.mod h1:CPqrM/ibNtlraee0to4dSRiTs+KLI1c3agMS2lmJpz0=
github.com/thanos-io/thanos v0.13.1-0.20200625180332-f078faed1b96 h1:McsluZ8fXVwGbdXsZ20uZNGukmPycDU9m6df64S2bqQ=
github.com/thanos-io/thanos v0.13.1-0.20200625180332-f078faed1b96/go.mod h1:VuNcGvUE0u57S1XXqYhf0dQzUO3wUnw2B5IKsju+1z4=
github.com/tidwall/pretty v0.0.0-20180105212114-65a9db5fad51/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
Expand Down
5 changes: 4 additions & 1 deletion pkg/ingester/ingester_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,10 @@ func (i *Ingester) createTSDB(userID string) (*userTSDB, error) {
tsdbPromReg,
udir,
cortex_tsdb.NewUserBucketClient(userID, i.TSDBState.bucket),
func() labels.Labels { return l }, metadata.ReceiveSource)
func() labels.Labels { return l },
metadata.ReceiveSource,
true, // Allow out of order uploads. It's fine in Cortex's context.
)
}

i.TSDBState.tsdbMetrics.setRegistryForUser(userID, tsdbPromReg)
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/tsdb/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.SyncInterval, "experimental.tsdb.bucket-store.sync-interval", 5*time.Minute, "How frequently scan the bucket to look for changes (new blocks shipped by ingesters and blocks removed by retention or compaction). 0 disables it.")
f.Uint64Var(&cfg.MaxChunkPoolBytes, "experimental.tsdb.bucket-store.max-chunk-pool-bytes", uint64(2*units.Gibibyte), "Max size - in bytes - of a per-tenant chunk pool, used to reduce memory allocations.")
f.Uint64Var(&cfg.MaxSampleCount, "experimental.tsdb.bucket-store.max-sample-count", 0, "Max number of samples per query when loading series from the long-term storage. 0 disables the limit.")
f.IntVar(&cfg.MaxConcurrent, "experimental.tsdb.bucket-store.max-concurrent", 20, "Max number of concurrent queries to execute against the long-term storage on a per-tenant basis.")
f.IntVar(&cfg.MaxConcurrent, "experimental.tsdb.bucket-store.max-concurrent", 100, "Max number of concurrent queries to execute against the long-term storage. The limit is shared across all tenants.")
f.IntVar(&cfg.TenantSyncConcurrency, "experimental.tsdb.bucket-store.tenant-sync-concurrency", 10, "Maximum number of concurrent tenants synching blocks.")
f.IntVar(&cfg.BlockSyncConcurrency, "experimental.tsdb.bucket-store.block-sync-concurrency", 20, "Maximum number of concurrent blocks synching per tenant.")
f.IntVar(&cfg.MetaSyncConcurrency, "experimental.tsdb.bucket-store.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from object storage per tenant.")
Expand Down
6 changes: 0 additions & 6 deletions pkg/storegateway/bucket_store_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,6 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.chunkSizeBytes.Observe(30 * base)

m.queriesDropped.Add(31 * base)
m.queriesLimit.Add(32 * base)

m.seriesRefetches.Add(33 * base)

Expand Down Expand Up @@ -273,7 +272,6 @@ type mockedBucketStoreMetrics struct {
resultSeriesCount prometheus.Summary
chunkSizeBytes prometheus.Histogram
queriesDropped prometheus.Counter
queriesLimit prometheus.Gauge

cachedPostingsCompressions *prometheus.CounterVec
cachedPostingsCompressionErrors *prometheus.CounterVec
Expand Down Expand Up @@ -355,10 +353,6 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
Name: "thanos_bucket_store_queries_dropped_total",
Help: "Number of queries that were dropped due to the sample limit.",
})
m.queriesLimit = promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "thanos_bucket_store_queries_concurrent_max",
Help: "Number of maximum concurrent queries.",
})
m.seriesRefetches = promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_bucket_store_series_refetches_total",
Help: fmt.Sprintf("Total number of cases where %v bytes was not enough was to fetch series from index, resulting in refetch.", 64*1024),
Expand Down
17 changes: 15 additions & 2 deletions pkg/storegateway/bucket_stores.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/thanos-io/thanos/pkg/block"
thanos_metadata "github.com/thanos-io/thanos/pkg/block/metadata"
"github.com/thanos-io/thanos/pkg/extprom"
"github.com/thanos-io/thanos/pkg/gate"
"github.com/thanos-io/thanos/pkg/objstore"
"github.com/thanos-io/thanos/pkg/store"
storecache "github.com/thanos-io/thanos/pkg/store/cache"
Expand All @@ -43,6 +44,9 @@ type BucketStores struct {
// Index cache shared across all tenants.
indexCache storecache.IndexCache

// Gate used to limit query concurrency across all tenants.
queryGate gate.Gate

// Keeps a bucket store for each tenant.
storesMu sync.RWMutex
stores map[string]*store.BucketStore
Expand All @@ -59,6 +63,14 @@ func NewBucketStores(cfg tsdb.Config, filters []block.MetadataFilter, bucketClie
return nil, errors.Wrapf(err, "create caching bucket")
}

// The number of concurrent queries against the tenants BucketStores are limited.
queryGateReg := extprom.WrapRegistererWithPrefix("cortex_bucket_stores_", reg)
queryGate := gate.NewKeeper(queryGateReg).NewGate(cfg.BucketStore.MaxConcurrent)
promauto.With(reg).NewGauge(prometheus.GaugeOpts{
Name: "cortex_bucket_stores_gate_queries_concurrent_max",
Help: "Number of maximum concurrent queries allowed.",
}).Set(float64(cfg.BucketStore.MaxConcurrent))

u := &BucketStores{
logger: logger,
cfg: cfg,
Expand All @@ -68,6 +80,7 @@ func NewBucketStores(cfg tsdb.Config, filters []block.MetadataFilter, bucketClie
logLevel: logLevel,
bucketStoreMetrics: NewBucketStoreMetrics(),
metaFetcherMetrics: NewMetadataFetcherMetrics(),
queryGate: queryGate,
syncTimes: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "cortex_bucket_stores_blocks_sync_seconds",
Help: "The total time it takes to perform a sync stores",
Expand Down Expand Up @@ -268,9 +281,9 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro
fetcher,
filepath.Join(u.cfg.BucketStore.SyncDir, userID),
u.indexCache,
uint64(u.cfg.BucketStore.MaxChunkPoolBytes),
u.queryGate,
u.cfg.BucketStore.MaxChunkPoolBytes,
u.cfg.BucketStore.MaxSampleCount,
u.cfg.BucketStore.MaxConcurrent,
u.logLevel.String() == "debug", // Turn on debug logging, if the log level is set to debug
u.cfg.BucketStore.BlockSyncConcurrency,
nil, // Do not limit timerange.
Expand Down
32 changes: 30 additions & 2 deletions pkg/storegateway/bucket_stores_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,21 @@ func TestBucketStores_InitialSync(t *testing.T) {
# HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts.
# TYPE cortex_bucket_store_block_load_failures_total counter
cortex_bucket_store_block_load_failures_total 0
`), "cortex_bucket_store_blocks_loaded", "cortex_bucket_store_block_loads_total", "cortex_bucket_store_block_load_failures_total"))

# HELP cortex_bucket_stores_gate_queries_concurrent_max Number of maximum concurrent queries allowed.
# TYPE cortex_bucket_stores_gate_queries_concurrent_max gauge
cortex_bucket_stores_gate_queries_concurrent_max 100

# HELP cortex_bucket_stores_gate_queries_in_flight Number of queries that are currently in flight.
# TYPE cortex_bucket_stores_gate_queries_in_flight gauge
cortex_bucket_stores_gate_queries_in_flight 0
`),
"cortex_bucket_store_blocks_loaded",
"cortex_bucket_store_block_loads_total",
"cortex_bucket_store_block_load_failures_total",
"cortex_bucket_stores_gate_queries_concurrent_max",
"cortex_bucket_stores_gate_queries_in_flight",
))

assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0))
}
Expand Down Expand Up @@ -145,7 +159,21 @@ func TestBucketStores_SyncBlocks(t *testing.T) {
# HELP cortex_bucket_store_block_load_failures_total Total number of failed remote block loading attempts.
# TYPE cortex_bucket_store_block_load_failures_total counter
cortex_bucket_store_block_load_failures_total 0
`), "cortex_bucket_store_blocks_loaded", "cortex_bucket_store_block_loads_total", "cortex_bucket_store_block_load_failures_total"))

# HELP cortex_bucket_stores_gate_queries_concurrent_max Number of maximum concurrent queries allowed.
# TYPE cortex_bucket_stores_gate_queries_concurrent_max gauge
cortex_bucket_stores_gate_queries_concurrent_max 100

# HELP cortex_bucket_stores_gate_queries_in_flight Number of queries that are currently in flight.
# TYPE cortex_bucket_stores_gate_queries_in_flight gauge
cortex_bucket_stores_gate_queries_in_flight 0
`),
"cortex_bucket_store_blocks_loaded",
"cortex_bucket_store_block_loads_total",
"cortex_bucket_store_block_load_failures_total",
"cortex_bucket_stores_gate_queries_concurrent_max",
"cortex_bucket_stores_gate_queries_in_flight",
))

assert.Greater(t, testutil.ToFloat64(stores.syncLastSuccess), float64(0))
}
Expand Down
Loading