From 46b08d8dc22ddb30ee47063dd31ca005c62147ff Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 3 Apr 2020 10:39:42 +0200 Subject: [PATCH 1/2] Fixed chunk data corruption when querying back series using the blocks storage Signed-off-by: Marco Pracucci --- CHANGELOG.md | 1 + .../blocks_bucket_store_inmemory_server.go | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3de5d04d46f..13836d5df47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * [ENHANCEMENT] Experimental TSDB: Added `cortex_querier_blocks_meta_synced`, which reflects current state of synced blocks over all tenants. #2392 * [ENHANCEMENT] Added `cortex_distributor_latest_seen_sample_timestamp_seconds` metric to see how far behind Prometheus servers are in sending data. #2371 * [ENHANCEMENT] FIFO cache to support eviction based on memory usage. The `-.fifocache.size` CLI flag has been renamed to `-.fifocache.max-size-items` as well as its YAML config option `size` renamed to `max_size_items`. Added `-.fifocache.max-size-bytes` CLI flag and YAML config option `max_size_bytes` to specify memory limit of the cache. #2319 +* [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. ## 1.0.0 / 2020-04-02 diff --git a/pkg/querier/blocks_bucket_store_inmemory_server.go b/pkg/querier/blocks_bucket_store_inmemory_server.go index cb4862739bc..8300b12982c 100644 --- a/pkg/querier/blocks_bucket_store_inmemory_server.go +++ b/pkg/querier/blocks_bucket_store_inmemory_server.go @@ -30,8 +30,22 @@ func (s *bucketStoreSeriesServer) Send(r *storepb.SeriesResponse) error { s.Warnings = append(s.Warnings, errors.New(r.GetWarning())) } - if r.GetSeries() != nil { - s.SeriesSet = append(s.SeriesSet, r.GetSeries()) + if recvSeries := r.GetSeries(); recvSeries != nil { + // Thanos uses a pool for the chunks and may use other pools in the future. + // Given we need to retain the reference after the pooled slices are recycled, + // we need to do a copy here. We prefer to stay on the safest side at this stage + // so we do a marshal+unmarshal to copy the whole series. + recvSeriesData, err := recvSeries.Marshal() + if err != nil { + return errors.Wrap(err, "marshal received series") + } + + copiedSeries := &storepb.Series{} + if err = copiedSeries.Unmarshal(recvSeriesData); err != nil { + return errors.Wrap(err, "unmarshal received series") + } + + s.SeriesSet = append(s.SeriesSet, copiedSeries) } return nil From 0d9cebafe93f01dff9897b32f8e4913d0e6062bf Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 3 Apr 2020 10:44:40 +0200 Subject: [PATCH 2/2] Added PR number to CHANGELOG Signed-off-by: Marco Pracucci --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 13836d5df47..c326dd20162 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ * [ENHANCEMENT] Experimental TSDB: Added `cortex_querier_blocks_meta_synced`, which reflects current state of synced blocks over all tenants. #2392 * [ENHANCEMENT] Added `cortex_distributor_latest_seen_sample_timestamp_seconds` metric to see how far behind Prometheus servers are in sending data. #2371 * [ENHANCEMENT] FIFO cache to support eviction based on memory usage. The `-.fifocache.size` CLI flag has been renamed to `-.fifocache.max-size-items` as well as its YAML config option `size` renamed to `max_size_items`. Added `-.fifocache.max-size-bytes` CLI flag and YAML config option `max_size_bytes` to specify memory limit of the cache. #2319 -* [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. +* [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. #2400 ## 1.0.0 / 2020-04-02