From fea1a9347da8083b7b403f347804f96c1ba1680c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Sat, 28 Nov 2020 21:49:37 +0100 Subject: [PATCH 1/3] Normalize labels from chunks. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- tools/blocksconvert/builder/builder.go | 83 ++++++++++++++++++++- tools/blocksconvert/builder/builder_test.go | 62 +++++++++++++++ 2 files changed, 143 insertions(+), 2 deletions(-) create mode 100644 tools/blocksconvert/builder/builder_test.go diff --git a/tools/blocksconvert/builder/builder.go b/tools/blocksconvert/builder/builder.go index 51af1fbb139..c104668d50d 100644 --- a/tools/blocksconvert/builder/builder.go +++ b/tools/blocksconvert/builder/builder.go @@ -6,6 +6,7 @@ import ( "io/ioutil" "os" "path/filepath" + "sort" "strings" "time" @@ -360,10 +361,18 @@ func fetchAndBuildSingleSeries(ctx context.Context, fetcher *Fetcher, chunksIds return nil, nil, nil } - m := cs[0].Metric + m, err := normalizeLabels(cs[0].Metric) + if err != nil { + return nil, nil, errors.Errorf("chunk has invalid metrics: %v", cs[0].Metric.String()) + } + // Verify that all chunks belong to the same series. for _, c := range cs { - if !labels.Equal(m, c.Metric) { + nm, err := normalizeLabels(c.Metric) + if err != nil { + return nil, nil, errors.Errorf("chunk has invalid metrics: %v", c.Metric.String()) + } + if !labels.Equal(m, nm) { return nil, nil, errors.Errorf("chunks for multiple metrics: %v, %v", m.String(), c.Metric.String()) } } @@ -371,6 +380,76 @@ func fetchAndBuildSingleSeries(ctx context.Context, fetcher *Fetcher, chunksIds return m, cs, nil } +// Labels are already sorted, but there may be duplicate label names. +// This method verifies sortedness, and removes duplicate label names (if they have the same value). +func normalizeLabels(lbls labels.Labels) (labels.Labels, error) { + err := checkLabels(lbls) + if err == errLabelsNotSorted { + sort.Sort(lbls) + err = checkLabels(lbls) + } + + if err == errDuplicateLabelsSameValue { + lbls = removeDuplicateLabels(lbls) + err = checkLabels(lbls) + } + + return lbls, err +} + +var ( + errLabelsNotSorted = errors.New("labels not sorted") + errDuplicateLabelsSameValue = errors.New("duplicate labels, same value") + errDuplicateLabelsDifferentValue = errors.New("duplicate labels, different values") +) + +// Returns one of errLabelsNotSorted, errDuplicateLabelsSameValue, errDuplicateLabelsDifferentValue, +// or nil, if labels are fine. +func checkLabels(lbls labels.Labels) error { + prevName, prevValue := "", "" + + uniqueLabels := true + for _, l := range lbls { + switch { + case l.Name < prevName: + return errLabelsNotSorted + case l.Name == prevName: + if l.Value != prevValue { + return errDuplicateLabelsDifferentValue + } + + uniqueLabels = false + } + + prevName = l.Name + prevValue = l.Value + } + + if !uniqueLabels { + return errDuplicateLabelsSameValue + } + + return nil +} + +func removeDuplicateLabels(lbls labels.Labels) labels.Labels { + prevName, prevValue := "", "" + + for ix := 0; ix < len(lbls); { + l := lbls[ix] + if l.Name == prevName && l.Value == prevValue { + lbls = append(lbls[:ix], lbls[ix+1:]...) + continue + } + + prevName = l.Name + prevValue = l.Value + ix++ + } + + return lbls +} + // Finds storage configuration for given day, and builds a client. func (b *Builder) createChunkClientForDay(dayStart time.Time) (chunk.Client, error) { for ix, s := range b.schemaConfig.Configs { diff --git a/tools/blocksconvert/builder/builder_test.go b/tools/blocksconvert/builder/builder_test.go new file mode 100644 index 00000000000..a83d97d5f75 --- /dev/null +++ b/tools/blocksconvert/builder/builder_test.go @@ -0,0 +1,62 @@ +package builder + +import ( + "testing" + + "github.com/prometheus/prometheus/pkg/labels" + "github.com/stretchr/testify/assert" +) + +func TestNormalizeLabels(t *testing.T) { + for name, tc := range map[string]struct { + input labels.Labels + + expectedOutput labels.Labels + expectedError error + }{ + "good labels": { + input: fromStrings("__name__", "hello", "label1", "world"), + expectedOutput: fromStrings("__name__", "hello", "label1", "world"), + expectedError: nil, + }, + "not sorted": { + input: fromStrings("label1", "world", "__name__", "hello"), + expectedOutput: fromStrings("__name__", "hello", "label1", "world"), + expectedError: nil, + }, + "duplicate with same value": { + input: fromStrings("__name__", "hello", "label1", "world", "label1", "world"), + expectedOutput: fromStrings("__name__", "hello", "label1", "world"), + expectedError: nil, + }, + "not sorted, duplicate with same value": { + input: fromStrings("label1", "world", "__name__", "hello", "label1", "world"), + expectedOutput: fromStrings("__name__", "hello", "label1", "world"), + expectedError: nil, + }, + "duplicate with different value": { + input: fromStrings("label1", "world1", "__name__", "hello", "label1", "world2"), + expectedOutput: fromStrings("__name__", "hello", "label1", "world1", "label1", "world2"), // sorted + expectedError: errDuplicateLabelsDifferentValue, + }, + } { + t.Run(name, func(t *testing.T) { + out, err := normalizeLabels(tc.input) + + assert.Equal(t, tc.expectedOutput, out) + assert.Equal(t, tc.expectedError, err) + }) + } +} + +// Similar to labels.FromStrings, but doesn't do sorting. +func fromStrings(ss ...string) labels.Labels { + if len(ss)%2 != 0 { + panic("invalid number of strings") + } + var res labels.Labels + for i := 0; i < len(ss); i += 2 { + res = append(res, labels.Label{Name: ss[i], Value: ss[i+1]}) + } + return res +} From 641bd5b232e4569bd538719d855d82a674c16a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Sat, 28 Nov 2020 21:55:29 +0100 Subject: [PATCH 2/3] CHANGELOG.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4889940101..7809a00154b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ * [ENHANCEMENT] Scheduler: ability to ignore users based on regexp, using `-scheduler.ignore-users-regex` flag. #3477 * [ENHANCEMENT] Builder: Parallelize reading chunks in the final stage of building block. #3470 +* [ENHANCEMENT] Builder: remove duplicate label names from chunk. #3547 ## 1.5.0 / 2020-11-09 From 7b7d9abeb0f1a16a74d17d4746cfd42e0e7de6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Sat, 28 Nov 2020 21:56:35 +0100 Subject: [PATCH 3/3] Include original error. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- tools/blocksconvert/builder/builder.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/blocksconvert/builder/builder.go b/tools/blocksconvert/builder/builder.go index c104668d50d..05ffa4c419b 100644 --- a/tools/blocksconvert/builder/builder.go +++ b/tools/blocksconvert/builder/builder.go @@ -363,14 +363,14 @@ func fetchAndBuildSingleSeries(ctx context.Context, fetcher *Fetcher, chunksIds m, err := normalizeLabels(cs[0].Metric) if err != nil { - return nil, nil, errors.Errorf("chunk has invalid metrics: %v", cs[0].Metric.String()) + return nil, nil, errors.Wrapf(err, "chunk has invalid metrics: %v", cs[0].Metric.String()) } // Verify that all chunks belong to the same series. for _, c := range cs { nm, err := normalizeLabels(c.Metric) if err != nil { - return nil, nil, errors.Errorf("chunk has invalid metrics: %v", c.Metric.String()) + return nil, nil, errors.Wrapf(err, "chunk has invalid metrics: %v", c.Metric.String()) } if !labels.Equal(m, nm) { return nil, nil, errors.Errorf("chunks for multiple metrics: %v, %v", m.String(), c.Metric.String())