From 741f9b20bcd9c43eb8cd184b809fcc0e26ab3a3e Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 26 Oct 2020 09:21:55 +0000 Subject: [PATCH 01/42] Add unit tests Signed-off-by: Michel Hollands --- pkg/ring/replication_set_test.go | 103 +++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index db11459cf10..2e11351de58 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -1,7 +1,10 @@ package ring import ( + "context" + "errors" "testing" + "time" "github.com/stretchr/testify/assert" ) @@ -33,3 +36,103 @@ func TestReplicationSet_GetAddresses(t *testing.T) { }) } } + +// Return a function that fails starting from failAfter times +func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { + count := 0 + return func(context.Context, *IngesterDesc) (interface{}, error) { + count++ + time.Sleep(delay) + if count > failAfter { + return nil, errors.New("Dummy error") + } + return 1, nil + } +} + +func TestReplicationSet_Do(t *testing.T) { + tests := []struct { + name string + ingesters []IngesterDesc + maxErrors int + f func(context.Context, *IngesterDesc) (interface{}, error) + delay time.Duration + closeContextDelay time.Duration + want []interface{} + wantErr bool + expectedError error + }{ + { + name: "no errors no delay", + ingesters: []IngesterDesc{ + {}, + }, + f: func(c context.Context, id *IngesterDesc) (interface{}, error) { + return 1, nil + }, + want: []interface{}{1}, + }, + { + name: "1 error, no errors expected", + ingesters: []IngesterDesc{{}}, + f: func(c context.Context, id *IngesterDesc) (interface{}, error) { + return nil, errors.New("Dummy error") + }, + want: nil, + wantErr: true, + }, + { + name: "3 ingesters, last call fails", + ingesters: []IngesterDesc{{}, {}, {}}, + f: failingFunctionAfter(2, 10*time.Millisecond), + want: nil, + wantErr: true, + }, + { + name: "5 ingesters, with delay, last 3 calls fail", + ingesters: []IngesterDesc{{}, {}, {}, {}, {}}, + maxErrors: 1, + f: failingFunctionAfter(2, 10*time.Millisecond), + delay: 100 * time.Millisecond, + want: nil, + wantErr: true, + }, + { + name: "3 ingesters, context fails", + ingesters: []IngesterDesc{{}, {}, {}}, + maxErrors: 1, + f: failingFunctionAfter(0, 200*time.Millisecond), + closeContextDelay: 100 * time.Millisecond, + want: nil, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := ReplicationSet{ + Ingesters: tt.ingesters, + MaxErrors: tt.maxErrors, + } + ctx := context.Background() + if tt.closeContextDelay > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithCancel(ctx) + go func() { + time.AfterFunc(tt.closeContextDelay, func() { + cancel() + }) + }() + } + got, err := r.Do(ctx, tt.delay, tt.f) + if tt.wantErr { + assert.Error(t, err) + if tt.expectedError != nil { + assert.Equal(t, tt.expectedError, err) + } + } else { + assert.NoError(t, err) + } + assert.Equal(t, tt.want, got) + }) + } +} From 91917fda654fd8882a324bc41e2f110fb1ccf574 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 26 Oct 2020 10:45:26 +0000 Subject: [PATCH 02/42] Add lock to avoid race condition in test Signed-off-by: Michel Hollands --- pkg/ring/replication_set_test.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index 2e11351de58..e76a0c2e700 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -3,6 +3,7 @@ package ring import ( "context" "errors" + "sync" "testing" "time" @@ -39,10 +40,15 @@ func TestReplicationSet_GetAddresses(t *testing.T) { // Return a function that fails starting from failAfter times func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { + var mutex = &sync.RWMutex{} count := 0 return func(context.Context, *IngesterDesc) (interface{}, error) { + mutex.Lock() count++ + mutex.Unlock() time.Sleep(delay) + mutex.RLock() + defer mutex.RUnlock() if count > failAfter { return nil, errors.New("Dummy error") } From ba3d8605699b841c34dfd0fa484c970609ed3c2d Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 28 Oct 2020 15:23:24 +0000 Subject: [PATCH 03/42] Add zone aware to GetAll Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 53 +++++++++-- pkg/ring/replication_set_test.go | 145 ++++++++++++++++++++++++++++--- pkg/ring/ring.go | 30 ++++++- pkg/ring/ring_test.go | 104 ++++++++++++++++++++++ 4 files changed, 308 insertions(+), 24 deletions(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index 3b15541f3ad..bd495a711ba 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -2,22 +2,32 @@ package ring import ( "context" + "errors" + "fmt" "sort" "time" ) +var errorTooManyZoneFailures = errors.New("Too many zones failed") + // ReplicationSet describes the ingesters to talk to for a given key, and how // many errors to tolerate. type ReplicationSet struct { - Ingesters []IngesterDesc - MaxErrors int + Ingesters []IngesterDesc + MaxErrors int + MaxUnavailableZones int +} + +type ingesterError struct { + err error + ing *IngesterDesc } // Do function f in parallel for all replicas in the set, erroring is we exceed // MaxErrors and returning early otherwise. func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(context.Context, *IngesterDesc) (interface{}, error)) ([]interface{}, error) { var ( - errs = make(chan error, len(r.Ingesters)) + errs = make(chan ingesterError, len(r.Ingesters)) resultsChan = make(chan interface{}, len(r.Ingesters)) minSuccess = len(r.Ingesters) - r.MaxErrors forceStart = make(chan struct{}, r.MaxErrors) @@ -38,9 +48,14 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont case <-after.C: } } + fmt.Printf("About to run for addr %v and zone %v\n", ing.Addr, ing.Zone) result, err := f(ctx, ing) + fmt.Printf("Addr %v and zone %v result %v\n", ing.Addr, ing.Zone, err) if err != nil { - errs <- err + errs <- ingesterError{ + err: err, + ing: ing, + } } else { resultsChan <- result } @@ -48,17 +63,37 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont } var ( - numErrs int - numSuccess int - results = make([]interface{}, 0, len(r.Ingesters)) + numErrs int + numSuccess int + results = make([]interface{}, 0, len(r.Ingesters)) + zoneFailureCount = make(map[string]int) ) + fmt.Printf("minSuccess %v\n", minSuccess) for numSuccess < minSuccess { + fmt.Printf("numSuccess %v\n", numSuccess) select { case err := <-errs: numErrs++ - if numErrs > r.MaxErrors { - return nil, err + + if r.MaxUnavailableZones > 0 { + // Non zone aware path + fmt.Printf("Incrementing count for Zone %v\n", err.ing.Zone) + zoneFailureCount[err.ing.Zone]++ + fmt.Printf("Count map %v\n", zoneFailureCount) + + if len(zoneFailureCount) > r.MaxUnavailableZones { + return nil, errorTooManyZoneFailures + } + } else { + // Non zone aware path + // Errors per zone : only RF-1 zone can be failing + // if failingZones > r.MaxUnavailableZones + + if numErrs > r.MaxErrors { + return nil, err.err + } } + // force one of the delayed requests to start forceStart <- struct{}{} diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index e76a0c2e700..356499ef4ec 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -56,17 +56,29 @@ func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Conte } } +func failingFunctionForZones(zones ...string) func(context.Context, *IngesterDesc) (interface{}, error) { + return func(ctx context.Context, ing *IngesterDesc) (interface{}, error) { + for _, zone := range zones { + if ing.Zone == zone { + return nil, errors.New("Failed") + } + } + return 1, nil + } +} + func TestReplicationSet_Do(t *testing.T) { tests := []struct { - name string - ingesters []IngesterDesc - maxErrors int - f func(context.Context, *IngesterDesc) (interface{}, error) - delay time.Duration - closeContextDelay time.Duration - want []interface{} - wantErr bool - expectedError error + name string + ingesters []IngesterDesc + maxErrors int + maxUnavailableZones int + f func(context.Context, *IngesterDesc) (interface{}, error) + delay time.Duration + closeContextDelay time.Duration + want []interface{} + wantErr bool + expectedError error }{ { name: "no errors no delay", @@ -112,12 +124,123 @@ func TestReplicationSet_Do(t *testing.T) { want: nil, wantErr: true, }, + { + name: "3 ingesters, 3 zones, no failures", + ingesters: []IngesterDesc{{ + Zone: "zone1", + }, { + Zone: "zone2", + }, { + Zone: "zone3", + }}, + f: func(c context.Context, id *IngesterDesc) (interface{}, error) { + return 1, nil + }, + want: []interface{}{1, 1, 1}, + }, + { + name: "3 ingesters, 3 zones, 1 zone fails", + ingesters: []IngesterDesc{{ + Zone: "zone1", + }, { + Zone: "zone2", + }, { + Zone: "zone3", + }}, + f: failingFunctionForZones("zone1"), + maxUnavailableZones: 1, // (nr of zones) / 2 + maxErrors: 1, // (nr of ingesters / nr of zones) * maxUnavailableZones + want: []interface{}{1, 1}, + }, + { + name: "3 ingesters, 3 zones, 2 zones fail", + ingesters: []IngesterDesc{{ + Zone: "zone1", + }, { + Zone: "zone2", + }, { + Zone: "zone3", + }}, + f: failingFunctionForZones("zone1", "zone2"), + maxUnavailableZones: 1, + maxErrors: 1, + wantErr: true, + expectedError: errorTooManyZoneFailures, + }, + { + name: "6 ingesters, 3 zones, 1 zone fails", + ingesters: []IngesterDesc{{ + Zone: "zone1", + }, { + Zone: "zone1", + }, { + Zone: "zone2", + }, { + Zone: "zone2", + }, { + Zone: "zone3", + }, { + Zone: "zone3", + }}, + f: failingFunctionForZones("zone1"), + maxUnavailableZones: 1, + maxErrors: 2, + want: []interface{}{1, 1, 1, 1}, + }, + { + name: "5 ingesters, 5 zones, 3 zones fails", + ingesters: []IngesterDesc{{ + Zone: "zone1", + }, { + Zone: "zone2", + }, { + Zone: "zone3", + }, { + Zone: "zone4", + }, { + Zone: "zone5", + }}, + f: failingFunctionForZones("zone1", "zone2", "zone3"), + maxUnavailableZones: 2, + maxErrors: 2, + wantErr: true, + expectedError: errorTooManyZoneFailures, + }, + { + name: "10 ingesters, 5 zones, 2 failures in zone 1", + ingesters: []IngesterDesc{{ + Zone: "zone1", + }, { + Zone: "zone1", + }, { + Zone: "zone2", + }, { + Zone: "zone2", + }, { + Zone: "zone3", + }, { + Zone: "zone3", + }, { + Zone: "zone4", + }, { + Zone: "zone4", + }, { + Zone: "zone5", + }, { + Zone: "zone5", + }}, + f: failingFunctionForZones("zone1"), + maxUnavailableZones: 2, + maxErrors: 4, + want: []interface{}{1, 1, 1, 1, 1, 1}, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { r := ReplicationSet{ - Ingesters: tt.ingesters, - MaxErrors: tt.maxErrors, + Ingesters: tt.ingesters, + MaxErrors: tt.maxErrors, + MaxUnavailableZones: tt.maxUnavailableZones, } ctx := context.Background() if tt.closeContextDelay > 0 { diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 6d2492c94ef..1e08105d0d2 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -363,16 +363,37 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro return ReplicationSet{}, ErrEmptyRing } + var maxUnavailableZones int + var numRequired int + ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) + if r.cfg.ZoneAwarenessEnabled { + zones := make(map[string]bool) + + for _, ingester := range r.ringDesc.Ingesters { + zones[ingester.Zone] = true + ingesters = append(ingesters, ingester) + } + maxUnavailableZones = len(zones) / 2 + // Assume ingesters are evenly spread over zones + // The maximum number of failures is all the ingesters in the maximum number of failing zones + maxErrors := (len(ingesters) / len(zones)) * maxUnavailableZones + + return ReplicationSet{ + Ingesters: ingesters, + MaxErrors: maxErrors, + MaxUnavailableZones: maxUnavailableZones, + }, nil + } + // Calculate the number of required ingesters; // ensure we always require at least RF-1 when RF=3. - numRequired := len(r.ringDesc.Ingesters) + numRequired = len(r.ringDesc.Ingesters) if numRequired < r.cfg.ReplicationFactor { numRequired = r.cfg.ReplicationFactor } maxUnavailable := r.cfg.ReplicationFactor / 2 numRequired -= maxUnavailable - ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) for _, ingester := range r.ringDesc.Ingesters { if r.IsHealthy(&ingester, op) { ingesters = append(ingesters, ingester) @@ -384,8 +405,9 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } return ReplicationSet{ - Ingesters: ingesters, - MaxErrors: len(ingesters) - numRequired, + Ingesters: ingesters, + MaxErrors: len(ingesters) - numRequired, + MaxUnavailableZones: maxUnavailableZones, }, nil } diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 189fac4b904..6ab72a060d7 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -418,6 +418,110 @@ func TestRing_GetReplicationSetForOperation(t *testing.T) { } } +func TestRing_GetAll_ZoneAware(t *testing.T) { + tests := map[string]struct { + ringInstances map[string]IngesterDesc + expectedError error + expectedMaxErrors int + expectedMaxUnavailableZones int + }{ + "empty ring": { + ringInstances: nil, + expectedError: ErrEmptyRing, + }, + "single zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "single zone, shard size < num instances": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "three zones, one instance per zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedMaxErrors: 1, + expectedMaxUnavailableZones: 1, + }, + "three zones, two instances per zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedMaxErrors: 2, + expectedMaxUnavailableZones: 1, + }, + "five zones, two instances per zone except for one zone which has three": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.6", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + expectedMaxErrors: 4, + expectedMaxUnavailableZones: 2, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Init the ring. + ringDesc := &Desc{Ingesters: testData.ringInstances} + for id, instance := range ringDesc.Ingesters { + instance.Timestamp = time.Now().Unix() + instance.State = ACTIVE + ringDesc.Ingesters[id] = instance + } + + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.getTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: &DefaultReplicationStrategy{}, + } + + // Check the replication set has the correct settings + replicationSet, err := ring.GetAll(Read) + if testData.expectedError == nil { + require.NoError(t, err) + } else { + require.EqualError(t, err, ErrEmptyRing.Error()) + } + + assert.Equal(t, testData.expectedMaxErrors, replicationSet.MaxErrors) + assert.Equal(t, testData.expectedMaxUnavailableZones, replicationSet.MaxUnavailableZones) + }) + } +} + func TestRing_ShuffleShard(t *testing.T) { tests := map[string]struct { ringInstances map[string]IngesterDesc From 90d901327c46e2f3f75e32de9f27983afa74107a Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 28 Oct 2020 15:33:04 +0000 Subject: [PATCH 04/42] Remove debug print statements Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index bd495a711ba..6014b163240 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -3,7 +3,6 @@ package ring import ( "context" "errors" - "fmt" "sort" "time" ) @@ -48,9 +47,7 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont case <-after.C: } } - fmt.Printf("About to run for addr %v and zone %v\n", ing.Addr, ing.Zone) result, err := f(ctx, ing) - fmt.Printf("Addr %v and zone %v result %v\n", ing.Addr, ing.Zone, err) if err != nil { errs <- ingesterError{ err: err, @@ -68,18 +65,14 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont results = make([]interface{}, 0, len(r.Ingesters)) zoneFailureCount = make(map[string]int) ) - fmt.Printf("minSuccess %v\n", minSuccess) for numSuccess < minSuccess { - fmt.Printf("numSuccess %v\n", numSuccess) select { case err := <-errs: numErrs++ if r.MaxUnavailableZones > 0 { // Non zone aware path - fmt.Printf("Incrementing count for Zone %v\n", err.ing.Zone) zoneFailureCount[err.ing.Zone]++ - fmt.Printf("Count map %v\n", zoneFailureCount) if len(zoneFailureCount) > r.MaxUnavailableZones { return nil, errorTooManyZoneFailures From 440d0fcb137a88140fbb247542364162b01bcd94 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 28 Oct 2020 15:36:14 +0000 Subject: [PATCH 05/42] Add changelog entry Signed-off-by: Michel Hollands --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2be5d52031b..f8dff18faa3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## master / unreleased * [CHANGE] Querier: deprecated `-store.max-look-back-period`. You should use `-querier.max-query-lookback` instead. #3452 +* [FEATURE] Add support for zone aware reads. #3414 * [ENHANCEMENT] Blocks storage ingester: exported more TSDB-related metrics. #3412 - `cortex_ingester_tsdb_wal_corruptions_total` - `cortex_ingester_tsdb_head_truncations_failed_total` From 4362f5d88b3ed68f7f88fc3cd26232109fcf4802 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 28 Oct 2020 15:37:57 +0000 Subject: [PATCH 06/42] Fix comment Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index 6014b163240..ed7d1e71e34 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -71,7 +71,7 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont numErrs++ if r.MaxUnavailableZones > 0 { - // Non zone aware path + // Zone aware path zoneFailureCount[err.ing.Zone]++ if len(zoneFailureCount) > r.MaxUnavailableZones { From d9ab63a4c2b44afcde8efee0bcc520288b3622ab Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 2 Nov 2020 11:14:45 +0000 Subject: [PATCH 07/42] Address replication set review comments Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 28 ++++---- pkg/ring/replication_set_test.go | 106 ++++++++++++++++--------------- 2 files changed, 65 insertions(+), 69 deletions(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index ed7d1e71e34..1897dcb4e04 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -17,16 +17,16 @@ type ReplicationSet struct { MaxUnavailableZones int } -type ingesterError struct { - err error - ing *IngesterDesc -} - // Do function f in parallel for all replicas in the set, erroring is we exceed // MaxErrors and returning early otherwise. func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(context.Context, *IngesterDesc) (interface{}, error)) ([]interface{}, error) { + type instanceError struct { + err error + instance *IngesterDesc + } + var ( - errs = make(chan ingesterError, len(r.Ingesters)) + errs = make(chan instanceError, len(r.Ingesters)) resultsChan = make(chan interface{}, len(r.Ingesters)) minSuccess = len(r.Ingesters) - r.MaxErrors forceStart = make(chan struct{}, r.MaxErrors) @@ -49,9 +49,9 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont } result, err := f(ctx, ing) if err != nil { - errs <- ingesterError{ - err: err, - ing: ing, + errs <- instanceError{ + err: err, + instance: ing, } } else { resultsChan <- result @@ -68,20 +68,14 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont for numSuccess < minSuccess { select { case err := <-errs: - numErrs++ - if r.MaxUnavailableZones > 0 { - // Zone aware path - zoneFailureCount[err.ing.Zone]++ + zoneFailureCount[err.instance.Zone]++ if len(zoneFailureCount) > r.MaxUnavailableZones { return nil, errorTooManyZoneFailures } } else { - // Non zone aware path - // Errors per zone : only RF-1 zone can be failing - // if failingZones > r.MaxUnavailableZones - + numErrs++ if numErrs > r.MaxErrors { return nil, err.err } diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index 356499ef4ec..87a3529b6e0 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -38,6 +38,11 @@ func TestReplicationSet_GetAddresses(t *testing.T) { } } +var ( + errFailure = errors.New("failed") + errZoneFailure = errors.New("zone failed") +) + // Return a function that fails starting from failAfter times func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { var mutex = &sync.RWMutex{} @@ -50,7 +55,7 @@ func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Conte mutex.RLock() defer mutex.RUnlock() if count > failAfter { - return nil, errors.New("Dummy error") + return nil, errFailure } return 1, nil } @@ -60,7 +65,7 @@ func failingFunctionForZones(zones ...string) func(context.Context, *IngesterDes return func(ctx context.Context, ing *IngesterDesc) (interface{}, error) { for _, zone := range zones { if ing.Zone == zone { - return nil, errors.New("Failed") + return nil, errZoneFailure } } return 1, nil @@ -70,19 +75,18 @@ func failingFunctionForZones(zones ...string) func(context.Context, *IngesterDes func TestReplicationSet_Do(t *testing.T) { tests := []struct { name string - ingesters []IngesterDesc + instances []IngesterDesc maxErrors int maxUnavailableZones int f func(context.Context, *IngesterDesc) (interface{}, error) delay time.Duration - closeContextDelay time.Duration + cancelContextDelay time.Duration want []interface{} - wantErr bool expectedError error }{ { name: "no errors no delay", - ingesters: []IngesterDesc{ + instances: []IngesterDesc{ {}, }, f: func(c context.Context, id *IngesterDesc) (interface{}, error) { @@ -92,41 +96,44 @@ func TestReplicationSet_Do(t *testing.T) { }, { name: "1 error, no errors expected", - ingesters: []IngesterDesc{{}}, + instances: []IngesterDesc{{}}, f: func(c context.Context, id *IngesterDesc) (interface{}, error) { - return nil, errors.New("Dummy error") + return nil, errFailure }, - want: nil, - wantErr: true, + want: nil, + expectedError: errFailure, }, { - name: "3 ingesters, last call fails", - ingesters: []IngesterDesc{{}, {}, {}}, - f: failingFunctionAfter(2, 10*time.Millisecond), - want: nil, - wantErr: true, + name: "3 instances, last call fails", + instances: []IngesterDesc{{}, {}, {}}, + f: failingFunctionAfter(2, 10*time.Millisecond), + want: nil, + expectedError: errFailure, }, { - name: "5 ingesters, with delay, last 3 calls fail", - ingesters: []IngesterDesc{{}, {}, {}, {}, {}}, - maxErrors: 1, - f: failingFunctionAfter(2, 10*time.Millisecond), - delay: 100 * time.Millisecond, - want: nil, - wantErr: true, + name: "5 instances, with delay, last 3 calls fail", + instances: []IngesterDesc{{}, {}, {}, {}, {}}, + maxErrors: 1, + f: failingFunctionAfter(2, 10*time.Millisecond), + delay: 100 * time.Millisecond, + want: nil, + expectedError: errFailure, }, { - name: "3 ingesters, context fails", - ingesters: []IngesterDesc{{}, {}, {}}, - maxErrors: 1, - f: failingFunctionAfter(0, 200*time.Millisecond), - closeContextDelay: 100 * time.Millisecond, - want: nil, - wantErr: true, + name: "3 instances, context cancelled", + instances: []IngesterDesc{{}, {}, {}}, + maxErrors: 1, + f: func(c context.Context, id *IngesterDesc) (interface{}, error) { + time.Sleep(300 * time.Millisecond) + return 1, nil + }, + cancelContextDelay: 100 * time.Millisecond, + want: nil, + expectedError: context.Canceled, }, { - name: "3 ingesters, 3 zones, no failures", - ingesters: []IngesterDesc{{ + name: "3 instances, 3 zones, no failures", + instances: []IngesterDesc{{ Zone: "zone1", }, { Zone: "zone2", @@ -139,8 +146,8 @@ func TestReplicationSet_Do(t *testing.T) { want: []interface{}{1, 1, 1}, }, { - name: "3 ingesters, 3 zones, 1 zone fails", - ingesters: []IngesterDesc{{ + name: "3 instances, 3 zones, 1 zone fails", + instances: []IngesterDesc{{ Zone: "zone1", }, { Zone: "zone2", @@ -149,12 +156,12 @@ func TestReplicationSet_Do(t *testing.T) { }}, f: failingFunctionForZones("zone1"), maxUnavailableZones: 1, // (nr of zones) / 2 - maxErrors: 1, // (nr of ingesters / nr of zones) * maxUnavailableZones + maxErrors: 1, // (nr of instances / nr of zones) * maxUnavailableZones want: []interface{}{1, 1}, }, { - name: "3 ingesters, 3 zones, 2 zones fail", - ingesters: []IngesterDesc{{ + name: "3 instances, 3 zones, 2 zones fail", + instances: []IngesterDesc{{ Zone: "zone1", }, { Zone: "zone2", @@ -164,12 +171,11 @@ func TestReplicationSet_Do(t *testing.T) { f: failingFunctionForZones("zone1", "zone2"), maxUnavailableZones: 1, maxErrors: 1, - wantErr: true, expectedError: errorTooManyZoneFailures, }, { - name: "6 ingesters, 3 zones, 1 zone fails", - ingesters: []IngesterDesc{{ + name: "6 instances, 3 zones, 1 zone fails", + instances: []IngesterDesc{{ Zone: "zone1", }, { Zone: "zone1", @@ -188,8 +194,8 @@ func TestReplicationSet_Do(t *testing.T) { want: []interface{}{1, 1, 1, 1}, }, { - name: "5 ingesters, 5 zones, 3 zones fails", - ingesters: []IngesterDesc{{ + name: "5 instances, 5 zones, 3 zones fails", + instances: []IngesterDesc{{ Zone: "zone1", }, { Zone: "zone2", @@ -203,12 +209,11 @@ func TestReplicationSet_Do(t *testing.T) { f: failingFunctionForZones("zone1", "zone2", "zone3"), maxUnavailableZones: 2, maxErrors: 2, - wantErr: true, expectedError: errorTooManyZoneFailures, }, { - name: "10 ingesters, 5 zones, 2 failures in zone 1", - ingesters: []IngesterDesc{{ + name: "10 instances, 5 zones, 2 failures in zone 1", + instances: []IngesterDesc{{ Zone: "zone1", }, { Zone: "zone1", @@ -238,26 +243,23 @@ func TestReplicationSet_Do(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { r := ReplicationSet{ - Ingesters: tt.ingesters, + Ingesters: tt.instances, MaxErrors: tt.maxErrors, MaxUnavailableZones: tt.maxUnavailableZones, } ctx := context.Background() - if tt.closeContextDelay > 0 { + if tt.cancelContextDelay > 0 { var cancel context.CancelFunc ctx, cancel = context.WithCancel(ctx) go func() { - time.AfterFunc(tt.closeContextDelay, func() { + time.AfterFunc(tt.cancelContextDelay, func() { cancel() }) }() } got, err := r.Do(ctx, tt.delay, tt.f) - if tt.wantErr { - assert.Error(t, err) - if tt.expectedError != nil { - assert.Equal(t, tt.expectedError, err) - } + if tt.expectedError != nil { + assert.Equal(t, tt.expectedError, err) } else { assert.NoError(t, err) } From f752ce3ba793748ee15646a7aba16e1ee6df679f Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 2 Nov 2020 11:18:28 +0000 Subject: [PATCH 08/42] Reword and change changelong entry to enhancement Signed-off-by: Michel Hollands --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8dff18faa3..d38b1ead1d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## master / unreleased * [CHANGE] Querier: deprecated `-store.max-look-back-period`. You should use `-querier.max-query-lookback` instead. #3452 -* [FEATURE] Add support for zone aware reads. #3414 +* [ENHANCEMENT] Added zone-awareness support on queries. When zone-awareness is enabled, queries will still succeed if all ingesters in a single zone will fail. #3414 * [ENHANCEMENT] Blocks storage ingester: exported more TSDB-related metrics. #3412 - `cortex_ingester_tsdb_wal_corruptions_total` - `cortex_ingester_tsdb_head_truncations_failed_total` From ff8ebccec61cb8e8a36bcbcb490845f5295a05de Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 2 Nov 2020 17:39:42 +0000 Subject: [PATCH 09/42] Address review comments in ring code Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 54 ++++++++++++++++----------- pkg/ring/ring_test.go | 87 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 108 insertions(+), 33 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 1e08105d0d2..3e2112eb37a 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -363,43 +363,53 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro return ReplicationSet{}, ErrEmptyRing } - var maxUnavailableZones int - var numRequired int - ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) - if r.cfg.ZoneAwarenessEnabled { - zones := make(map[string]bool) - - for _, ingester := range r.ringDesc.Ingesters { - zones[ingester.Zone] = true - ingesters = append(ingesters, ingester) - } - maxUnavailableZones = len(zones) / 2 - // Assume ingesters are evenly spread over zones - // The maximum number of failures is all the ingesters in the maximum number of failing zones - maxErrors := (len(ingesters) / len(zones)) * maxUnavailableZones - - return ReplicationSet{ - Ingesters: ingesters, - MaxErrors: maxErrors, - MaxUnavailableZones: maxUnavailableZones, - }, nil - } + maxUnavailableZones := len(r.ringZones) / 2 // Calculate the number of required ingesters; // ensure we always require at least RF-1 when RF=3. - numRequired = len(r.ringDesc.Ingesters) + numRequired := len(r.ringDesc.Ingesters) if numRequired < r.cfg.ReplicationFactor { numRequired = r.cfg.ReplicationFactor } maxUnavailable := r.cfg.ReplicationFactor / 2 numRequired -= maxUnavailable + ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) + zoneFailures := make(map[string]int) for _, ingester := range r.ringDesc.Ingesters { if r.IsHealthy(&ingester, op) { ingesters = append(ingesters, ingester) + + } else { + zoneFailures[ingester.Zone]++ } } + if r.cfg.ZoneAwarenessEnabled { + filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) + if len(zoneFailures) > 0 { + for _, ingester := range ingesters { + _, present := zoneFailures[ingester.Zone] + if !present { + filteredInstances = append(filteredInstances, ingester) + } + } + } + + if len(filteredInstances) == 0 { + return ReplicationSet{ + Ingesters: ingesters, + MaxErrors: len(ingesters) - numRequired, + MaxUnavailableZones: maxUnavailableZones, + }, nil + } + return ReplicationSet{ + Ingesters: filteredInstances, + MaxErrors: 0, + MaxUnavailableZones: 0, + }, nil + } + if len(ingesters) < numRequired { return ReplicationSet{}, ErrTooManyFailedIngesters } diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 6ab72a060d7..6a89309acc1 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -421,6 +421,9 @@ func TestRing_GetReplicationSetForOperation(t *testing.T) { func TestRing_GetAll_ZoneAware(t *testing.T) { tests := map[string]struct { ringInstances map[string]IngesterDesc + unhealthyInstances []string + expectedAddresses []string + replicationFactor int expectedError error expectedMaxErrors int expectedMaxUnavailableZones int @@ -434,26 +437,33 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + replicationFactor: 1, expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, - "single zone, shard size < num instances": { + "three zones, one instance per zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, - "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, - "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, }, - expectedMaxErrors: 0, - expectedMaxUnavailableZones: 0, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3"}, + replicationFactor: 3, + expectedMaxErrors: 1, + expectedMaxUnavailableZones: 1, }, - "three zones, one instance per zone": { + "three zones, one instance per zone, one instance unhealthy": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, }, - expectedMaxErrors: 1, - expectedMaxUnavailableZones: 1, + expectedAddresses: []string{"127.0.0.2", "127.0.0.3"}, + unhealthyInstances: []string{"instance-1"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, }, "three zones, two instances per zone": { ringInstances: map[string]IngesterDesc{ @@ -464,9 +474,26 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, }, - expectedMaxErrors: 2, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5", "127.0.0.6"}, + replicationFactor: 3, + expectedMaxErrors: 1, expectedMaxUnavailableZones: 1, }, + "three zones, two instances per zone, two instances unhealthy in same zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.5", "127.0.0.6"}, + unhealthyInstances: []string{"instance-3", "instance-4"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, "five zones, two instances per zone except for one zone which has three": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -476,14 +503,37 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, - "instance-8": {Addr: "127.0.0.6", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, }, - expectedMaxErrors: 4, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5", + "127.0.0.6", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, + replicationFactor: 5, + expectedMaxErrors: 2, expectedMaxUnavailableZones: 2, }, + "five zones, two instances per zone except for one zone which has three, 2 unhealthy nodes in separate zones": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, + unhealthyInstances: []string{"instance-3", "instance-5"}, + replicationFactor: 5, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, } for testName, testData := range tests { @@ -493,6 +543,11 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { for id, instance := range ringDesc.Ingesters { instance.Timestamp = time.Now().Unix() instance.State = ACTIVE + for _, instanceName := range testData.unhealthyInstances { + if instanceName == id { + instance.State = JOINING + } + } ringDesc.Ingesters[id] = instance } @@ -500,6 +555,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { cfg: Config{ HeartbeatTimeout: time.Hour, ZoneAwarenessEnabled: true, + ReplicationFactor: testData.replicationFactor, }, ringDesc: ringDesc, ringTokens: ringDesc.getTokens(), @@ -518,6 +574,15 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { assert.Equal(t, testData.expectedMaxErrors, replicationSet.MaxErrors) assert.Equal(t, testData.expectedMaxUnavailableZones, replicationSet.MaxUnavailableZones) + + returnAddresses := []string{} + for _, instance := range replicationSet.Ingesters { + returnAddresses = append(returnAddresses, instance.Addr) + } + for _, addr := range testData.expectedAddresses { + assert.Contains(t, returnAddresses, addr) + } + assert.Equal(t, len(testData.expectedAddresses), len(replicationSet.Ingesters)) }) } } From 6e217ef74011e363cacbc0ea31a528a1825e30cf Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Tue, 3 Nov 2020 09:16:52 +0000 Subject: [PATCH 10/42] Do not return early and add more test cases Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 40 ++++++++++++++++++++++------------------ pkg/ring/ring_test.go | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 19 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 3e2112eb37a..7200dd3e1c8 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -101,8 +101,12 @@ var ( // not registered within the ring. ErrInstanceNotFound = errors.New("instance not found in the ring") +<<<<<<< HEAD // ErrTooManyFailedIngesters is the error returned when there are too many failed ingesters for a // specific operation. +======= + // ErrTooManyFailedIngesters is the error returned when not enough ingesters are available +>>>>>>> Do not return early and add more test cases ErrTooManyFailedIngesters = errors.New("too many failed ingesters") ) @@ -332,6 +336,16 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet }, nil } +func calculateRequiredInstances(nrInstances, replicationFactor int) int { + numRequired := nrInstances + if numRequired < replicationFactor { + numRequired = replicationFactor + } + maxUnavailable := replicationFactor / 2 + numRequired -= maxUnavailable + return numRequired +} + // GetAllHealthy implements ReadRing. func (r *Ring) GetAllHealthy(op Operation) (ReplicationSet, error) { r.mtx.RLock() @@ -367,12 +381,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // Calculate the number of required ingesters; // ensure we always require at least RF-1 when RF=3. - numRequired := len(r.ringDesc.Ingesters) - if numRequired < r.cfg.ReplicationFactor { - numRequired = r.cfg.ReplicationFactor - } - maxUnavailable := r.cfg.ReplicationFactor / 2 - numRequired -= maxUnavailable + numRequired := calculateRequiredInstances(len(r.ringDesc.Ingesters), r.cfg.ReplicationFactor) ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]int) @@ -384,6 +393,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro zoneFailures[ingester.Zone]++ } } + maxErrors := len(ingesters) - numRequired if r.cfg.ZoneAwarenessEnabled { filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) @@ -396,18 +406,12 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } } - if len(filteredInstances) == 0 { - return ReplicationSet{ - Ingesters: ingesters, - MaxErrors: len(ingesters) - numRequired, - MaxUnavailableZones: maxUnavailableZones, - }, nil + if len(filteredInstances) != 0 { + ingesters = filteredInstances + maxUnavailableZones = 0 + numRequired = calculateRequiredInstances(len(ingesters), r.cfg.ReplicationFactor) + maxErrors = 0 } - return ReplicationSet{ - Ingesters: filteredInstances, - MaxErrors: 0, - MaxUnavailableZones: 0, - }, nil } if len(ingesters) < numRequired { @@ -416,7 +420,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro return ReplicationSet{ Ingesters: ingesters, - MaxErrors: len(ingesters) - numRequired, + MaxErrors: maxErrors, MaxUnavailableZones: maxUnavailableZones, }, nil } diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 6a89309acc1..e73391cecd0 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -465,6 +465,26 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, + "three zones, one instance per zone, two instances unhealthy in separate zones": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-1", "instance-2"}, + replicationFactor: 3, + expectedError: ErrTooManyFailedIngesters, + }, + "three zones, one instance per zone, all instances unhealthy": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-1", "instance-2", "instance-3"}, + replicationFactor: 3, + expectedError: ErrTooManyFailedIngesters, + }, "three zones, two instances per zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -494,6 +514,24 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, + "three zones, three instances per zone, two instances unhealthy in same zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.7", "127.0.0.8", "127.0.0.9"}, + unhealthyInstances: []string{"instance-4", "instance-6"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, "five zones, two instances per zone except for one zone which has three": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -569,7 +607,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { if testData.expectedError == nil { require.NoError(t, err) } else { - require.EqualError(t, err, ErrEmptyRing.Error()) + require.Equal(t, testData.expectedError, err) } assert.Equal(t, testData.expectedMaxErrors, replicationSet.MaxErrors) From 9cf6a925b94e7423cf8000a9f805d312dbfd1911 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Tue, 3 Nov 2020 09:19:45 +0000 Subject: [PATCH 11/42] Rename ingesters to instances Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 7200dd3e1c8..f3e15278fe2 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -383,22 +383,22 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // ensure we always require at least RF-1 when RF=3. numRequired := calculateRequiredInstances(len(r.ringDesc.Ingesters), r.cfg.ReplicationFactor) - ingesters := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) + instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]int) for _, ingester := range r.ringDesc.Ingesters { if r.IsHealthy(&ingester, op) { - ingesters = append(ingesters, ingester) + instances = append(instances, ingester) } else { zoneFailures[ingester.Zone]++ } } - maxErrors := len(ingesters) - numRequired + maxErrors := len(instances) - numRequired if r.cfg.ZoneAwarenessEnabled { filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) if len(zoneFailures) > 0 { - for _, ingester := range ingesters { + for _, ingester := range instances { _, present := zoneFailures[ingester.Zone] if !present { filteredInstances = append(filteredInstances, ingester) @@ -407,19 +407,19 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } if len(filteredInstances) != 0 { - ingesters = filteredInstances + instances = filteredInstances maxUnavailableZones = 0 - numRequired = calculateRequiredInstances(len(ingesters), r.cfg.ReplicationFactor) + numRequired = calculateRequiredInstances(len(instances), r.cfg.ReplicationFactor) maxErrors = 0 } } - if len(ingesters) < numRequired { + if len(instances) < numRequired { return ReplicationSet{}, ErrTooManyFailedIngesters } return ReplicationSet{ - Ingesters: ingesters, + Ingesters: instances, MaxErrors: maxErrors, MaxUnavailableZones: maxUnavailableZones, }, nil From 4b8e908a4720ac99cb00d414af8d9b18e69cca21 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Tue, 3 Nov 2020 09:39:11 +0000 Subject: [PATCH 12/42] Add one more test Signed-off-by: Michel Hollands --- pkg/ring/ring_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index e73391cecd0..c5f29b70274 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -572,6 +572,18 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, + "five zones, one instances per zone, three unhealthy instances": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-2", "instance-4", "instance-5"}, + replicationFactor: 5, + expectedError: ErrTooManyFailedIngesters, + }, } for testName, testData := range tests { From c4d560de168840721f1ce477264313e1cef845ef Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Wed, 4 Nov 2020 08:07:55 +0000 Subject: [PATCH 13/42] Update pkg/ring/replication_set.go Co-authored-by: Marco Pracucci Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index 1897dcb4e04..72460a4ea2e 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -7,7 +7,7 @@ import ( "time" ) -var errorTooManyZoneFailures = errors.New("Too many zones failed") +var errorTooManyZoneFailures = errors.New("too many zones failed") // ReplicationSet describes the ingesters to talk to for a given key, and how // many errors to tolerate. From 3531844078c8ada1ebbd2c84784eddeb6f9f23db Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Wed, 4 Nov 2020 08:07:55 +0000 Subject: [PATCH 14/42] Update pkg/ring/replication_set.go : add sign off Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index f3e15278fe2..027e2f794b8 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -377,40 +377,46 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro return ReplicationSet{}, ErrEmptyRing } - maxUnavailableZones := len(r.ringZones) / 2 - // Calculate the number of required ingesters; // ensure we always require at least RF-1 when RF=3. - numRequired := calculateRequiredInstances(len(r.ringDesc.Ingesters), r.cfg.ReplicationFactor) + numRequired := len(r.ringDesc.Ingesters) + if numRequired < r.cfg.ReplicationFactor { + numRequired = r.cfg.ReplicationFactor + } + maxUnavailable := r.cfg.ReplicationFactor / 2 + numRequired -= maxUnavailable + + maxUnavailableZones := len(r.ringZones) - maxUnavailable - 1 + if maxUnavailableZones < 0 { + maxUnavailableZones = 0 + } instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]int) for _, ingester := range r.ringDesc.Ingesters { if r.IsHealthy(&ingester, op) { instances = append(instances, ingester) - } else { zoneFailures[ingester.Zone]++ } } - maxErrors := len(instances) - numRequired - if r.cfg.ZoneAwarenessEnabled { + if r.cfg.ZoneAwarenessEnabled && len(zoneFailures) > 0 { + if len(zoneFailures) > maxUnavailableZones { + return ReplicationSet{}, ErrTooManyFailedIngesters + } + filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) - if len(zoneFailures) > 0 { - for _, ingester := range instances { - _, present := zoneFailures[ingester.Zone] - if !present { - filteredInstances = append(filteredInstances, ingester) - } + for _, ingester := range instances { + if _, ok := zoneFailures[ingester.Zone]; !ok { + filteredInstances = append(filteredInstances, ingester) } } if len(filteredInstances) != 0 { instances = filteredInstances maxUnavailableZones = 0 - numRequired = calculateRequiredInstances(len(instances), r.cfg.ReplicationFactor) - maxErrors = 0 + numRequired = len(instances) } } @@ -418,6 +424,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro return ReplicationSet{}, ErrTooManyFailedIngesters } + maxErrors := len(instances) - numRequired return ReplicationSet{ Ingesters: instances, MaxErrors: maxErrors, From 6edb2b40c30d9de9ce881a2656b7b573f3727f38 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 4 Nov 2020 11:29:40 +0000 Subject: [PATCH 15/42] Add integration test Signed-off-by: Michel Hollands --- integration/e2e/service.go | 2 +- integration/zone_aware_test.go | 124 +++++++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+), 1 deletion(-) create mode 100644 integration/zone_aware_test.go diff --git a/integration/e2e/service.go b/integration/e2e/service.go index c9931c0b443..d59171f2e17 100644 --- a/integration/e2e/service.go +++ b/integration/e2e/service.go @@ -168,7 +168,7 @@ func (s *ConcreteService) Kill() error { logger.Log("Killing", s.name) - if out, err := RunCommandAndGetOutput("docker", "stop", "--time=0", s.containerName()); err != nil { + if out, err := RunCommandAndGetOutput("docker", "kill", s.containerName()); err != nil { logger.Log(string(out)) return err } diff --git a/integration/zone_aware_test.go b/integration/zone_aware_test.go new file mode 100644 index 00000000000..0f4fbe2247a --- /dev/null +++ b/integration/zone_aware_test.go @@ -0,0 +1,124 @@ +package integration + +import ( + "fmt" + "testing" + "time" + + "github.com/cortexproject/cortex/integration/e2e" + e2edb "github.com/cortexproject/cortex/integration/e2e/db" + "github.com/cortexproject/cortex/integration/e2ecortex" + "github.com/prometheus/common/model" + "github.com/prometheus/prometheus/pkg/labels" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestZoneAwareReadPath(t *testing.T) { + const numSeriesToPush = 100 + + s, err := e2e.NewScenario(networkName) + require.NoError(t, err) + defer s.Close() + + flags := BlocksStorageFlags() + + // Start dependencies. + consul := e2edb.NewConsul() + minio := e2edb.NewMinio(9000, flags["-blocks-storage.s3.bucket-name"]) + require.NoError(t, s.StartAndWaitReady(consul, minio)) + + // Start Cortex components. + zoneFlags := func(zone string) map[string]string { + zoneflags := mergeFlags(flags, map[string]string{ + "-ingester.availability-zone": zone, + "-distributor.replication-factor": "3", + "-distributor.zone-awareness-enabled": "true", + }) + return zoneflags + } + + zoneAwarenessEnabledFlags := mergeFlags(flags, map[string]string{ + "-distributor.zone-awareness-enabled": "true", + "-distributor.replication-factor": "3", + }) + + ingester1 := e2ecortex.NewIngesterWithConfigFile("ingester-1", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-a"), "") + ingester2 := e2ecortex.NewIngesterWithConfigFile("ingester-2", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-a"), "") + ingester3 := e2ecortex.NewIngesterWithConfigFile("ingester-3", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-b"), "") + ingester4 := e2ecortex.NewIngesterWithConfigFile("ingester-4", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-b"), "") + ingester5 := e2ecortex.NewIngesterWithConfigFile("ingester-5", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-c"), "") + ingester6 := e2ecortex.NewIngesterWithConfigFile("ingester-6", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-c"), "") + require.NoError(t, s.StartAndWaitReady(ingester1, ingester2, ingester3, ingester4, ingester5, ingester6)) + + distributor := e2ecortex.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), zoneAwarenessEnabledFlags, "") + querier := e2ecortex.NewQuerier("querier", consul.NetworkHTTPEndpoint(), zoneAwarenessEnabledFlags, "") + require.NoError(t, s.StartAndWaitReady(distributor, querier)) + + // Wait until distributor and queriers have updated the ring. + require.NoError(t, distributor.WaitSumMetricsWithOptions(e2e.Equals(6), []string{"cortex_ring_members"}, e2e.WithLabelMatchers( + labels.MustNewMatcher(labels.MatchEqual, "name", "ingester"), + labels.MustNewMatcher(labels.MatchEqual, "state", "ACTIVE")))) + + require.NoError(t, querier.WaitSumMetricsWithOptions(e2e.Equals(6), []string{"cortex_ring_members"}, e2e.WithLabelMatchers( + labels.MustNewMatcher(labels.MatchEqual, "name", "ingester"), + labels.MustNewMatcher(labels.MatchEqual, "state", "ACTIVE")))) + + client, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), querier.HTTPEndpoint(), "", "", userID) + require.NoError(t, err) + + // Push 100 series + now := time.Now() + expectedVectors := map[string]model.Vector{} + + for i := 1; i <= numSeriesToPush; i++ { + metricName := fmt.Sprintf("series_%d", i) + series, expectedVector := generateSeries(metricName, now) + res, err := client.Push(series) + require.NoError(t, err) + require.Equal(t, 200, res.StatusCode) + + expectedVectors[metricName] = expectedVector + } + + // Query back 100 series => all good + for metricName, expectedVector := range expectedVectors { + result, err := client.Query(metricName, now) + require.NoError(t, err) + require.Equal(t, model.ValVector, result.Type()) + assert.Equal(t, expectedVector, result.(model.Vector)) + } + + // SIGKILL 1 ingester in 1 zone + require.NoError(t, ingester5.Kill()) + + // Query back 100 series => all good + for metricName, expectedVector := range expectedVectors { + result, err := client.Query(metricName, now) + require.NoError(t, err) + require.Equal(t, model.ValVector, result.Type()) + assert.Equal(t, expectedVector, result.(model.Vector)) + } + + // SIGKILL 1 more ingester in the same zone + require.NoError(t, ingester6.Kill()) + + // Query back 100 series => all good + for metricName, expectedVector := range expectedVectors { + result, err := client.Query(metricName, now) + require.NoError(t, err) + require.Equal(t, model.ValVector, result.Type()) + assert.Equal(t, expectedVector, result.(model.Vector)) + } + + // SIGKILL 1 more ingester in a different zone + require.NoError(t, ingester1.Kill()) + + // Query back 100 series => fail + for i := 1; i <= numSeriesToPush; i++ { + metricName := fmt.Sprintf("series_%d", i) + result, _, err := client.QueryRaw(metricName) + require.NoError(t, err) + require.Equal(t, 500, result.StatusCode) + } +} From e04db17211cdf9a39f656204f3783031538a10b2 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 4 Nov 2020 11:45:35 +0000 Subject: [PATCH 16/42] Fix imports as per goimports Signed-off-by: Michel Hollands --- integration/zone_aware_test.go | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/integration/zone_aware_test.go b/integration/zone_aware_test.go index 0f4fbe2247a..bd51b5a6339 100644 --- a/integration/zone_aware_test.go +++ b/integration/zone_aware_test.go @@ -5,13 +5,14 @@ import ( "testing" "time" - "github.com/cortexproject/cortex/integration/e2e" - e2edb "github.com/cortexproject/cortex/integration/e2e/db" - "github.com/cortexproject/cortex/integration/e2ecortex" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/cortexproject/cortex/integration/e2e" + e2edb "github.com/cortexproject/cortex/integration/e2e/db" + "github.com/cortexproject/cortex/integration/e2ecortex" ) func TestZoneAwareReadPath(t *testing.T) { From fa1c3b553e0ec719409c463e57a6844d3bf73658 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 4 Nov 2020 14:52:06 +0000 Subject: [PATCH 17/42] Address review comments and add extra tests Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 18 +++++++----------- pkg/ring/ring_test.go | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 027e2f794b8..652b8527db8 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -385,11 +385,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } maxUnavailable := r.cfg.ReplicationFactor / 2 numRequired -= maxUnavailable - - maxUnavailableZones := len(r.ringZones) - maxUnavailable - 1 - if maxUnavailableZones < 0 { - maxUnavailableZones = 0 - } + maxUnavailableZones := maxUnavailable instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]int) @@ -401,11 +397,13 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } } - if r.cfg.ZoneAwarenessEnabled && len(zoneFailures) > 0 { + if r.cfg.ZoneAwarenessEnabled && len(zoneFailures) > 0 && maxUnavailableZones > 0 { if len(zoneFailures) > maxUnavailableZones { return ReplicationSet{}, ErrTooManyFailedIngesters } + // Given that we can tolerate a zone failure, we can skip querying + // all instances in that zone. filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) for _, ingester := range instances { if _, ok := zoneFailures[ingester.Zone]; !ok { @@ -413,11 +411,9 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } } - if len(filteredInstances) != 0 { - instances = filteredInstances - maxUnavailableZones = 0 - numRequired = len(instances) - } + instances = filteredInstances + maxUnavailableZones = 0 + numRequired = len(instances) } if len(instances) < numRequired { diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index c5f29b70274..d6a01af9c60 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -442,6 +442,26 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, + "single zone, one unhealthy instance": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 1, + expectedError: ErrTooManyFailedIngesters, + }, + "three zones, replication factor one, one unhealthy instance": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-3"}, + replicationFactor: 1, + expectedError: ErrTooManyFailedIngesters, + }, "three zones, one instance per zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, From c0d7be01a900c7dd05a52f4b30e8250d440947ee Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 4 Nov 2020 17:09:20 +0000 Subject: [PATCH 18/42] Fix rebase Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 652b8527db8..7fa07c9c8ba 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -101,12 +101,8 @@ var ( // not registered within the ring. ErrInstanceNotFound = errors.New("instance not found in the ring") -<<<<<<< HEAD // ErrTooManyFailedIngesters is the error returned when there are too many failed ingesters for a // specific operation. -======= - // ErrTooManyFailedIngesters is the error returned when not enough ingesters are available ->>>>>>> Do not return early and add more test cases ErrTooManyFailedIngesters = errors.New("too many failed ingesters") ) @@ -336,16 +332,6 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet }, nil } -func calculateRequiredInstances(nrInstances, replicationFactor int) int { - numRequired := nrInstances - if numRequired < replicationFactor { - numRequired = replicationFactor - } - maxUnavailable := replicationFactor / 2 - numRequired -= maxUnavailable - return numRequired -} - // GetAllHealthy implements ReadRing. func (r *Ring) GetAllHealthy(op Operation) (ReplicationSet, error) { r.mtx.RLock() From 9ffa08d37dd9ce233897b37993ba2fd2f0ef6bc9 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Wed, 4 Nov 2020 17:16:02 +0000 Subject: [PATCH 19/42] Fix rebase in test Signed-off-by: Michel Hollands --- pkg/ring/ring_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index d6a01af9c60..09456c780b9 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -635,7 +635,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { } // Check the replication set has the correct settings - replicationSet, err := ring.GetAll(Read) + replicationSet, err := ring.GetReplicationSetForOperation(Read) if testData.expectedError == nil { require.NoError(t, err) } else { From e10842fe76c679982ec47886d8dc0c49af960af6 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 5 Nov 2020 09:10:04 +0000 Subject: [PATCH 20/42] Add lock around mockIngester call Signed-off-by: Michel Hollands --- pkg/distributor/distributor_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index e003b87f148..1409d447992 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -1487,6 +1487,9 @@ func (i *mockIngester) MetricsMetadata(ctx context.Context, req *client.MetricsM } func (i *mockIngester) trackCall(name string) { + i.Lock() + defer i.Unlock() + if i.calls == nil { i.calls = map[string]int{} } From 4385980c48579b1b1a1f3beff9fb2e01054ec5db Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 5 Nov 2020 09:12:42 +0000 Subject: [PATCH 21/42] Add lock around mockIngester call at correct place Signed-off-by: Michel Hollands --- pkg/distributor/distributor_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 1409d447992..7826f37e538 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -1283,6 +1283,9 @@ func (i *mockIngester) series() map[uint32]*client.PreallocTimeseries { } func (i *mockIngester) Check(ctx context.Context, in *grpc_health_v1.HealthCheckRequest, opts ...grpc.CallOption) (*grpc_health_v1.HealthCheckResponse, error) { + i.Lock() + defer i.Unlock() + i.trackCall("Check") return &grpc_health_v1.HealthCheckResponse{}, nil @@ -1487,9 +1490,6 @@ func (i *mockIngester) MetricsMetadata(ctx context.Context, req *client.MetricsM } func (i *mockIngester) trackCall(name string) { - i.Lock() - defer i.Unlock() - if i.calls == nil { i.calls = map[string]int{} } From be94fff19d97d0c04d382e692f031384b55a2806 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 5 Nov 2020 11:08:34 +0000 Subject: [PATCH 22/42] Handle nr of zones > replication factor Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 7fa07c9c8ba..6f3f5165679 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -371,7 +371,11 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } maxUnavailable := r.cfg.ReplicationFactor / 2 numRequired -= maxUnavailable - maxUnavailableZones := maxUnavailable + numRequiredZones := len(r.ringZones) + if len(r.ringZones) > r.cfg.ReplicationFactor { + numRequiredZones = r.cfg.ReplicationFactor + } + maxUnavailableZones := util.Max(0, numRequiredZones-maxUnavailable-1) instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]int) From 7f08749ad8d938f73d948f85105f8a47019ddca9 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 5 Nov 2020 11:42:31 +0000 Subject: [PATCH 23/42] Use util.Min instead of if statement Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 6f3f5165679..23ad191721b 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -371,10 +371,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } maxUnavailable := r.cfg.ReplicationFactor / 2 numRequired -= maxUnavailable - numRequiredZones := len(r.ringZones) - if len(r.ringZones) > r.cfg.ReplicationFactor { - numRequiredZones = r.cfg.ReplicationFactor - } + numRequiredZones := util.Min(len(r.ringZones), r.cfg.ReplicationFactor) maxUnavailableZones := util.Max(0, numRequiredZones-maxUnavailable-1) instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) From 847dd718356c13e906907b6e2dcb0c1eacae2f49 Mon Sep 17 00:00:00 2001 From: Michel Hollands <42814411+MichelHollands@users.noreply.github.com> Date: Thu, 5 Nov 2020 15:38:07 +0000 Subject: [PATCH 24/42] Update pkg/ring/replication_set_test.go MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Peter Štibraný Signed-off-by: Michel Hollands --- pkg/ring/replication_set_test.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index 87a3529b6e0..e4c7b4dbfde 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -46,7 +46,16 @@ var ( // Return a function that fails starting from failAfter times func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { var mutex = &sync.RWMutex{} - count := 0 +func failingFunctionAfter(failAfter int32, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { + count := atomic.NewInt32(0) + return func(context.Context, *IngesterDesc) (interface{}, error) { + time.Sleep(delay) + if count.Inc() > failAfter { + return nil, errFailure + } + return 1, nil + } +} return func(context.Context, *IngesterDesc) (interface{}, error) { mutex.Lock() count++ From 78864507ecc031ea8d911c02e40294a2a34cb2b6 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Thu, 5 Nov 2020 15:45:58 +0000 Subject: [PATCH 25/42] Use atomic and sets Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 4 ++-- pkg/ring/replication_set_test.go | 17 +---------------- pkg/ring/ring.go | 4 ++-- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index 72460a4ea2e..81fb1b65080 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -63,13 +63,13 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont numErrs int numSuccess int results = make([]interface{}, 0, len(r.Ingesters)) - zoneFailureCount = make(map[string]int) + zoneFailureCount = make(map[string]struct{}) ) for numSuccess < minSuccess { select { case err := <-errs: if r.MaxUnavailableZones > 0 { - zoneFailureCount[err.instance.Zone]++ + zoneFailureCount[err.instance.Zone] = struct{}{} if len(zoneFailureCount) > r.MaxUnavailableZones { return nil, errorTooManyZoneFailures diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index e4c7b4dbfde..a1eeef00730 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -3,11 +3,11 @@ package ring import ( "context" "errors" - "sync" "testing" "time" "github.com/stretchr/testify/assert" + "go.uber.org/atomic" ) func TestReplicationSet_GetAddresses(t *testing.T) { @@ -44,8 +44,6 @@ var ( ) // Return a function that fails starting from failAfter times -func failingFunctionAfter(failAfter int, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { - var mutex = &sync.RWMutex{} func failingFunctionAfter(failAfter int32, delay time.Duration) func(context.Context, *IngesterDesc) (interface{}, error) { count := atomic.NewInt32(0) return func(context.Context, *IngesterDesc) (interface{}, error) { @@ -56,19 +54,6 @@ func failingFunctionAfter(failAfter int32, delay time.Duration) func(context.Con return 1, nil } } - return func(context.Context, *IngesterDesc) (interface{}, error) { - mutex.Lock() - count++ - mutex.Unlock() - time.Sleep(delay) - mutex.RLock() - defer mutex.RUnlock() - if count > failAfter { - return nil, errFailure - } - return 1, nil - } -} func failingFunctionForZones(zones ...string) func(context.Context, *IngesterDesc) (interface{}, error) { return func(ctx context.Context, ing *IngesterDesc) (interface{}, error) { diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 23ad191721b..e1972e6c23f 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -375,12 +375,12 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro maxUnavailableZones := util.Max(0, numRequiredZones-maxUnavailable-1) instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) - zoneFailures := make(map[string]int) + zoneFailures := make(map[string]struct{}) for _, ingester := range r.ringDesc.Ingesters { if r.IsHealthy(&ingester, op) { instances = append(instances, ingester) } else { - zoneFailures[ingester.Zone]++ + zoneFailures[ingester.Zone] = struct{}{} } } From ccdb9087cdd3e73e8b76e43799aaa071fd45cde5 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 11:54:03 +0100 Subject: [PATCH 26/42] Fixed integration test and ReplicationSet.Do() Signed-off-by: Marco Pracucci --- integration/e2ecortex/client.go | 1 - integration/zone_aware_test.go | 47 ++++++------- pkg/ring/replication_set.go | 60 ++++++++-------- pkg/ring/replication_set_test.go | 4 +- pkg/ring/replication_set_tracker.go | 104 ++++++++++++++++++++++++++++ pkg/ring/ring.go | 1 + 6 files changed, 159 insertions(+), 58 deletions(-) create mode 100644 pkg/ring/replication_set_tracker.go diff --git a/integration/e2ecortex/client.go b/integration/e2ecortex/client.go index 1fd57beec5c..3650e8e50c2 100644 --- a/integration/e2ecortex/client.go +++ b/integration/e2ecortex/client.go @@ -153,7 +153,6 @@ func (c *Client) QueryRaw(query string) (*http.Response, []byte, error) { } func (c *Client) query(addr string) (*http.Response, []byte, error) { - ctx, cancel := context.WithTimeout(context.Background(), c.timeout) defer cancel() diff --git a/integration/zone_aware_test.go b/integration/zone_aware_test.go index bd51b5a6339..f11b777427c 100644 --- a/integration/zone_aware_test.go +++ b/integration/zone_aware_test.go @@ -23,6 +23,9 @@ func TestZoneAwareReadPath(t *testing.T) { defer s.Close() flags := BlocksStorageFlags() + flags["-distributor.shard-by-all-labels"] = "true" + flags["-distributor.replication-factor"] = "3" + flags["-distributor.zone-awareness-enabled"] = "true" // Start dependencies. consul := e2edb.NewConsul() @@ -30,33 +33,25 @@ func TestZoneAwareReadPath(t *testing.T) { require.NoError(t, s.StartAndWaitReady(consul, minio)) // Start Cortex components. - zoneFlags := func(zone string) map[string]string { - zoneflags := mergeFlags(flags, map[string]string{ - "-ingester.availability-zone": zone, - "-distributor.replication-factor": "3", - "-distributor.zone-awareness-enabled": "true", + ingesterFlags := func(zone string) map[string]string { + return mergeFlags(flags, map[string]string{ + "-ingester.availability-zone": zone, }) - return zoneflags } - zoneAwarenessEnabledFlags := mergeFlags(flags, map[string]string{ - "-distributor.zone-awareness-enabled": "true", - "-distributor.replication-factor": "3", - }) - - ingester1 := e2ecortex.NewIngesterWithConfigFile("ingester-1", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-a"), "") - ingester2 := e2ecortex.NewIngesterWithConfigFile("ingester-2", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-a"), "") - ingester3 := e2ecortex.NewIngesterWithConfigFile("ingester-3", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-b"), "") - ingester4 := e2ecortex.NewIngesterWithConfigFile("ingester-4", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-b"), "") - ingester5 := e2ecortex.NewIngesterWithConfigFile("ingester-5", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-c"), "") - ingester6 := e2ecortex.NewIngesterWithConfigFile("ingester-6", consul.NetworkHTTPEndpoint(), "", zoneFlags("zone-c"), "") + ingester1 := e2ecortex.NewIngesterWithConfigFile("ingester-1", consul.NetworkHTTPEndpoint(), "", ingesterFlags("zone-a"), "") + ingester2 := e2ecortex.NewIngesterWithConfigFile("ingester-2", consul.NetworkHTTPEndpoint(), "", ingesterFlags("zone-a"), "") + ingester3 := e2ecortex.NewIngesterWithConfigFile("ingester-3", consul.NetworkHTTPEndpoint(), "", ingesterFlags("zone-b"), "") + ingester4 := e2ecortex.NewIngesterWithConfigFile("ingester-4", consul.NetworkHTTPEndpoint(), "", ingesterFlags("zone-b"), "") + ingester5 := e2ecortex.NewIngesterWithConfigFile("ingester-5", consul.NetworkHTTPEndpoint(), "", ingesterFlags("zone-c"), "") + ingester6 := e2ecortex.NewIngesterWithConfigFile("ingester-6", consul.NetworkHTTPEndpoint(), "", ingesterFlags("zone-c"), "") require.NoError(t, s.StartAndWaitReady(ingester1, ingester2, ingester3, ingester4, ingester5, ingester6)) - distributor := e2ecortex.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), zoneAwarenessEnabledFlags, "") - querier := e2ecortex.NewQuerier("querier", consul.NetworkHTTPEndpoint(), zoneAwarenessEnabledFlags, "") + distributor := e2ecortex.NewDistributor("distributor", consul.NetworkHTTPEndpoint(), flags, "") + querier := e2ecortex.NewQuerier("querier", consul.NetworkHTTPEndpoint(), flags, "") require.NoError(t, s.StartAndWaitReady(distributor, querier)) - // Wait until distributor and queriers have updated the ring. + // Wait until distributor and querier have updated the ring. require.NoError(t, distributor.WaitSumMetricsWithOptions(e2e.Equals(6), []string{"cortex_ring_members"}, e2e.WithLabelMatchers( labels.MustNewMatcher(labels.MatchEqual, "name", "ingester"), labels.MustNewMatcher(labels.MatchEqual, "state", "ACTIVE")))) @@ -115,11 +110,9 @@ func TestZoneAwareReadPath(t *testing.T) { // SIGKILL 1 more ingester in a different zone require.NoError(t, ingester1.Kill()) - // Query back 100 series => fail - for i := 1; i <= numSeriesToPush; i++ { - metricName := fmt.Sprintf("series_%d", i) - result, _, err := client.QueryRaw(metricName) - require.NoError(t, err) - require.Equal(t, 500, result.StatusCode) - } + // Query back any series => fail + metricName := "series_1" + result, _, err := client.QueryRaw(metricName) + require.NoError(t, err) + require.Equal(t, 500, result.StatusCode) } diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index 81fb1b65080..ca1776e4b53 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -2,13 +2,10 @@ package ring import ( "context" - "errors" "sort" "time" ) -var errorTooManyZoneFailures = errors.New("too many zones failed") - // ReplicationSet describes the ingesters to talk to for a given key, and how // many errors to tolerate. type ReplicationSet struct { @@ -20,24 +17,37 @@ type ReplicationSet struct { // Do function f in parallel for all replicas in the set, erroring is we exceed // MaxErrors and returning early otherwise. func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(context.Context, *IngesterDesc) (interface{}, error)) ([]interface{}, error) { + type instanceResult struct { + res interface{} + instance *IngesterDesc + } + type instanceError struct { err error instance *IngesterDesc } + // Initialise the result tracker, which is use to keep track of successes and failures. + var tracker replicationSetResultTracker + if r.MaxUnavailableZones > 0 { + tracker = newZoneAwareResultTracker(r.Ingesters, r.MaxUnavailableZones) + } else { + tracker = newDefaultResultTracker(r.Ingesters, r.MaxErrors) + } + var ( errs = make(chan instanceError, len(r.Ingesters)) - resultsChan = make(chan interface{}, len(r.Ingesters)) - minSuccess = len(r.Ingesters) - r.MaxErrors + resultsChan = make(chan instanceResult, len(r.Ingesters)) forceStart = make(chan struct{}, r.MaxErrors) ) ctx, cancel := context.WithCancel(ctx) defer cancel() + // Spawn a goroutine for each instance. for i := range r.Ingesters { go func(i int, ing *IngesterDesc) { - // wait to send extra requests - if i >= minSuccess && delay > 0 { + // Wait to send extra requests. Works only when zone-awareness is disabled. + if delay > 0 && r.MaxUnavailableZones == 0 && i >= len(r.Ingesters)-r.MaxErrors { after := time.NewTimer(delay) defer after.Stop() select { @@ -54,39 +64,33 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont instance: ing, } } else { - resultsChan <- result + resultsChan <- instanceResult{ + res: result, + instance: ing, + } } }(i, &r.Ingesters[i]) } - var ( - numErrs int - numSuccess int - results = make([]interface{}, 0, len(r.Ingesters)) - zoneFailureCount = make(map[string]struct{}) - ) - for numSuccess < minSuccess { + results := make([]interface{}, 0, len(r.Ingesters)) + + for !tracker.succeeded() { select { case err := <-errs: - if r.MaxUnavailableZones > 0 { - zoneFailureCount[err.instance.Zone] = struct{}{} + tracker.done(err.instance, err.err) - if len(zoneFailureCount) > r.MaxUnavailableZones { - return nil, errorTooManyZoneFailures - } - } else { - numErrs++ - if numErrs > r.MaxErrors { - return nil, err.err - } + if tracker.failed() { + return nil, err.err } // force one of the delayed requests to start - forceStart <- struct{}{} + if delay > 0 && r.MaxUnavailableZones == 0 { + forceStart <- struct{}{} + } case result := <-resultsChan: - numSuccess++ - results = append(results, result) + tracker.done(result.instance, nil) + results = append(results, result.res) case <-ctx.Done(): return nil, ctx.Err() diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index a1eeef00730..2b1becd2c16 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -165,7 +165,7 @@ func TestReplicationSet_Do(t *testing.T) { f: failingFunctionForZones("zone1", "zone2"), maxUnavailableZones: 1, maxErrors: 1, - expectedError: errorTooManyZoneFailures, + expectedError: errZoneFailure, }, { name: "6 instances, 3 zones, 1 zone fails", @@ -203,7 +203,7 @@ func TestReplicationSet_Do(t *testing.T) { f: failingFunctionForZones("zone1", "zone2", "zone3"), maxUnavailableZones: 2, maxErrors: 2, - expectedError: errorTooManyZoneFailures, + expectedError: errZoneFailure, }, { name: "10 instances, 5 zones, 2 failures in zone 1", diff --git a/pkg/ring/replication_set_tracker.go b/pkg/ring/replication_set_tracker.go new file mode 100644 index 00000000000..2c1c63fa2cc --- /dev/null +++ b/pkg/ring/replication_set_tracker.go @@ -0,0 +1,104 @@ +package ring + +import "github.com/cortexproject/cortex/pkg/util" + +type replicationSetResultTracker interface { + // TODO doc + done(instance *IngesterDesc, err error) + + // TODO doc + succeeded() bool + + // TODO doc + failed() bool +} + +type defaultResultTracker struct { + minSucceeded int + numSucceeded int + numErrors int + maxErrors int +} + +func newDefaultResultTracker(instances []IngesterDesc, maxErrors int) *defaultResultTracker { + return &defaultResultTracker{ + minSucceeded: len(instances) - maxErrors, + numSucceeded: 0, + numErrors: 0, + maxErrors: maxErrors, + } +} + +func (t *defaultResultTracker) done(instance *IngesterDesc, err error) { + if err == nil { + t.numSucceeded++ + } else { + t.numErrors++ + } +} + +func (t *defaultResultTracker) succeeded() bool { + return t.numSucceeded >= t.minSucceeded +} + +func (t *defaultResultTracker) failed() bool { + return t.numErrors > t.maxErrors +} + +type zoneAwareResultTracker struct { + waitingByZone map[string]int + failuresByZone map[string]int + maxUnavailableZones int +} + +func newZoneAwareResultTracker(instances []IngesterDesc, maxUnavailableZones int) *zoneAwareResultTracker { + t := &zoneAwareResultTracker{ + waitingByZone: make(map[string]int), + failuresByZone: make(map[string]int), + maxUnavailableZones: maxUnavailableZones, + } + + for _, instance := range instances { + t.waitingByZone[instance.Zone]++ + t.failuresByZone[instance.Zone] = 0 + } + + return t +} + +func (t *zoneAwareResultTracker) done(instance *IngesterDesc, err error) { + t.waitingByZone[instance.Zone]-- + + if err != nil { + t.failuresByZone[instance.Zone]++ + } +} + +func (t *zoneAwareResultTracker) succeeded() bool { + minSucceededZones := util.Max(0, len(t.waitingByZone)-t.maxUnavailableZones) + actualSucceededZones := 0 + + // The execution succeeded once we successfully received a successful result + // from "all zones - max unavailable zones". + for zone, numWaiting := range t.waitingByZone { + if numWaiting == 0 && t.failuresByZone[zone] == 0 { + actualSucceededZones++ + } + } + + return actualSucceededZones >= minSucceededZones +} + +func (t *zoneAwareResultTracker) failed() bool { + failedZones := 0 + + // The execution failed if the number of zones, for which we have tracker at least 1 + // failure, exceeds the max unavailable zones. + for _, numFailures := range t.failuresByZone { + if numFailures > 0 { + failedZones++ + } + } + + return failedZones > t.maxUnavailableZones +} diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index e1972e6c23f..e023ace87b9 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -398,6 +398,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } } + // TODO when this logic is triggered, we must guarantee that maxErrors is 0 instances = filteredInstances maxUnavailableZones = 0 numRequired = len(instances) From 1259ed9825a5cdcf0c4b9ca9cacc698385e9d125 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 13:08:21 +0100 Subject: [PATCH 27/42] Added tracker unit tests Signed-off-by: Marco Pracucci --- pkg/ring/replication_set_tracker.go | 18 +- pkg/ring/replication_set_tracker_test.go | 266 +++++++++++++++++++++++ 2 files changed, 275 insertions(+), 9 deletions(-) create mode 100644 pkg/ring/replication_set_tracker_test.go diff --git a/pkg/ring/replication_set_tracker.go b/pkg/ring/replication_set_tracker.go index 2c1c63fa2cc..310cb2b86cb 100644 --- a/pkg/ring/replication_set_tracker.go +++ b/pkg/ring/replication_set_tracker.go @@ -1,15 +1,14 @@ package ring -import "github.com/cortexproject/cortex/pkg/util" - type replicationSetResultTracker interface { - // TODO doc + // Signals an instance has done the execution, either successful (no error) + // or failed (with error). done(instance *IngesterDesc, err error) - // TODO doc + // Returns true if the minimum number of successful results have been received. succeeded() bool - // TODO doc + // Returns true if the maximum number of failed executions have been reached. failed() bool } @@ -29,7 +28,7 @@ func newDefaultResultTracker(instances []IngesterDesc, maxErrors int) *defaultRe } } -func (t *defaultResultTracker) done(instance *IngesterDesc, err error) { +func (t *defaultResultTracker) done(_ *IngesterDesc, err error) { if err == nil { t.numSucceeded++ } else { @@ -48,6 +47,7 @@ func (t *defaultResultTracker) failed() bool { type zoneAwareResultTracker struct { waitingByZone map[string]int failuresByZone map[string]int + minSuccessfulZones int maxUnavailableZones int } @@ -62,6 +62,7 @@ func newZoneAwareResultTracker(instances []IngesterDesc, maxUnavailableZones int t.waitingByZone[instance.Zone]++ t.failuresByZone[instance.Zone] = 0 } + t.minSuccessfulZones = len(t.waitingByZone) - maxUnavailableZones return t } @@ -75,7 +76,6 @@ func (t *zoneAwareResultTracker) done(instance *IngesterDesc, err error) { } func (t *zoneAwareResultTracker) succeeded() bool { - minSucceededZones := util.Max(0, len(t.waitingByZone)-t.maxUnavailableZones) actualSucceededZones := 0 // The execution succeeded once we successfully received a successful result @@ -86,13 +86,13 @@ func (t *zoneAwareResultTracker) succeeded() bool { } } - return actualSucceededZones >= minSucceededZones + return actualSucceededZones >= t.minSuccessfulZones } func (t *zoneAwareResultTracker) failed() bool { failedZones := 0 - // The execution failed if the number of zones, for which we have tracker at least 1 + // The execution failed if the number of zones, for which we have tracked at least 1 // failure, exceeds the max unavailable zones. for _, numFailures := range t.failuresByZone { if numFailures > 0 { diff --git a/pkg/ring/replication_set_tracker_test.go b/pkg/ring/replication_set_tracker_test.go new file mode 100644 index 00000000000..8c87a21f592 --- /dev/null +++ b/pkg/ring/replication_set_tracker_test.go @@ -0,0 +1,266 @@ +package ring + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDefaultResultTracker(t *testing.T) { + instance1 := IngesterDesc{Addr: "127.0.0.1"} + instance2 := IngesterDesc{Addr: "127.0.0.2"} + instance3 := IngesterDesc{Addr: "127.0.0.3"} + instance4 := IngesterDesc{Addr: "127.0.0.4"} + + tests := map[string]struct { + instances []IngesterDesc + maxErrors int + run func(t *testing.T, tracker *defaultResultTracker) + }{ + "should succeed on no instances to track": { + instances: nil, + maxErrors: 0, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed once all instances succeed on max errors = 0": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4}, + maxErrors: 0, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance4, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should fail on 1st failing instance on max errors = 0": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4}, + maxErrors: 0, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + "should fail on 2nd failing instance on max errors = 1": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4}, + maxErrors: 1, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + "should fail on 3rd failing instance on max errors = 2": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4}, + maxErrors: 2, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance4, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + testCase.run(t, newDefaultResultTracker(testCase.instances, testCase.maxErrors)) + }) + } +} + +func TestZoneAwareResultTracker(t *testing.T) { + instance1 := IngesterDesc{Addr: "127.0.0.1", Zone: "zone-a"} + instance2 := IngesterDesc{Addr: "127.0.0.2", Zone: "zone-a"} + instance3 := IngesterDesc{Addr: "127.0.0.3", Zone: "zone-b"} + instance4 := IngesterDesc{Addr: "127.0.0.4", Zone: "zone-b"} + instance5 := IngesterDesc{Addr: "127.0.0.5", Zone: "zone-c"} + instance6 := IngesterDesc{Addr: "127.0.0.6", Zone: "zone-c"} + + tests := map[string]struct { + instances []IngesterDesc + maxUnavailableZones int + run func(t *testing.T, tracker *zoneAwareResultTracker) + }{ + "should succeed on no instances to track": { + instances: nil, + maxUnavailableZones: 0, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed once all instances succeed on max unavailable zones = 0": { + instances: []IngesterDesc{instance1, instance2, instance3}, + maxUnavailableZones: 0, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should fail on 1st failing instance on max unavailable zones = 0": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 0, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + "should succeed on 2 failing instances within the same zone on max unavailable zones = 1": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 1, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Track failing instances. + for _, instance := range []IngesterDesc{instance1, instance2} { + tracker.done(&instance, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + // Track successful instances. + for _, instance := range []IngesterDesc{instance3, instance4, instance5} { + tracker.done(&instance, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + tracker.done(&instance6, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed as soon as the response has been successfully received from 'all zones - 1' on max unavailable zones = 1": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 1, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Track successful instances. + for _, instance := range []IngesterDesc{instance1, instance2, instance3} { + tracker.done(&instance, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + tracker.done(&instance4, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed on failing instances within 2 zones on max unavailable zones = 2": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 2, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Track failing instances. + for _, instance := range []IngesterDesc{instance1, instance2, instance3, instance4} { + tracker.done(&instance, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + // Track successful instances. + tracker.done(&instance5, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance6, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed as soon as the response has been successfully received from 'all zones - 2' on max unavailable zones = 2": { + instances: []IngesterDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 2, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Zone-a + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + // Zone-b + tracker.done(&instance3, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + // Zone-a + tracker.done(&instance1, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + testCase.run(t, newZoneAwareResultTracker(testCase.instances, testCase.maxUnavailableZones)) + }) + } +} From 3facb351c4b6b5da1815dc7183fd6faba8663dc5 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 13:22:22 +0100 Subject: [PATCH 28/42] Fixed TestReplicationSet_Do Signed-off-by: Marco Pracucci --- pkg/ring/replication_set_test.go | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index 2b1becd2c16..b268ff3eb03 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -7,6 +7,7 @@ import ( "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "go.uber.org/atomic" ) @@ -55,7 +56,7 @@ func failingFunctionAfter(failAfter int32, delay time.Duration) func(context.Con } } -func failingFunctionForZones(zones ...string) func(context.Context, *IngesterDesc) (interface{}, error) { +func failingFunctionOnZones(zones ...string) func(context.Context, *IngesterDesc) (interface{}, error) { return func(ctx context.Context, ing *IngesterDesc) (interface{}, error) { for _, zone := range zones { if ing.Zone == zone { @@ -79,7 +80,7 @@ func TestReplicationSet_Do(t *testing.T) { expectedError error }{ { - name: "no errors no delay", + name: "max errors = 0, no errors no delay", instances: []IngesterDesc{ {}, }, @@ -89,7 +90,7 @@ func TestReplicationSet_Do(t *testing.T) { want: []interface{}{1}, }, { - name: "1 error, no errors expected", + name: "max errors = 0, should fail on 1 error out of 1 instance", instances: []IngesterDesc{{}}, f: func(c context.Context, id *IngesterDesc) (interface{}, error) { return nil, errFailure @@ -98,14 +99,14 @@ func TestReplicationSet_Do(t *testing.T) { expectedError: errFailure, }, { - name: "3 instances, last call fails", + name: "max errors = 0, should fail on 1 error out of 3 instances (last call fails)", instances: []IngesterDesc{{}, {}, {}}, f: failingFunctionAfter(2, 10*time.Millisecond), want: nil, expectedError: errFailure, }, { - name: "5 instances, with delay, last 3 calls fail", + name: "max errors = 1, should fail on 3 errors out of 5 instances (last calls fail)", instances: []IngesterDesc{{}, {}, {}, {}, {}}, maxErrors: 1, f: failingFunctionAfter(2, 10*time.Millisecond), @@ -114,7 +115,7 @@ func TestReplicationSet_Do(t *testing.T) { expectedError: errFailure, }, { - name: "3 instances, context cancelled", + name: "max errors = 1, should handle context canceled", instances: []IngesterDesc{{}, {}, {}}, maxErrors: 1, f: func(c context.Context, id *IngesterDesc) (interface{}, error) { @@ -126,7 +127,7 @@ func TestReplicationSet_Do(t *testing.T) { expectedError: context.Canceled, }, { - name: "3 instances, 3 zones, no failures", + name: "max errors = 0, should succeed on all successful instances", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -140,7 +141,7 @@ func TestReplicationSet_Do(t *testing.T) { want: []interface{}{1, 1, 1}, }, { - name: "3 instances, 3 zones, 1 zone fails", + name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (3 instances)", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -148,13 +149,12 @@ func TestReplicationSet_Do(t *testing.T) { }, { Zone: "zone3", }}, - f: failingFunctionForZones("zone1"), - maxUnavailableZones: 1, // (nr of zones) / 2 - maxErrors: 1, // (nr of instances / nr of zones) * maxUnavailableZones + f: failingFunctionOnZones("zone1"), + maxUnavailableZones: 1, want: []interface{}{1, 1}, }, { - name: "3 instances, 3 zones, 2 zones fail", + name: "max unavailable zones = 1, should fail on instances failing in 2 out of 3 zones (3 instances)", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -162,13 +162,12 @@ func TestReplicationSet_Do(t *testing.T) { }, { Zone: "zone3", }}, - f: failingFunctionForZones("zone1", "zone2"), + f: failingFunctionOnZones("zone1", "zone2"), maxUnavailableZones: 1, - maxErrors: 1, expectedError: errZoneFailure, }, { - name: "6 instances, 3 zones, 1 zone fails", + name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (6 instances)", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -182,13 +181,12 @@ func TestReplicationSet_Do(t *testing.T) { }, { Zone: "zone3", }}, - f: failingFunctionForZones("zone1"), + f: failingFunctionOnZones("zone1"), maxUnavailableZones: 1, - maxErrors: 2, want: []interface{}{1, 1, 1, 1}, }, { - name: "5 instances, 5 zones, 3 zones fails", + name: "max unavailable zones = 2, should fail on instances failing in 3 out of 5 zones (5 instances)", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -200,13 +198,12 @@ func TestReplicationSet_Do(t *testing.T) { }, { Zone: "zone5", }}, - f: failingFunctionForZones("zone1", "zone2", "zone3"), + f: failingFunctionOnZones("zone1", "zone2", "zone3"), maxUnavailableZones: 2, - maxErrors: 2, expectedError: errZoneFailure, }, { - name: "10 instances, 5 zones, 2 failures in zone 1", + name: "max unavailable zones = 2, should succeed on instances failing in 1 out of 5 zones (10 instances)", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -228,14 +225,17 @@ func TestReplicationSet_Do(t *testing.T) { }, { Zone: "zone5", }}, - f: failingFunctionForZones("zone1"), + f: failingFunctionOnZones("zone1"), maxUnavailableZones: 2, - maxErrors: 4, want: []interface{}{1, 1, 1, 1, 1, 1}, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + // Ensure the test case has been correctly setup (max errors and max unavailable zones are + // mutually exclusive). + require.False(t, tt.maxErrors > 0 && tt.maxUnavailableZones > 0) + r := ReplicationSet{ Ingesters: tt.instances, MaxErrors: tt.maxErrors, From c788f175f4f1aa381f7f2d5accada6b319af9fef Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 13:25:05 +0100 Subject: [PATCH 29/42] Commented ReplicationSet max errors and max unavailable zones Signed-off-by: Marco Pracucci --- pkg/ring/replication_set.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index ca1776e4b53..cd6a33baa6a 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -9,8 +9,14 @@ import ( // ReplicationSet describes the ingesters to talk to for a given key, and how // many errors to tolerate. type ReplicationSet struct { - Ingesters []IngesterDesc - MaxErrors int + Ingesters []IngesterDesc + + // Maximum number of tolerated failing instances. Max errors and max unavailable zones are + // mutually exclusive. + MaxErrors int + + // Maximum number of different zones in which instances can fail. Max unavailable zones and + // max errors are mutually exclusive. MaxUnavailableZones int } From 833eb89fda743e9b9508414196f8cf43edbef136 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 14:53:17 +0100 Subject: [PATCH 30/42] Fixed GetReplicationSetForOperation() logic and improved unit tests Signed-off-by: Marco Pracucci --- pkg/ring/ring.go | 69 +++++++++++++++++---------- pkg/ring/ring_test.go | 108 ++++++++++++++++++++++++++++++++++-------- 2 files changed, 132 insertions(+), 45 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index e023ace87b9..1730906db16 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -363,17 +363,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro return ReplicationSet{}, ErrEmptyRing } - // Calculate the number of required ingesters; - // ensure we always require at least RF-1 when RF=3. - numRequired := len(r.ringDesc.Ingesters) - if numRequired < r.cfg.ReplicationFactor { - numRequired = r.cfg.ReplicationFactor - } - maxUnavailable := r.cfg.ReplicationFactor / 2 - numRequired -= maxUnavailable - numRequiredZones := util.Min(len(r.ringZones), r.cfg.ReplicationFactor) - maxUnavailableZones := util.Max(0, numRequiredZones-maxUnavailable-1) - + // Build the initial replication set, excluding unhealthy instances. instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]struct{}) for _, ingester := range r.ringDesc.Ingesters { @@ -384,31 +374,58 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } } - if r.cfg.ZoneAwarenessEnabled && len(zoneFailures) > 0 && maxUnavailableZones > 0 { + // Max errors and max unavailable zones are mutually exclusive. We initialise both + // to 0 and then we update them whether zone-awareness is enabled or not. + maxErrors := 0 + maxUnavailableZones := 0 + + if r.cfg.ZoneAwarenessEnabled { + // Given data is replicated to RF different zones, we can tolerate a number of + // RF/2 failing zones. However, we need to protect from the case the ring currently + // contains instances in a number of zones < RF. + numReplicatedZones := util.Min(len(r.ringZones), r.cfg.ReplicationFactor) + minSuccessZones := (numReplicatedZones / 2) + 1 + maxUnavailableZones = numReplicatedZones - minSuccessZones + if len(zoneFailures) > maxUnavailableZones { return ReplicationSet{}, ErrTooManyFailedIngesters } - // Given that we can tolerate a zone failure, we can skip querying - // all instances in that zone. - filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) - for _, ingester := range instances { - if _, ok := zoneFailures[ingester.Zone]; !ok { - filteredInstances = append(filteredInstances, ingester) + if len(zoneFailures) > 0 { + // We remove all instances (even healthy ones) from zones with at least + // 1 failing ingester. Due to how replication works when zone-awareness is + // enabled (data is replicated to RF different zones), there's no benefit in + // querying healthy instances from "failing zones". + filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) + for _, ingester := range instances { + if _, ok := zoneFailures[ingester.Zone]; !ok { + filteredInstances = append(filteredInstances, ingester) + } } + + instances = filteredInstances } - // TODO when this logic is triggered, we must guarantee that maxErrors is 0 - instances = filteredInstances - maxUnavailableZones = 0 - numRequired = len(instances) - } + // Since we removed all instances from zones containing at least 1 failing + // instance, we have to decrease the max unavailable zones accordingly. + maxUnavailableZones -= len(zoneFailures) + } else { + // Calculate the number of required ingesters; + // ensure we always require at least RF-1 when RF=3. + numRequired := len(r.ringDesc.Ingesters) + if numRequired < r.cfg.ReplicationFactor { + numRequired = r.cfg.ReplicationFactor + } + maxUnavailable := r.cfg.ReplicationFactor / 2 + numRequired -= maxUnavailable + + if len(instances) < numRequired { + return ReplicationSet{}, ErrTooManyFailedIngesters + } - if len(instances) < numRequired { - return ReplicationSet{}, ErrTooManyFailedIngesters + maxErrors = len(instances) - numRequired } - maxErrors := len(instances) - numRequired return ReplicationSet{ Ingesters: instances, MaxErrors: maxErrors, diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 09456c780b9..ebea41b32fb 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -418,7 +418,7 @@ func TestRing_GetReplicationSetForOperation(t *testing.T) { } } -func TestRing_GetAll_ZoneAware(t *testing.T) { +func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing.T) { tests := map[string]struct { ringInstances map[string]IngesterDesc unhealthyInstances []string @@ -432,7 +432,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { ringInstances: nil, expectedError: ErrEmptyRing, }, - "single zone": { + "RF=1, 1 zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -442,7 +442,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, - "single zone, one unhealthy instance": { + "RF=1, 1 zone, one unhealthy instance": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -452,7 +452,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { replicationFactor: 1, expectedError: ErrTooManyFailedIngesters, }, - "three zones, replication factor one, one unhealthy instance": { + "RF=1, 3 zones, one unhealthy instance": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, @@ -462,7 +462,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { replicationFactor: 1, expectedError: ErrTooManyFailedIngesters, }, - "three zones, one instance per zone": { + "RF=3, 3 zones, one instance per zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, @@ -470,10 +470,10 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { }, expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3"}, replicationFactor: 3, - expectedMaxErrors: 1, + expectedMaxErrors: 0, expectedMaxUnavailableZones: 1, }, - "three zones, one instance per zone, one instance unhealthy": { + "RF=3, 3 zones, one instance per zone, one instance unhealthy": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, @@ -485,7 +485,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, - "three zones, one instance per zone, two instances unhealthy in separate zones": { + "RF=3, 3 zones, one instance per zone, two instances unhealthy in separate zones": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, @@ -495,7 +495,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { replicationFactor: 3, expectedError: ErrTooManyFailedIngesters, }, - "three zones, one instance per zone, all instances unhealthy": { + "RF=3, 3 zones, one instance per zone, all instances unhealthy": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, @@ -505,7 +505,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { replicationFactor: 3, expectedError: ErrTooManyFailedIngesters, }, - "three zones, two instances per zone": { + "RF=3, 3 zones, two instances per zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -516,10 +516,10 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { }, expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5", "127.0.0.6"}, replicationFactor: 3, - expectedMaxErrors: 1, + expectedMaxErrors: 0, expectedMaxUnavailableZones: 1, }, - "three zones, two instances per zone, two instances unhealthy in same zone": { + "RF=3, 3 zones, two instances per zone, two instances unhealthy in same zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -534,7 +534,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, - "three zones, three instances per zone, two instances unhealthy in same zone": { + "RF=3, 3 zones, three instances per zone, two instances unhealthy in same zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -552,7 +552,53 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, - "five zones, two instances per zone except for one zone which has three": { + "RF=3, only 2 zones, two instances per zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, + unhealthyInstances: []string{}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, only 2 zones, two instances per zone, one instance unhealthy": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{}, + unhealthyInstances: []string{"instance-4"}, + replicationFactor: 3, + expectedError: ErrTooManyFailedIngesters, + }, + "RF=3, only 1 zone, two instances per zone": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + unhealthyInstances: []string{}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, only 1 zone, two instances per zone, one instance unhealthy": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{}, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 3, + expectedError: ErrTooManyFailedIngesters, + }, + "RF=5, 5 zones, two instances per zone except for one zone which has three": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -569,10 +615,30 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5", "127.0.0.6", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, replicationFactor: 5, - expectedMaxErrors: 2, + expectedMaxErrors: 0, expectedMaxUnavailableZones: 2, }, - "five zones, two instances per zone except for one zone which has three, 2 unhealthy nodes in separate zones": { + "RF=5, 5 zones, two instances per zone except for one zone which has three, 2 unhealthy nodes in same zones": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.5", "127.0.0.6", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, + unhealthyInstances: []string{"instance-3", "instance-4"}, + replicationFactor: 5, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 1, + }, + "RF=5, 5 zones, two instances per zone except for one zone which has three, 2 unhealthy nodes in separate zones": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, @@ -592,7 +658,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, }, - "five zones, one instances per zone, three unhealthy instances": { + "RF=5, 5 zones, one instances per zone, three unhealthy instances": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, @@ -608,6 +674,10 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { for testName, testData := range tests { t.Run(testName, func(t *testing.T) { + // Ensure the test case has been correctly setup (max errors and max unavailable zones are + // mutually exclusive). + require.False(t, testData.expectedMaxErrors > 0 && testData.expectedMaxUnavailableZones > 0) + // Init the ring. ringDesc := &Desc{Ingesters: testData.ringInstances} for id, instance := range ringDesc.Ingesters { @@ -615,7 +685,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { instance.State = ACTIVE for _, instanceName := range testData.unhealthyInstances { if instanceName == id { - instance.State = JOINING + instance.Timestamp = time.Now().Add(-time.Hour).Unix() } } ringDesc.Ingesters[id] = instance @@ -623,7 +693,7 @@ func TestRing_GetAll_ZoneAware(t *testing.T) { ring := Ring{ cfg: Config{ - HeartbeatTimeout: time.Hour, + HeartbeatTimeout: time.Minute, ZoneAwarenessEnabled: true, ReplicationFactor: testData.replicationFactor, }, From 3351321817e766a0fbcb178c5b09e52af8477842 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 15:17:39 +0100 Subject: [PATCH 31/42] Improved tests Signed-off-by: Marco Pracucci --- integration/zone_aware_test.go | 44 ++++++++++++++++++++++++++-------- pkg/ring/ring_test.go | 1 + 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/integration/zone_aware_test.go b/integration/zone_aware_test.go index f11b777427c..5253bc34599 100644 --- a/integration/zone_aware_test.go +++ b/integration/zone_aware_test.go @@ -15,9 +15,7 @@ import ( "github.com/cortexproject/cortex/integration/e2ecortex" ) -func TestZoneAwareReadPath(t *testing.T) { - const numSeriesToPush = 100 - +func TestZoneAwareReplication(t *testing.T) { s, err := e2e.NewScenario(networkName) require.NoError(t, err) defer s.Close() @@ -63,11 +61,12 @@ func TestZoneAwareReadPath(t *testing.T) { client, err := e2ecortex.NewClient(distributor.HTTPEndpoint(), querier.HTTPEndpoint(), "", "", userID) require.NoError(t, err) - // Push 100 series + // Push some series now := time.Now() + numSeries := 100 expectedVectors := map[string]model.Vector{} - for i := 1; i <= numSeriesToPush; i++ { + for i := 1; i <= numSeries; i++ { metricName := fmt.Sprintf("series_%d", i) series, expectedVector := generateSeries(metricName, now) res, err := client.Push(series) @@ -77,7 +76,7 @@ func TestZoneAwareReadPath(t *testing.T) { expectedVectors[metricName] = expectedVector } - // Query back 100 series => all good + // Query back series => all good for metricName, expectedVector := range expectedVectors { result, err := client.Query(metricName, now) require.NoError(t, err) @@ -88,7 +87,17 @@ func TestZoneAwareReadPath(t *testing.T) { // SIGKILL 1 ingester in 1 zone require.NoError(t, ingester5.Kill()) - // Query back 100 series => all good + // Push 1 more series => all good + numSeries++ + metricName := fmt.Sprintf("series_%d", numSeries) + series, expectedVector := generateSeries(metricName, now) + res, err := client.Push(series) + require.NoError(t, err) + require.Equal(t, 200, res.StatusCode) + + expectedVectors[metricName] = expectedVector + + // Query back series => all good for metricName, expectedVector := range expectedVectors { result, err := client.Query(metricName, now) require.NoError(t, err) @@ -99,7 +108,17 @@ func TestZoneAwareReadPath(t *testing.T) { // SIGKILL 1 more ingester in the same zone require.NoError(t, ingester6.Kill()) - // Query back 100 series => all good + // Push 1 more series => all good + numSeries++ + metricName = fmt.Sprintf("series_%d", numSeries) + series, expectedVector = generateSeries(metricName, now) + res, err = client.Push(series) + require.NoError(t, err) + require.Equal(t, 200, res.StatusCode) + + expectedVectors[metricName] = expectedVector + + // Query back series => all good for metricName, expectedVector := range expectedVectors { result, err := client.Query(metricName, now) require.NoError(t, err) @@ -110,9 +129,14 @@ func TestZoneAwareReadPath(t *testing.T) { // SIGKILL 1 more ingester in a different zone require.NoError(t, ingester1.Kill()) + // Push 1 more series => fail + series, _ = generateSeries("series_last", now) + res, err = client.Push(series) + require.NoError(t, err) + require.Equal(t, 500, res.StatusCode) + // Query back any series => fail - metricName := "series_1" - result, _, err := client.QueryRaw(metricName) + result, _, err := client.QueryRaw("series_1") require.NoError(t, err) require.Equal(t, 500, result.StatusCode) } diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index ebea41b32fb..173143bc88c 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -1503,6 +1503,7 @@ func TestRing_ShuffleShardWithLookback_CorrectnessWithFuzzy(t *testing.T) { cfg: Config{ HeartbeatTimeout: time.Hour, ZoneAwarenessEnabled: true, + ReplicationFactor: 3, }, ringDesc: ringDesc, ringTokens: ringDesc.getTokens(), From a87411b52945470cb8974b2c0aba2aa50c39c057 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 15:59:54 +0100 Subject: [PATCH 32/42] Fixed tests flakyness Signed-off-by: Marco Pracucci --- integration/e2e/service.go | 5 +++++ integration/zone_aware_test.go | 28 ++++++++++++++++++---------- pkg/ring/replication_set_test.go | 4 ++-- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/integration/e2e/service.go b/integration/e2e/service.go index d59171f2e17..ffb8f211481 100644 --- a/integration/e2e/service.go +++ b/integration/e2e/service.go @@ -172,6 +172,11 @@ func (s *ConcreteService) Kill() error { logger.Log(string(out)) return err } + + // Wait until the container actually stopped. However, this could fail if + // the container already exited, so we just ignore the error. + _, _ = RunCommandAndGetOutput("docker", "wait", s.containerName()) + s.usedNetworkName = "" return nil diff --git a/integration/zone_aware_test.go b/integration/zone_aware_test.go index 5253bc34599..ec36e720acb 100644 --- a/integration/zone_aware_test.go +++ b/integration/zone_aware_test.go @@ -1,10 +1,12 @@ package integration import ( + "context" "fmt" "testing" "time" + "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" "github.com/stretchr/testify/assert" @@ -84,8 +86,8 @@ func TestZoneAwareReplication(t *testing.T) { assert.Equal(t, expectedVector, result.(model.Vector)) } - // SIGKILL 1 ingester in 1 zone - require.NoError(t, ingester5.Kill()) + // SIGKILL 1 ingester in 1st zone + require.NoError(t, ingester1.Kill()) // Push 1 more series => all good numSeries++ @@ -105,8 +107,8 @@ func TestZoneAwareReplication(t *testing.T) { assert.Equal(t, expectedVector, result.(model.Vector)) } - // SIGKILL 1 more ingester in the same zone - require.NoError(t, ingester6.Kill()) + // SIGKILL 1 more ingester in the 1st zone (all ingesters in 1st zone have been killed) + require.NoError(t, ingester2.Kill()) // Push 1 more series => all good numSeries++ @@ -126,8 +128,18 @@ func TestZoneAwareReplication(t *testing.T) { assert.Equal(t, expectedVector, result.(model.Vector)) } - // SIGKILL 1 more ingester in a different zone - require.NoError(t, ingester1.Kill()) + // SIGKILL 1 ingester in the 2nd zone + require.NoError(t, ingester3.Kill()) + + // Query back any series => fail (either because of a timeout or 500) + result, _, err := client.QueryRaw("series_1") + if !errors.Is(err, context.DeadlineExceeded) { + require.NoError(t, err) + require.Equal(t, 500, result.StatusCode) + } + + // SIGKILL 1 more ingester in the 2nd zone (all ingesters in 2nd zone have been killed) + require.NoError(t, ingester4.Kill()) // Push 1 more series => fail series, _ = generateSeries("series_last", now) @@ -135,8 +147,4 @@ func TestZoneAwareReplication(t *testing.T) { require.NoError(t, err) require.Equal(t, 500, res.StatusCode) - // Query back any series => fail - result, _, err := client.QueryRaw("series_1") - require.NoError(t, err) - require.Equal(t, 500, result.StatusCode) } diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index b268ff3eb03..f131e753530 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -203,7 +203,7 @@ func TestReplicationSet_Do(t *testing.T) { expectedError: errZoneFailure, }, { - name: "max unavailable zones = 2, should succeed on instances failing in 1 out of 5 zones (10 instances)", + name: "max unavailable zones = 2, should succeed on instances failing in 2 out of 5 zones (10 instances)", instances: []IngesterDesc{{ Zone: "zone1", }, { @@ -225,7 +225,7 @@ func TestReplicationSet_Do(t *testing.T) { }, { Zone: "zone5", }}, - f: failingFunctionOnZones("zone1"), + f: failingFunctionOnZones("zone1", "zone5"), maxUnavailableZones: 2, want: []interface{}{1, 1, 1, 1, 1, 1}, }, From 99021014439c72b398522054b983d6e32b672803 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Fri, 6 Nov 2020 17:31:22 +0100 Subject: [PATCH 33/42] Fixed test Signed-off-by: Marco Pracucci --- pkg/ring/replication_set_tracker_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/ring/replication_set_tracker_test.go b/pkg/ring/replication_set_tracker_test.go index 8c87a21f592..cb981c80571 100644 --- a/pkg/ring/replication_set_tracker_test.go +++ b/pkg/ring/replication_set_tracker_test.go @@ -251,7 +251,7 @@ func TestZoneAwareResultTracker(t *testing.T) { assert.False(t, tracker.failed()) // Zone-a - tracker.done(&instance1, nil) + tracker.done(&instance2, nil) assert.True(t, tracker.succeeded()) assert.False(t, tracker.failed()) }, From c7d39e904b6a6a4c1ff635bc68c46eee4d32b544 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 08:42:27 +0000 Subject: [PATCH 34/42] Update documentation Signed-off-by: Michel Hollands --- docs/guides/zone-replication.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/guides/zone-replication.md b/docs/guides/zone-replication.md index c64be0c1f57..c8e17e36094 100644 --- a/docs/guides/zone-replication.md +++ b/docs/guides/zone-replication.md @@ -26,6 +26,8 @@ The Cortex time-series replication is used to hold multiple (typically 3) replic 2. Rollout ingesters to apply the configured zone 3. Enable time-series zone-aware replication via the `-distributor.zone-awareness-enabled` CLI flag (or its respective YAML config option). Please be aware this configuration option should be set to distributors, queriers and rulers. +The `-distributor.shard-by-all-labels` setting has an impact on zone replication. When set to `true` there will be more series stored that will be spread out over more instances. When an instance goes down fewer series will be impacted. + ## Store-gateways: blocks replication The Cortex [store-gateway](../blocks-storage/store-gateway.md) (used only when Cortex is running with the [blocks storage](../blocks-storage/_index.md)) supports blocks sharding, used to horizontally scale blocks in a large cluster without hitting any vertical scalability limit. From 75a5dc657df5a38b0bbdd353dbd2cfd5345acce1 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 08:52:00 +0000 Subject: [PATCH 35/42] Add note about reads from zone aware clusters Signed-off-by: Michel Hollands --- docs/guides/zone-replication.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/guides/zone-replication.md b/docs/guides/zone-replication.md index c8e17e36094..504db7b6457 100644 --- a/docs/guides/zone-replication.md +++ b/docs/guides/zone-replication.md @@ -11,6 +11,8 @@ It is completely possible that all the replicas for the given data are held with For this reason, Cortex optionally supports zone-aware replication. When zone-aware replication is **enabled**, replicas for the given data are guaranteed to span across different availability zones. This requires Cortex cluster to run at least in a number of zones equal to the configured replication factor. +Reads from a zone-aware replication enabled Cortex Cluster can withstand zone failures as long as there are more than replication factor / 2 zones available without any failing instances. + The Cortex services supporting **zone-aware replication** are: - **[Distributors and Ingesters](#distributors-and-ingesters-time-series-replication)** From 264729434265beb591c3dbfe18759d101a3165cd Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 09:28:57 +0000 Subject: [PATCH 36/42] Remove extra space Signed-off-by: Michel Hollands --- docs/guides/zone-replication.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guides/zone-replication.md b/docs/guides/zone-replication.md index 504db7b6457..1c4fbbe7240 100644 --- a/docs/guides/zone-replication.md +++ b/docs/guides/zone-replication.md @@ -11,7 +11,7 @@ It is completely possible that all the replicas for the given data are held with For this reason, Cortex optionally supports zone-aware replication. When zone-aware replication is **enabled**, replicas for the given data are guaranteed to span across different availability zones. This requires Cortex cluster to run at least in a number of zones equal to the configured replication factor. -Reads from a zone-aware replication enabled Cortex Cluster can withstand zone failures as long as there are more than replication factor / 2 zones available without any failing instances. +Reads from a zone-aware replication enabled Cortex Cluster can withstand zone failures as long as there are more than replication factor / 2 zones available without any failing instances. The Cortex services supporting **zone-aware replication** are: From ee08cab72b2513ad4a11c9b6d057f667743cf127 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 09:45:09 +0000 Subject: [PATCH 37/42] Address some of Peter's review comment Signed-off-by: Michel Hollands --- pkg/ring/replication_set.go | 48 +++++++++++------------------ pkg/ring/replication_set_tracker.go | 20 ++++-------- pkg/ring/ring.go | 18 +++++------ 3 files changed, 33 insertions(+), 53 deletions(-) diff --git a/pkg/ring/replication_set.go b/pkg/ring/replication_set.go index cd6a33baa6a..adc619e85cb 100644 --- a/pkg/ring/replication_set.go +++ b/pkg/ring/replication_set.go @@ -25,10 +25,6 @@ type ReplicationSet struct { func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(context.Context, *IngesterDesc) (interface{}, error)) ([]interface{}, error) { type instanceResult struct { res interface{} - instance *IngesterDesc - } - - type instanceError struct { err error instance *IngesterDesc } @@ -42,9 +38,8 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont } var ( - errs = make(chan instanceError, len(r.Ingesters)) - resultsChan = make(chan instanceResult, len(r.Ingesters)) - forceStart = make(chan struct{}, r.MaxErrors) + ch = make(chan instanceResult, len(r.Ingesters)) + forceStart = make(chan struct{}, r.MaxErrors) ) ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -64,16 +59,10 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont } } result, err := f(ctx, ing) - if err != nil { - errs <- instanceError{ - err: err, - instance: ing, - } - } else { - resultsChan <- instanceResult{ - res: result, - instance: ing, - } + ch <- instanceResult{ + res: result, + err: err, + instance: ing, } }(i, &r.Ingesters[i]) } @@ -82,22 +71,21 @@ func (r ReplicationSet) Do(ctx context.Context, delay time.Duration, f func(cont for !tracker.succeeded() { select { - case err := <-errs: - tracker.done(err.instance, err.err) - - if tracker.failed() { - return nil, err.err - } + case res := <-ch: + tracker.done(res.instance, res.err) + if res.err != nil { + if tracker.failed() { + return nil, res.err + } - // force one of the delayed requests to start - if delay > 0 && r.MaxUnavailableZones == 0 { - forceStart <- struct{}{} + // force one of the delayed requests to start + if delay > 0 && r.MaxUnavailableZones == 0 { + forceStart <- struct{}{} + } + } else { + results = append(results, res.res) } - case result := <-resultsChan: - tracker.done(result.instance, nil) - results = append(results, result.res) - case <-ctx.Done(): return nil, ctx.Err() } diff --git a/pkg/ring/replication_set_tracker.go b/pkg/ring/replication_set_tracker.go index 310cb2b86cb..09f12e3cebb 100644 --- a/pkg/ring/replication_set_tracker.go +++ b/pkg/ring/replication_set_tracker.go @@ -44,6 +44,8 @@ func (t *defaultResultTracker) failed() bool { return t.numErrors > t.maxErrors } +// zoneAwareResultTracker tracks the results per zone. +// All instances in a zone must succeed in order for the zone to succeed. type zoneAwareResultTracker struct { waitingByZone map[string]int failuresByZone map[string]int @@ -60,7 +62,6 @@ func newZoneAwareResultTracker(instances []IngesterDesc, maxUnavailableZones int for _, instance := range instances { t.waitingByZone[instance.Zone]++ - t.failuresByZone[instance.Zone] = 0 } t.minSuccessfulZones = len(t.waitingByZone) - maxUnavailableZones @@ -76,29 +77,20 @@ func (t *zoneAwareResultTracker) done(instance *IngesterDesc, err error) { } func (t *zoneAwareResultTracker) succeeded() bool { - actualSucceededZones := 0 + successfulZones := 0 // The execution succeeded once we successfully received a successful result // from "all zones - max unavailable zones". for zone, numWaiting := range t.waitingByZone { if numWaiting == 0 && t.failuresByZone[zone] == 0 { - actualSucceededZones++ + successfulZones++ } } - return actualSucceededZones >= t.minSuccessfulZones + return successfulZones >= t.minSuccessfulZones } func (t *zoneAwareResultTracker) failed() bool { - failedZones := 0 - - // The execution failed if the number of zones, for which we have tracked at least 1 - // failure, exceeds the max unavailable zones. - for _, numFailures := range t.failuresByZone { - if numFailures > 0 { - failedZones++ - } - } - + failedZones := len(t.failuresByZone) return failedZones > t.maxUnavailableZones } diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 1730906db16..6e651a98aae 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -364,11 +364,11 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro } // Build the initial replication set, excluding unhealthy instances. - instances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) + healthyInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) zoneFailures := make(map[string]struct{}) for _, ingester := range r.ringDesc.Ingesters { if r.IsHealthy(&ingester, op) { - instances = append(instances, ingester) + healthyInstances = append(healthyInstances, ingester) } else { zoneFailures[ingester.Zone] = struct{}{} } @@ -397,13 +397,13 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // enabled (data is replicated to RF different zones), there's no benefit in // querying healthy instances from "failing zones". filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) - for _, ingester := range instances { + for _, ingester := range healthyInstances { if _, ok := zoneFailures[ingester.Zone]; !ok { filteredInstances = append(filteredInstances, ingester) } } - instances = filteredInstances + healthyInstances = filteredInstances } // Since we removed all instances from zones containing at least 1 failing @@ -416,18 +416,18 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro if numRequired < r.cfg.ReplicationFactor { numRequired = r.cfg.ReplicationFactor } - maxUnavailable := r.cfg.ReplicationFactor / 2 - numRequired -= maxUnavailable + // We can tolerate this many failures + numRequired -= r.cfg.ReplicationFactor / 2 - if len(instances) < numRequired { + if len(healthyInstances) < numRequired { return ReplicationSet{}, ErrTooManyFailedIngesters } - maxErrors = len(instances) - numRequired + maxErrors = len(healthyInstances) - numRequired } return ReplicationSet{ - Ingesters: instances, + Ingesters: healthyInstances, MaxErrors: maxErrors, MaxUnavailableZones: maxUnavailableZones, }, nil From dd18b80bf540eee1261fe181ba7fca6da648647c Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 14:30:36 +0000 Subject: [PATCH 38/42] Add special case for rf=2 Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 3 +++ pkg/ring/ring_test.go | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 6e651a98aae..3754dad6bf9 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -385,6 +385,9 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // contains instances in a number of zones < RF. numReplicatedZones := util.Min(len(r.ringZones), r.cfg.ReplicationFactor) minSuccessZones := (numReplicatedZones / 2) + 1 + if r.cfg.ReplicationFactor == 2 && len(r.ringZones) == 2 { + minSuccessZones = 1 + } maxUnavailableZones = numReplicatedZones - minSuccessZones if len(zoneFailures) > maxUnavailableZones { diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 173143bc88c..895fae0389e 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -462,6 +462,25 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. replicationFactor: 1, expectedError: ErrTooManyFailedIngesters, }, + "RF=2, 2 zones": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + replicationFactor: 2, + expectedMaxUnavailableZones: 1, + }, + "RF=2, 2 zones, one unhealthy instance": { + ringInstances: map[string]IngesterDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1"}, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 2, + expectedMaxUnavailableZones: 0, + }, "RF=3, 3 zones, one instance per zone": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, From 97ea59b554f51b703cec9ef7394bd4da9131186b Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 17:34:56 +0000 Subject: [PATCH 39/42] Address review comments Signed-off-by: Michel Hollands --- pkg/ring/replication_set_test.go | 92 ++++++-------------------------- pkg/ring/ring.go | 3 -- pkg/ring/ring_test.go | 9 ++-- 3 files changed, 19 insertions(+), 85 deletions(-) diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go index f131e753530..d8c853fe303 100644 --- a/pkg/ring/replication_set_test.go +++ b/pkg/ring/replication_set_test.go @@ -127,104 +127,44 @@ func TestReplicationSet_Do(t *testing.T) { expectedError: context.Canceled, }, { - name: "max errors = 0, should succeed on all successful instances", - instances: []IngesterDesc{{ - Zone: "zone1", - }, { - Zone: "zone2", - }, { - Zone: "zone3", - }}, + name: "max errors = 0, should succeed on all successful instances", + instances: []IngesterDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}}, f: func(c context.Context, id *IngesterDesc) (interface{}, error) { return 1, nil }, want: []interface{}{1, 1, 1}, }, { - name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (3 instances)", - instances: []IngesterDesc{{ - Zone: "zone1", - }, { - Zone: "zone2", - }, { - Zone: "zone3", - }}, + name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (3 instances)", + instances: []IngesterDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}}, f: failingFunctionOnZones("zone1"), maxUnavailableZones: 1, want: []interface{}{1, 1}, }, { - name: "max unavailable zones = 1, should fail on instances failing in 2 out of 3 zones (3 instances)", - instances: []IngesterDesc{{ - Zone: "zone1", - }, { - Zone: "zone2", - }, { - Zone: "zone3", - }}, + name: "max unavailable zones = 1, should fail on instances failing in 2 out of 3 zones (3 instances)", + instances: []IngesterDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}}, f: failingFunctionOnZones("zone1", "zone2"), maxUnavailableZones: 1, expectedError: errZoneFailure, }, { - name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (6 instances)", - instances: []IngesterDesc{{ - Zone: "zone1", - }, { - Zone: "zone1", - }, { - Zone: "zone2", - }, { - Zone: "zone2", - }, { - Zone: "zone3", - }, { - Zone: "zone3", - }}, + name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (6 instances)", + instances: []IngesterDesc{{Zone: "zone1"}, {Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone2"}, {Zone: "zone3"}, {Zone: "zone3"}}, f: failingFunctionOnZones("zone1"), maxUnavailableZones: 1, want: []interface{}{1, 1, 1, 1}, }, { - name: "max unavailable zones = 2, should fail on instances failing in 3 out of 5 zones (5 instances)", - instances: []IngesterDesc{{ - Zone: "zone1", - }, { - Zone: "zone2", - }, { - Zone: "zone3", - }, { - Zone: "zone4", - }, { - Zone: "zone5", - }}, + name: "max unavailable zones = 2, should fail on instances failing in 3 out of 5 zones (5 instances)", + instances: []IngesterDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}, {Zone: "zone4"}, {Zone: "zone5"}}, f: failingFunctionOnZones("zone1", "zone2", "zone3"), maxUnavailableZones: 2, expectedError: errZoneFailure, }, { - name: "max unavailable zones = 2, should succeed on instances failing in 2 out of 5 zones (10 instances)", - instances: []IngesterDesc{{ - Zone: "zone1", - }, { - Zone: "zone1", - }, { - Zone: "zone2", - }, { - Zone: "zone2", - }, { - Zone: "zone3", - }, { - Zone: "zone3", - }, { - Zone: "zone4", - }, { - Zone: "zone4", - }, { - Zone: "zone5", - }, { - Zone: "zone5", - }}, + name: "max unavailable zones = 2, should succeed on instances failing in 2 out of 5 zones (10 instances)", + instances: []IngesterDesc{{Zone: "zone1"}, {Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone2"}, {Zone: "zone3"}, {Zone: "zone3"}, {Zone: "zone4"}, {Zone: "zone4"}, {Zone: "zone5"}, {Zone: "zone5"}}, f: failingFunctionOnZones("zone1", "zone5"), maxUnavailableZones: 2, want: []interface{}{1, 1, 1, 1, 1, 1}, @@ -245,11 +185,9 @@ func TestReplicationSet_Do(t *testing.T) { if tt.cancelContextDelay > 0 { var cancel context.CancelFunc ctx, cancel = context.WithCancel(ctx) - go func() { - time.AfterFunc(tt.cancelContextDelay, func() { - cancel() - }) - }() + time.AfterFunc(tt.cancelContextDelay, func() { + cancel() + }) } got, err := r.Do(ctx, tt.delay, tt.f) if tt.expectedError != nil { diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 3754dad6bf9..6e651a98aae 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -385,9 +385,6 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // contains instances in a number of zones < RF. numReplicatedZones := util.Min(len(r.ringZones), r.cfg.ReplicationFactor) minSuccessZones := (numReplicatedZones / 2) + 1 - if r.cfg.ReplicationFactor == 2 && len(r.ringZones) == 2 { - minSuccessZones = 1 - } maxUnavailableZones = numReplicatedZones - minSuccessZones if len(zoneFailures) > maxUnavailableZones { diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 895fae0389e..1a70cc6ec55 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -469,17 +469,16 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. }, expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, replicationFactor: 2, - expectedMaxUnavailableZones: 1, + expectedMaxUnavailableZones: 0, }, "RF=2, 2 zones, one unhealthy instance": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, }, - expectedAddresses: []string{"127.0.0.1"}, - unhealthyInstances: []string{"instance-2"}, - replicationFactor: 2, - expectedMaxUnavailableZones: 0, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 2, + expectedError: ErrTooManyFailedIngesters, }, "RF=3, 3 zones, one instance per zone": { ringInstances: map[string]IngesterDesc{ From 474168f7f59b04aa7518e50ecab573b7a1594c59 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Mon, 9 Nov 2020 17:48:08 +0000 Subject: [PATCH 40/42] Fix comment Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index 6e651a98aae..d170f8d06a0 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -395,7 +395,8 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // We remove all instances (even healthy ones) from zones with at least // 1 failing ingester. Due to how replication works when zone-awareness is // enabled (data is replicated to RF different zones), there's no benefit in - // querying healthy instances from "failing zones". + // querying healthy instances from "failing zones". A zone is considered + // failed if there is single error. filteredInstances := make([]IngesterDesc, 0, len(r.ringDesc.Ingesters)) for _, ingester := range healthyInstances { if _, ok := zoneFailures[ingester.Zone]; !ok { From 1e50276b7e903201c6af8059a31889bd88aaf457 Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Tue, 10 Nov 2020 13:50:25 +0000 Subject: [PATCH 41/42] Update docs with review comments Signed-off-by: Michel Hollands --- docs/guides/zone-replication.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/guides/zone-replication.md b/docs/guides/zone-replication.md index 1c4fbbe7240..56abf3ac7e4 100644 --- a/docs/guides/zone-replication.md +++ b/docs/guides/zone-replication.md @@ -11,7 +11,7 @@ It is completely possible that all the replicas for the given data are held with For this reason, Cortex optionally supports zone-aware replication. When zone-aware replication is **enabled**, replicas for the given data are guaranteed to span across different availability zones. This requires Cortex cluster to run at least in a number of zones equal to the configured replication factor. -Reads from a zone-aware replication enabled Cortex Cluster can withstand zone failures as long as there are more than replication factor / 2 zones available without any failing instances. +Reads from a zone-aware replication enabled Cortex Cluster can withstand zone failures as long as there are no more than `floor(replication factor / 2)` zones with failing instances. The Cortex services supporting **zone-aware replication** are: @@ -28,7 +28,9 @@ The Cortex time-series replication is used to hold multiple (typically 3) replic 2. Rollout ingesters to apply the configured zone 3. Enable time-series zone-aware replication via the `-distributor.zone-awareness-enabled` CLI flag (or its respective YAML config option). Please be aware this configuration option should be set to distributors, queriers and rulers. -The `-distributor.shard-by-all-labels` setting has an impact on zone replication. When set to `true` there will be more series stored that will be spread out over more instances. When an instance goes down fewer series will be impacted. +The `-distributor.shard-by-all-labels` setting has an impact on read availability. When enabled, a metric is sharded across all ingesters and querier needs to fetch series from all ingesters while, when disabled, a metric is sharded only across `` ingesters. + +In the event of a large outage impacting ingesters in more than 1 zone, when `-distributor.shard-by-all-labels=true` all queries will fail, while when disabled some queries may still succeed if the ingesters holding the required metric are not impacted by the outage. ## Store-gateways: blocks replication From af5ff2a272d3e091afbcba9e6562a4d3becc240e Mon Sep 17 00:00:00 2001 From: Michel Hollands Date: Tue, 10 Nov 2020 13:56:22 +0000 Subject: [PATCH 42/42] Set maxUnavailableZones and change tests Signed-off-by: Michel Hollands --- pkg/ring/ring.go | 2 +- pkg/ring/ring_test.go | 18 ++++++++---------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/pkg/ring/ring.go b/pkg/ring/ring.go index d170f8d06a0..a0d8723e2c1 100644 --- a/pkg/ring/ring.go +++ b/pkg/ring/ring.go @@ -385,7 +385,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // contains instances in a number of zones < RF. numReplicatedZones := util.Min(len(r.ringZones), r.cfg.ReplicationFactor) minSuccessZones := (numReplicatedZones / 2) + 1 - maxUnavailableZones = numReplicatedZones - minSuccessZones + maxUnavailableZones = minSuccessZones - 1 if len(zoneFailures) > maxUnavailableZones { return ReplicationSet{}, ErrTooManyFailedIngesters diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go index 1a70cc6ec55..7b36347d078 100644 --- a/pkg/ring/ring_test.go +++ b/pkg/ring/ring_test.go @@ -469,16 +469,16 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. }, expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, replicationFactor: 2, - expectedMaxUnavailableZones: 0, + expectedMaxUnavailableZones: 1, }, "RF=2, 2 zones, one unhealthy instance": { ringInstances: map[string]IngesterDesc{ "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, }, + expectedAddresses: []string{"127.0.0.1"}, unhealthyInstances: []string{"instance-2"}, replicationFactor: 2, - expectedError: ErrTooManyFailedIngesters, }, "RF=3, 3 zones, one instance per zone": { ringInstances: map[string]IngesterDesc{ @@ -578,10 +578,9 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, }, expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, - unhealthyInstances: []string{}, replicationFactor: 3, expectedMaxErrors: 0, - expectedMaxUnavailableZones: 0, + expectedMaxUnavailableZones: 1, }, "RF=3, only 2 zones, two instances per zone, one instance unhealthy": { ringInstances: map[string]IngesterDesc{ @@ -590,10 +589,11 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, }, - expectedAddresses: []string{}, - unhealthyInstances: []string{"instance-4"}, - replicationFactor: 3, - expectedError: ErrTooManyFailedIngesters, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + unhealthyInstances: []string{"instance-4"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, }, "RF=3, only 1 zone, two instances per zone": { ringInstances: map[string]IngesterDesc{ @@ -601,7 +601,6 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, }, expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, - unhealthyInstances: []string{}, replicationFactor: 3, expectedMaxErrors: 0, expectedMaxUnavailableZones: 0, @@ -611,7 +610,6 @@ func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing. "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, }, - expectedAddresses: []string{}, unhealthyInstances: []string{"instance-2"}, replicationFactor: 3, expectedError: ErrTooManyFailedIngesters,