diff --git a/swarm/network/kademlia.go b/swarm/network/kademlia.go index f9b38fc48d..b59735e6dc 100644 --- a/swarm/network/kademlia.go +++ b/swarm/network/kademlia.go @@ -57,6 +57,7 @@ type KadParams struct { MaxProxDisplay int // number of rows the table shows NeighbourhoodSize int // nearest neighbour core minimum cardinality MinBinSize int // minimum number of peers in a row + HealthBinSize int // minimum number of peers per bin MaxBinSize int // maximum number of peers in a row before pruning RetryInterval int64 // initial interval before a peer is first redialed RetryExponent int // exponent to multiply retry intervals with @@ -71,6 +72,7 @@ func NewKadParams() *KadParams { MaxProxDisplay: 16, NeighbourhoodSize: 2, MinBinSize: 2, + HealthBinSize: 1, MaxBinSize: 4, RetryInterval: 4200000000, // 4.2 sec MaxRetries: 42, @@ -715,17 +717,16 @@ func (k *Kademlia) knowNeighbours(addrs [][]byte) (got bool, n int, missing [][] // then we don't know all our neighbors // (which sadly is all too common in modern society) var gots int - var culprits [][]byte for _, p := range addrs { pk := common.Bytes2Hex(p) if pm[pk] { gots++ } else { log.Trace(fmt.Sprintf("%08x: known nearest neighbour %s not found", k.base, pk)) - culprits = append(culprits, p) + missing = append(missing, p) } } - return gots == len(addrs), gots, culprits + return gots == len(addrs), gots, missing } // connectedNeighbours tests if all neighbours in the peerpot @@ -750,18 +751,49 @@ func (k *Kademlia) connectedNeighbours(peers [][]byte) (got bool, n int, missing // iterate through nearest neighbors in the peerpot map // if we can't find the neighbor in the map we created above // then we don't know all our neighbors - var gots int - var culprits [][]byte + var connects int for _, p := range peers { pk := common.Bytes2Hex(p) if pm[pk] { - gots++ + connects++ } else { log.Trace(fmt.Sprintf("%08x: ExpNN: %s not found", k.base, pk)) - culprits = append(culprits, p) + missing = append(missing, p) + } + } + return connects == len(peers), connects, missing +} + +// getWeakBins checks whether the node is connected to a health minimum of peers it knows about in bins that are shallower than depth +// it returns an array of bin proximity orders for which this is not the case +// TODO move to separate testing tools file +func (k *Kademlia) getWeakBins() (missing []int) { + pk := make(map[int]int) + pc := make(map[int]int) + + // create a map with all bins that have known peers + // in order deepest to shallowest compared to the kademlia base address + depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base) + k.eachAddr(nil, 255, func(_ *BzzAddr, po int) bool { + pk[po]++ + return true + }) + k.eachConn(nil, 255, func(_ *Peer, po int) bool { + pc[po]++ + return true + }) + + for po, v := range pk { + if pc[po] == v { + continue + } else if po >= depth { + missing = append(missing, po) + } else if pc[po] < k.HealthBinSize { + missing = append(missing, po) } + } - return gots == len(peers), gots, culprits + return missing } // Health state of the Kademlia @@ -774,10 +806,21 @@ type Health struct { CountConnectNN int // amount of neighbours connected to MissingConnectNN [][]byte // which neighbours we should have been connected to but we're not Saturated bool // whether we are connected to all the peers we would have liked to + Robust bool // whether we are connected to a minimum of peers in all the bins we have known peers in Hive string } -// Healthy reports the health state of the kademlia connectivity +// IsHealthyStrict return the strict interpretation of `Healthy` given a `Health` struct +// definition of strict health: all conditions must be true: +// - we at least know one peer +// - we know all neighbors +// - we are connected to all known neighbors +// - it is robust (we are connected to a minimum of peers in all the bins we have known peers in) +func (h *Health) IsHealthyStrict() bool { + return h.KnowNN && h.ConnectNN && h.CountKnowNN > 0 && h.Robust +} + +// GetHealthInfo reports the health state of the kademlia connectivity // // The PeerPot argument provides an all-knowing view of the network // The resulting Health object is a result of comparisons between @@ -785,25 +828,31 @@ type Health struct { // what SHOULD it have been when we take all we know about the network into consideration. // // used for testing only -func (k *Kademlia) Healthy(pp *PeerPot) *Health { +func (k *Kademlia) GetHealthInfo(pp *PeerPot) *Health { k.lock.RLock() defer k.lock.RUnlock() if len(pp.NNSet) < k.NeighbourhoodSize { log.Warn("peerpot NNSet < NeighbourhoodSize") } - gotnn, countgotnn, culpritsgotnn := k.connectedNeighbours(pp.NNSet) - knownn, countknownn, culpritsknownn := k.knowNeighbours(pp.NNSet) + connectNN, countConnectNN, missingConnectNN := k.connectedNeighbours(pp.NNSet) + knownNN, countKnownNN, missingKnownNN := k.knowNeighbours(pp.NNSet) depth := depthForPot(k.conns, k.NeighbourhoodSize, k.base) - saturated := k.saturation() < depth - log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, saturated: %v\n", k.base, knownn, gotnn, saturated)) + isSaturated := k.saturation() < depth + + weakBins := k.getWeakBins() + isRobust := len(weakBins) == 0 + + log.Trace(fmt.Sprintf("%08x: healthy: knowNNs: %v, gotNNs: %v, isSaturated: %v, isRobust:%v\n", k.base, knownNN, connectNN, isSaturated, isRobust)) + return &Health{ - KnowNN: knownn, - CountKnowNN: countknownn, - MissingKnowNN: culpritsknownn, - ConnectNN: gotnn, - CountConnectNN: countgotnn, - MissingConnectNN: culpritsgotnn, - Saturated: saturated, + KnowNN: knownNN, + CountKnowNN: countKnownNN, + MissingKnowNN: missingKnownNN, + ConnectNN: connectNN, + CountConnectNN: countConnectNN, + MissingConnectNN: missingConnectNN, + Saturated: isSaturated, + Robust: isRobust, Hive: k.string(), } } diff --git a/swarm/network/kademlia_test.go b/swarm/network/kademlia_test.go index 8a724756b6..d59d687cb6 100644 --- a/swarm/network/kademlia_test.go +++ b/swarm/network/kademlia_test.go @@ -1,4 +1,4 @@ -// Copyright 2018 The go-ethereum Authors +// Copyright 2017 The go-ethereum Authors // This file is part of the go-ethereum library. // // The go-ethereum library is free software: you can redistribute it and/or modify @@ -168,10 +168,7 @@ func TestNeighbourhoodDepth(t *testing.T) { testNum++ } -// TestHealthStrict tests the simplest definition of health -// Which means whether we are connected to all neighbors we know of func TestHealthStrict(t *testing.T) { - // base address is all zeros // no peers // unhealthy (and lonely) @@ -199,9 +196,9 @@ func TestHealthStrict(t *testing.T) { tk.checkHealth(true, false) // know three peers, connected to the two deepest - // healthy + // unhealthy (not robust) tk.Register("00000000") - tk.checkHealth(true, false) + tk.checkHealth(false, false) // know three peers, connected to all three // healthy @@ -229,7 +226,7 @@ func TestHealthStrict(t *testing.T) { tk.checkHealth(true, false) } -func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) { +func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturated bool) { tk.t.Helper() kid := common.Bytes2Hex(tk.BaseAddr()) addrs := [][]byte{tk.BaseAddr()} @@ -239,18 +236,100 @@ func (tk *testKademlia) checkHealth(expectHealthy bool, expectSaturation bool) { }) pp := NewPeerPotMap(tk.NeighbourhoodSize, addrs) - healthParams := tk.Healthy(pp[kid]) + healthParams := tk.GetHealthInfo(pp[kid]) - // definition of health, all conditions but be true: - // - we at least know one peer - // - we know all neighbors - // - we are connected to all known neighbors - health := healthParams.KnowNN && healthParams.ConnectNN && healthParams.CountKnowNN > 0 + health := healthParams.IsHealthyStrict() if expectHealthy != health { tk.t.Fatalf("expected kademlia health %v, is %v\n%v", expectHealthy, health, tk.String()) } } +func TestIsRobust(t *testing.T) { + tk := newTestKademlia(t, "11111111") + isRobust(t, tk, true) + + // know one peer but not connected + // not robust and not healthy + tk.Register("11100000") + log.Trace(tk.String()) + isRobust(t, tk, false) + + // know one peer and connected + // healthy and robust + tk.On("11100000") + isRobust(t, tk, true) + + // know two peers, only one connected + // not healthy, not robust + tk.Register("11111100") + log.Trace(tk.String()) + isRobust(t, tk, false) + + // know two peers and connected to both + // healthy and robust + tk.On("11111100") + isRobust(t, tk, true) + + // know three peers, connected to the two deepest + // healthy but not robust + tk.Register("00000000") + log.Trace(tk.String()) + isRobust(t, tk, false) + + // know three peers, connected to all three + // healthy and robust + tk.On("00000000") + isRobust(t, tk, true) + + // add another peer in the zero-bin + // still healthy and robust + tk.Register("00000000") + log.Trace(tk.String()) + isRobust(t, tk, true) + + // add peers until depth + // healthy but not robust + tk.Register("10000000") + tk.Register("11000000") + log.Trace(tk.String()) + isRobust(t, tk, false) + + // add fourth peer deeper than current depth + // still healthy, still not robust + tk.On("10000000") + log.Trace(tk.String()) + isRobust(t, tk, false) + + // add fourth peer deeper than current depth + // healthy and robust + tk.On("11000000") + log.Trace(tk.String()) + isRobust(t, tk, true) +} + +// retrieves the health object based on the current connectivity of the given kademlia +func getHealth(k *Kademlia) *Health { + kid := common.Bytes2Hex(k.BaseAddr()) + addrs := [][]byte{k.BaseAddr()} + k.EachAddr(nil, 255, func(addr *BzzAddr, po int) bool { + addrs = append(addrs, addr.Address()) + return true + }) + pp := NewPeerPotMap(k.NeighbourhoodSize, addrs) + return k.GetHealthInfo(pp[kid]) +} + +// evaluates healthiness by taking into account robustial connections +// additional conditions for healthiness +// - IF we know of peers in bins shallower than depth, connected to at least HealthBinSize of them +func isRobust(t *testing.T, k *testKademlia, expectIsRobust bool) { + t.Helper() + healthParams := getHealth(k.Kademlia) + if expectIsRobust != healthParams.Robust { + t.Fatalf("expected kademlia potency %v, is %v\n%v", expectIsRobust, healthParams.Robust, k.String()) + } +} + func (tk *testKademlia) checkSuggestPeer(expAddr string, expDepth int, expChanged bool) { tk.t.Helper() addr, depth, changed := tk.SuggestPeer() diff --git a/swarm/network/simulation/kademlia.go b/swarm/network/simulation/kademlia.go index 6d8d0e0a2c..b7ef024a68 100644 --- a/swarm/network/simulation/kademlia.go +++ b/swarm/network/simulation/kademlia.go @@ -64,13 +64,13 @@ func (s *Simulation) WaitTillHealthy(ctx context.Context) (ill map[enode.ID]*net addr := common.Bytes2Hex(k.BaseAddr()) pp := ppmap[addr] //call Healthy RPC - h := k.Healthy(pp) + h := k.GetHealthInfo(pp) //print info log.Debug(k.String()) log.Debug("kademlia", "connectNN", h.ConnectNN, "knowNN", h.KnowNN) log.Debug("kademlia", "health", h.ConnectNN && h.KnowNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id) log.Debug("kademlia", "ill condition", !h.ConnectNN, "addr", hex.EncodeToString(k.BaseAddr()), "node", id) - if !h.ConnectNN { + if !h.IsHealthyStrict() { ill[id] = k } } diff --git a/swarm/network/simulations/discovery/discovery_test.go b/swarm/network/simulations/discovery/discovery_test.go index e695bc4ac0..737ffcf3e3 100644 --- a/swarm/network/simulations/discovery/discovery_test.go +++ b/swarm/network/simulations/discovery/discovery_test.go @@ -267,11 +267,11 @@ func discoverySimulation(nodes, conns int, adapter adapters.NodeAdapter) (*simul } healthy := &network.Health{} - if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { + if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { return false, fmt.Errorf("error getting node health: %s", err) } - log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v,\n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Hive)) - return healthy.KnowNN && healthy.ConnectNN, nil + log.Debug(fmt.Sprintf("node %4s healthy: connected nearest neighbours: %v, know nearest neighbours: %v, is robust: %v, \n\n%v", id, healthy.ConnectNN, healthy.KnowNN, healthy.Robust, healthy.Hive)) + return healthy.IsHealthyStrict(), nil } // 64 nodes ~ 1min @@ -352,7 +352,7 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt healthy := &network.Health{} addr := id.String() ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) - if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { + if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { return fmt.Errorf("error getting node health: %s", err) } @@ -425,12 +425,12 @@ func discoveryPersistenceSimulation(nodes, conns int, adapter adapters.NodeAdapt healthy := &network.Health{} ppmap := network.NewPeerPotMap(network.NewKadParams().NeighbourhoodSize, addrs) - if err := client.Call(&healthy, "hive_healthy", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { + if err := client.Call(&healthy, "hive_getHealthInfo", ppmap[common.Bytes2Hex(id.Bytes())]); err != nil { return false, fmt.Errorf("error getting node health: %s", err) } log.Info(fmt.Sprintf("node %4s healthy: got nearest neighbours: %v, know nearest neighbours: %v", id, healthy.ConnectNN, healthy.KnowNN)) - return healthy.KnowNN && healthy.ConnectNN, nil + return healthy.IsHealthyStrict(), nil } // 64 nodes ~ 1min