-
Notifications
You must be signed in to change notification settings - Fork 474
/
Copy pathbgp_peers.go
431 lines (388 loc) · 14.6 KB
/
bgp_peers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
package routing
import (
"context"
"errors"
"fmt"
"net"
"strconv"
"strings"
"time"
"github.com/cloudnativelabs/kube-router/v2/pkg/metrics"
"github.com/cloudnativelabs/kube-router/v2/pkg/options"
"github.com/cloudnativelabs/kube-router/v2/pkg/utils"
gobgpapi "github.com/osrg/gobgp/v3/api"
gobgp "github.com/osrg/gobgp/v3/pkg/server"
v1core "k8s.io/api/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
)
// Refresh the peer relationship with rest of the nodes in the cluster (iBGP peers). Node add/remove
// events should ensure peer relationship with only currently active nodes. In case
// we miss any events from API server this method which is called periodically
// ensures peer relationship with removed nodes is deleted.
func (nrc *NetworkRoutingController) syncInternalPeers() {
nrc.mu.Lock()
defer nrc.mu.Unlock()
start := time.Now()
defer func() {
endTime := time.Since(start)
if nrc.MetricsEnabled {
metrics.ControllerBGPInternalPeersSyncTime.Observe(endTime.Seconds())
}
klog.V(2).Infof("Syncing BGP peers for the node took %v", endTime)
}()
// get the current list of the nodes from API server
nodes := nrc.nodeLister.List()
if nrc.MetricsEnabled {
metrics.ControllerBPGpeers.Set(float64(len(nodes)))
}
// establish peer and add Pod CIDRs with current set of nodes
currentNodes := make([]string, 0)
for _, obj := range nodes {
node := obj.(*v1core.Node)
targetNode, err := utils.NewRemoteKRNode(node)
if err != nil {
klog.Errorf("failed to create KRNode from node object: %v", err)
continue
}
// skip self
if targetNode.GetPrimaryNodeIP().Equal(nrc.krNode.GetPrimaryNodeIP()) {
continue
}
// we are rr-client peer only with rr-server
if nrc.bgpRRClient {
if _, ok := node.Annotations[rrServerAnnotation]; !ok {
continue
}
}
// if node full mesh is not requested then just peer with nodes with same ASN
// (run iBGP among same ASN peers)
if !nrc.bgpFullMeshMode {
nodeasn, ok := node.Annotations[nodeASNAnnotation]
if !ok {
klog.Infof("Not peering with the Node %s as ASN number of the node is unknown.",
targetNode.GetPrimaryNodeIP().String())
continue
}
asnNo, err := strconv.ParseUint(nodeasn, 0, asnMaxBitSize)
if err != nil {
klog.Infof("Not peering with the Node %s as ASN number of the node is invalid.",
targetNode.GetPrimaryNodeIP().String())
continue
}
// if the nodes ASN number is different from ASN number of current node skip peering
if nrc.nodeAsnNumber != uint32(asnNo) {
klog.Infof("Not peering with the Node %s as ASN number of the node is different.",
targetNode.GetPrimaryNodeIP().String())
continue
}
}
targetNodeIsIPv4 := targetNode.GetPrimaryNodeIP().To4() != nil
sourceNodeIsIPv4 := nrc.krNode.GetPrimaryNodeIP().To4() != nil
if targetNodeIsIPv4 != sourceNodeIsIPv4 {
klog.Warningf("Not peering with Node %s as it's primary IP (%s) uses a different protocol than "+
"our primary IP (%s)", node.Name, targetNode.GetPrimaryNodeIP(),
nrc.krNode.GetPrimaryNodeIP())
continue
}
currentNodes = append(currentNodes, targetNode.GetPrimaryNodeIP().String())
nrc.activeNodes[targetNode.GetPrimaryNodeIP().String()] = true
// explicitly set neighbors.transport.config.local-address with primaryIP which is configured
// as their neighbor address at the remote peers.
// this prevents the controller from initiating connection to its peers with a different IP address
// when multiple L3 interfaces are active.
n := &gobgpapi.Peer{
Conf: &gobgpapi.PeerConf{
NeighborAddress: targetNode.GetPrimaryNodeIP().String(),
PeerAsn: nrc.nodeAsnNumber,
},
Transport: &gobgpapi.Transport{
LocalAddress: nrc.krNode.GetPrimaryNodeIP().String(),
RemotePort: nrc.bgpPort,
},
}
if nrc.bgpGracefulRestart {
n.GracefulRestart = &gobgpapi.GracefulRestart{
Enabled: true,
RestartTime: uint32(nrc.bgpGracefulRestartTime.Seconds()),
DeferralTime: uint32(nrc.bgpGracefulRestartDeferralTime.Seconds()),
LocalRestarting: true,
}
// We choose to only peer using the protocol of the node's primary IP
if targetNode.IsIPv4Capable() {
afiSafi := gobgpapi.AfiSafi{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
State: &gobgpapi.MpGracefulRestartState{},
},
}
n.AfiSafis = append(n.AfiSafis, &afiSafi)
}
if targetNode.IsIPv6Capable() {
afiSafi := gobgpapi.AfiSafi{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP6, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
State: &gobgpapi.MpGracefulRestartState{},
},
}
n.AfiSafis = append(n.AfiSafis, &afiSafi)
}
}
// we are rr-server peer with other rr-client with reflection enabled
if nrc.bgpRRServer {
if _, ok := node.Annotations[rrClientAnnotation]; ok {
// add rr options with clusterId
n.RouteReflector = &gobgpapi.RouteReflector{
RouteReflectorClient: true,
RouteReflectorClusterId: fmt.Sprint(nrc.bgpClusterID),
}
}
}
// TODO: check if a node is already added as neighbor in a better way than add and catch error
if err := nrc.bgpServer.AddPeer(context.Background(), &gobgpapi.AddPeerRequest{
Peer: n,
}); err != nil {
if !strings.Contains(err.Error(), "can't overwrite the existing peer") {
klog.Errorf("Failed to add node %s as peer due to %s", targetNode.GetPrimaryNodeIP(), err)
}
}
}
// find the list of the node removed, from the last known list of active nodes
removedNodes := make([]string, 0)
for ip := range nrc.activeNodes {
stillActive := false
for _, node := range currentNodes {
if ip == node {
stillActive = true
break
}
}
if !stillActive {
removedNodes = append(removedNodes, ip)
}
}
// delete the neighbor for the nodes that are removed
for _, ip := range removedNodes {
if err := nrc.bgpServer.DeletePeer(context.Background(), &gobgpapi.DeletePeerRequest{Address: ip}); err != nil {
klog.Errorf("Failed to remove node %s as peer due to %s", ip, err)
}
delete(nrc.activeNodes, ip)
}
}
// connectToExternalBGPPeers adds all the configured eBGP peers (global or node specific) as neighbours
func (nrc *NetworkRoutingController) connectToExternalBGPPeers(server *gobgp.BgpServer, peerNeighbors []*gobgpapi.Peer,
bgpGracefulRestart bool, bgpGracefulRestartDeferralTime time.Duration, bgpGracefulRestartTime time.Duration,
peerMultihopTTL uint8) error {
for _, n := range peerNeighbors {
neighborIPStr := n.Conf.NeighborAddress
neighborIP := net.ParseIP(neighborIPStr)
if neighborIP == nil {
klog.Errorf("unable to parse CIDR of global peer (%s), not peering with this peer",
neighborIPStr)
continue
}
peeringAddressForNeighbor := net.ParseIP(n.Transport.LocalAddress)
if peeringAddressForNeighbor == nil {
klog.Errorf("unable to parse our local address for peer (%s), not peering with this peer (%s)",
n.Transport.LocalAddress, neighborIPStr)
}
neighborIsIPv4 := neighborIP.To4() != nil
peeringAddressIsIPv4 := peeringAddressForNeighbor.To4() != nil
if neighborIsIPv4 != peeringAddressIsIPv4 {
klog.Warningf("Not peering with configured peer as it's primary IP (%s) uses a different "+
"protocol than our configured local-address (%s). Its possible that this can be resolved by setting "+
"the local address appropriately", neighborIP, peeringAddressForNeighbor)
continue
}
if bgpGracefulRestart {
n.GracefulRestart = &gobgpapi.GracefulRestart{
Enabled: true,
RestartTime: uint32(bgpGracefulRestartTime.Seconds()),
DeferralTime: uint32(bgpGracefulRestartDeferralTime.Seconds()),
LocalRestarting: true,
}
if nrc.krNode.IsIPv4Capable() {
n.AfiSafis = []*gobgpapi.AfiSafi{
{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
},
},
}
}
if nrc.krNode.IsIPv6Capable() {
afiSafi := gobgpapi.AfiSafi{
Config: &gobgpapi.AfiSafiConfig{
Family: &gobgpapi.Family{Afi: gobgpapi.Family_AFI_IP6, Safi: gobgpapi.Family_SAFI_UNICAST},
Enabled: true,
},
MpGracefulRestart: &gobgpapi.MpGracefulRestart{
Config: &gobgpapi.MpGracefulRestartConfig{
Enabled: true,
},
},
}
n.AfiSafis = append(n.AfiSafis, &afiSafi)
}
}
if peerMultihopTTL > 1 {
n.EbgpMultihop = &gobgpapi.EbgpMultihop{
Enabled: true,
MultihopTtl: uint32(peerMultihopTTL),
}
}
err := server.AddPeer(context.Background(), &gobgpapi.AddPeerRequest{Peer: n})
if err != nil {
return fmt.Errorf("error peering with peer router "+
"%q due to: %s", n.Conf.NeighborAddress, err)
}
klog.V(2).Infof("Successfully configured %s in ASN %v as BGP peer to the node",
n.Conf.NeighborAddress, n.Conf.PeerAsn)
}
return nil
}
// Does validation and returns neighbor configs
func newGlobalPeers(ips []net.IP, ports []uint32, asns []uint32, passwords []string, localips []string,
holdtime float64, localAddress string) ([]*gobgpapi.Peer, error) {
peers := make([]*gobgpapi.Peer, 0)
// Validations
if len(ips) != len(asns) {
return nil, errors.New("invalid peer router config, the number of IPs and ASN numbers must be equal")
}
if len(ips) != len(passwords) && len(passwords) != 0 {
return nil, errors.New("invalid peer router config. The number of passwords should either be zero, or " +
"one per peer router. Use blank items if a router doesn't expect a password. Example: \"pass,,pass\" " +
"OR [\"pass\",\"\",\"pass\"]")
}
if len(ips) != len(ports) && len(ports) != 0 {
return nil, fmt.Errorf("invalid peer router config. The number of ports should either be zero, or "+
"one per peer router. If blank items are used, it will default to standard BGP port, %s. "+
"Example: \"port,,port\" OR [\"port\",\"\",\"port\"]", strconv.Itoa(options.DefaultBgpPort))
}
if len(ips) != len(localips) && len(localips) != 0 {
return nil, fmt.Errorf("invalid peer router config. The number of localIPs should either be zero, or "+
"one per peer router. If blank items are used, it will default to nodeIP, %s. "+
"Example: \"10.1.1.1,,10.1.1.2\" OR [\"10.1.1.1\",\"\",\"10.1.1.2\"]", localAddress)
}
for i := 0; i < len(ips); i++ {
if (asns[i] < 1 || asns[i] > 23455) &&
(asns[i] < 23457 || asns[i] > 63999) &&
(asns[i] < 64512 || asns[i] > 65534) &&
(asns[i] < 131072 || asns[i] > 4199999999) &&
(asns[i] < 4200000000 || asns[i] > 4294967294) {
return nil, fmt.Errorf("reserved ASN number \"%d\" for global BGP peer",
asns[i])
}
// explicitly set neighbors.transport.config.local-address with primaryIP which is configured
// as their neighbor address at the remote peers.
// this prevents the controller from initiating connection to its peers with a different IP address
// when multiple L3 interfaces are active.
peer := &gobgpapi.Peer{
Conf: &gobgpapi.PeerConf{
NeighborAddress: ips[i].String(),
PeerAsn: asns[i],
},
Timers: &gobgpapi.Timers{Config: &gobgpapi.TimersConfig{HoldTime: uint64(holdtime)}},
Transport: &gobgpapi.Transport{
// localAddress defaults to the node's primary IP, but can be overridden below on a peer-by-peer basis
// below via the kube-router.io/peer.localips annotation
LocalAddress: localAddress,
RemotePort: options.DefaultBgpPort,
},
}
if len(ports) != 0 {
peer.Transport.RemotePort = ports[i]
}
if len(passwords) != 0 {
peer.Conf.AuthPassword = passwords[i]
}
// if localip is set and is non-blank for BGP configuration override primaryIP choice set for peer above
if len(localips) != 0 && localips[i] != "" {
peer.Transport.LocalAddress = localips[i]
}
peers = append(peers, peer)
}
return peers, nil
}
func (nrc *NetworkRoutingController) newNodeEventHandler() cache.ResourceEventHandler {
return cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
node := obj.(*v1core.Node)
targetNode, err := utils.NewRemoteKRNode(node)
if err != nil {
klog.Errorf("failed to create KRNode from node object: %v", err)
return
}
klog.V(2).Infof("Received node %s added update from watch API so peer with new node",
targetNode.GetPrimaryNodeIP())
nrc.OnNodeUpdate(obj)
},
UpdateFunc: func(oldObj, newObj interface{}) {
// we are only interested in node add/delete, so skip update
},
DeleteFunc: func(obj interface{}) {
node, ok := obj.(*v1core.Node)
if !ok {
tombstone, ok := obj.(cache.DeletedFinalStateUnknown)
if !ok {
klog.Errorf("unexpected object type: %v", obj)
return
}
if node, ok = tombstone.Obj.(*v1core.Node); !ok {
klog.Errorf("unexpected object type: %v", obj)
return
}
}
targetNode, err := utils.NewRemoteKRNode(node)
// In this case even if we can't get the NodeIP that's alright as the node is being removed anyway and
// future node lister operations that happen in OnNodeUpdate won't be affected as the node won't be returned
if err == nil && targetNode != nil {
klog.Infof("Received node %s removed update from watch API, so remove node from peer",
targetNode.GetPrimaryNodeIP())
} else {
klog.Infof("Received node (IP unavailable) removed update from watch API, so remove node " +
"from peer")
}
nrc.OnNodeUpdate(obj)
},
}
}
// OnNodeUpdate Handle updates from Node watcher. Node watcher calls this method whenever there is
// new node is added or old node is deleted. So peer up with new node and drop peering
// from old node
func (nrc *NetworkRoutingController) OnNodeUpdate(_ interface{}) {
if !nrc.bgpServerStarted {
return
}
// update export policies so that NeighborSet gets updated with new set of nodes
err := nrc.AddPolicies()
if err != nil {
klog.Errorf("Error adding BGP policies: %s", err.Error())
}
if nrc.bgpEnableInternal {
nrc.syncInternalPeers()
}
// skip if first round of disableSourceDestinationCheck() is not done yet, this is to prevent
// all the nodes for all the node add update trying to perfrom disableSourceDestinationCheck
if nrc.disableSrcDstCheck && nrc.initSrcDstCheckDone && nrc.ec2IamAuthorized {
nrc.disableSourceDestinationCheck()
}
}