From 76890008162e5072cbd0d7e259482f04d039398a Mon Sep 17 00:00:00 2001 From: "bingshen.wbs" Date: Fri, 13 Sep 2024 16:18:10 +0800 Subject: [PATCH] set erdma resource quota based on EniQuantity to avoid too many normal multiip pod quota consume by ERI Signed-off-by: bingshen.wbs --- daemon/builder.go | 10 +-- daemon/config.go | 4 +- pkg/aliyun/client/limit.go | 11 +++- pkg/aliyun/client/limit_test.go | 86 +++++++++++++++++++++++++ pkg/controller/node/node.go | 2 +- plugin/datapath/policy_router_linux.go | 6 +- plugin/driver/utils/utils_linux.go | 38 +++++++++++ plugin/driver/utils/utils_linux_test.go | 6 ++ 8 files changed, 153 insertions(+), 10 deletions(-) diff --git a/daemon/builder.go b/daemon/builder.go index 744dbd01..c7e6d8b1 100644 --- a/daemon/builder.go +++ b/daemon/builder.go @@ -275,7 +275,7 @@ func (b *NetworkServiceBuilder) setupENIManager() error { return err } realRdmaCount := b.limit.ERDMARes() - if b.config.EnableERDMA && len(attached) >= b.limit.Adapters-1-b.limit.ERdmaAdapters { + if b.config.EnableERDMA && len(attached) >= b.limit.Adapters-1-b.limit.ERDMARes() { attachedERdma := lo.Filter(attached, func(ni *daemon.ENI, idx int) bool { return ni.ERdma }) if len(attachedERdma) <= 0 { // turn off only when no one use it @@ -359,8 +359,8 @@ func (b *NetworkServiceBuilder) setupENIManager() error { } normalENINeeded := poolConfig.MaxENI - normalENICount if b.config.EnableERDMA { - normalENINeeded = poolConfig.MaxENI - b.limit.ERdmaAdapters - normalENICount - for i := 0; i < b.limit.ERdmaAdapters-erdmaENICount; i++ { + normalENINeeded = poolConfig.MaxENI - b.limit.ERDMARes() - normalENICount + for i := 0; i < b.limit.ERDMARes()-erdmaENICount; i++ { eniList = append(eniList, eni.NewLocal(nil, "erdma", factory, poolConfig)) } } @@ -389,8 +389,8 @@ func (b *NetworkServiceBuilder) setupENIManager() error { } normalENINeeded := poolConfig.MaxENI - normalENICount if b.config.EnableERDMA { - normalENINeeded = poolConfig.MaxENI - b.limit.ERdmaAdapters - normalENICount - for i := 0; i < b.limit.ERdmaAdapters-erdmaENICount; i++ { + normalENINeeded = poolConfig.MaxENI - b.limit.ERDMARes() - normalENICount + for i := 0; i < b.limit.ERDMARes()-erdmaENICount; i++ { eniList = append(eniList, eni.NewLocal(nil, "erdma", factory, poolConfig)) } } diff --git a/daemon/config.go b/daemon/config.go index 18ccb947..8d987ff4 100644 --- a/daemon/config.go +++ b/daemon/config.go @@ -123,7 +123,7 @@ func getPoolConfig(cfg *daemon.Config, daemonMode string, limit *client.Limits) poolConfig.MaxIPPerENI = 1 if cfg.EnableERDMA { - poolConfig.ERdmaCapacity = limit.ERdmaAdapters + poolConfig.ERdmaCapacity = limit.ERDMARes() } case daemon.ModeENIMultiIP: maxENI = limit.Adapters @@ -161,7 +161,7 @@ func getPoolConfig(cfg *daemon.Config, daemonMode string, limit *client.Limits) poolConfig.MaxIPPerENI = ipPerENI if cfg.EnableERDMA { - poolConfig.ERdmaCapacity = limit.ERdmaAdapters * limit.IPv4PerAdapter + poolConfig.ERdmaCapacity = limit.ERDMARes() * limit.IPv4PerAdapter } } diff --git a/pkg/aliyun/client/limit.go b/pkg/aliyun/client/limit.go index 54f45540..d101227b 100644 --- a/pkg/aliyun/client/limit.go +++ b/pkg/aliyun/client/limit.go @@ -66,7 +66,16 @@ func (l *Limits) MultiIPPod() int { } func (l *Limits) ERDMARes() int { - return l.ERdmaAdapters + if l.ERdmaAdapters <= 0 || l.Adapters <= 2 { + return 0 + } + // limit adapters + if l.Adapters >= 8 { + // for multi physical network card instance + return min(2, l.ERdmaAdapters) + } + // limit normal ecs eri to 1, to avoid too many normal multiip pod quota consume + return min(1, l.ERdmaAdapters) } func (l *Limits) ExclusiveENIPod() int { diff --git a/pkg/aliyun/client/limit_test.go b/pkg/aliyun/client/limit_test.go index 6360af99..8cb023ed 100644 --- a/pkg/aliyun/client/limit_test.go +++ b/pkg/aliyun/client/limit_test.go @@ -72,6 +72,92 @@ func TestGetInstanceType(t *testing.T) { } } +func TestGetERIRes(t *testing.T) { + tests := []struct { + name string + input *ecs.InstanceType + expected int + }{ + { + name: "not support instance type", + input: &ecs.InstanceType{ + EniQuantity: 2, + EniPrivateIpAddressQuantity: 5, + EniIpv6AddressQuantity: 10, + EniTotalQuantity: 6, + EriQuantity: 0, + InstanceBandwidthRx: 1000, + InstanceBandwidthTx: 500, + EniTrunkSupported: true, + }, + expected: 0, + }, + { + name: "Small instance type", + input: &ecs.InstanceType{ + EniQuantity: 2, + EniPrivateIpAddressQuantity: 5, + EniIpv6AddressQuantity: 10, + EniTotalQuantity: 6, + EriQuantity: 2, + InstanceBandwidthRx: 1000, + InstanceBandwidthTx: 500, + EniTrunkSupported: true, + }, + expected: 0, + }, + { + name: "Basic instance type", + input: &ecs.InstanceType{ + EniQuantity: 4, + EniPrivateIpAddressQuantity: 5, + EniIpv6AddressQuantity: 10, + EniTotalQuantity: 6, + EriQuantity: 2, + InstanceBandwidthRx: 1000, + InstanceBandwidthTx: 500, + EniTrunkSupported: true, + }, + expected: 1, + }, + { + name: "giant instance type only one eri", + input: &ecs.InstanceType{ + EniQuantity: 8, + EniPrivateIpAddressQuantity: 5, + EniIpv6AddressQuantity: 10, + EniTotalQuantity: 10, + EriQuantity: 1, + InstanceBandwidthRx: 1000, + InstanceBandwidthTx: 500, + EniTrunkSupported: true, + }, + expected: 1, + }, + { + name: "giant instance type", + input: &ecs.InstanceType{ + EniQuantity: 8, + EniPrivateIpAddressQuantity: 5, + EniIpv6AddressQuantity: 10, + EniTotalQuantity: 10, + EriQuantity: 4, + InstanceBandwidthRx: 1000, + InstanceBandwidthTx: 500, + EniTrunkSupported: true, + }, + expected: 2, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := getInstanceType(tt.input) + assert.Equal(t, tt.expected, actual.ERDMARes()) + }) + } +} + func TestECSLimitProvider_GetLimitFromAnno(t *testing.T) { type args struct { diff --git a/pkg/controller/node/node.go b/pkg/controller/node/node.go index 977898d2..97346f3e 100644 --- a/pkg/controller/node/node.go +++ b/pkg/controller/node/node.go @@ -144,7 +144,7 @@ func (r *ReconcileNode) createOrUpdate(ctx context.Context, k8sNode *corev1.Node InstanceBandwidthTx: limit.InstanceBandwidthTx, InstanceBandwidthRx: limit.InstanceBandwidthRx, Adapters: limit.Adapters, - EriQuantity: limit.ERdmaAdapters, + EriQuantity: limit.ERDMARes(), TotalAdapters: limit.TotalAdapters, IPv6PerAdapter: limit.IPv6PerAdapter, MemberAdapterLimit: limit.MemberAdapterLimit, diff --git a/plugin/datapath/policy_router_linux.go b/plugin/datapath/policy_router_linux.go index 96f156c2..6bd22a9d 100644 --- a/plugin/datapath/policy_router_linux.go +++ b/plugin/datapath/policy_router_linux.go @@ -351,7 +351,11 @@ func (d *PolicyRoute) Setup(cfg *types.SetupConfig, netNS ns.NetNS) error { } if cfg.ERDMA { - err = smc.ConfigSMCForDevice("erdma_0", cfg.ContainerIfName, netNS) + rdmaDev, err := utils.GetERdmaFromLink(eni) + if err != nil { + return fmt.Errorf("error get erdma device: %w", err) + } + err = smc.ConfigSMCForDevice(rdmaDev.Attrs.Name, cfg.ContainerIfName, netNS) if err != nil { return fmt.Errorf("error setup pnet config for pod: %w", err) } diff --git a/plugin/driver/utils/utils_linux.go b/plugin/driver/utils/utils_linux.go index 97ea58f9..7030a7f2 100644 --- a/plugin/driver/utils/utils_linux.go +++ b/plugin/driver/utils/utils_linux.go @@ -5,6 +5,8 @@ import ( "fmt" "net" "os" + "strconv" + "strings" terwayIP "github.com/AliyunContainerService/terway/pkg/ip" terwaySysctl "github.com/AliyunContainerService/terway/pkg/sysctl" @@ -844,3 +846,39 @@ func CleanIPRules() (err error) { return nil } + +func GetERdmaFromLink(link netlink.Link) (*netlink.RdmaLink, error) { + rdmaLinks, err := netlink.RdmaLinkList() + if err != nil { + return nil, fmt.Errorf("error list rdma links, %v", err) + } + for _, rl := range rdmaLinks { + rdmaHwAddr, err := parseERdmaLinkHwAddr(rl.Attrs.NodeGuid) + if err != nil { + return nil, err + } + linkHwAddr := link.Attrs().HardwareAddr + // erdma guid first byte is ^= 0x2 + linkHwAddr[0] ^= 0x2 + if rdmaHwAddr.String() == linkHwAddr.String() { + return rl, nil + } + } + return nil, fmt.Errorf("cannot found rdma link for %s", link.Attrs().Name) +} + +func parseERdmaLinkHwAddr(guid string) (net.HardwareAddr, error) { + hwAddrSlice := make([]byte, 8) + guidSlice := strings.Split(guid, ":") + if len(guidSlice) != 8 { + return nil, fmt.Errorf("invalid rdma guid: %s", guid) + } + for i, s := range guidSlice { + sint, err := strconv.ParseUint(s, 16, 8) + if err != nil { + return nil, fmt.Errorf("invalid rdma guid: %s, err: %v", guid, err) + } + hwAddrSlice[7-i] = uint8(sint) + } + return append(hwAddrSlice[0:3], hwAddrSlice[5:8]...), nil +} diff --git a/plugin/driver/utils/utils_linux_test.go b/plugin/driver/utils/utils_linux_test.go index 8b9bb57d..c1f1e503 100644 --- a/plugin/driver/utils/utils_linux_test.go +++ b/plugin/driver/utils/utils_linux_test.go @@ -293,3 +293,9 @@ var _ = Describe("Test TC filter", func() { Expect(err).NotTo(HaveOccurred()) }) }) + +func TestParseERdmaLinkHwAddress(t *testing.T) { + hwaddr, err := parseERdmaLinkHwAddr("0d:d3:04:fe:ff:3e:16:02") + assert.NoError(t, err) + assert.Equal(t, "02:16:3e:04:d3:0d", hwaddr.String()) +}