Skip to content

Commit

Permalink
Merge pull request #693 from BSWANG/erdma_19
Browse files Browse the repository at this point in the history
  • Loading branch information
l1b0k authored Sep 13, 2024
2 parents 325d07e + 7689000 commit a61c695
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 10 deletions.
10 changes: 5 additions & 5 deletions daemon/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ func (b *NetworkServiceBuilder) setupENIManager() error {
return err
}
realRdmaCount := b.limit.ERDMARes()
if b.config.EnableERDMA && len(attached) >= b.limit.Adapters-1-b.limit.ERdmaAdapters {
if b.config.EnableERDMA && len(attached) >= b.limit.Adapters-1-b.limit.ERDMARes() {
attachedERdma := lo.Filter(attached, func(ni *daemon.ENI, idx int) bool { return ni.ERdma })
if len(attachedERdma) <= 0 {
// turn off only when no one use it
Expand Down Expand Up @@ -359,8 +359,8 @@ func (b *NetworkServiceBuilder) setupENIManager() error {
}
normalENINeeded := poolConfig.MaxENI - normalENICount
if b.config.EnableERDMA {
normalENINeeded = poolConfig.MaxENI - b.limit.ERdmaAdapters - normalENICount
for i := 0; i < b.limit.ERdmaAdapters-erdmaENICount; i++ {
normalENINeeded = poolConfig.MaxENI - b.limit.ERDMARes() - normalENICount
for i := 0; i < b.limit.ERDMARes()-erdmaENICount; i++ {
eniList = append(eniList, eni.NewLocal(nil, "erdma", factory, poolConfig))
}
}
Expand Down Expand Up @@ -389,8 +389,8 @@ func (b *NetworkServiceBuilder) setupENIManager() error {
}
normalENINeeded := poolConfig.MaxENI - normalENICount
if b.config.EnableERDMA {
normalENINeeded = poolConfig.MaxENI - b.limit.ERdmaAdapters - normalENICount
for i := 0; i < b.limit.ERdmaAdapters-erdmaENICount; i++ {
normalENINeeded = poolConfig.MaxENI - b.limit.ERDMARes() - normalENICount
for i := 0; i < b.limit.ERDMARes()-erdmaENICount; i++ {
eniList = append(eniList, eni.NewLocal(nil, "erdma", factory, poolConfig))
}
}
Expand Down
4 changes: 2 additions & 2 deletions daemon/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func getPoolConfig(cfg *daemon.Config, daemonMode string, limit *client.Limits)

poolConfig.MaxIPPerENI = 1
if cfg.EnableERDMA {
poolConfig.ERdmaCapacity = limit.ERdmaAdapters
poolConfig.ERdmaCapacity = limit.ERDMARes()
}
case daemon.ModeENIMultiIP:
maxENI = limit.Adapters
Expand Down Expand Up @@ -161,7 +161,7 @@ func getPoolConfig(cfg *daemon.Config, daemonMode string, limit *client.Limits)
poolConfig.MaxIPPerENI = ipPerENI

if cfg.EnableERDMA {
poolConfig.ERdmaCapacity = limit.ERdmaAdapters * limit.IPv4PerAdapter
poolConfig.ERdmaCapacity = limit.ERDMARes() * limit.IPv4PerAdapter
}
}

Expand Down
11 changes: 10 additions & 1 deletion pkg/aliyun/client/limit.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,16 @@ func (l *Limits) MultiIPPod() int {
}

func (l *Limits) ERDMARes() int {
return l.ERdmaAdapters
if l.ERdmaAdapters <= 0 || l.Adapters <= 2 {
return 0
}
// limit adapters
if l.Adapters >= 8 {
// for multi physical network card instance
return min(2, l.ERdmaAdapters)
}
// limit normal ecs eri to 1, to avoid too many normal multiip pod quota consume
return min(1, l.ERdmaAdapters)
}

func (l *Limits) ExclusiveENIPod() int {
Expand Down
86 changes: 86 additions & 0 deletions pkg/aliyun/client/limit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,92 @@ func TestGetInstanceType(t *testing.T) {
}
}

func TestGetERIRes(t *testing.T) {
tests := []struct {
name string
input *ecs.InstanceType
expected int
}{
{
name: "not support instance type",
input: &ecs.InstanceType{
EniQuantity: 2,
EniPrivateIpAddressQuantity: 5,
EniIpv6AddressQuantity: 10,
EniTotalQuantity: 6,
EriQuantity: 0,
InstanceBandwidthRx: 1000,
InstanceBandwidthTx: 500,
EniTrunkSupported: true,
},
expected: 0,
},
{
name: "Small instance type",
input: &ecs.InstanceType{
EniQuantity: 2,
EniPrivateIpAddressQuantity: 5,
EniIpv6AddressQuantity: 10,
EniTotalQuantity: 6,
EriQuantity: 2,
InstanceBandwidthRx: 1000,
InstanceBandwidthTx: 500,
EniTrunkSupported: true,
},
expected: 0,
},
{
name: "Basic instance type",
input: &ecs.InstanceType{
EniQuantity: 4,
EniPrivateIpAddressQuantity: 5,
EniIpv6AddressQuantity: 10,
EniTotalQuantity: 6,
EriQuantity: 2,
InstanceBandwidthRx: 1000,
InstanceBandwidthTx: 500,
EniTrunkSupported: true,
},
expected: 1,
},
{
name: "giant instance type only one eri",
input: &ecs.InstanceType{
EniQuantity: 8,
EniPrivateIpAddressQuantity: 5,
EniIpv6AddressQuantity: 10,
EniTotalQuantity: 10,
EriQuantity: 1,
InstanceBandwidthRx: 1000,
InstanceBandwidthTx: 500,
EniTrunkSupported: true,
},
expected: 1,
},
{
name: "giant instance type",
input: &ecs.InstanceType{
EniQuantity: 8,
EniPrivateIpAddressQuantity: 5,
EniIpv6AddressQuantity: 10,
EniTotalQuantity: 10,
EriQuantity: 4,
InstanceBandwidthRx: 1000,
InstanceBandwidthTx: 500,
EniTrunkSupported: true,
},
expected: 2,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := getInstanceType(tt.input)
assert.Equal(t, tt.expected, actual.ERDMARes())
})
}
}

func TestECSLimitProvider_GetLimitFromAnno(t *testing.T) {

type args struct {
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func (r *ReconcileNode) createOrUpdate(ctx context.Context, k8sNode *corev1.Node
InstanceBandwidthTx: limit.InstanceBandwidthTx,
InstanceBandwidthRx: limit.InstanceBandwidthRx,
Adapters: limit.Adapters,
EriQuantity: limit.ERdmaAdapters,
EriQuantity: limit.ERDMARes(),
TotalAdapters: limit.TotalAdapters,
IPv6PerAdapter: limit.IPv6PerAdapter,
MemberAdapterLimit: limit.MemberAdapterLimit,
Expand Down
6 changes: 5 additions & 1 deletion plugin/datapath/policy_router_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,11 @@ func (d *PolicyRoute) Setup(cfg *types.SetupConfig, netNS ns.NetNS) error {
}

if cfg.ERDMA {
err = smc.ConfigSMCForDevice("erdma_0", cfg.ContainerIfName, netNS)
rdmaDev, err := utils.GetERdmaFromLink(eni)
if err != nil {
return fmt.Errorf("error get erdma device: %w", err)
}
err = smc.ConfigSMCForDevice(rdmaDev.Attrs.Name, cfg.ContainerIfName, netNS)
if err != nil {
return fmt.Errorf("error setup pnet config for pod: %w", err)
}
Expand Down
38 changes: 38 additions & 0 deletions plugin/driver/utils/utils_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"fmt"
"net"
"os"
"strconv"
"strings"

terwayIP "github.com/AliyunContainerService/terway/pkg/ip"
terwaySysctl "github.com/AliyunContainerService/terway/pkg/sysctl"
Expand Down Expand Up @@ -844,3 +846,39 @@ func CleanIPRules() (err error) {

return nil
}

func GetERdmaFromLink(link netlink.Link) (*netlink.RdmaLink, error) {
rdmaLinks, err := netlink.RdmaLinkList()
if err != nil {
return nil, fmt.Errorf("error list rdma links, %v", err)
}
for _, rl := range rdmaLinks {
rdmaHwAddr, err := parseERdmaLinkHwAddr(rl.Attrs.NodeGuid)
if err != nil {
return nil, err
}
linkHwAddr := link.Attrs().HardwareAddr
// erdma guid first byte is ^= 0x2
linkHwAddr[0] ^= 0x2
if rdmaHwAddr.String() == linkHwAddr.String() {
return rl, nil
}
}
return nil, fmt.Errorf("cannot found rdma link for %s", link.Attrs().Name)
}

func parseERdmaLinkHwAddr(guid string) (net.HardwareAddr, error) {
hwAddrSlice := make([]byte, 8)
guidSlice := strings.Split(guid, ":")
if len(guidSlice) != 8 {
return nil, fmt.Errorf("invalid rdma guid: %s", guid)
}
for i, s := range guidSlice {
sint, err := strconv.ParseUint(s, 16, 8)
if err != nil {
return nil, fmt.Errorf("invalid rdma guid: %s, err: %v", guid, err)
}
hwAddrSlice[7-i] = uint8(sint)
}
return append(hwAddrSlice[0:3], hwAddrSlice[5:8]...), nil
}
6 changes: 6 additions & 0 deletions plugin/driver/utils/utils_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,9 @@ var _ = Describe("Test TC filter", func() {
Expect(err).NotTo(HaveOccurred())
})
})

func TestParseERdmaLinkHwAddress(t *testing.T) {
hwaddr, err := parseERdmaLinkHwAddr("0d:d3:04:fe:ff:3e:16:02")
assert.NoError(t, err)
assert.Equal(t, "02:16:3e:04:d3:0d", hwaddr.String())
}

0 comments on commit a61c695

Please sign in to comment.