From bd70e8b17738db67720e77b11db6079b42225e1d Mon Sep 17 00:00:00 2001 From: menglingwei Date: Wed, 28 Aug 2024 15:59:04 +0800 Subject: [PATCH 1/5] add 401 http code --- policy/policyinit.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/policy/policyinit.sh b/policy/policyinit.sh index 4d5fdaba..1faa5f0b 100755 --- a/policy/policyinit.sh +++ b/policy/policyinit.sh @@ -9,7 +9,7 @@ if [ "$DATASTORE_TYPE" = "kubernetes" ]; then exit 1 fi return_code="$(curl -k -o /dev/null -I -L -s -w "%{http_code}" https://"${KUBERNETES_SERVICE_HOST}":"${KUBERNETES_SERVICE_PORT:-443}")" - if [ "$return_code" -ne 403 ]&&[ "$return_code" -ne 200 ]&&[ "$return_code" -ne 201 ];then + if [ "$return_code" -ne 401 ]&&[ "$return_code" -ne 403 ]&&[ "$return_code" -ne 200 ]&&[ "$return_code" -ne 201 ];then echo "can not access kubernetes service, exiting" exit 1 fi @@ -173,4 +173,4 @@ fi else # shellcheck disable=SC2016 exec socat TCP-LISTEN:9099,bind=127.0.0.1,fork,reuseaddr system:'sleep 2;kill -9 $SOCAT_PID 2>/dev/null' - fi \ No newline at end of file + fi From 910dc3d7c685ecb75fcc5301d7947527f7647f8b Mon Sep 17 00:00:00 2001 From: l1b0k Date: Fri, 30 Aug 2024 12:24:25 +0800 Subject: [PATCH 2/5] daemon: add a wait check for mac in ecs metadata metadata sync is slow, have to wait it found Signed-off-by: l1b0k --- pkg/factory/aliyun/aliyun.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pkg/factory/aliyun/aliyun.go b/pkg/factory/aliyun/aliyun.go index 06acf3ed..c1cd0cd1 100644 --- a/pkg/factory/aliyun/aliyun.go +++ b/pkg/factory/aliyun/aliyun.go @@ -191,6 +191,19 @@ func (a *Aliyun) CreateNetworkInterface(ipv4, ipv6 int, eniType string) (*daemon return r, nil, nil, err } + // wait mac + err = wait.PollUntilContextTimeout(ctx, metadataPollInterval, metadataWaitTimeout, true, func(ctx context.Context) (bool, error) { + macs, err := metadata.GetENIsMAC() + if err != nil { + klog.Errorf("metadata: error get mac: %v", err) + return false, nil + } + return sets.NewString(macs...).Has(r.MAC), nil + }) + if err != nil { + return r, nil, nil, err + } + prefix, err := metadata.GetVSwitchCIDR(eni.MacAddress) if err != nil { return r, nil, nil, err From f1adbec2baa844b0f1faab77d636d8f748b7e392 Mon Sep 17 00:00:00 2001 From: l1b0k Date: Thu, 29 Aug 2024 22:29:27 +0800 Subject: [PATCH 3/5] daemon add the orphan ip warning default 0s Warning ResourceInvalid node/cn-hangzhou.172.16.1.196 orphan ip found on ecs metadata, ip: 172.16.64.4, restart terway to resync data Signed-off-by: l1b0k --- pkg/eni/local.go | 38 +++++++++++++++++++++++++++++++++++++- pkg/eni/local_test.go | 29 +++++++++++++++++++++++++++++ pkg/eni/types.go | 2 +- 3 files changed, 67 insertions(+), 2 deletions(-) diff --git a/pkg/eni/local.go b/pkg/eni/local.go index 582e08ed..605e5c7e 100644 --- a/pkg/eni/local.go +++ b/pkg/eni/local.go @@ -13,6 +13,7 @@ import ( "golang.org/x/time/rate" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/cache" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/wait" logf "sigs.k8s.io/controller-runtime/pkg/log" @@ -27,6 +28,8 @@ import ( "github.com/AliyunContainerService/terway/pkg/metric" ) +const defaultSyncPeriod = 1 * time.Minute + var _ NetworkInterface = &Local{} var _ Usage = &Local{} var _ ReportStatus = &Trunk{} @@ -178,7 +181,7 @@ func (l *Local) Run(ctx context.Context, podResources []daemon.PodResources, wg go l.notify(ctx) - go wait.JitterUntil(l.sync, 1*time.Minute, 1.0, true, ctx.Done()) + go wait.JitterUntil(l.sync, defaultSyncPeriod, 1.0, true, ctx.Done()) return nil } @@ -371,6 +374,7 @@ func (l *Local) sync() { syncIPLocked(l.ipv4, ipv4) syncIPLocked(l.ipv6, ipv6) + report() l.cond.Broadcast() } @@ -1038,8 +1042,40 @@ func syncIPLocked(lo Set, remote []netip.Addr) { } } } + orphanIP(lo, s) +} + +func orphanIP(lo Set, remote sets.Set[netip.Addr]) { + for key := range remote { + if _, ok := lo[key]; !ok { + + prev, ok := invalidIPCache.Get(key) + if !ok { + invalidIPCache.Add(key, 1, 5*defaultSyncPeriod) + } else { + invalidIPCache.Add(key, prev.(int)+1, 5*defaultSyncPeriod) + } + } else { + invalidIPCache.Remove(key) + } + } +} + +func report() { + for _, key := range invalidIPCache.Keys() { + count, ok := invalidIPCache.Get(key) + if !ok { + continue + } + if count.(int) > 1 { + _ = tracing.RecordNodeEvent(corev1.EventTypeWarning, string(types.ErrResourceInvalid), fmt.Sprintf("orphan ip found on ecs metadata, ip: %s", key)) + logf.Log.Info("orphan ip found on ecs metadata", "ip", key) + } + } } +var invalidIPCache = cache.NewLRUExpireCache(100) + func parseResourceID(id string) (string, string, error) { parts := strings.SplitN(id, ".", 2) if len(parts) < 2 { diff --git a/pkg/eni/local_test.go b/pkg/eni/local_test.go index e3a47202..c0d52a3d 100644 --- a/pkg/eni/local_test.go +++ b/pkg/eni/local_test.go @@ -10,6 +10,8 @@ import ( "github.com/stretchr/testify/assert" "golang.org/x/time/rate" + "k8s.io/apimachinery/pkg/util/cache" + "k8s.io/apimachinery/pkg/util/sets" "github.com/AliyunContainerService/terway/pkg/factory" "github.com/AliyunContainerService/terway/types" @@ -309,3 +311,30 @@ func Test_parseResourceID(t *testing.T) { }) } } + +func Test_orphanIP(t *testing.T) { + invalidIPCache = cache.NewLRUExpireCache(100) + + lo1 := map[netip.Addr]*IP{ + netip.MustParseAddr("127.0.0.1"): { + ip: netip.MustParseAddr("127.0.0.1"), + }, + } + + remote1 := sets.Set[netip.Addr]{ + netip.MustParseAddr("127.0.0.1"): {}, + netip.MustParseAddr("127.0.0.2"): {}, + } + + orphanIP(lo1, remote1) + + v, _ := invalidIPCache.Get(netip.MustParseAddr("127.0.0.1")) + assert.Equal(t, nil, v) + + v, _ = invalidIPCache.Get(netip.MustParseAddr("127.0.0.2")) + assert.Equal(t, 1, v) + + orphanIP(lo1, remote1) + v, _ = invalidIPCache.Get(netip.MustParseAddr("127.0.0.2")) + assert.Equal(t, 2, v) +} diff --git a/pkg/eni/types.go b/pkg/eni/types.go index 82c762a7..c9142c63 100644 --- a/pkg/eni/types.go +++ b/pkg/eni/types.go @@ -94,7 +94,7 @@ func (ip *IP) Allocatable() bool { return ip.Valid() && !ip.InUse() } -type Set map[any]*IP +type Set map[netip.Addr]*IP func (s Set) Idles() []*IP { var result []*IP From d63680232b4c7e5efa498c92295588bb63a2edcf Mon Sep 17 00:00:00 2001 From: l1b0k Date: Thu, 12 Sep 2024 17:16:30 +0800 Subject: [PATCH 4/5] cni: retry get mac on new eni Signed-off-by: l1b0k --- plugin/terway/cni.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/plugin/terway/cni.go b/plugin/terway/cni.go index 1a329992..e32c3f62 100644 --- a/plugin/terway/cni.go +++ b/plugin/terway/cni.go @@ -2,6 +2,7 @@ package main import ( "context" + "errors" "fmt" "net" "runtime" @@ -9,6 +10,8 @@ import ( "google.golang.org/grpc/backoff" "google.golang.org/grpc/credentials/insecure" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/util/retry" "github.com/AliyunContainerService/terway/pkg/link" "github.com/AliyunContainerService/terway/plugin/datapath" @@ -295,7 +298,17 @@ func parseSetupConf(args *skel.CmdArgs, alloc *rpc.NetConf, conf *types.CNIConf, if alloc.GetENIInfo() != nil { mac := alloc.GetENIInfo().GetMAC() if mac != "" { - deviceID, err = link.GetDeviceNumber(mac) + err = retry.OnError(wait.Backoff{ + Steps: 10, + Duration: 1 * time.Second, + Factor: 1.0, + Jitter: 0, + }, func(err error) bool { + return errors.Is(err, link.ErrNotFound) + }, func() error { + deviceID, err = link.GetDeviceNumber(mac) + return err + }) if err != nil { return nil, err } From 38eeb31cee0462c461e2a4a7e7a911b9b744222d Mon Sep 17 00:00:00 2001 From: l1b0k Date: Fri, 13 Sep 2024 11:03:19 +0800 Subject: [PATCH 5/5] fix vpc endpoint Signed-off-by: l1b0k --- pkg/aliyun/credential/aliyun_client_mgr.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/aliyun/credential/aliyun_client_mgr.go b/pkg/aliyun/credential/aliyun_client_mgr.go index 966d62aa..15b6bc38 100644 --- a/pkg/aliyun/credential/aliyun_client_mgr.go +++ b/pkg/aliyun/credential/aliyun_client_mgr.go @@ -181,7 +181,7 @@ func (c *ClientMgr) refreshToken() (bool, error) { if err != nil { return false, err } - c.ecs.SetEndpointRules(c.ecs.EndpointMap, "regional", "public") + c.ecs.SetEndpointRules(c.ecs.EndpointMap, "regional", "vpc") if c.ecsDomainOverride != "" { c.ecs.Domain = c.ecsDomainOverride @@ -191,7 +191,7 @@ func (c *ClientMgr) refreshToken() (bool, error) { if err != nil { return false, err } - c.vpc.SetEndpointRules(c.vpc.EndpointMap, "regional", "public") + c.vpc.SetEndpointRules(c.vpc.EndpointMap, "regional", "vpc") if c.vpcDomainOverride != "" { c.vpc.Domain = c.vpcDomainOverride @@ -201,7 +201,7 @@ func (c *ClientMgr) refreshToken() (bool, error) { if err != nil { return false, err } - c.eflo.SetEndpointRules(c.eflo.EndpointMap, "regional", "public") + c.eflo.SetEndpointRules(c.eflo.EndpointMap, "regional", "vpc") if c.efloDomainOverride != "" { c.eflo.Domain = c.efloDomainOverride