diff --git a/contrib/kind.sh b/contrib/kind.sh index 2bac61928b..2d5e04c7c1 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -149,6 +149,7 @@ usage() { echo "-ric | --run-in-container Configure the script to be run from a docker container, allowing it to still communicate with the kind controlplane" echo "-ehp | --egress-ip-healthcheck-port TCP port used for gRPC session by egress IP node check. DEFAULT: 9107 (Use "0" for legacy dial to port 9)." echo "-is | --ipsec Enable IPsec encryption (spawns ovn-ipsec pods)" + echo "-sm | --scale-metrics Enable scale metrics" echo "--isolated Deploy with an isolated environment (no default gateway)" echo "--delete Delete current cluster" echo "--deploy Deploy ovn kubernetes without restarting kind" @@ -295,6 +296,8 @@ parse_args() { fi OVN_EGRESSIP_HEALTHCHECK_PORT=$1 ;; + -sm | --scale-metrics ) OVN_METRICS_SCALE_ENABLE=true + ;; --isolated ) OVN_ISOLATED=true ;; -mne | --multi-network-enable ) shift @@ -361,6 +364,7 @@ print_params() { echo "OVN_EX_GW_NETWORK_INTERFACE = $OVN_EX_GW_NETWORK_INTERFACE" echo "OVN_EGRESSIP_HEALTHCHECK_PORT = $OVN_EGRESSIP_HEALTHCHECK_PORT" echo "OVN_DEPLOY_PODS = $OVN_DEPLOY_PODS" + echo "OVN_METRICS_SCALE_ENABLE = $OVN_METRICS_SCALE_ENABLE" echo "OVN_ISOLATED = $OVN_ISOLATED" echo "ENABLE_MULTI_NET = $ENABLE_MULTI_NET" echo "OVN_SEPARATE_CLUSTER_MANAGER = $OVN_SEPARATE_CLUSTER_MANAGER" @@ -510,6 +514,7 @@ set_default_params() { OVN_EGRESSIP_HEALTHCHECK_PORT=${OVN_EGRESSIP_HEALTHCHECK_PORT:-9107} OCI_BIN=${KIND_EXPERIMENTAL_PROVIDER:-docker} OVN_DEPLOY_PODS=${OVN_DEPLOY_PODS:-"ovnkube-master ovnkube-node"} + OVN_METRICS_SCALE_ENABLE=${OVN_METRICS_SCALE_ENABLE:-false} OVN_ISOLATED=${OVN_ISOLATED:-false} OVN_GATEWAY_OPTS="" if [ "$OVN_ISOLATED" == true ]; then @@ -721,7 +726,8 @@ create_ovn_kube_manifests() { --v4-join-subnet="${JOIN_SUBNET_IPV4}" \ --v6-join-subnet="${JOIN_SUBNET_IPV6}" \ --ex-gw-network-interface="${OVN_EX_GW_NETWORK_INTERFACE}" \ - --multi-network-enable=${ENABLE_MULTI_NET} + --multi-network-enable="${ENABLE_MULTI_NET}" \ + --ovnkube-metrics-scale-enable="${OVN_METRICS_SCALE_ENABLE}" popd } diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index d1b248b5c6..507def4d7d 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -77,6 +77,7 @@ OVN_HOST_NETWORK_NAMESPACE="" OVN_EX_GW_NETWORK_INTERFACE="" OVNKUBE_NODE_MGMT_PORT_NETDEV="" OVNKUBE_CONFIG_DURATION_ENABLE= +OVNKUBE_METRICS_SCALE_ENABLE= # IN_UPGRADE is true only if called by upgrade-ovn.sh during the upgrade test, # it will render only the parts in ovn-setup.yaml related to RBAC permissions. IN_UPGRADE= @@ -263,6 +264,9 @@ while [ "$1" != "" ]; do --ovnkube-config-duration-enable) OVNKUBE_CONFIG_DURATION_ENABLE=$VALUE ;; + --ovnkube-metrics-scale-enable) + OVNKUBE_METRICS_SCALE_ENABLE=$VALUE + ;; --in-upgrade) IN_UPGRADE=true ;; @@ -405,6 +409,8 @@ ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV} echo "ovnkube_node_mgmt_port_netdev: ${ovnkube_node_mgmt_port_netdev}" ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE} echo "ovnkube_config_duration_enable: ${ovnkube_config_duration_enable}" +ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE} +echo "ovnkube_metrics_scale_enable: ${ovnkube_metrics_scale_enable}" ovn_image=${image} \ ovn_image_pull_policy=${image_pull_policy} \ @@ -486,6 +492,7 @@ ovn_image=${image} \ ovnkube_logfile_maxbackups=${ovnkube_logfile_maxbackups} \ ovnkube_logfile_maxage=${ovnkube_logfile_maxage} \ ovnkube_config_duration_enable=${ovnkube_config_duration_enable} \ + ovnkube_metrics_scale_enable=${ovnkube_metrics_scale_enable} \ ovn_acl_logging_rate_limit=${ovn_acl_logging_rate_limit} \ ovn_hybrid_overlay_net_cidr=${ovn_hybrid_overlay_net_cidr} \ ovn_hybrid_overlay_enable=${ovn_hybrid_overlay_enable} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index e84db01e64..bf9dec1088 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -235,6 +235,7 @@ ovnkube_node_mode=${OVNKUBE_NODE_MODE:-"full"} # OVNKUBE_NODE_MGMT_PORT_NETDEV - is the net device to be used for management port ovnkube_node_mgmt_port_netdev=${OVNKUBE_NODE_MGMT_PORT_NETDEV:-} ovnkube_config_duration_enable=${OVNKUBE_CONFIG_DURATION_ENABLE:-false} +ovnkube_metrics_scale_enable=${OVNKUBE_METRICS_SCALE_ENABLE:-false} # OVN_ENCAP_IP - encap IP to be used for OVN traffic on the node ovn_encap_ip=${OVN_ENCAP_IP:-} @@ -993,6 +994,12 @@ ovn-master() { fi echo "ovnkube_config_duration_enable_flag: ${ovnkube_config_duration_enable_flag}" + ovnkube_metrics_scale_enable_flag= + if [[ ${ovnkube_metrics_scale_enable} == "true" ]]; then + ovnkube_metrics_scale_enable_flag="--metrics-enable-scale" + fi + echo "ovnkube_metrics_scale_enable_flag: ${ovnkube_metrics_scale_enable_flag}" + echo "=============== ovn-master ========== MASTER ONLY" /usr/bin/ovnkube \ --init-master ${K8S_NODE} \ @@ -1019,6 +1026,7 @@ ovn-master() { ${egressfirewall_enabled_flag} \ ${egressqos_enabled_flag} \ ${ovnkube_config_duration_enable_flag} \ + ${ovnkube_metrics_scale_enable_flag} \ ${multi_network_enabled_flag} \ --metrics-bind-address ${ovnkube_master_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 5bf07a84e3..1cdd381e57 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -164,6 +164,8 @@ spec: value: "{{ ovnkube_logfile_maxage }}" - name: OVNKUBE_CONFIG_DURATION_ENABLE value: "{{ ovnkube_config_duration_enable }}" + - name: OVNKUBE_METRICS_SCALE_ENABLE + value: "{{ ovnkube_metrics_scale_enable }}" - name: OVN_NET_CIDR valueFrom: configMapKeyRef: diff --git a/go-controller/pkg/cni/cni.go b/go-controller/pkg/cni/cni.go index 706d398d36..57ec7e116a 100644 --- a/go-controller/pkg/cni/cni.go +++ b/go-controller/pkg/cni/cni.go @@ -2,10 +2,12 @@ package cni import ( "fmt" + "net" + + kapi "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/klog/v2" utilnet "k8s.io/utils/net" - "net" current "github.com/containernetworking/cni/pkg/types/100" "github.com/k8snetworkplumbingwg/govdpa/pkg/kvdpa" @@ -80,11 +82,17 @@ func (pr *PodRequest) String() string { // and the best we can do is use the given UID for the duration of the request. // But if the existing UID is valid and does not match the given UID then the // sandbox request is for a different pod instance and should be terminated. -func (pr *PodRequest) checkOrUpdatePodUID(podUID string) error { - if pr.PodUID == "" { - // Runtime didn't pass UID, use the one we got from the pod object - pr.PodUID = podUID - } else if podUID != pr.PodUID { +// Static pod UID is a hash of the pod itself that does not match +// the UID of the mirror kubelet creates on the api /server. +// We will use the UID of the mirror. +// The hash is annotated in the mirror pod (kubernetes.io/config.hash) +// and we could match against it, but let's avoid that for now as it is not +// a published standard. +func (pr *PodRequest) checkOrUpdatePodUID(pod *kapi.Pod) error { + if pr.PodUID == "" || IsStaticPod(pod) { + // Runtime didn't pass UID, or the pod is a static pod, use the one we got from the pod object + pr.PodUID = string(pod.UID) + } else if string(pod.UID) != pr.PodUID { // Exit early if the pod was deleted and recreated already return fmt.Errorf("pod deleted before sandbox %v operation began", pr.Command) } @@ -142,12 +150,12 @@ func (pr *PodRequest) cmdAdd(kubeAuth *KubeAPIAuth, clientset *ClientSet, useOVS } // Get the IP address and MAC address of the pod // for DPU, ensure connection-details is present - podUID, annotations, podNADAnnotation, err := GetPodAnnotations(pr.ctx, clientset, namespace, podName, + pod, annotations, podNADAnnotation, err := GetPodWithAnnotations(pr.ctx, clientset, namespace, podName, pr.nadName, annotCondFn) if err != nil { return nil, fmt.Errorf("failed to get pod annotation: %v", err) } - if err := pr.checkOrUpdatePodUID(podUID); err != nil { + if err = pr.checkOrUpdatePodUID(pod); err != nil { return nil, err } podInterfaceInfo, err := PodAnnotation2PodInfo(annotations, podNADAnnotation, useOVSExternalIDs, pr.PodUID, vfNetdevName, diff --git a/go-controller/pkg/cni/helper_linux.go b/go-controller/pkg/cni/helper_linux.go index 8d375a707e..1c80b61ef0 100644 --- a/go-controller/pkg/cni/helper_linux.go +++ b/go-controller/pkg/cni/helper_linux.go @@ -355,7 +355,6 @@ func ConfigureOVS(ctx context.Context, namespace, podName, hostIfaceName string, ifaceID = util.GetSecondaryNetworkIfaceId(namespace, podName, ifInfo.NADName) } initialPodUID := ifInfo.PodUID - ipStrs := make([]string, len(ifInfo.IPs)) for i, ip := range ifInfo.IPs { ipStrs[i] = ip.String() diff --git a/go-controller/pkg/cni/utils.go b/go-controller/pkg/cni/utils.go index 3fc455ceb1..06c3c8b1e8 100644 --- a/go-controller/pkg/cni/utils.go +++ b/go-controller/pkg/cni/utils.go @@ -60,8 +60,8 @@ func (c *ClientSet) getPod(namespace, name string) (*kapi.Pod, error) { } // GetPodAnnotations obtains the pod UID and annotation from the cache or apiserver -func GetPodAnnotations(ctx context.Context, getter PodInfoGetter, - namespace, name, nadName string, annotCond podAnnotWaitCond) (string, map[string]string, *util.PodAnnotation, error) { +func GetPodWithAnnotations(ctx context.Context, getter PodInfoGetter, + namespace, name, nadName string, annotCond podAnnotWaitCond) (*kapi.Pod, map[string]string, *util.PodAnnotation, error) { var notFoundCount uint for { @@ -71,23 +71,23 @@ func GetPodAnnotations(ctx context.Context, getter PodInfoGetter, if ctx.Err() == context.Canceled { detail = "canceled while" } - return "", nil, nil, fmt.Errorf("%s waiting for annotations: %w", detail, ctx.Err()) + return nil, nil, nil, fmt.Errorf("%s waiting for annotations: %w", detail, ctx.Err()) default: pod, err := getter.getPod(namespace, name) if err != nil { if !apierrors.IsNotFound(err) { - return "", nil, nil, fmt.Errorf("failed to get pod for annotations: %v", err) + return nil, nil, nil, fmt.Errorf("failed to get pod for annotations: %v", err) } // Allow up to 1 second for pod to be found notFoundCount++ if notFoundCount >= 5 { - return "", nil, nil, fmt.Errorf("timed out waiting for pod after 1s: %v", err) + return nil, nil, nil, fmt.Errorf("timed out waiting for pod after 1s: %v", err) } // drop through to try again } else if pod != nil { podNADAnnotation, ready := annotCond(pod.Annotations, nadName) if ready { - return string(pod.UID), pod.Annotations, podNADAnnotation, nil + return pod, pod.Annotations, podNADAnnotation, nil } } @@ -133,3 +133,28 @@ func PodAnnotation2PodInfo(podAnnotation map[string]string, podNADAnnotation *ut } return podInterfaceInfo, nil } + +//START taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go +const ( + ConfigSourceAnnotationKey = "kubernetes.io/config.source" + // ApiserverSource identifies updates from Kubernetes API Server. + ApiserverSource = "api" +) + +// GetPodSource returns the source of the pod based on the annotation. +func GetPodSource(pod *kapi.Pod) (string, error) { + if pod.Annotations != nil { + if source, ok := pod.Annotations[ConfigSourceAnnotationKey]; ok { + return source, nil + } + } + return "", fmt.Errorf("cannot get source of pod %q", pod.UID) +} + +// IsStaticPod returns true if the pod is a static pod. +func IsStaticPod(pod *kapi.Pod) bool { + source, err := GetPodSource(pod) + return err == nil && source != ApiserverSource +} + +//END taken from https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/types/pod_update.go diff --git a/go-controller/pkg/cni/utils_test.go b/go-controller/pkg/cni/utils_test.go index 2875aa443e..aa40fa922d 100644 --- a/go-controller/pkg/cni/utils_test.go +++ b/go-controller/pkg/cni/utils_test.go @@ -3,10 +3,11 @@ package cni import ( "context" "fmt" + "time" + . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" "github.com/stretchr/testify/mock" - "time" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" @@ -113,7 +114,7 @@ var _ = Describe("CNI Utils tests", func() { }) }) - Context("GetPodAnnotations", func() { + Context("GetPodWithAnnotations", func() { var podNamespaceLister mocks.PodNamespaceLister var pod *v1.Pod @@ -143,10 +144,10 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) - uid, annot, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + returnedPod, annot, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).ToNot(HaveOccurred()) Expect(annot).To(Equal(podAnnot)) - Expect(uid).To(Equal(string(pod.UID))) + Expect(string(returnedPod.UID)).To(Equal(string(pod.UID))) }) It("Returns with Error if context is canceled", func() { @@ -164,7 +165,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).To(HaveOccurred()) }) @@ -184,7 +185,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).ToNot(HaveOccurred()) }) @@ -199,7 +200,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, fmt.Errorf("failed to list pods")) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to list pods")) }) @@ -220,7 +221,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(pod, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, errors.NewNotFound(v1.Resource("pod"), name)) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).ToNot(HaveOccurred()) }) @@ -235,7 +236,7 @@ var _ = Describe("CNI Utils tests", func() { clientset := newFakeClientSet(nil, &podNamespaceLister) podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(nil, errors.NewNotFound(v1.Resource("pod"), name)) - _, _, _, err := GetPodAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) + _, _, _, err := GetPodWithAnnotations(ctx, clientset, namespace, podName, ovntypes.DefaultNetworkName, cond) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("timed out waiting for pod after 1s")) }) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index b936eea583..aa9d217bb8 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -333,8 +333,8 @@ type MetricsConfig struct { NodeServerCert string `gcfg:"node-server-cert"` // EnableConfigDuration holds the boolean flag to enable OVN-Kubernetes master to monitor OVN-Kubernetes master // configuration duration and optionally, its application to all nodes - EnableConfigDuration bool `gcfg:"enable-config-duration"` - EnableEIPScaleMetrics bool `gcfg:"enable-eip-scale-metrics"` + EnableConfigDuration bool `gcfg:"enable-config-duration"` + EnableScaleMetrics bool `gcfg:"enable-scale-metrics"` } // OVNKubernetesFeatureConfig holds OVN-Kubernetes feature enhancement config file parameters and command-line overrides @@ -1041,9 +1041,9 @@ var MetricsFlags = []cli.Flag{ Destination: &cliConfig.Metrics.EnableConfigDuration, }, &cli.BoolFlag{ - Name: "metrics-enable-eip-scale", - Usage: "Enables metrics related to Egress IP scaling", - Destination: &cliConfig.Metrics.EnableEIPScaleMetrics, + Name: "metrics-enable-scale", + Usage: "Enables metrics related to scaling", + Destination: &cliConfig.Metrics.EnableScaleMetrics, }, } diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 57bfb8a148..747d84ac16 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -158,7 +158,7 @@ enable-pprof=true node-server-privkey=/path/to/node-metrics-private.key node-server-cert=/path/to/node-metrics.crt enable-config-duration=true -enable-eip-scale-metrics=true +enable-scale-metrics=true [logging] loglevel=5 @@ -585,7 +585,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Metrics.NodeServerPrivKey).To(gomega.Equal("/path/to/node-metrics-private.key")) gomega.Expect(Metrics.NodeServerCert).To(gomega.Equal("/path/to/node-metrics.crt")) gomega.Expect(Metrics.EnableConfigDuration).To(gomega.Equal(true)) - gomega.Expect(Metrics.EnableEIPScaleMetrics).To(gomega.Equal(true)) + gomega.Expect(Metrics.EnableScaleMetrics).To(gomega.Equal(true)) gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeSSL)) gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("/path/to/nb-client-private.key")) @@ -673,7 +673,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(Metrics.NodeServerPrivKey).To(gomega.Equal("/tls/nodeprivkey")) gomega.Expect(Metrics.NodeServerCert).To(gomega.Equal("/tls/nodecert")) gomega.Expect(Metrics.EnableConfigDuration).To(gomega.Equal(true)) - gomega.Expect(Metrics.EnableEIPScaleMetrics).To(gomega.Equal(true)) + gomega.Expect(Metrics.EnableScaleMetrics).To(gomega.Equal(true)) gomega.Expect(OvnNorth.Scheme).To(gomega.Equal(OvnDBSchemeSSL)) gomega.Expect(OvnNorth.PrivKey).To(gomega.Equal("/client/privkey")) diff --git a/go-controller/pkg/libovsdbops/acl.go b/go-controller/pkg/libovsdbops/acl.go index 8a8b2de7a5..ce32fd78d5 100644 --- a/go-controller/pkg/libovsdbops/acl.go +++ b/go-controller/pkg/libovsdbops/acl.go @@ -102,6 +102,11 @@ func CreateOrUpdateACLsOps(nbClient libovsdbclient.Client, ops []libovsdb.Operat for i := range acls { // can't use i in the predicate, for loop replaces it in-memory acl := acls[i] + // ensure names are truncated (let's cover our bases from snippets that don't call BuildACL and call this directly) + if acl.Name != nil { + // node ACLs won't have names set + *acl.Name = fmt.Sprintf("%.63s", *acl.Name) + } opModel := operationModel{ Model: acl, ModelPredicate: func(item *nbdb.ACL) bool { return isEquivalentACL(item, acl) }, diff --git a/go-controller/pkg/metrics/master.go b/go-controller/pkg/metrics/master.go index 1013cbb46e..3fdf506e80 100644 --- a/go-controller/pkg/metrics/master.go +++ b/go-controller/pkg/metrics/master.go @@ -145,6 +145,17 @@ var MetricMasterReadyDuration = prometheus.NewGauge(prometheus.GaugeOpts{ Help: "The duration for the master to get to ready state", }) +// MetricMasterSyncDuration is the time taken to complete initial Watch for different resource. +// Resource name is in the label. +var MetricMasterSyncDuration = prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "sync_duration_seconds", + Help: "The duration to sync and setup all handlers for a given resource"}, + []string{ + "resource_name", + }) + // MetricMasterLeader identifies whether this instance of ovnkube-master is a leader or not var MetricMasterLeader = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: MetricOvnkubeNamespace, @@ -190,6 +201,66 @@ var metricEgressIPRebalanceCount = prometheus.NewCounter(prometheus.CounterOpts{ Help: "The total number of times assigned egress IP(s) needed to be moved to a different node"}, ) +var metricNetpolEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_event_latency_seconds", + Help: "The latency of full network policy event handling (create, delete)", + Buckets: prometheus.ExponentialBuckets(.004, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolLocalPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_local_pod_event_latency_seconds", + Help: "The latency of local pod events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolPeerPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_peer_pod_event_latency_seconds", + Help: "The latency of peer pod events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolPeerNamespaceEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_peer_namespace_event_latency_seconds", + Help: "The latency of peer namespace events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricNetpolPeerNamespaceAndPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "network_policy_peer_namespace_and_pod_event_latency_seconds", + Help: "The latency of peer namespace events handling (add, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + +var metricPodEventLatency = prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: MetricOvnkubeNamespace, + Subsystem: MetricOvnkubeSubsystemMaster, + Name: "pod_event_latency_seconds", + Help: "The latency of pod events handling (add, update, delete)", + Buckets: prometheus.ExponentialBuckets(.002, 2, 15)}, + []string{ + "event", + }) + var metricEgressFirewallRuleCount = prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: MetricOvnkubeNamespace, Subsystem: MetricOvnkubeSubsystemMaster, @@ -288,6 +359,7 @@ const ( func RegisterMasterBase() { prometheus.MustRegister(MetricMasterLeader) prometheus.MustRegister(MetricMasterReadyDuration) + prometheus.MustRegister(MetricMasterSyncDuration) prometheus.MustRegister(prometheus.NewGaugeFunc( prometheus.GaugeOpts{ Namespace: MetricOvnkubeNamespace, @@ -341,9 +413,16 @@ func RegisterMasterPerformance(nbClient libovsdbclient.Client) { func RegisterMasterFunctional() { // No need to unregister because process exits when leadership is lost. prometheus.MustRegister(metricEgressIPCount) - if config.Metrics.EnableEIPScaleMetrics { + if config.Metrics.EnableScaleMetrics { + klog.Infof("Scale metrics are enabled") prometheus.MustRegister(metricEgressIPAssignLatency) prometheus.MustRegister(metricEgressIPUnassignLatency) + prometheus.MustRegister(metricNetpolEventLatency) + prometheus.MustRegister(metricNetpolLocalPodEventLatency) + prometheus.MustRegister(metricNetpolPeerPodEventLatency) + prometheus.MustRegister(metricNetpolPeerNamespaceEventLatency) + prometheus.MustRegister(metricNetpolPeerNamespaceAndPodEventLatency) + prometheus.MustRegister(metricPodEventLatency) } prometheus.MustRegister(metricEgressIPNodeUnreacheableCount) prometheus.MustRegister(metricEgressIPRebalanceCount) @@ -448,6 +527,30 @@ func RecordEgressIPRebalance(count int) { metricEgressIPRebalanceCount.Add(float64(count)) } +func RecordNetpolEvent(eventName string, duration time.Duration) { + metricNetpolEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolLocalPodEvent(eventName string, duration time.Duration) { + metricNetpolLocalPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolPeerPodEvent(eventName string, duration time.Duration) { + metricNetpolPeerPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolPeerNamespaceEvent(eventName string, duration time.Duration) { + metricNetpolPeerNamespaceEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordNetpolPeerNamespaceAndPodEvent(eventName string, duration time.Duration) { + metricNetpolPeerNamespaceAndPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + +func RecordPodEvent(eventName string, duration time.Duration) { + metricPodEventLatency.WithLabelValues(eventName).Observe(duration.Seconds()) +} + // UpdateEgressFirewallRuleCount records the number of Egress firewall rules. func UpdateEgressFirewallRuleCount(count float64) { metricEgressFirewallRuleCount.Add(count) diff --git a/go-controller/pkg/node/base_node_network_controller_dpu_test.go b/go-controller/pkg/node/base_node_network_controller_dpu_test.go index b7a511288f..d6d34be39a 100644 --- a/go-controller/pkg/node/base_node_network_controller_dpu_test.go +++ b/go-controller/pkg/node/base_node_network_controller_dpu_test.go @@ -175,7 +175,7 @@ var _ = Describe("Node DPU tests", func() { It("Fails if dpu.connection-details Pod annotation is not present", func() { pod.Annotations = map[string]string{} - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) Expect(err.Error()).To(ContainSubstring("failed to get dpu annotation")) @@ -198,7 +198,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) // call addRepPort() err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) @@ -226,7 +226,7 @@ var _ = Describe("Node DPU tests", func() { execMock.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) // call addRepPort() err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) @@ -282,7 +282,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) @@ -298,7 +298,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) @@ -315,7 +315,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err := dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) @@ -337,7 +337,7 @@ var _ = Describe("Node DPU tests", func() { Expect(err).ToNot(HaveOccurred()) kubeMock.On("UpdatePod", cpod).Return(nil) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err = dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).ToNot(HaveOccurred()) @@ -363,7 +363,7 @@ var _ = Describe("Node DPU tests", func() { Cmd: genOVSDelPortCmd("pf0vf9"), }) - podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(pod, nil) + podNamespaceLister.On("Get", mock.AnythingOfType("string")).Return(&pod, nil) err = dnnc.addRepPort(&pod, vfRep, ifInfo, clientset) Expect(err).To(HaveOccurred()) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index e29e451361..4793d45002 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -658,5 +658,5 @@ func (bnc *BaseNetworkController) recordNodeErrorEvent(node *kapi.Node, nodeErr } func (bnc *BaseNetworkController) doesNetworkRequireIPAM() bool { - return !(bnc.TopologyType() == types.Layer2Topology && len(bnc.Subnets()) == 0) + return !((bnc.TopologyType() == types.Layer2Topology || bnc.TopologyType() == types.LocalnetTopology) && len(bnc.Subnets()) == 0) } diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 9e05d7d6d6..dfd8413ea6 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -348,35 +348,39 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { // Sync external gateway routes. External gateway may be set in namespaces // or via pods. So execute an individual sync method at startup - oc.cleanExGwECMPRoutes() + WithSyncDurationMetricNoError("external gateway routes", oc.cleanExGwECMPRoutes) // WatchNamespaces() should be started first because it has no other // dependencies, and WatchNodes() depends on it - if err := oc.WatchNamespaces(); err != nil { + if err := WithSyncDurationMetric("namespace", oc.WatchNamespaces); err != nil { return err } // WatchNodes must be started next because it creates the node switch // which most other watches depend on. // https://github.com/ovn-org/ovn-kubernetes/pull/859 - if err := oc.WatchNodes(); err != nil { + if err := WithSyncDurationMetric("node", oc.WatchNodes); err != nil { return err } + startSvc := time.Now() // Start service watch factory and sync services oc.svcFactory.Start(oc.stopChan) // Services should be started after nodes to prevent LB churn - if err := oc.StartServiceController(oc.wg, true); err != nil { + err := oc.StartServiceController(oc.wg, true) + endSvc := time.Since(startSvc) + metrics.MetricMasterSyncDuration.WithLabelValues("service").Set(endSvc.Seconds()) + if err != nil { return err } - if err := oc.WatchPods(); err != nil { + if err := WithSyncDurationMetric("pod", oc.WatchPods); err != nil { return err } // WatchNetworkPolicy depends on WatchPods and WatchNamespaces - if err := oc.WatchNetworkPolicy(); err != nil { + if err := WithSyncDurationMetric("network policy", oc.WatchNetworkPolicy); err != nil { return err } @@ -391,20 +395,20 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { // risk performing a bunch of modifications on the EgressIP objects when // we restart and then have these handlers act on stale data when they // sync. - if err := oc.WatchEgressIPNamespaces(); err != nil { + if err := WithSyncDurationMetric("egress ip namespace", oc.WatchEgressIPNamespaces); err != nil { return err } - if err := oc.WatchEgressIPPods(); err != nil { + if err := WithSyncDurationMetric("egress ip pod", oc.WatchEgressIPPods); err != nil { return err } - if err := oc.WatchEgressNodes(); err != nil { + if err := WithSyncDurationMetric("egress node", oc.WatchEgressNodes); err != nil { return err } - if err := oc.WatchEgressIP(); err != nil { + if err := WithSyncDurationMetric("egress ip", oc.WatchEgressIP); err != nil { return err } if util.PlatformTypeIsEgressIPCloudProvider() { - if err := oc.WatchCloudPrivateIPConfig(); err != nil { + if err := WithSyncDurationMetric("could private ip config", oc.WatchCloudPrivateIPConfig); err != nil { return err } } @@ -423,7 +427,7 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { return err } oc.egressFirewallDNS.Run(egressFirewallDNSDefaultDuration) - err = oc.WatchEgressFirewall() + err = WithSyncDurationMetric("egress firewall", oc.WatchEgressFirewall) if err != nil { return err } @@ -451,7 +455,9 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { oc.egressSvcController.Run(1) }() - klog.Infof("Completing all the Watchers took %v", time.Since(start)) + end := time.Since(start) + klog.Infof("Completing all the Watchers took %v", end) + metrics.MetricMasterSyncDuration.WithLabelValues("all watchers").Set(end.Seconds()) if config.Kubernetes.OVNEmptyLbEvents { klog.Infof("Starting unidling controllers") @@ -481,6 +487,24 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { return nil } +func WithSyncDurationMetric(resourceName string, f func() error) error { + start := time.Now() + defer func() { + end := time.Since(start) + metrics.MetricMasterSyncDuration.WithLabelValues(resourceName).Set(end.Seconds()) + }() + return f() +} + +func WithSyncDurationMetricNoError(resourceName string, f func()) { + start := time.Now() + defer func() { + end := time.Since(start) + metrics.MetricMasterSyncDuration.WithLabelValues(resourceName).Set(end.Seconds()) + }() + f() +} + type defaultNetworkControllerEventHandler struct { baseHandler baseNetworkControllerEventHandler watchFactory *factory.WatchFactory diff --git a/go-controller/pkg/ovn/egressfirewall.go b/go-controller/pkg/ovn/egressfirewall.go index 3c001861b6..9fd532e360 100644 --- a/go-controller/pkg/ovn/egressfirewall.go +++ b/go-controller/pkg/ovn/egressfirewall.go @@ -28,7 +28,8 @@ const ( egressFirewallAppliedCorrectly = "EgressFirewall Rules applied" egressFirewallAddError = "EgressFirewall Rules not correctly added" // egressFirewallACLExtIdKey external ID key for egress firewall ACLs - egressFirewallACLExtIdKey = "egressFirewall" + egressFirewallACLExtIdKey = "egressFirewall" + egressFirewallACLPriorityKey = "priority" ) type egressFirewall struct { @@ -403,7 +404,10 @@ func (oc *DefaultNetworkController) createEgressFirewallRules(priority int, matc aclLogging, // since egressFirewall has direction to-lport, set type to ingress lportIngress, - map[string]string{egressFirewallACLExtIdKey: externalID}, + map[string]string{ + egressFirewallACLExtIdKey: externalID, + egressFirewallACLPriorityKey: fmt.Sprintf("%d", priority), + }, ) ops, err := libovsdbops.CreateOrUpdateACLsOps(oc.nbClient, nil, egressFirewallACL) if err != nil { diff --git a/go-controller/pkg/ovn/egressfirewall_test.go b/go-controller/pkg/ovn/egressfirewall_test.go index 503eac974a..f61cd04ed7 100644 --- a/go-controller/pkg/ovn/egressfirewall_test.go +++ b/go-controller/pkg/ovn/egressfirewall_test.go @@ -202,8 +202,26 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { // Both ACLs will be removed from the node switch nodeSwitch.ACLs = nil + // keepACL will be re-created with the new external ID + asHash, _ := getNsAddrSetHashNames(namespace1.Name) + newKeepACL := libovsdbops.BuildACL( + buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority), + nbdb.ACLDirectionToLport, + t.EgressFirewallStartPriority, + "(ip4.dst == 1.2.3.4/23) && ip4.src == $"+asHash, + nbdb.ACLActionAllow, + t.OvnACLLoggingMeter, + "", + false, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, + nil, + ) + newKeepACL.UUID = "newKeepACL-UUID" // keepACL will be added to the clusterPortGroup - clusterPortGroup.ACLs = []string{keepACL.UUID} + clusterPortGroup.ACLs = []string{newKeepACL.UUID} // Direction of both ACLs will be converted to keepACL.Direction = nbdb.ACLDirectionToLport @@ -211,10 +229,6 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { keepACL.Name = &newName // check severity was reset from default to nil keepACL.Severity = nil - // subnet exclusion will be deleted - asHash, _ := getNsAddrSetHashNames(namespace1.Name) - keepACL.Match = "(ip4.dst == 1.2.3.4/23) && ip4.src == $" + asHash - // purgeACL ACL will be deleted when test server starts deleting dereferenced ACLs // for now we need to update its fields, since it is present in the db purgeACL.Direction = nbdb.ACLDirectionToLport @@ -225,6 +239,7 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { expectedDatabaseState := []libovsdb.TestData{ otherACL, purgeACL, + newKeepACL, keepACL, nodeSwitch, joinSwitch, @@ -290,7 +305,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -358,7 +376,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv6ACL.UUID = "ipv6ACL-UUID" @@ -435,7 +456,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) udpACL.UUID = "udpACL-UUID" @@ -510,7 +534,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -593,7 +620,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -819,7 +849,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{"egressFirewall": "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -898,7 +931,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1006,7 +1042,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1109,7 +1148,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1197,7 +1239,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{"egressFirewall": namespace1.Name}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) ipv4ACL.UUID = "ipv4ACL-UUID" @@ -1267,7 +1312,10 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { t.OvnACLLoggingMeter, "", false, - map[string]string{egressFirewallACLExtIdKey: "namespace1"}, + map[string]string{ + egressFirewallACLExtIdKey: "namespace1", + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, nil, ) acl.UUID = "acl-UUID" @@ -1282,6 +1330,103 @@ var _ = ginkgo.Describe("OVN EgressFirewall Operations", func() { err := app.Run([]string{app.Name}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) }) + ginkgo.It(fmt.Sprintf("correctly creates an egressfirewall for namespace name > 43 symbols, gateway mode %s", gwMode), func() { + app.Action = func(ctx *cli.Context) error { + // 52 characters namespace + namespace1 := *newNamespace("abcdefghigklmnopqrstuvwxyzabcdefghigklmnopqrstuvwxyz") + egressFirewall := newEgressFirewallObject("default", namespace1.Name, []egressfirewallapi.EgressFirewallRule{ + { + Type: "Allow", + To: egressfirewallapi.EgressFirewallDestination{ + CIDRSelector: "1.2.3.5/23", + }, + }, + { + Type: "Allow", + To: egressfirewallapi.EgressFirewallDestination{ + CIDRSelector: "2.2.3.5/23", + }, + }, + }) + + fakeOVN.startWithDBSetup(dbSetup, + &egressfirewallapi.EgressFirewallList{ + Items: []egressfirewallapi.EgressFirewall{ + *egressFirewall, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespace1, + }, + }) + + err := fakeOVN.controller.WatchNamespaces() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + err = fakeOVN.controller.WatchEgressFirewall() + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + asHash, _ := getNsAddrSetHashNames(namespace1.Name) + ipv4ACL1 := libovsdbops.BuildACL( + buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority), + nbdb.ACLDirectionToLport, + t.EgressFirewallStartPriority, + "(ip4.dst == 1.2.3.5/23) && ip4.src == $"+asHash, + nbdb.ACLActionAllow, + t.OvnACLLoggingMeter, + "", + false, + map[string]string{ + egressFirewallACLExtIdKey: namespace1.Name, + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority), + }, + nil, + ) + ipv4ACL1.UUID = "ipv4ACL1-UUID" + ipv4ACL2 := libovsdbops.BuildACL( + buildEgressFwAclName(namespace1.Name, t.EgressFirewallStartPriority-1), + nbdb.ACLDirectionToLport, + t.EgressFirewallStartPriority-1, + "(ip4.dst == 2.2.3.5/23) && ip4.src == $"+asHash, + nbdb.ACLActionAllow, + t.OvnACLLoggingMeter, + "", + false, + map[string]string{ + egressFirewallACLExtIdKey: namespace1.Name, + egressFirewallACLPriorityKey: fmt.Sprintf("%d", t.EgressFirewallStartPriority-1), + }, + nil, + ) + ipv4ACL2.UUID = "ipv4ACL2-UUID" + + // new ACL will be added to the port group + clusterPortGroup.ACLs = []string{ipv4ACL1.UUID, ipv4ACL2.UUID} + expectedDatabaseState := append(initialData, ipv4ACL1, ipv4ACL2) + + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + // NOTE: syncEgressFirewall is not calling libovsdbops.BuildACL and directly calls CreateOrUpdateACLs + // This part of test ensures syncEgressFirewall code path is tested well and that we truncate the ACL names correctly + err = fakeOVN.controller.syncEgressFirewall([]interface{}{*egressFirewall}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + err = fakeOVN.fakeClient.EgressFirewallClient.K8sV1().EgressFirewalls(egressFirewall.Namespace).Delete(context.TODO(), egressFirewall.Name, *metav1.NewDeleteOptions(0)) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // ACL should be removed from the port group egfw is deleted + clusterPortGroup.ACLs = []string{} + // this ACL will be deleted when test server starts deleting dereferenced ACLs + expectedDatabaseState = append(initialData, ipv4ACL1, ipv4ACL2) + gomega.Eventually(fakeOVN.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState)) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) } }) }) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 00c61062f6..2d778703f5 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -2273,7 +2273,7 @@ func (oc *DefaultNetworkController) addStandByEgressIPAssignment(podKey string, // (routing pod traffic to the egress node) and NAT objects on the egress node // (SNAT-ing to the egress IP). func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { - if config.Metrics.EnableEIPScaleMetrics { + if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { if err != nil { @@ -2306,7 +2306,7 @@ func (e *egressIPController) addPodEgressIPAssignment(egressIPName string, statu // deletePodEgressIPAssignment deletes the OVN programmed egress IP // configuration mentioned for addPodEgressIPAssignment. func (e *egressIPController) deletePodEgressIPAssignment(egressIPName string, status egressipv1.EgressIPStatusItem, pod *kapi.Pod, podIPs []*net.IPNet) (err error) { - if config.Metrics.EnableEIPScaleMetrics { + if config.Metrics.EnableScaleMetrics { start := time.Now() defer func() { if err != nil { diff --git a/go-controller/pkg/ovn/master.go b/go-controller/pkg/ovn/master.go index 248fed6b18..b37833f6a3 100644 --- a/go-controller/pkg/ovn/master.go +++ b/go-controller/pkg/ovn/master.go @@ -14,6 +14,7 @@ import ( "k8s.io/klog/v2" utilnet "k8s.io/utils/net" + libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" @@ -118,23 +119,43 @@ func (oc *DefaultNetworkController) SetupMaster(existingNodeNames []string) erro } oc.defaultCOPPUUID = *(logicalRouter.Copp) - // Create a cluster-wide port group that all logical switch ports are part of - pg := libovsdbops.BuildPortGroup(types.ClusterPortGroupName, types.ClusterPortGroupName, nil, nil) - err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) - if err != nil { - klog.Errorf("Failed to create cluster port group: %v", err) + pg := &nbdb.PortGroup{ + Name: types.ClusterPortGroupName, + } + pg, err = libovsdbops.GetPortGroup(oc.nbClient, pg) + if err != nil && err != libovsdbclient.ErrNotFound { return err } + if pg == nil { + // we didn't find an existing clusterPG, let's create a new empty PG (fresh cluster install) + // Create a cluster-wide port group that all logical switch ports are part of + pg := libovsdbops.BuildPortGroup(types.ClusterPortGroupName, types.ClusterPortGroupName, nil, nil) + err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) + if err != nil { + klog.Errorf("Failed to create cluster port group: %v", err) + return err + } + } - // Create a cluster-wide port group with all node-to-cluster router - // logical switch ports. Currently the only user is multicast but it might - // be used for other features in the future. - pg = libovsdbops.BuildPortGroup(types.ClusterRtrPortGroupName, types.ClusterRtrPortGroupName, nil, nil) - err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) - if err != nil { - klog.Errorf("Failed to create cluster port group: %v", err) + pg = &nbdb.PortGroup{ + Name: types.ClusterRtrPortGroupName, + } + pg, err = libovsdbops.GetPortGroup(oc.nbClient, pg) + if err != nil && err != libovsdbclient.ErrNotFound { return err } + if pg == nil { + // we didn't find an existing clusterRtrPG, let's create a new empty PG (fresh cluster install) + // Create a cluster-wide port group with all node-to-cluster router + // logical switch ports. Currently the only user is multicast but it might + // be used for other features in the future. + pg = libovsdbops.BuildPortGroup(types.ClusterRtrPortGroupName, types.ClusterRtrPortGroupName, nil, nil) + err = libovsdbops.CreateOrUpdatePortGroups(oc.nbClient, pg) + if err != nil { + klog.Errorf("Failed to create cluster port group: %v", err) + return err + } + } // If supported, enable IGMP relay on the router to forward multicast // traffic between nodes. diff --git a/go-controller/pkg/ovn/ovn.go b/go-controller/pkg/ovn/ovn.go index ce6cccdfdf..1c2e2a5fde 100644 --- a/go-controller/pkg/ovn/ovn.go +++ b/go-controller/pkg/ovn/ovn.go @@ -13,6 +13,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" addressset "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/address_set" egresssvc "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn/controller/egress_services" @@ -128,6 +129,17 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo if !util.PodScheduled(pod) { return nil } + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + eventName := "add" + if !addPort { + eventName = "update" + } + metrics.RecordPodEvent(eventName, duration) + }() + } if oldPod != nil && (exGatewayAnnotationsChanged(oldPod, pod) || networkStatusAnnotationsChanged(oldPod, pod)) { // No matter if a pod is ovn networked, or host networked, we still need to check for exgw @@ -157,6 +169,13 @@ func (oc *DefaultNetworkController) ensurePod(oldPod, pod *kapi.Pod, addPort boo // removePod tried to tear down a pod. It returns nil on success and error on failure; // failure indicates the pod tear down should be retried later. func (oc *DefaultNetworkController) removePod(pod *kapi.Pod, portInfo *lpInfo) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordPodEvent("delete", duration) + }() + } if util.PodWantsHostNetwork(pod) { if err := oc.deletePodExternalGW(pod); err != nil { return fmt.Errorf("unable to delete external gateway routes for pod %s: %w", diff --git a/go-controller/pkg/ovn/policy.go b/go-controller/pkg/ovn/policy.go index 863715381b..30e1463949 100644 --- a/go-controller/pkg/ovn/policy.go +++ b/go-controller/pkg/ovn/policy.go @@ -5,11 +5,14 @@ import ( "net" "strings" "sync" + "time" libovsdbclient "github.com/ovn-org/libovsdb/client" ovsdb "github.com/ovn-org/libovsdb/ovsdb" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdbops" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/metrics" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" @@ -820,6 +823,13 @@ func (oc *DefaultNetworkController) denyPGDeletePorts(np *networkPolicy, portNam // handleLocalPodSelectorAddFunc adds a new pod to an existing NetworkPolicy, should be retriable. func (oc *DefaultNetworkController) handleLocalPodSelectorAddFunc(np *networkPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolLocalPodEvent("add", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -864,6 +874,13 @@ func (oc *DefaultNetworkController) handleLocalPodSelectorAddFunc(np *networkPol // handleLocalPodSelectorDelFunc handles delete event for local pod, should be retriable func (oc *DefaultNetworkController) handleLocalPodSelectorDelFunc(np *networkPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolLocalPodEvent("delete", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1172,7 +1189,13 @@ func (oc *DefaultNetworkController) createNetworkPolicy(policy *knet.NetworkPoli // if addNetworkPolicy fails, create or delete operation can be retried func (oc *DefaultNetworkController) addNetworkPolicy(policy *knet.NetworkPolicy) error { klog.Infof("Adding network policy %s", getPolicyKey(policy)) - + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolEvent("add", duration) + }() + } // To not hold nsLock for the whole process on network policy creation, we do the following: // 1. save required namespace information to use for netpol create // 2. create network policy without ns Lock @@ -1277,7 +1300,13 @@ func (oc *DefaultNetworkController) buildNetworkPolicyACLs(np *networkPolicy, ac func (oc *DefaultNetworkController) deleteNetworkPolicy(policy *knet.NetworkPolicy) error { npKey := getPolicyKey(policy) klog.Infof("Deleting network policy %s", npKey) - + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolEvent("delete", duration) + }() + } // First lock and update namespace nsInfo, nsUnlock := oc.getNamespaceLocked(policy.Namespace, false) if nsInfo != nil { @@ -1372,6 +1401,13 @@ func (oc *DefaultNetworkController) cleanupNetworkPolicy(np *networkPolicy) erro // selected as a peer by a NetworkPolicy's ingress/egress section to that // ingress/egress address set func (oc *DefaultNetworkController) handlePeerPodSelectorAddUpdate(np *networkPolicy, gp *gressPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerPodEvent("add", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1394,6 +1430,13 @@ func (oc *DefaultNetworkController) handlePeerPodSelectorAddUpdate(np *networkPo // matches a NetworkPolicy ingress/egress section's selectors from that // ingress/egress address set func (oc *DefaultNetworkController) handlePeerPodSelectorDelete(np *networkPolicy, gp *gressPolicy, podSelector labels.Selector, obj interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerPodEvent("delete", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1479,6 +1522,13 @@ func (oc *DefaultNetworkController) addPeerPodHandler(podSelector *metav1.LabelS func (oc *DefaultNetworkController) handlePeerNamespaceAndPodAdd(np *networkPolicy, gp *gressPolicy, podSelector labels.Selector, obj interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceAndPodEvent("add", duration) + }() + } namespace := obj.(*kapi.Namespace) np.RLock() locked := true @@ -1528,6 +1578,13 @@ func (oc *DefaultNetworkController) handlePeerNamespaceAndPodAdd(np *networkPoli } func (oc *DefaultNetworkController) handlePeerNamespaceAndPodDel(np *networkPolicy, gp *gressPolicy, obj interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceAndPodEvent("delete", duration) + }() + } np.RLock() defer np.RUnlock() if np.deleted { @@ -1593,6 +1650,13 @@ func (oc *DefaultNetworkController) addPeerNamespaceAndPodHandler(namespaceSelec } func (oc *DefaultNetworkController) handlePeerNamespaceSelectorAdd(np *networkPolicy, gp *gressPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceEvent("add", duration) + }() + } np.RLock() if np.deleted { np.RUnlock() @@ -1623,6 +1687,13 @@ func (oc *DefaultNetworkController) handlePeerNamespaceSelectorAdd(np *networkPo } func (oc *DefaultNetworkController) handlePeerNamespaceSelectorDel(np *networkPolicy, gp *gressPolicy, objs ...interface{}) error { + if config.Metrics.EnableScaleMetrics { + start := time.Now() + defer func() { + duration := time.Since(start) + metrics.RecordNetpolPeerNamespaceEvent("delete", duration) + }() + } np.RLock() if np.deleted { np.RUnlock() diff --git a/test/e2e/multihoming.go b/test/e2e/multihoming.go index 682f09055b..9af8ce159f 100644 --- a/test/e2e/multihoming.go +++ b/test/e2e/multihoming.go @@ -25,13 +25,16 @@ import ( var _ = Describe("Multi Homing", func() { const ( - podName = "tinypod" - secondaryNetworkCIDR = "10.128.0.0/16" - secondaryNetworkName = "tenant-blue" - secondaryFlatL2IgnoreCIDR = "10.128.0.0/29" - secondaryFlatL2NetworkCIDR = "10.128.0.0/24" - netPrefixLengthPerNode = 24 - secondaryIPv6CIDR = "10:100:200::0/64" + podName = "tinypod" + secondaryNetworkCIDR = "10.128.0.0/16" + secondaryNetworkName = "tenant-blue" + secondaryFlatL2IgnoreCIDR = "10.128.0.0/29" + secondaryFlatL2NetworkCIDR = "10.128.0.0/24" + secondaryLocalnetIgnoreCIDR = "60.128.0.0/29" + secondaryLocalnetNetworkCIDR = "60.128.0.0/24" + netPrefixLengthPerNode = 24 + localnetVLANID = 10 + secondaryIPv6CIDR = "10:100:200::0/64" ) f := wrappedTestFramework("multi-homing") @@ -81,7 +84,7 @@ var _ = Describe("Multi Homing", func() { if netConfig.excludeCIDRs != nil { podIP, err := podIPForAttachment(cs, pod.GetNamespace(), pod.GetName(), secondaryNetworkName, 0) Expect(err).NotTo(HaveOccurred()) - Expect(inRange(secondaryNetworkCIDR, podIP)).To(Succeed()) + Expect(inRange(popCIDRsPerNodePrefix(netConfig.cidr, netPrefixLengthPerNode), podIP)).To(Succeed()) for _, excludedRange := range netConfig.excludeCIDRs { Expect(inRange(excludedRange, podIP)).To( MatchError(fmt.Errorf("ip [%s] is NOT in range %s", podIP, excludedRange))) @@ -160,6 +163,71 @@ var _ = Describe("Multi Homing", func() { name: podName, }, ), + table.Entry( + "when attaching to an localnet - switched - network", + networkAttachmentConfig{ + cidr: secondaryLocalnetNetworkCIDR, + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an Localnet - switched - network featuring `excludeCIDR`s", + networkAttachmentConfig{ + cidr: secondaryLocalnetNetworkCIDR, + name: secondaryNetworkName, + topology: "localnet", + excludeCIDRs: []string{secondaryLocalnetIgnoreCIDR}, + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an localnet - switched - network without IPAM", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an localnet - switched - network with an IPv6 subnet", + networkAttachmentConfig{ + cidr: secondaryIPv6CIDR, + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), + table.Entry( + "when attaching to an L2 - switched - network with a dual stack configuration", + networkAttachmentConfig{ + cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), + name: secondaryNetworkName, + topology: "localnet", + vlanID: localnetVLANID, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + }, + ), ) }) @@ -395,6 +463,107 @@ var _ = Describe("Multi Homing", func() { nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, }, ), + table.Entry( + "can communicate over an Localnet secondary network when the pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + cidr: secondaryLocalnetNetworkCIDR, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), + table.Entry( + "can communicate over an Localnet secondary network without IPAM when the pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + isPrivileged: true, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + isPrivileged: true, + }, + ), + table.Entry( + "can communicate over an localnet secondary network without IPAM when the pods are scheduled on the same node, with static IPs configured via network selection elements", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{ + Name: secondaryNetworkName, + IPRequest: []string{clientIP}, + }}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{ + Name: secondaryNetworkName, + IPRequest: []string{staticServerIP}, + }}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), + table.Entry( + "can communicate over an localnet secondary network with an IPv6 subnet when pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + cidr: secondaryIPv6CIDR, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), + table.Entry( + "can communicate over an localnet secondary network with a dual stack configuration when pods are scheduled on the same node", + networkAttachmentConfig{ + name: secondaryNetworkName, + topology: "localnet", + cidr: strings.Join([]string{secondaryLocalnetNetworkCIDR, secondaryIPv6CIDR}, ","), + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: clientPodName, + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + podConfiguration{ + attachments: []nadapi.NetworkSelectionElement{{Name: secondaryNetworkName}}, + name: podName, + containerCmd: httpServerContainerCmd(port), + nodeSelector: map[string]string{nodeHostnameKey: workerTwoNodeName}, + }, + ), ) }) @@ -482,6 +651,7 @@ type networkAttachmentConfig struct { name string topology string networkName string + vlanID int } func (nac networkAttachmentConfig) attachmentName() string { @@ -507,7 +677,8 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini "subnets": %q, "excludeSubnets": %q, "mtu": 1300, - "netAttachDefName": %q + "netAttachDefName": %q, + "vlanID": %d } `, config.attachmentName(), @@ -515,6 +686,7 @@ func generateNAD(config networkAttachmentConfig) *nadapi.NetworkAttachmentDefini config.cidr, strings.Join(config.excludeCIDRs, ","), namespacedName(config.namespace, config.name), + config.vlanID, ) return generateNetAttachDef(config.namespace, config.name, nadSpec) } diff --git a/test/e2e/static_pods.go b/test/e2e/static_pods.go new file mode 100644 index 0000000000..767a28e000 --- /dev/null +++ b/test/e2e/static_pods.go @@ -0,0 +1,113 @@ +package e2e + +import ( + "fmt" + "io/ioutil" + "os" + "time" + + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + e2enode "k8s.io/kubernetes/test/e2e/framework/node" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" +) + +// pulled from https://github.com/kubernetes/kubernetes/blob/v1.26.2/test/e2e/framework/pod/wait.go#L468 +// had to modify function due to restart policy on static pods being set to always, which caused function to fail +func waitForPodRunningInNamespaceTimeout(c clientset.Interface, podName, namespace string, timeout time.Duration) error { + return e2epod.WaitForPodCondition(c, namespace, podName, fmt.Sprintf("%s", v1.PodRunning), timeout, func(pod *v1.Pod) (bool, error) { + switch pod.Status.Phase { + case v1.PodRunning: + ginkgo.By("Saw pod running") + return true, nil + default: + return false, nil + } + }) +} + +func createStaticPod(f *framework.Framework, nodeName string, podYaml string) { + //create file + var podFile = "static-pod.yaml" + if err := ioutil.WriteFile(podFile, []byte(podYaml), 0644); err != nil { + framework.Failf("Unable to write static-pod.yaml to disk: %v", err) + } + defer func() { + if err := os.Remove(podFile); err != nil { + framework.Logf("Unable to remove the static-pod.yaml from disk: %v", err) + } + }() + var dst = fmt.Sprintf("%s:/etc/kubernetes/manifests/%s", nodeName, podFile) + cmd := []string{"docker", "cp", podFile, dst} + framework.Logf("Running command %v", cmd) + _, err := runCommand(cmd...) + if err != nil { + framework.Failf("failed to copy pod file to node %s", nodeName) + } + +} + +func removeStaticPodFile(nodeName string, podFile string) { + cmd := []string{"docker", "exec", nodeName, "bash", "-c", "rm /etc/kubernetes/manifests/static-pod.yaml"} + framework.Logf("Running command %v", cmd) + _, err := runCommand(cmd...) + if err != nil { + framework.Failf("failed to remove pod file from node %s", nodeName) + } + +} + +//This test does the following +//Applies a static-pod.yaml file to a nodes /etc/kubernetes/manifest dir +//Expects the static pod to succeed +var _ = ginkgo.Describe("Creating a static pod on a node", func() { + + const ( + podName string = "static-pod" + podFile string = "static-pod.yaml" + agnhostImage string = "k8s.gcr.io/e2e-test-images/agnhost:2.26" + ) + + f := wrappedTestFramework("staticpods") + + var cs clientset.Interface + + ginkgo.BeforeEach(func() { + cs = f.ClientSet + }) + + ginkgo.It("Should successfully create then remove a static pod", func() { + nodes, err := e2enode.GetBoundedReadySchedulableNodes(cs, 3) + framework.ExpectNoError(err) + if len(nodes.Items) < 1 { + framework.Failf("Test requires 1 Ready node, but there are none") + } + nodeName := nodes.Items[0].Name + podName := fmt.Sprintf("static-pod-%s", nodeName) + + ginkgo.By("copying a pod.yaml file into the /etc/kubernetes/manifests dir of a node") + framework.Logf("creating %s on node %s", podName, nodeName) + var staticPodYaml = fmt.Sprintf(`apiVersion: v1 +kind: Pod +metadata: + name: static-pod + namespace: %s +spec: + containers: + - name: web + image: %s + command: ["/bin/bash", "-c", "trap : TERM INT; sleep infinity & wait"] +`, f.Namespace.Name, agnhostImage) + createStaticPod(f, nodeName, staticPodYaml) + err = waitForPodRunningInNamespaceTimeout(f.ClientSet, podName, f.Namespace.Name, time.Second*30) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + ginkgo.By("Removing the pod file from the nodes /etc/kubernetes/manifests") + framework.Logf("Removing %s from %s", podName, nodeName) + removeStaticPodFile(nodeName, podFile) + err = e2epod.WaitForPodNotFoundInNamespace(f.ClientSet, podName, f.Namespace.Name, time.Second*30) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + }) +})