From 901989c925606482a5e7b0698ac4e4c10125c35d Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Wed, 18 Dec 2024 21:34:17 +0100 Subject: [PATCH 01/51] (C)UDN CRD: update CEL validations. - remove subnets filed validation from Layer3 config, as it is required for Layer3 (not for Layer2 though) and length validation is already built into subnets field. - fix mtu validation when mtu field is not present - add `isCIDR() &&` or `!isCIDR ||` checks in front of all cidr() usages that makes sure given expressions will not fire an error in case of the wrong cidr. We have separate isCIDR() validations on every cidr field. - simplify `hostSubnet < 32` check for v4 networks - add MaxLenght limitation for CIDR to improve CEL cost estimation Signed-off-by: Nadia Pinaeva --- ...ovn.org_clusteruserdefinednetworks.yaml.j2 | 53 +++++++++++++++++-- .../k8s.ovn.org_userdefinednetworks.yaml.j2 | 52 ++++++++++++++++-- .../pkg/crd/userdefinednetwork/v1/shared.go | 28 ++++------ 3 files changed, 106 insertions(+), 27 deletions(-) diff --git a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 index cad81a2c28..faca8b6073 100644 --- a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 +++ b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 @@ -114,9 +114,17 @@ spec: When omitted, the platform will choose a reasonable default which is subject to change over time. items: type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different + IP families + rule: size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) + || cidr(self[0]).ip().family() != cidr(self[1]).ip().family() mtu: description: |- MTU is the maximum transmission unit for a network. @@ -145,22 +153,33 @@ spec: and users must configure IP addresses for the pods. As a consequence, Port security only prevents MAC spoofing. items: type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different + IP families + rule: size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) + || cidr(self[0]).ip().family() != cidr(self[1]).ip().family() required: - role type: object x-kubernetes-validations: - message: Subnets is required for Primary Layer2 topology - rule: self.role != 'Primary' || has(self.subnets) && size(self.subnets) - > 0 + rule: self.role != 'Primary' || has(self.subnets) - message: JoinSubnets is only supported for Primary network rule: '!has(self.joinSubnets) || has(self.role) && self.role == ''Primary''' - message: IPAMLifecycle is only supported when subnets are set rule: '!has(self.ipamLifecycle) || has(self.subnets) && size(self.subnets) > 0' + - message: MTU should be greater than or equal to 1280 when IPv6 + subent is used + rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, + isCIDR(i) && cidr(i).ip().family() == 6) || self.mtu >= 1280' layer3: description: Layer3 is the Layer3 topology configuration. properties: @@ -174,9 +193,17 @@ spec: When omitted, the platform will choose a reasonable default which is subject to change over time. items: type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different + IP families + rule: size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) + || cidr(self[0]).ip().family() != cidr(self[1]).ip().family() mtu: description: |- MTU is the maximum transmission unit for a network. @@ -209,6 +236,9 @@ spec: description: CIDR specifies L3Subnet, which is split into smaller subnets for every node. type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) hostSubnet: description: |- HostSubnet specifies the subnet size for every node. @@ -221,19 +251,34 @@ spec: required: - cidr type: object + x-kubernetes-validations: + - message: HostSubnet must be smaller than CIDR subnet + rule: '!has(self.hostSubnet) || !isCIDR(self.cidr) || + self.hostSubnet > cidr(self.cidr).prefixLength()' + - message: HostSubnet must < 32 for ipv4 CIDR + rule: '!has(self.hostSubnet) || !isCIDR(self.cidr) || + (cidr(self.cidr).ip().family() == 4 && self.hostSubnet + < 32)' maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different + IP families + rule: size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) + || cidr(self[0].cidr).ip().family() != cidr(self[1].cidr).ip().family() required: - role - subnets type: object x-kubernetes-validations: - - message: Subnets is required for Layer3 topology - rule: has(self.subnets) && size(self.subnets) > 0 - message: JoinSubnets is only supported for Primary network rule: '!has(self.joinSubnets) || has(self.role) && self.role == ''Primary''' + - message: MTU should be greater than or equal to 1280 when IPv6 + subent is used + rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, + cidr(i.cidr).ip().family() == 6) || self.mtu >= 1280' topology: description: |- Topology describes network configuration. diff --git a/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 index 49856ca206..fda6c5978b 100644 --- a/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 +++ b/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 @@ -62,9 +62,17 @@ spec: When omitted, the platform will choose a reasonable default which is subject to change over time. items: type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different IP + families + rule: size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) + || cidr(self[0]).ip().family() != cidr(self[1]).ip().family() mtu: description: |- MTU is the maximum transmission unit for a network. @@ -93,22 +101,33 @@ spec: and users must configure IP addresses for the pods. As a consequence, Port security only prevents MAC spoofing. items: type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different IP + families + rule: size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) + || cidr(self[0]).ip().family() != cidr(self[1]).ip().family() required: - role type: object x-kubernetes-validations: - message: Subnets is required for Primary Layer2 topology - rule: self.role != 'Primary' || has(self.subnets) && size(self.subnets) - > 0 + rule: self.role != 'Primary' || has(self.subnets) - message: JoinSubnets is only supported for Primary network rule: '!has(self.joinSubnets) || has(self.role) && self.role == ''Primary''' - message: IPAMLifecycle is only supported when subnets are set rule: '!has(self.ipamLifecycle) || has(self.subnets) && size(self.subnets) > 0' + - message: MTU should be greater than or equal to 1280 when IPv6 subent + is used + rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, + isCIDR(i) && cidr(i).ip().family() == 6) || self.mtu >= 1280' layer3: description: Layer3 is the Layer3 topology configuration. properties: @@ -122,9 +141,17 @@ spec: When omitted, the platform will choose a reasonable default which is subject to change over time. items: type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different IP + families + rule: size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) + || cidr(self[0]).ip().family() != cidr(self[1]).ip().family() mtu: description: |- MTU is the maximum transmission unit for a network. @@ -157,6 +184,9 @@ spec: description: CIDR specifies L3Subnet, which is split into smaller subnets for every node. type: string + x-kubernetes-validations: + - message: CIDR is invalid + rule: isCIDR(self) hostSubnet: description: |- HostSubnet specifies the subnet size for every node. @@ -169,19 +199,33 @@ spec: required: - cidr type: object + x-kubernetes-validations: + - message: HostSubnet must be smaller than CIDR subnet + rule: '!has(self.hostSubnet) || !isCIDR(self.cidr) || self.hostSubnet + > cidr(self.cidr).prefixLength()' + - message: HostSubnet must < 32 for ipv4 CIDR + rule: '!has(self.hostSubnet) || !isCIDR(self.cidr) || (cidr(self.cidr).ip().family() + == 4 && self.hostSubnet < 32)' maxItems: 2 minItems: 1 type: array + x-kubernetes-validations: + - message: When 2 CIDRs are set, they must be from different IP + families + rule: size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) + || cidr(self[0].cidr).ip().family() != cidr(self[1].cidr).ip().family() required: - role - subnets type: object x-kubernetes-validations: - - message: Subnets is required for Layer3 topology - rule: has(self.subnets) && size(self.subnets) > 0 - message: JoinSubnets is only supported for Primary network rule: '!has(self.joinSubnets) || has(self.role) && self.role == ''Primary''' + - message: MTU should be greater than or equal to 1280 when IPv6 subent + is used + rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, + cidr(i.cidr).ip().family() == 6) || self.mtu >= 1280' topology: description: |- Topology describes network configuration. diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/shared.go b/go-controller/pkg/crd/userdefinednetwork/v1/shared.go index 56092975d5..f25e2df159 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/shared.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/shared.go @@ -24,10 +24,8 @@ const ( NetworkTopologyLayer3 NetworkTopology = "Layer3" ) -// +kubebuilder:validation:XValidation:rule="has(self.subnets) && size(self.subnets) > 0", message="Subnets is required for Layer3 topology" // +kubebuilder:validation:XValidation:rule="!has(self.joinSubnets) || has(self.role) && self.role == 'Primary'", message="JoinSubnets is only supported for Primary network" -// + TODO This validation does not work and needs to be fixed -// + kubebuilder:validation:XValidation:rule="!has(self.subnets) || !self.subnets.exists_one(i, cidr(i.cidr).ip().family() == 6) || self.mtu >= 1280", message="MTU should be greater than or equal to 1280 when IPv6 subent is used" +// +kubebuilder:validation:XValidation:rule="!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, isCIDR(i.cidr) && cidr(i.cidr).ip().family() == 6) || self.mtu >= 1280", message="MTU should be greater than or equal to 1280 when IPv6 subent is used" type Layer3Config struct { // Role describes the network role in the pod. // @@ -56,9 +54,7 @@ type Layer3Config struct { // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=2 // +required - // + --- - // + TODO: Add the following validations when available (kube v1.31). - // + kubebuilder:validation:XValidation:rule="size(self) != 2 || isCIDR(self[0].cidr) && isCIDR(self[1].cidr) && cidr(self[0].cidr).ip().family() != cidr(self[1].cidr).ip().family()", message="When 2 CIDRs are set, they must be from different IP families" + // +kubebuilder:validation:XValidation:rule="size(self) != 2 || !isCIDR(self[0].cidr) || !isCIDR(self[1].cidr) || cidr(self[0].cidr).ip().family() != cidr(self[1].cidr).ip().family()", message="When 2 CIDRs are set, they must be from different IP families" Subnets []Layer3Subnet `json:"subnets,omitempty"` // JoinSubnets are used inside the OVN network topology. @@ -72,10 +68,8 @@ type Layer3Config struct { JoinSubnets DualStackCIDRs `json:"joinSubnets,omitempty"` } -// + --- -// + TODO: Add the following validations when available (kube v1.31). -// + kubebuilder:validation:XValidation:rule="!has(self.hostSubnet) || (isCIDR(self.cidr) && self.hostSubnet > cidr(self.cidr).prefixLength())", message="HostSubnet must be smaller than CIDR subnet" -// + kubebuilder:validation:XValidation:rule="!has(self.hostSubnet) || (isCIDR(self.cidr) && (cidr(self.cidr).ip().family() == 6 || self.hostSubnet < 32))", message="HostSubnet must < 32 for ipv4 CIDR" +// +kubebuilder:validation:XValidation:rule="!has(self.hostSubnet) || !isCIDR(self.cidr) || self.hostSubnet > cidr(self.cidr).prefixLength()", message="HostSubnet must be smaller than CIDR subnet" +// +kubebuilder:validation:XValidation:rule="!has(self.hostSubnet) || !isCIDR(self.cidr) || (cidr(self.cidr).ip().family() == 4 && self.hostSubnet < 32)", message="HostSubnet must < 32 for ipv4 CIDR" type Layer3Subnet struct { // CIDR specifies L3Subnet, which is split into smaller subnets for every node. // @@ -92,11 +86,10 @@ type Layer3Subnet struct { HostSubnet int32 `json:"hostSubnet,omitempty"` } -// +kubebuilder:validation:XValidation:rule="self.role != 'Primary' || has(self.subnets) && size(self.subnets) > 0", message="Subnets is required for Primary Layer2 topology" +// +kubebuilder:validation:XValidation:rule="self.role != 'Primary' || has(self.subnets)", message="Subnets is required for Primary Layer2 topology" // +kubebuilder:validation:XValidation:rule="!has(self.joinSubnets) || has(self.role) && self.role == 'Primary'", message="JoinSubnets is only supported for Primary network" // +kubebuilder:validation:XValidation:rule="!has(self.ipamLifecycle) || has(self.subnets) && size(self.subnets) > 0", message="IPAMLifecycle is only supported when subnets are set" -// + TODO This validation does not work and needs to be fixed -// + kubebuilder:validation:XValidation:rule="!has(self.subnets) || !self.subnets.exists_one(i, cidr(i).ip().family() == 6) || self.mtu >= 1280", message="MTU should be greater than or equal to 1280 when IPv6 subent is used" +// +kubebuilder:validation:XValidation:rule="!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, isCIDR(i) && cidr(i).ip().family() == 6) || self.mtu >= 1280", message="MTU should be greater than or equal to 1280 when IPv6 subent is used" type Layer2Config struct { // Role describes the network role in the pod. // @@ -158,14 +151,11 @@ type NetworkIPAMLifecycle string const IPAMLifecyclePersistent NetworkIPAMLifecycle = "Persistent" -// + --- -// + TODO: Add the following validations when available (kube v1.31). -// + kubebuilder:validation:XValidation:rule="isCIDR(self)", message="CIDR is invalid" +// +kubebuilder:validation:XValidation:rule="isCIDR(self)", message="CIDR is invalid" +// +kubebuilder:validation:MaxLength=43 type CIDR string // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=2 -// + --- -// + TODO: Add the following validations when available (kube v1.31). -// + kubebuilder:validation:XValidation:rule="size(self) != 2 || isCIDR(self[0]) && isCIDR(self[1]) && cidr(self[0]).ip().family() != cidr(self[1]).ip().family()", message="When 2 CIDRs are set, they must be from different IP families" +// +kubebuilder:validation:XValidation:rule="size(self) != 2 || !isCIDR(self[0]) || !isCIDR(self[1]) || cidr(self[0]).ip().family() != cidr(self[1]).ip().family()", message="When 2 CIDRs are set, they must be from different IP families" type DualStackCIDRs []CIDR From d940cab5826e5c5415797fb17dc12799aec9a85b Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Sun, 12 Jan 2025 06:07:36 +0000 Subject: [PATCH 02/51] UDN EIP IP: change scope from link to global When making a call to netlink to retrieve a links addresses, netlink will return the addresses and group them by at least the addresses scope. It will return addresses grouped by scope - host, link and global scope. Therefore if the EIP address assigned to the ext bridge to support EIP for UDN is scope host, and OVN Kube restarts, it may select the EIP address as the primary IP address. Signed-off-by: Martin Kennelly --- go-controller/pkg/node/gateway_egressip.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/node/gateway_egressip.go b/go-controller/pkg/node/gateway_egressip.go index 08c7d7f60c..15b4937b72 100644 --- a/go-controller/pkg/node/gateway_egressip.go +++ b/go-controller/pkg/node/gateway_egressip.go @@ -518,7 +518,7 @@ func getEIPBridgeNetlinkAddress(ip net.IP, ifindex int) netlink.Addr { return netlink.Addr{ IPNet: &net.IPNet{IP: ip, Mask: util.GetIPFullMask(ip)}, Flags: getEIPNetlinkAddressFlag(ip), - Scope: int(netlink.SCOPE_LINK), + Scope: int(netlink.SCOPE_UNIVERSE), ValidLft: getEIPNetlinkAddressValidLft(ip), LinkIndex: ifindex, } From 6c5f5d99657cf6b5f30335516acdbb85f704816f Mon Sep 17 00:00:00 2001 From: nithyar Date: Mon, 30 Sep 2024 15:45:32 -0700 Subject: [PATCH 03/51] Update OVS bridge flows for supporting gateway VLANs Signed-off-by: nithyar --- dist/images/ovnkube.sh | 16 ++++ go-controller/pkg/node/gateway_shared_intf.go | 95 ++++++++++++------- 2 files changed, 79 insertions(+), 32 deletions(-) diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 8ba6205695..09323f37b0 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -1906,6 +1906,14 @@ ovnkube-controller-with-node() { fi fi + if [[ ${ovnkube_node_mode} != "dpu-host" && ! ${ovn_gateway_opts} =~ "gateway-vlanid" ]]; then + # get the gateway vlanid + gw_vlanid=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-vlanid | tr -d \") + if [[ -n ${gw_vlanid} ]]; then + ovn_gateway_opts+="--gateway-vlanid=${gw_vlanid}" + fi + fi + ovnkube_node_mgmt_port_netdev_flag= if [[ ${ovnkube_node_mgmt_port_netdev} != "" ]]; then ovnkube_node_mgmt_port_netdev_flag="--ovnkube-node-mgmt-port-netdev=${ovnkube_node_mgmt_port_netdev}" @@ -2570,6 +2578,14 @@ ovn-node() { fi + if [[ ${ovnkube_node_mode} != "dpu-host" && ! ${ovn_gateway_opts} =~ "gateway-vlanid" ]]; then + # get the gateway vlanid + gw_vlanid=$(ovs-vsctl --if-exists get Open_vSwitch . external_ids:ovn-gw-vlanid | tr -d \") + if [[ -n ${gw_vlanid} ]]; then + ovn_gateway_opts+="--gateway-vlanid=${gw_vlanid}" + fi + fi + local ovn_node_ssl_opts="" if [[ ${ovnkube_node_mode} != "dpu-host" ]]; then [[ "yes" == ${OVN_SSL_ENABLE} ]] && { diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 657f1875d2..037a9ef459 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -562,9 +562,18 @@ func (npw *nodePortWatcher) generateARPBypassFlow(ofPorts []string, ofPortPatch, } arpPortsFiltered = append(arpPortsFiltered, port) } - arpFlow = fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, "+ - "actions=output:%s", - cookie, npw.ofportPhys, addrResProto, addrResDst, ipAddr, strings.Join(arpPortsFiltered, ",")) + + // If vlan tagged traffic is received from physical interface, it has to be untagged before sending to access ports + if config.Gateway.VLANID != 0 { + match_vlan := fmt.Sprintf("dl_vlan=%d,", config.Gateway.VLANID) + arpFlow = fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s, %s=%s, "+ + "actions=strip_vlan,output:%s", + cookie, npw.ofportPhys, match_vlan, addrResProto, addrResDst, ipAddr, strings.Join(arpPortsFiltered, ",")) + } else { + arpFlow = fmt.Sprintf("cookie=%s, priority=110, in_port=%s, %s, %s=%s, "+ + "actions=output:%s", + cookie, npw.ofportPhys, addrResProto, addrResDst, ipAddr, strings.Join(arpPortsFiltered, ",")) + } } return arpFlow @@ -1410,6 +1419,15 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st // 14 bytes of overhead for ethernet header (does not include VLAN) maxPktLength := getMaxFrameLength() + strip_vlan := "" + mod_vlan_id := "" + match_vlan := "" + if config.Gateway.VLANID != 0 { + strip_vlan = "strip_vlan," + match_vlan = fmt.Sprintf("dl_vlan=%d,", config.Gateway.VLANID) + mod_vlan_id = fmt.Sprintf("mod_vlan_vid:%d,", config.Gateway.VLANID) + } + if config.IPv4Mode { // table0, Geneve packets coming from external. Skip conntrack and go directly to host // if dest mac is the shared mac send directly to host. @@ -1632,32 +1650,34 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st if config.IPv4Mode { // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+est, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=100, table=1, %s ip, ct_state=+trk+est, ct_mark=%s, "+ + "actions=%soutput:%s", + defaultOpenFlowCookie, match_vlan, ctMarkHost, strip_vlan, ofPortHost)) dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=100, table=1, %s ip, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=%soutput:%s", + defaultOpenFlowCookie, match_vlan, ctMarkHost, strip_vlan, ofPortHost)) + } if config.IPv6Mode { // table 1, established and related connections in zone 64000 with ct_mark ctMarkHost go to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+est, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=100, table=1, %s ip6, ct_state=+trk+est, ct_mark=%s, "+ + "actions=%soutput:%s", + defaultOpenFlowCookie, match_vlan, ctMarkHost, strip_vlan, ofPortHost)) dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=100, table=1, ip6, ct_state=+trk+rel, ct_mark=%s, "+ - "actions=output:%s", - defaultOpenFlowCookie, ctMarkHost, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=100, table=1, %s ip6, ct_state=+trk+rel, ct_mark=%s, "+ + "actions=%soutput:%s", + defaultOpenFlowCookie, match_vlan, ctMarkHost, strip_vlan, ofPortHost)) + } // table 1, we check to see if this dest mac is the shared mac, if so send to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=10, table=1, %s dl_dst=%s, actions=%soutput:%s", + defaultOpenFlowCookie, match_vlan, bridgeMacAddress, strip_vlan, ofPortHost)) } defaultNetConfig := bridge.netConfig[types.DefaultNetworkName] @@ -1665,8 +1685,8 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st // table 2, dispatch from Host -> OVN dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=100, table=2, "+ - "actions=set_field:%s->eth_dst,output:%s", defaultOpenFlowCookie, - bridgeMacAddress, defaultNetConfig.ofPortPatch)) + "actions=set_field:%s->eth_dst,%soutput:%s", defaultOpenFlowCookie, + bridgeMacAddress, mod_vlan_id, defaultNetConfig.ofPortPatch)) // table 2, priority 200, dispatch from UDN -> Host -> OVN. These packets have // already been SNATed to the UDN's masq IP or have been marked with the UDN's packet mark. @@ -1709,9 +1729,9 @@ func flowsForDefaultBridge(bridge *bridgeConfiguration, extraIPs []net.IP) ([]st // table 3, dispatch from OVN -> Host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, table=3, "+ - "actions=move:NXM_OF_ETH_DST[]->NXM_OF_ETH_SRC[],set_field:%s->eth_dst,output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + fmt.Sprintf("cookie=%s, table=3, %s "+ + "actions=move:NXM_OF_ETH_DST[]->NXM_OF_ETH_SRC[],set_field:%s->eth_dst,%soutput:%s", + defaultOpenFlowCookie, match_vlan, bridgeMacAddress, strip_vlan, ofPortHost)) // table 4, hairpinned pkts that need to go from OVN -> Host // We need to SNAT and masquerade OVN GR IP, send to table 3 for dispatch to Host @@ -1753,15 +1773,25 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration, isPodNetwork var dftFlows []string + strip_vlan := "" + match_vlan := "" + mod_vlan_id := "" + if config.Gateway.VLANID != 0 { + strip_vlan = "strip_vlan," + match_vlan = fmt.Sprintf("dl_vlan=%d,", config.Gateway.VLANID) + mod_vlan_id = fmt.Sprintf("mod_vlan_vid:%d,", config.Gateway.VLANID) + } + if ofPortPhys != "" { // table 0, we check to see if this dest mac is the shared mac, if so flood to all ports - actions := "output:" + ofPortHost + actions := "" for _, netConfig := range bridge.patchedNetConfigs() { - actions += ",output:" + netConfig.ofPortPatch + actions += "output:" + netConfig.ofPortPatch + "," } + actions += strip_vlan + "output:" + ofPortHost dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=0, in_port=%s, dl_dst=%s, actions=%s", - defaultOpenFlowCookie, ofPortPhys, bridgeMacAddress, actions)) + fmt.Sprintf("cookie=%s, priority=10, table=0, in_port=%s, %s dl_dst=%s, actions=%s", + defaultOpenFlowCookie, ofPortPhys, match_vlan, bridgeMacAddress, actions)) } // table 0, check packets coming from OVN have the correct mac address. Low priority flows that are a catch all @@ -1830,8 +1860,8 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration, isPodNetwork // so that reverse direction goes back to the host. dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ip, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), %soutput:%s", + defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, mod_vlan_id, ofPortPhys)) } if config.Gateway.Mode == config.GatewayModeLocal { for _, netConfig := range bridge.patchedNetConfigs() { @@ -1921,8 +1951,9 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration, isPodNetwork // so that reverse direction goes back to the host. dftFlows = append(dftFlows, fmt.Sprintf("cookie=%s, priority=100, in_port=%s, ipv6, "+ - "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), output:%s", - defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, ofPortPhys)) + "actions=ct(commit, zone=%d, exec(set_field:%s->ct_mark)), %soutput:%s", + defaultOpenFlowCookie, ofPortHost, config.Default.ConntrackZone, ctMarkHost, mod_vlan_id, ofPortPhys)) + } if config.Gateway.Mode == config.GatewayModeLocal { for _, netConfig := range bridge.patchedNetConfigs() { @@ -2020,8 +2051,8 @@ func commonFlows(subnets []*net.IPNet, bridge *bridgeConfiguration, isPodNetwork // table 1, we check to see if this dest mac is the shared mac, if so send to host dftFlows = append(dftFlows, - fmt.Sprintf("cookie=%s, priority=10, table=1, dl_dst=%s, actions=output:%s", - defaultOpenFlowCookie, bridgeMacAddress, ofPortHost)) + fmt.Sprintf("cookie=%s, priority=10, table=1, %s dl_dst=%s, actions=%soutput:%s", + defaultOpenFlowCookie, match_vlan, bridgeMacAddress, strip_vlan, ofPortHost)) if config.IPv6Mode { // REMOVEME(trozet) when https://bugzilla.kernel.org/show_bug.cgi?id=11797 is resolved From d31b047506db6ba5c6016aba14e6c0802b292599 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 13 Dec 2024 18:20:12 -0500 Subject: [PATCH 04/51] Require namespace label for primary UDN k8s.ovn.org/user-defined-network is now required to be labeled on a namespace at namespace creation time in order to use a primary UDN. The following conditions are true: 1. If namespace is missing the label, and a pod is created, it attaches to default network. 2. If the namespace is missing the label, and a primary UDN or CUDN is created that matches that namespace, the UDN/CUDN will report error status and the NAD will not be generated. 3. If the namespace is missing the label, and a primary UDN/CUDN exists, a pod in the namespace will be created and attached to default network. 4. If the namespace has the label, and a primary UDN/CUDN does not exist a pod in the namespace will fail creation until the UDN/CUDN is created. Also includes some fixes to unit tests that were brought to light by this PR. For example, the layer 2 multi-network tests were adding invalid annotations for node-subnets, etc. Signed-off-by: Tim Rozet --- .../endpointslice_mirror_controller_test.go | 2 + .../userdefinednetwork/controller_helper.go | 15 ++ .../userdefinednetwork/controller_test.go | 222 +++++++++++++----- .../pkg/networkmanager/nad_controller.go | 17 +- .../pkg/networkmanager/nad_controller_test.go | 31 +++ go-controller/pkg/ovn/egressip_test.go | 4 +- go-controller/pkg/ovn/egressip_udn_l2_test.go | 18 +- go-controller/pkg/ovn/egressip_udn_l3_test.go | 20 +- go-controller/pkg/ovn/multicast_test.go | 6 + go-controller/pkg/ovn/namespace_test.go | 18 ++ go-controller/pkg/ovn/ovn_test.go | 68 +++++- ...econdary_layer2_network_controller_test.go | 52 ++-- ...econdary_layer3_network_controller_test.go | 17 +- go-controller/pkg/types/const.go | 2 + go-controller/pkg/util/util.go | 15 ++ 15 files changed, 397 insertions(+), 110 deletions(-) diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go index 78c8a30a90..56365e13ba 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go @@ -78,6 +78,7 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( ginkgo.It("should delete stale mirrored EndpointSlices and create missing ones", func() { app.Action = func(ctx *cli.Context) error { namespaceT := *util.NewNamespace("testns") + namespaceT.Labels[types.RequiredUDNNamespaceLabel] = "" pod := v1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pod", @@ -282,6 +283,7 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( ginkgo.It("should update/delete mirrored EndpointSlices in namespaces that use user defined networks as primary ", func() { app.Action = func(ctx *cli.Context) error { namespaceT := *util.NewNamespace("testns") + namespaceT.Labels[types.RequiredUDNNamespaceLabel] = "" pod := v1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go index 2fb784675b..bc2f58f0ce 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_helper.go @@ -15,10 +15,25 @@ import ( netv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utiludn "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/udn" ) func (c *Controller) updateNAD(obj client.Object, namespace string) (*netv1.NetworkAttachmentDefinition, error) { + if utiludn.IsPrimaryNetwork(template.GetSpec(obj)) { + // check if required UDN label is on namespace + ns, err := c.namespaceInformer.Lister().Get(namespace) + if err != nil { + return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, err) + } + + if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; !exists { + // No Required label set on namespace while trying to render NAD for primary network on this namespace + return nil, util.NewInvalidPrimaryNetworkError(namespace) + } + } + desiredNAD, err := c.renderNadFn(obj, namespace) if err != nil { return nil, fmt.Errorf("failed to generate NetworkAttachmentDefinition: %w", err) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index 7d2dc34979..28ff8116ac 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -30,6 +30,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/clustermanager/userdefinednetwork/template" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory" + ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" ) @@ -74,9 +75,9 @@ var _ = Describe("User Defined Network Controller", func() { }) Context("reconcile UDN CR", func() { It("should create NAD successfully", func() { - udn := testUDN() + udn := testPrimaryUDN() expectedNAD := testNAD() - c = newTestController(renderNadStub(expectedNAD), udn) + c = newTestController(renderNadStub(expectedNAD), udn, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []metav1.Condition { @@ -96,10 +97,53 @@ var _ = Describe("User Defined Network Controller", func() { Expect(nad).To(Equal(expectedNAD)) }) + It("should fail when required namespace label is missing for primary network", func() { + udn := testPrimaryUDN() + expectedNAD := testNAD() + c = newTestController(renderNadStub(expectedNAD), udn, invalidTestNamespace("test")) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + udn, err := cs.UserDefinedNetworkClient.K8sV1().UserDefinedNetworks(udn.Namespace).Get(context.Background(), udn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(udn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "SyncError", + Message: "invalid primary network state for namespace \"test\": a valid primary user defined network or network attachment definition custom resource, and required namespace label \"k8s.ovn.org/primary-user-defined-network\" must both be present", + }})) + + _, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(udn.Namespace).Get(context.Background(), udn.Name, metav1.GetOptions{}) + Expect(kerrors.IsNotFound(err)).To(BeTrue()) + }) + + It("should NOT fail when required namespace label is missing for secondary network", func() { + udn := testSecondaryUDN() + expectedNAD := testNAD() + c = newTestController(renderNadStub(expectedNAD), udn, invalidTestNamespace("test")) + Expect(c.Run()).To(Succeed()) + + Eventually(func() []metav1.Condition { + udn, err := cs.UserDefinedNetworkClient.K8sV1().UserDefinedNetworks(udn.Namespace).Get(context.Background(), udn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(udn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "True", + Reason: "NetworkAttachmentDefinitionCreated", + Message: "NetworkAttachmentDefinition has been created", + }})) + + nad, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(udn.Namespace).Get(context.Background(), udn.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(nad).To(Equal(expectedNAD)) + }) + It("should fail when NAD render fail", func() { - udn := testUDN() + udn := testPrimaryUDN() renderErr := errors.New("render NAD fails") - c = newTestController(failRenderNadStub(renderErr), udn) + c = newTestController(failRenderNadStub(renderErr), udn, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []metav1.Condition { @@ -117,8 +161,8 @@ var _ = Describe("User Defined Network Controller", func() { Expect(kerrors.IsNotFound(err)).To(BeTrue()) }) It("should fail when NAD create fail", func() { - udn := testUDN() - c = newTestController(noopRenderNadStub(), udn) + udn := testPrimaryUDN() + c = newTestController(noopRenderNadStub(), udn, testNamespace("test")) expectedError := errors.New("create NAD error") cs.NetworkAttchDefClient.(*netv1fakeclientset.Clientset).PrependReactor("create", "network-attachment-definitions", func(action testing.Action) (handled bool, ret runtime.Object, err error) { @@ -143,10 +187,10 @@ var _ = Describe("User Defined Network Controller", func() { }) It("should fail when foreign NAD exist", func() { - udn := testUDN() + udn := testPrimaryUDN() foreignNad := testNAD() foreignNad.ObjectMeta.OwnerReferences = nil - c = newTestController(noopRenderNadStub(), udn, foreignNad) + c = newTestController(noopRenderNadStub(), udn, foreignNad, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []metav1.Condition { @@ -161,9 +205,9 @@ var _ = Describe("User Defined Network Controller", func() { }})) }) It("should reconcile mutated NAD", func() { - udn := testUDN() + udn := testPrimaryUDN() expectedNAD := testNAD() - c = newTestController(renderNadStub(expectedNAD), udn) + c = newTestController(renderNadStub(expectedNAD), udn, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []metav1.Condition { @@ -189,9 +233,9 @@ var _ = Describe("User Defined Network Controller", func() { }).Should(Equal(expectedNAD)) }) It("should fail when update mutated NAD fails", func() { - udn := testUDN() + udn := testPrimaryUDN() expectedNAD := testNAD() - c = newTestController(renderNadStub(expectedNAD), udn) + c = newTestController(renderNadStub(expectedNAD), udn, testNamespace("test")) expectedErr := errors.New("update error") cs.NetworkAttchDefClient.(*netv1fakeclientset.Clientset).PrependReactor("update", "network-attachment-definitions", func(action testing.Action) (bool, runtime.Object, error) { @@ -243,12 +287,12 @@ var _ = Describe("User Defined Network Controller", func() { }) It("given primary UDN, should fail when primary NAD already exist", func() { - primaryUDN := testUDN() + primaryUDN := testPrimaryUDN() primaryUDN.Spec.Topology = udnv1.NetworkTopologyLayer2 primaryUDN.Spec.Layer2 = &udnv1.Layer2Config{Role: udnv1.NetworkRolePrimary} primaryNAD := primaryNetNAD() - c = newTestController(noopRenderNadStub(), primaryUDN, primaryNAD) + c = newTestController(noopRenderNadStub(), primaryUDN, primaryNAD, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []metav1.Condition { @@ -263,14 +307,14 @@ var _ = Describe("User Defined Network Controller", func() { }})) }) It("given primary UDN, should fail when unmarshal primary NAD fails", func() { - primaryUDN := testUDN() + primaryUDN := testPrimaryUDN() primaryUDN.Spec.Topology = udnv1.NetworkTopologyLayer3 primaryUDN.Spec.Layer3 = &udnv1.Layer3Config{Role: udnv1.NetworkRolePrimary} primaryNAD := primaryNetNAD() primaryNAD.Name = "another-primary-net" primaryNAD.Spec.Config = "!@#$" - c = newTestController(noopRenderNadStub(), primaryUDN, primaryNAD) + c = newTestController(noopRenderNadStub(), primaryUDN, primaryNAD, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []metav1.Condition { @@ -286,9 +330,9 @@ var _ = Describe("User Defined Network Controller", func() { }) It("should add finalizer to UDN", func() { - udn := testUDN() + udn := testPrimaryUDN() udn.Finalizers = nil - c = newTestController(noopRenderNadStub(), udn) + c = newTestController(noopRenderNadStub(), udn, testNamespace("test")) Expect(c.Run()).To(Succeed()) Eventually(func() []string { @@ -298,9 +342,9 @@ var _ = Describe("User Defined Network Controller", func() { }).Should(Equal([]string{"k8s.ovn.org/user-defined-network-protection"})) }) It("should fail when add finalizer to UDN fails", func() { - udn := testUDN() + udn := testPrimaryUDN() udn.Finalizers = nil - c = newTestController(noopRenderNadStub(), udn) + c = newTestController(noopRenderNadStub(), udn, testNamespace("test")) expectedErr := errors.New("update UDN error") cs.UserDefinedNetworkClient.(*udnfakeclient.Clientset).PrependReactor("update", "userdefinednetworks", func(action testing.Action) (handled bool, obj runtime.Object, err error) { @@ -324,14 +368,14 @@ var _ = Describe("User Defined Network Controller", func() { It("when UDN is being deleted, NAD exist, 2 pods using UDN, should delete NAD once no pod uses the network", func() { var err error nad := testNAD() - udn := testUDN() + udn := testPrimaryUDN() udn.SetDeletionTimestamp(&metav1.Time{Time: time.Now()}) testOVNPodAnnot := map[string]string{util.OvnPodAnnotationName: `{"default": {"role":"primary"}, "test/test": {"role": "secondary"}}`} pod1 := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-1", Namespace: udn.Namespace, Annotations: testOVNPodAnnot}} pod2 := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "pod-2", Namespace: udn.Namespace, Annotations: testOVNPodAnnot}} - c = newTestController(renderNadStub(nad), udn, nad, pod1, pod2) + c = newTestController(renderNadStub(nad), udn, nad, pod1, pod2, testNamespace("test")) // user short interval to make the controller re-enqueue requests c.networkInUseRequeueInterval = 50 * time.Millisecond Expect(c.Run()).To(Succeed()) @@ -414,10 +458,9 @@ var _ = Describe("User Defined Network Controller", func() { } By("create test namespaces with tests label") connectedNsNames = []string{"green", "yellow"} - testLabelEmea := map[string]string{testLabelKey: testLabelValue} for _, nsName := range connectedNsNames { ns := testNamespace(nsName) - ns.Labels = testLabelEmea + ns.Labels[testLabelKey] = testLabelValue testObjs = append(testObjs, ns) } By("create CUDN selecting namespaces with test label") @@ -426,7 +469,13 @@ var _ = Describe("User Defined Network Controller", func() { Key: testLabelKey, Operator: metav1.LabelSelectorOpIn, Values: []string{testLabelValue}, - }}}} + }}}, + Network: udnv1.NetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRolePrimary, + }, + }} testObjs = append(testObjs, cudn) By("start test controller") @@ -474,10 +523,10 @@ var _ = Describe("User Defined Network Controller", func() { It("when CR selector has selection added, should create NAD in matching namespaces", func() { By("create test new namespaces with new selection label") newNsLabelValue := "us" - newNsLabel := map[string]string{testLabelKey: newNsLabelValue} newNsNames := []string{"black", "gray"} for _, nsName := range newNsNames { - ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName, Labels: newNsLabel}} + ns := testNamespace(nsName) + ns.Labels[testLabelKey] = newNsLabelValue _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), ns, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) } @@ -611,7 +660,7 @@ var _ = Describe("User Defined Network Controller", func() { newNsNames := []string{"black", "gray"} for _, nsName := range newNsNames { ns := testNamespace(nsName) - ns.Labels = map[string]string{testLabelKey: testLabelValue} + ns.Labels[testLabelKey] = testLabelValue _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), ns, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) } @@ -633,13 +682,47 @@ var _ = Describe("User Defined Network Controller", func() { } }) + It("when new namespace is created without required UDN label, it should not create NAD", func() { + By("create new namespaces with test label") + newNsNames := []string{"black", "gray"} + for _, nsName := range newNsNames { + ns := invalidTestNamespace(nsName) + ns.Labels[testLabelKey] = testLabelValue + _, err := cs.KubeClient.CoreV1().Namespaces().Create(context.Background(), ns, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + } + + Eventually(func() []metav1.Condition { + cudn, err := cs.UserDefinedNetworkClient.K8sV1().ClusterUserDefinedNetworks().Get(context.Background(), cudnName, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + return normalizeConditions(cudn.Status.Conditions) + }).Should(Equal([]metav1.Condition{{ + Type: "NetworkCreated", + Status: "False", + Reason: "NetworkAttachmentDefinitionSyncError", + Message: "invalid primary network state for namespace \"black\": a valid primary user defined network or network attachment definition " + + "custom resource, and required namespace label \"k8s.ovn.org/primary-user-defined-network\" must both be present\ninvalid primary " + + "network state for namespace \"gray\": a valid primary user defined network or network attachment definition custom resource, and " + + "required namespace label \"k8s.ovn.org/primary-user-defined-network\" must both be present", + }}), "status should report NAD failed in existing and new test namespaces") + for _, nsName := range newNsNames { + nads, err := cs.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nsName).List(context.Background(), metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + Expect(len(nads.Items)).To(Equal(0)) + } + }) + It("when existing namespace is labeled with matching label, should create NAD in newly labeled matching namespaces", func() { By("add test label to tests disconnected namespaces") for _, nsName := range disconnectedNsNames { - p := fmt.Sprintf(`[{"op": "add", "path": "./metadata/labels", "value": {%q: %q}}]`, testLabelKey, testLabelValue) + p := fmt.Sprintf(`[{"op": "add", "path": "./metadata/labels/%s", "value": %q}]`, testLabelKey, testLabelValue) ns, err := cs.KubeClient.CoreV1().Namespaces().Patch(context.Background(), nsName, types.JSONPatchType, []byte(p), metav1.PatchOptions{}) Expect(err).NotTo(HaveOccurred()) - Expect(ns.Labels).To(Equal(map[string]string{testLabelKey: testLabelValue})) + Expect(ns.Labels).To(Equal(map[string]string{ + testLabelKey: testLabelValue, + "kubernetes.io/metadata.name": nsName, + ovntypes.RequiredUDNNamespaceLabel: "", + })) } Eventually(func() []metav1.Condition { @@ -698,17 +781,16 @@ var _ = Describe("User Defined Network Controller", func() { It("when started, CR exist, stale NADs exist, should deleted stale NADs", func() { var testObjs []runtime.Object staleNADsNsNames := []string{"red", "blue"} - staleLabel := map[string]string{"test.io": "stale"} for _, nsName := range staleNADsNsNames { ns := testNamespace(nsName) - ns.SetLabels(staleLabel) + ns.Labels["test.io"] = "stale" testObjs = append(testObjs, ns) } connectedNsNames := []string{"green", "yellow"} connectedLabel := map[string]string{"test.io": "connected"} for _, nsName := range connectedNsNames { ns := testNamespace(nsName) - ns.SetLabels(connectedLabel) + ns.Labels["test.io"] = "connected" testObjs = append(testObjs, ns) } cudn := testClusterUDN("test") @@ -745,10 +827,10 @@ var _ = Describe("User Defined Network Controller", func() { Context("UserDefinedNetwork object sync", func() { It("should fail when NAD owner-reference is malformed", func() { - udn := testUDN() + udn := testPrimaryUDN() mutatedNAD := testNAD() mutatedNAD.ObjectMeta.OwnerReferences = []metav1.OwnerReference{{Kind: "DifferentKind"}} - c := newTestController(noopRenderNadStub(), udn, mutatedNAD) + c := newTestController(noopRenderNadStub(), udn, mutatedNAD, testNamespace("test")) _, err := c.syncUserDefinedNetwork(udn) Expect(err).To(Equal(errors.New("foreign NetworkAttachmentDefinition with the desired name already exist [test/test]"))) @@ -758,7 +840,7 @@ var _ = Describe("User Defined Network Controller", func() { udn := testsUDNWithDeletionTimestamp(time.Now()) unmanagedNAD := testNAD() unmanagedNAD.OwnerReferences[0].UID = "99" - c := newTestController(noopRenderNadStub(), udn, unmanagedNAD) + c := newTestController(noopRenderNadStub(), udn, unmanagedNAD, testNamespace("test")) _, err := c.syncUserDefinedNetwork(udn) Expect(err).ToNot(HaveOccurred()) @@ -773,7 +855,7 @@ var _ = Describe("User Defined Network Controller", func() { It("when UDN is being deleted, and NAD exist, should delete NAD", func() { udn := testsUDNWithDeletionTimestamp(time.Now()) nad := testNAD() - c := newTestController(noopRenderNadStub(), udn, nad) + c := newTestController(noopRenderNadStub(), udn, nad, testNamespace("test")) _, err := c.syncUserDefinedNetwork(udn) Expect(err).ToNot(HaveOccurred()) @@ -785,7 +867,7 @@ var _ = Describe("User Defined Network Controller", func() { It("when UDN is being deleted, and NAD exist, should fail when remove NAD finalizer fails", func() { udn := testsUDNWithDeletionTimestamp(time.Now()) nad := testNAD() - c := newTestController(noopRenderNadStub(), udn, nad) + c := newTestController(noopRenderNadStub(), udn, nad, testNamespace("test")) expectedErr := errors.New("update NAD error") cs.NetworkAttchDefClient.(*netv1fakeclientset.Clientset).PrependReactor("update", "network-attachment-definitions", func(action testing.Action) (bool, runtime.Object, error) { @@ -800,7 +882,7 @@ var _ = Describe("User Defined Network Controller", func() { udn := testsUDNWithDeletionTimestamp(time.Now()) nad := testNAD() nad.Finalizers = nil - c := newTestController(noopRenderNadStub(), udn, nad) + c := newTestController(noopRenderNadStub(), udn, nad, testNamespace("test")) _, err := c.syncUserDefinedNetwork(udn) Expect(err).ToNot(HaveOccurred()) @@ -808,7 +890,7 @@ var _ = Describe("User Defined Network Controller", func() { }) It("when UDN is being deleted, and NAD not exist, should remove finalizer from UDN", func() { udn := testsUDNWithDeletionTimestamp(time.Now()) - c := newTestController(noopRenderNadStub(), udn) + c := newTestController(noopRenderNadStub(), udn, testNamespace("test")) _, err := c.syncUserDefinedNetwork(udn) Expect(err).ToNot(HaveOccurred()) @@ -818,7 +900,7 @@ var _ = Describe("User Defined Network Controller", func() { udn := testsUDNWithDeletionTimestamp(time.Now()) nad := testNAD() nad.Finalizers = nil - c := newTestController(noopRenderNadStub(), udn, nad) + c := newTestController(noopRenderNadStub(), udn, nad, testNamespace("test")) expectedErr := errors.New("update UDN error") cs.UserDefinedNetworkClient.(*udnfakeclient.Clientset).PrependReactor("update", "userdefinednetworks", func(action testing.Action) (bool, runtime.Object, error) { @@ -842,7 +924,7 @@ var _ = Describe("User Defined Network Controller", func() { }, }, } - c := newTestController(renderNadStub(nad), udn, nad, pod) + c := newTestController(renderNadStub(nad), udn, nad, pod, testNamespace("test")) _, err := c.syncUserDefinedNetwork(udn) Expect(err).ToNot(HaveOccurred()) @@ -909,7 +991,7 @@ var _ = Describe("User Defined Network Controller", func() { Context("UserDefinedNetwork status update", func() { DescribeTable("should update status, when", func(nad *netv1.NetworkAttachmentDefinition, syncErr error, expectedStatus *udnv1.UserDefinedNetworkStatus) { - udn := testUDN() + udn := testPrimaryUDN() c := newTestController(noopRenderNadStub(), udn) Expect(c.updateUserDefinedNetworkStatus(udn, nad, syncErr)).To(Succeed(), "should update status successfully") @@ -961,7 +1043,7 @@ var _ = Describe("User Defined Network Controller", func() { ) It("should update status according to sync errors", func() { - udn := testUDN() + udn := testPrimaryUDN() c := newTestController(noopRenderNadStub(), udn) nad := testNAD() @@ -1004,7 +1086,7 @@ var _ = Describe("User Defined Network Controller", func() { return true, nil, expectedError }) - udn := testUDN() + udn := testPrimaryUDN() nad := testNAD() Expect(c.updateUserDefinedNetworkStatus(udn, nad, nil)).To(MatchError(expectedError)) }) @@ -1294,7 +1376,24 @@ func normalizeConditions(conditions []metav1.Condition) []metav1.Condition { return conditions } -func testUDN() *udnv1.UserDefinedNetwork { +func testPrimaryUDN() *udnv1.UserDefinedNetwork { + return &udnv1.UserDefinedNetwork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test", + Namespace: "test", + UID: "1", + Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, + }, + Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRolePrimary, + }, + }, + } +} + +func testSecondaryUDN() *udnv1.UserDefinedNetwork { return &udnv1.UserDefinedNetwork{ ObjectMeta: metav1.ObjectMeta{ Name: "test", @@ -1302,11 +1401,17 @@ func testUDN() *udnv1.UserDefinedNetwork { UID: "1", Finalizers: []string{"k8s.ovn.org/user-defined-network-protection"}, }, + Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer3, + Layer3: &udnv1.Layer3Config{ + Role: udnv1.NetworkRoleSecondary, + }, + }, } } func testsUDNWithDeletionTimestamp(ts time.Time) *udnv1.UserDefinedNetwork { - udn := testUDN() + udn := testPrimaryUDN() deletionTimestamp := metav1.NewTime(ts) udn.DeletionTimestamp = &deletionTimestamp return udn @@ -1334,6 +1439,18 @@ func testNAD() *netv1.NetworkAttachmentDefinition { } } +func invalidTestNamespace(name string) *corev1.Namespace { + return &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "kubernetes.io/metadata.name": name, + }, + }, + Spec: corev1.NamespaceSpec{}, + } +} + func primaryNetNAD() *netv1.NetworkAttachmentDefinition { return &netv1.NetworkAttachmentDefinition{ ObjectMeta: metav1.ObjectMeta{ @@ -1353,14 +1470,9 @@ func testNADWithDeletionTimestamp(ts time.Time) *netv1.NetworkAttachmentDefiniti } func testNamespace(name string) *corev1.Namespace { - return &corev1.Namespace{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Labels: map[string]string{ - "kubernetes.io/metadata.name": name, - }, - }, - } + ns := invalidTestNamespace(name) + ns.ObjectMeta.Labels[ovntypes.RequiredUDNNamespaceLabel] = "" + return ns } func testClusterUDN(name string, targetNamespaces ...string) *udnv1.ClusterUserDefinedNetwork { diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index f4b0c3dc68..9c60719c00 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -413,11 +413,23 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net if !util.IsNetworkSegmentationSupportEnabled() { return &util.DefaultNetInfo{}, nil } + + // check if required UDN label is on namespace + ns, err := c.namespaceLister.Get(namespace) + if err != nil { + return nil, fmt.Errorf("failed to get namespace %q: %w", namespace, err) + } + if _, exists := ns.Labels[types.RequiredUDNNamespaceLabel]; !exists { + // UDN required label not set on namespace, assume default network + return &util.DefaultNetInfo{}, nil + } + c.RLock() defer c.RUnlock() primaryNAD := c.primaryNADs[namespace] if primaryNAD != "" { - // we have a primary NAD, get the network + // we have a primary NAD, no need to check for NS UDN annotation because NAD would not have existed otherwise + // get the network netName := c.nads[primaryNAD] if netName == "" { // this should never happen where we have a nad keyed in the primaryNADs @@ -465,7 +477,8 @@ func (c *nadController) GetActiveNetworkForNamespace(namespace string) (util.Net } } - return &util.DefaultNetInfo{}, nil + // namespace has required UDN label, but no UDN was found + return nil, util.NewInvalidPrimaryNetworkError(namespace) } func (c *nadController) GetNetwork(name string) util.NetInfo { diff --git a/go-controller/pkg/networkmanager/nad_controller_test.go b/go-controller/pkg/networkmanager/nad_controller_test.go index 306c827669..a683ef4347 100644 --- a/go-controller/pkg/networkmanager/nad_controller_test.go +++ b/go-controller/pkg/networkmanager/nad_controller_test.go @@ -13,8 +13,10 @@ import ( cnitypes "github.com/containernetworking/cni/pkg/types" nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + kapiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/tools/cache" @@ -112,6 +114,23 @@ func (tcm *testControllerManager) Reconcile(name string, old, new util.NetInfo) return nil } +type fakeNamespaceLister struct{} + +func (f *fakeNamespaceLister) List(selector labels.Selector) (ret []*kapiv1.Namespace, err error) { + return nil, nil +} + +// Get retrieves the Namespace from the index for a given name. +// Objects returned here must be treated as read-only. +func (f *fakeNamespaceLister) Get(name string) (*kapiv1.Namespace, error) { + return &kapiv1.Namespace{ + ObjectMeta: v1.ObjectMeta{ + Name: name, + Labels: map[string]string{types.RequiredUDNNamespaceLabel: ""}, + }, + }, nil +} + func TestNADController(t *testing.T) { networkAPrimary := &ovncnitypes.NetConf{ Topology: types.Layer2Topology, @@ -470,6 +489,7 @@ func TestNADController(t *testing.T) { networkController: newNetworkController("", "", "", tcm, nil), networkIDAllocator: id.NewIDAllocator("NetworkIDs", MaxNetworks), nadClient: fakeClient.NetworkAttchDefClient, + namespaceLister: &fakeNamespaceLister{}, } err = nadController.networkIDAllocator.ReserveID(types.DefaultNetworkName, DefaultNetworkID) g.Expect(err).ToNot(gomega.HaveOccurred()) @@ -654,6 +674,17 @@ func TestSyncAll(t *testing.T) { expectedNetworks := map[string]util.NetInfo{} expectedPrimaryNetworks := map[string]util.NetInfo{} + for _, namespace := range []string{"test", "test2"} { + _, err = fakeClient.KubeClient.CoreV1().Namespaces().Create(context.TODO(), + &kapiv1.Namespace{ + ObjectMeta: v1.ObjectMeta{ + Name: namespace, + Labels: map[string]string{types.RequiredUDNNamespaceLabel: ""}, + }, + }, v1.CreateOptions{}, + ) + } + g.Expect(err).ToNot(gomega.HaveOccurred()) for _, testNAD := range tt.testNADs { namespace, name, err := cache.SplitMetaNamespaceKey(testNAD.name) g.Expect(err).ToNot(gomega.HaveOccurred()) diff --git a/go-controller/pkg/ovn/egressip_test.go b/go-controller/pkg/ovn/egressip_test.go index daeedd83e1..4ecd65bfeb 100644 --- a/go-controller/pkg/ovn/egressip_test.go +++ b/go-controller/pkg/ovn/egressip_test.go @@ -2106,8 +2106,8 @@ var _ = ginkgo.Describe("OVN master EgressIP Operations cluster default network" "k8s.ovn.org/egress-assignable": "", } node2 := nodes[1] - egressNamespace := newNamespace(eipNamespace) - egressNamespace2 := newNamespace(eipNamespace2) + egressNamespace := newUDNNamespace(eipNamespace) + egressNamespace2 := newUDNNamespace(eipNamespace2) egressPod1Node1 := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPod2Node1 := *newPodWithLabels(eipNamespace2, podName, node1Name, podV4IP2, egressPodLabel) egressPod3Node2 := *newPodWithLabels(eipNamespace, podName2, node2Name, podV4IP3, egressPodLabel) diff --git a/go-controller/pkg/ovn/egressip_udn_l2_test.go b/go-controller/pkg/ovn/egressip_udn_l2_test.go index 0e8d5fa402..4454a5d25a 100644 --- a/go-controller/pkg/ovn/egressip_udn_l2_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l2_test.go @@ -131,7 +131,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, egressPodLabel) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) @@ -498,7 +498,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, egressPodLabel) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) @@ -1005,7 +1005,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, egressPodLabel) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) @@ -1489,7 +1489,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1CDNSubnet, _ := net.ParseCIDR(v4Node1Subnet) _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, nil) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, nil) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, nil) egressPodCDN := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDN := *newPodWithLabels(eipNamespace2, podName2, node1Name, podV4IP2, egressPodLabel) @@ -1680,7 +1680,11 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol egressCDNNamespace.Labels = egressPodLabel _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.Background(), egressCDNNamespace, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressUDNNamespace.Labels = egressPodLabel + egressUDNNamespace.Labels = map[string]string{} + for k, v := range egressPodLabel { + egressUDNNamespace.Labels[k] = v + } + egressUDNNamespace.Labels[ovntypes.RequiredUDNNamespaceLabel] = "" _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.Background(), egressUDNNamespace, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) egressSVCServedPodsASv4, _ := buildEgressServiceAddressSets(nil) @@ -1849,7 +1853,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1CDNSubnet, _ := net.ParseCIDR(v4Node1Subnet) _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDN := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDN := *newPodWithLabels(eipNamespace2, podName2, node1Name, podV4IP2, egressPodLabel) @@ -2200,7 +2204,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, nil) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, nil) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) diff --git a/go-controller/pkg/ovn/egressip_udn_l3_test.go b/go-controller/pkg/ovn/egressip_udn_l3_test.go index 34dec4d824..552e6a98b9 100644 --- a/go-controller/pkg/ovn/egressip_udn_l3_test.go +++ b/go-controller/pkg/ovn/egressip_udn_l3_test.go @@ -125,7 +125,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, egressPodLabel) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) @@ -499,7 +499,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, egressPodLabel) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) @@ -1019,7 +1019,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, egressPodLabel) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) @@ -1514,7 +1514,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1CDNSubnet, _ := net.ParseCIDR(v4Node1Subnet) _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, nil) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, nil) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, nil) egressPodCDN := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDN := *newPodWithLabels(eipNamespace2, podName2, node1Name, podV4IP2, egressPodLabel) @@ -1698,10 +1698,14 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol nUDN.IP = iUDN secConInfo.bnc.logicalPortCache.add(&egressPodUDN, "", util.GetNADName(nad.Namespace, nad.Name), "", nil, []*net.IPNet{nUDN}) ginkgo.By("update namespaces with label so its now selected by EgressIP") - egressCDNNamespace.Labels = egressPodLabel + for k, v := range egressPodLabel { + egressCDNNamespace.Labels[k] = v + } _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.Background(), egressCDNNamespace, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) - egressUDNNamespace.Labels = egressPodLabel + for k, v := range egressPodLabel { + egressUDNNamespace.Labels[k] = v + } _, err = fakeOvn.fakeClient.KubeClient.CoreV1().Namespaces().Update(context.Background(), egressUDNNamespace, metav1.UpdateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) egressSVCServedPodsASv4, _ := buildEgressServiceAddressSets(nil) @@ -1878,7 +1882,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1CDNSubnet, _ := net.ParseCIDR(v4Node1Subnet) _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDN := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, egressPodLabel) egressPodUDN := *newPodWithLabels(eipNamespace2, podName2, node1Name, podV4IP2, egressPodLabel) @@ -2235,7 +2239,7 @@ var _ = ginkgo.Describe("EgressIP Operations for user defined network with topol _, node1UDNSubnet, _ := net.ParseCIDR(v4Node1Net1) nadName := util.GetNADName(eipNamespace2, nadName1) egressCDNNamespace := newNamespaceWithLabels(eipNamespace, egressPodLabel) - egressUDNNamespace := newNamespaceWithLabels(eipNamespace2, egressPodLabel) + egressUDNNamespace := newUDNNamespaceWithLabels(eipNamespace2, egressPodLabel) egressPodCDNLocal := *newPodWithLabels(eipNamespace, podName, node1Name, podV4IP, nil) egressPodUDNLocal := *newPodWithLabels(eipNamespace2, podName2, node1Name, v4Pod1IPNode1Net1, nil) egressPodCDNRemote := *newPodWithLabels(eipNamespace, podName3, node2Name, podV4IP2, egressPodLabel) diff --git a/go-controller/pkg/ovn/multicast_test.go b/go-controller/pkg/ovn/multicast_test.go index 7735146ba5..0dd2903137 100644 --- a/go-controller/pkg/ovn/multicast_test.go +++ b/go-controller/pkg/ovn/multicast_test.go @@ -672,6 +672,9 @@ var _ = Describe("OVN Multicast with IP Address Family", func() { netInfo := getNetInfoFromNAD(nad) node := newNodeWithNad(nad, networkName, networkID) namespace1 := *newNamespace(namespaceName1) + if nad != nil { + namespace1 = *newUDNNamespace(namespaceName1) + } pods, tPods, tPodIPs := createTestPods(nodeName, namespaceName1, useIPv4, useIPv6) objs := []runtime.Object{ @@ -820,6 +823,9 @@ var _ = Describe("OVN Multicast with IP Address Family", func() { netInfo := getNetInfoFromNAD(nad) namespace1 := *newNamespace(namespaceName1) + if nad != nil { + namespace1 = *newUDNNamespace(namespaceName1) + } node := newNodeWithNad(nad, networkName, networkID) _, tPods, tPodIPs := createTestPods(nodeName, namespaceName1, useIPv4, useIPv6) diff --git a/go-controller/pkg/ovn/namespace_test.go b/go-controller/pkg/ovn/namespace_test.go index 31e380159b..c527d1b010 100644 --- a/go-controller/pkg/ovn/namespace_test.go +++ b/go-controller/pkg/ovn/namespace_test.go @@ -51,6 +51,16 @@ func newNamespaceMeta(namespace string, additionalLabels map[string]string) meta } } +func newUDNNamespaceWithLabels(namespace string, additionalLabels map[string]string) *v1.Namespace { + n := &v1.Namespace{ + ObjectMeta: newNamespaceMeta(namespace, additionalLabels), + Spec: v1.NamespaceSpec{}, + Status: v1.NamespaceStatus{}, + } + n.Labels[ovntypes.RequiredUDNNamespaceLabel] = "" + return n +} + func newNamespaceWithLabels(namespace string, additionalLabels map[string]string) *v1.Namespace { return &v1.Namespace{ ObjectMeta: newNamespaceMeta(namespace, additionalLabels), @@ -67,6 +77,14 @@ func newNamespace(namespace string) *v1.Namespace { } } +func newUDNNamespace(namespace string) *v1.Namespace { + return &v1.Namespace{ + ObjectMeta: newNamespaceMeta(namespace, map[string]string{ovntypes.RequiredUDNNamespaceLabel: ""}), + Spec: v1.NamespaceSpec{}, + Status: v1.NamespaceStatus{}, + } +} + func getDefaultNetNsAddrSetHashNames(ns string) (string, string) { return getNsAddrSetHashNames(DefaultNetworkControllerName, ns) } diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 276ebe1e15..cdcbe5896f 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -92,7 +92,8 @@ type FakeOVN struct { portCache *PortCache // information map of all secondary network controllers - secondaryControllers map[string]secondaryControllerInfo + secondaryControllers map[string]secondaryControllerInfo + fullSecondaryL2Controllers map[string]*SecondaryLayer2NetworkController } // NOTE: the FakeAddressSetFactory is no longer needed and should no longer be used. starting to phase out FakeAddressSetFactory @@ -108,7 +109,8 @@ func NewFakeOVN(useFakeAddressSet bool) *FakeOVN { egressSVCWg: &sync.WaitGroup{}, anpWg: &sync.WaitGroup{}, - secondaryControllers: map[string]secondaryControllerInfo{}, + secondaryControllers: map[string]secondaryControllerInfo{}, + fullSecondaryL2Controllers: map[string]*SecondaryLayer2NetworkController{}, } } @@ -280,6 +282,67 @@ func (o *FakeOVN) init(nadList []nettypes.NetworkAttachmentDefinition) { } +// creates the global entities that should remain after a UDN created and removed +func generateUDNPostInitDB(testData []libovsdbtest.TestData, netName string) []libovsdbtest.TestData { + testData = append(testData, &nbdb.MeterBand{ + UUID: "25-pktps-rate-limiter-UUID", + Action: types.MeterAction, + Rate: int(25), + }) + meters := map[string]string{ + OVNARPRateLimiter: getMeterNameForProtocol(OVNARPRateLimiter), + OVNARPResolveRateLimiter: getMeterNameForProtocol(OVNARPResolveRateLimiter), + OVNBFDRateLimiter: getMeterNameForProtocol(OVNBFDRateLimiter), + OVNControllerEventsRateLimiter: getMeterNameForProtocol(OVNControllerEventsRateLimiter), + OVNICMPV4ErrorsRateLimiter: getMeterNameForProtocol(OVNICMPV4ErrorsRateLimiter), + OVNICMPV6ErrorsRateLimiter: getMeterNameForProtocol(OVNICMPV6ErrorsRateLimiter), + OVNRejectRateLimiter: getMeterNameForProtocol(OVNRejectRateLimiter), + OVNTCPRSTRateLimiter: getMeterNameForProtocol(OVNTCPRSTRateLimiter), + OVNServiceMonitorLimiter: getMeterNameForProtocol(OVNServiceMonitorLimiter), + } + fairness := true + for _, v := range meters { + testData = append(testData, &nbdb.Meter{ + UUID: v + "-UUID", + Bands: []string{"25-pktps-rate-limiter-UUID"}, + Name: v, + Unit: types.PacketsPerSecond, + Fair: &fairness, + }) + } + + copp := &nbdb.Copp{ + UUID: "copp-UUID", + Name: "ovnkube-default", + Meters: meters, + } + testData = append(testData, copp) + + clusterLBGroupName := types.ClusterLBGroupName + clusterSwitchLBGroupName := types.ClusterSwitchLBGroupName + clusterRouterLBGroupName := types.ClusterRouterLBGroupName + if len(netName) > 0 { + clusterLBGroupName = fmt.Sprintf("%s_%s", netName, clusterLBGroupName) + clusterSwitchLBGroupName = fmt.Sprintf("%s_%s", netName, clusterSwitchLBGroupName) + clusterRouterLBGroupName = fmt.Sprintf("%s_%s", netName, clusterRouterLBGroupName) + } + + testData = append(testData, + &nbdb.LoadBalancerGroup{ + Name: clusterLBGroupName, + UUID: clusterLBGroupName + "-UUID", + }, + &nbdb.LoadBalancerGroup{ + Name: clusterSwitchLBGroupName, + UUID: clusterSwitchLBGroupName + "-UUID", + }, + &nbdb.LoadBalancerGroup{ + Name: clusterRouterLBGroupName, + UUID: clusterRouterLBGroupName + "-UUID", + }) + return testData +} + func setupClusterController(clusterController *DefaultNetworkController, setupCOPP bool) { var err error clusterController.SCTPSupport = true @@ -510,6 +573,7 @@ func (o *FakeOVN) NewSecondaryNetworkController(netattachdef *nettypes.NetworkAt l2Controller.addressSetFactory = asf } secondaryController = &l2Controller.BaseSecondaryNetworkController + o.fullSecondaryL2Controllers[netName] = l2Controller case types.LocalnetTopology: localnetController := NewSecondaryLocalnetNetworkController(cnci, nInfo, o.networkManager.Interface()) if o.asf != nil { // use fake asf only when enabled diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go b/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go index 427bef1faa..1709874b0c 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go @@ -51,7 +51,7 @@ type liveMigrationInfo struct { targetPodInfo liveMigrationPodInfo } -var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { +var _ = Describe("OVN Multi-Homed pod operations for layer 2 network", func() { var ( app *cli.App fakeOvn *FakeOVN @@ -89,7 +89,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { const nodeIPv4CIDR = "192.168.126.202/24" By(fmt.Sprintf("Creating a node named %q, with IP: %s", nodeName, nodeIPv4CIDR)) - testNode, err := newNodeWithSecondaryNets(nodeName, nodeIPv4CIDR, netInfo) + testNode, err := newNodeWithSecondaryNets(nodeName, nodeIPv4CIDR) Expect(err).NotTo(HaveOccurred()) Expect(setupFakeOvnForLayer2Topology(fakeOvn, initialDB, netInfo, testNode, podInfo, pod)).To(Succeed()) @@ -196,7 +196,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { const nodeIPv4CIDR = "192.168.126.202/24" By(fmt.Sprintf("Creating a node named %q, with IP: %s", nodeName, nodeIPv4CIDR)) - testNode, err := newNodeWithSecondaryNets(nodeName, nodeIPv4CIDR, netInfo) + testNode, err := newNodeWithSecondaryNets(nodeName, nodeIPv4CIDR) Expect(err).NotTo(HaveOccurred()) Expect(setupFakeOvnForLayer2Topology(fakeOvn, initialDB, netInfo, testNode, sourcePodInfo, sourcePod)).To(Succeed()) @@ -328,7 +328,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { ) DescribeTable( - "the gateway is properly cleaned up", + "secondary network controller DB entities are properly cleaned up", func(netInfo secondaryNetInfo, testConfig testConfiguration) { podInfo := dummyTestPod(ns, netInfo) if testConfig.configToOverride != nil { @@ -336,6 +336,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { if testConfig.gatewayConfig != nil { config.Gateway.DisableSNATMultipleGWs = testConfig.gatewayConfig.DisableSNATMultipleGWs } + config.OVNKubernetesFeature.EnableMultiNetwork = true } app.Action = func(ctx *cli.Context) error { netConf := netInfo.netconf() @@ -355,7 +356,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { nad.Annotations = map[string]string{ovntypes.OvnNetworkIDAnnotation: secondaryNetworkID} const nodeIPv4CIDR = "192.168.126.202/24" - testNode, err := newNodeWithSecondaryNets(nodeName, nodeIPv4CIDR, netInfo) + testNode, err := newNodeWithSecondaryNets(nodeName, nodeIPv4CIDR) Expect(err).NotTo(HaveOccurred()) gwConfig, err := util.ParseNodeL3GatewayAnnotation(testNode) @@ -363,7 +364,9 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { Expect(gwConfig.NextHops).NotTo(BeEmpty()) nbZone := &nbdb.NBGlobal{Name: ovntypes.OvnDefaultZone, UUID: ovntypes.OvnDefaultZone} + n := newNamespace(ns) if netInfo.isPrimary { + n = newUDNNamespace(ns) gwConfig, err := util.ParseNodeL3GatewayAnnotation(testNode) Expect(err).NotTo(HaveOccurred()) initialDB.NBData = append( @@ -376,7 +379,7 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { initialDB, &v1.NamespaceList{ Items: []v1.Namespace{ - *newNamespace(ns), + *n, }, }, &v1.NodeList{ @@ -407,11 +410,16 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { Expect(ok).To(BeFalse()) } - Expect(fakeOvn.controller.WatchNamespaces()).To(Succeed()) - Expect(fakeOvn.controller.WatchPods()).To(Succeed()) + Expect(fakeOvn.networkManager.Start()).To(Succeed()) + defer fakeOvn.networkManager.Stop() secondaryNetController, ok := fakeOvn.secondaryControllers[secondaryNetworkName] Expect(ok).To(BeTrue()) + fullSecondaryController, ok := fakeOvn.fullSecondaryL2Controllers[secondaryNetworkName] + Expect(ok).To(BeTrue()) + err = fullSecondaryController.Init() + Expect(err).NotTo(HaveOccurred()) + secondaryNetController.bnc.ovnClusterLRPToJoinIfAddrs = dummyJoinIPs() podInfo.populateSecondaryNetworkLogicalSwitchCache(fakeOvn, secondaryNetController) Expect(secondaryNetController.bnc.WatchNodes()).To(Succeed()) @@ -420,17 +428,10 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { Expect(fakeOvn.fakeClient.KubeClient.CoreV1().Pods(pod.Namespace).Delete(context.Background(), pod.Name, metav1.DeleteOptions{})).To(Succeed()) Expect(fakeOvn.fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(nad.Namespace).Delete(context.Background(), nad.Name, metav1.DeleteOptions{})).To(Succeed()) - // we must access the layer2 controller to be able to issue its cleanup function (to remove the GW related stuff). - Expect( - newSecondaryLayer2NetworkController( - &secondaryNetController.bnc.CommonNetworkControllerInfo, - networkConfig, - nodeName, - fakeNetworkManager, - nil, - NewPortCache(ctx.Done()), - ).Cleanup()).To(Succeed()) - Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData([]libovsdbtest.TestData{nbZone})) + err = fullSecondaryController.Cleanup() + Expect(err).NotTo(HaveOccurred()) + Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(generateUDNPostInitDB([]libovsdbtest.TestData{nbZone}, + fullSecondaryController.BaseSecondaryNetworkController.GetNetworkName()))) return nil } @@ -454,15 +455,6 @@ var _ = Describe("OVN Multi-Homed pod operations for layer2 network", func() { }) -func dummyLocalnetWithSecondaryUserDefinedNetwork(subnets string) secondaryNetInfo { - return secondaryNetInfo{ - netName: secondaryNetworkName, - nadName: namespacedName(ns, nadName), - topology: ovntypes.LocalnetTopology, - clustersubnets: subnets, - } -} - func dummySecondaryLayer2UserDefinedNetwork(subnets string) secondaryNetInfo { return secondaryNetInfo{ netName: secondaryNetworkName, @@ -716,7 +708,9 @@ func setupFakeOvnForLayer2Topology(fakeOvn *FakeOVN, initialDB libovsdbtest.Test By("setting up the OVN DB without any entities in it") Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + n := newNamespace(ns) if netInfo.isPrimary { + n = newUDNNamespace(ns) networkConfig, err := util.NewNetInfo(netInfo.netconf()) Expect(err).NotTo(HaveOccurred()) @@ -734,7 +728,7 @@ func setupFakeOvnForLayer2Topology(fakeOvn *FakeOVN, initialDB libovsdbtest.Test initialDB, &v1.NamespaceList{ Items: []v1.Namespace{ - *newNamespace(ns), + *n, }, }, &v1.NodeList{Items: []v1.Node{*testNode}}, diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go b/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go index c587ab9105..02671eaf54 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go @@ -59,7 +59,7 @@ type testConfiguration struct { expectationOptions []option } -var _ = Describe("OVN Multi-Homed pod operations", func() { +var _ = Describe("OVN Multi-Homed pod operations for layer 3 network", func() { var ( app *cli.App fakeOvn *FakeOVN @@ -115,7 +115,9 @@ var _ = Describe("OVN Multi-Homed pod operations", func() { Expect(err).NotTo(HaveOccurred()) nad.Annotations = map[string]string{types.OvnNetworkIDAnnotation: secondaryNetworkID} Expect(netInfo.setupOVNDependencies(&initialDB)).To(Succeed()) + n := newNamespace(ns) if netInfo.isPrimary { + n = newUDNNamespace(ns) networkConfig, err := util.NewNetInfo(netInfo.netconf()) Expect(err).NotTo(HaveOccurred()) initialDB.NBData = append( @@ -144,7 +146,7 @@ var _ = Describe("OVN Multi-Homed pod operations", func() { initialDB, &v1.NamespaceList{ Items: []v1.Namespace{ - *newNamespace(ns), + *n, }, }, &v1.NodeList{ @@ -357,7 +359,7 @@ var _ = Describe("OVN Multi-Homed pod operations", func() { initialDB, &v1.NamespaceList{ Items: []v1.Namespace{ - *newNamespace(ns), + *newUDNNamespace(ns), }, }, &v1.NodeList{ @@ -621,6 +623,11 @@ func newNodeWithSecondaryNets(nodeName string, nodeIPv4CIDR string, netInfos ... nodeSubnetInfo = append(nodeSubnetInfo, info.String()) } + parsedNodeSubnets := fmt.Sprintf("{\"default\":\"%s\"}", v4Node1Subnet) + if len(nodeSubnetInfo) > 0 { + parsedNodeSubnets = fmt.Sprintf("{\"default\":\"%s\", %s}", v4Node1Subnet, strings.Join(nodeSubnetInfo, ",")) + } + nodeIP, nodeCIDR, err := net.ParseCIDR(nodeIPv4CIDR) if err != nil { return nil, err @@ -633,13 +640,13 @@ func newNodeWithSecondaryNets(nodeName string, nodeIPv4CIDR string, netInfos ... Name: nodeName, Annotations: map[string]string{ "k8s.ovn.org/node-primary-ifaddr": fmt.Sprintf("{\"ipv4\": \"%s\", \"ipv6\": \"%s\"}", nodeIPv4CIDR, ""), - "k8s.ovn.org/node-subnets": fmt.Sprintf("{\"default\":\"%s\", %s}", v4Node1Subnet, strings.Join(nodeSubnetInfo, ",")), + "k8s.ovn.org/node-subnets": parsedNodeSubnets, util.OVNNodeHostCIDRs: fmt.Sprintf("[\"%s\"]", nodeIPv4CIDR), "k8s.ovn.org/zone-name": "global", "k8s.ovn.org/l3-gateway-config": fmt.Sprintf("{\"default\":{\"mode\":\"shared\",\"bridge-id\":\"breth0\",\"interface-id\":\"breth0_ovn-worker\",\"mac-address\":%q,\"ip-addresses\":[%[2]q],\"ip-address\":%[2]q,\"next-hops\":[%[3]q],\"next-hop\":%[3]q,\"node-port-enable\":\"true\",\"vlan-id\":\"0\"}}", util.IPAddrToHWAddr(nodeIP), nodeCIDR, nextHopIP), util.OvnNodeChassisID: "abdcef", "k8s.ovn.org/network-ids": fmt.Sprintf("{\"default\":\"0\",\"isolatednet\":\"%s\"}", secondaryNetworkID), - util.OVNNodeGRLRPAddrs: fmt.Sprintf("{\"isolatednet\":{\"ipv4\":%q}}", gwRouterJoinIPAddress()), + util.OVNNodeGRLRPAddrs: fmt.Sprintf("{\"default\":{\"ipv4\":\"100.64.0.2/16\"},\"isolatednet\":{\"ipv4\":%q}}", gwRouterJoinIPAddress()), "k8s.ovn.org/udn-layer2-node-gateway-router-lrp-tunnel-ids": "{\"isolatednet\":\"25\"}", }, Labels: map[string]string{ diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 9c6afae5ba..94891a58bd 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -215,6 +215,8 @@ const ( LoadBalancerOwnerExternalID = OvnK8sPrefix + "/" + "owner" // key for UDN enabled services routes UDNEnabledServiceExternalID = OvnK8sPrefix + "/" + "udn-enabled-default-service" + // RequiredUDNNamespaceLabel is the required namespace label for enabling primary UDNs + RequiredUDNNamespaceLabel = "k8s.ovn.org/primary-user-defined-network" // different secondary network topology type defined in CNI netconf Layer3Topology = "layer3" diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index c7eff45fed..58645192c9 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -386,6 +386,21 @@ func NewUnprocessedActiveNetworkError(namespace, udnName string) *UnprocessedAct return &UnprocessedActiveNetworkError{namespace: namespace, udnName: udnName} } +type InvalidPrimaryNetworkError struct { + namespace string +} + +func (m *InvalidPrimaryNetworkError) Error() string { + return fmt.Sprintf("invalid primary network state for namespace %q: "+ + "a valid primary user defined network or network attachment definition custom resource, "+ + "and required namespace label %q must both be present", + m.namespace, types.RequiredUDNNamespaceLabel) +} + +func NewInvalidPrimaryNetworkError(namespace string) *InvalidPrimaryNetworkError { + return &InvalidPrimaryNetworkError{namespace: namespace} +} + func GetUserDefinedNetworkRole(isPrimary bool) string { networkRole := types.NetworkRoleSecondary if isPrimary { From 7bb83ce1c69236129d1176ef23f033e6717f2968 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 12 Dec 2024 18:38:59 +0100 Subject: [PATCH 05/51] Disable adding/removing the UDN namespace label Signed-off-by: Patryk Diak --- dist/templates/rbac-ovnkube-master.yaml.j2 | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/dist/templates/rbac-ovnkube-master.yaml.j2 b/dist/templates/rbac-ovnkube-master.yaml.j2 index b4c5f64816..ab4c98fd89 100644 --- a/dist/templates/rbac-ovnkube-master.yaml.j2 +++ b/dist/templates/rbac-ovnkube-master.yaml.j2 @@ -156,3 +156,35 @@ rules: - apiGroups: [""] resources: ["configmaps"] verbs: ["create", "patch", "update"] + +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicy +metadata: + name: user-defined-networks-namespace-label +spec: + matchConstraints: + resourceRules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["UPDATE"] + resources: ["namespaces"] + failurePolicy: Fail + validations: + - expression: "('k8s.ovn.org/primary-user-defined-network' in oldObject.metadata.labels) == ('k8s.ovn.org/primary-user-defined-network' in object.metadata.labels)" + message: "The 'k8s.ovn.org/primary-user-defined-network' label cannot be added/removed after the namespace was created" + +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: ValidatingAdmissionPolicyBinding +metadata: + name: user-defined-networks-namespace-label-binding +spec: + policyName: user-defined-networks-namespace-label + validationActions: [Deny] + matchResources: + resourceRules: + - apiGroups: [""] + apiVersions: ["v1"] + operations: ["UPDATE"] + resources: ["namespaces"] From 0b71fe6cf2cb1842f19c34101f0e161d03f58b44 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 16 Dec 2024 22:15:35 -0500 Subject: [PATCH 06/51] Update E2Es to for required UDN label Signed-off-by: Tim Rozet --- test/e2e/e2e.go | 11 +- test/e2e/egressip.go | 65 ++++++------ test/e2e/network_segmentation.go | 100 +++++++++++++----- ...work_segmentation_endpointslices_mirror.go | 38 ++++--- test/e2e/network_segmentation_policy.go | 12 ++- test/e2e/network_segmentation_services.go | 7 ++ 6 files changed, 159 insertions(+), 74 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 3dc483db31..1330aa274e 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -469,8 +469,15 @@ func deleteClusterExternalContainer(containerName string) { }, 5).Should(gomega.HaveLen(0)) } -func updateNamespace(f *framework.Framework, namespace *v1.Namespace) { - _, err := f.ClientSet.CoreV1().Namespaces().Update(context.Background(), namespace, metav1.UpdateOptions{}) +// updatesNamespace labels while preserving the required UDN label +func updateNamespaceLabels(f *framework.Framework, namespace *v1.Namespace, labels map[string]string) { + // should never be nil + n := *namespace + n.Labels = labels + if _, ok := namespace.Labels[RequiredUDNNamespaceLabel]; ok { + n.Labels[RequiredUDNNamespaceLabel] = "" + } + _, err := f.ClientSet.CoreV1().Namespaces().Update(context.Background(), &n, metav1.UpdateOptions{}) framework.ExpectNoError(err, fmt.Sprintf("unable to update namespace: %s, err: %v", namespace.Name, err)) } func getNamespace(f *framework.Framework, name string) *v1.Namespace { diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index 5e54eaacd5..7d9c5b6856 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -771,10 +771,10 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa usedEgressNodeAvailabilityHandler.Enable(egress2Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, f.Namespace, labels) ginkgo.By("1. Create an EgressIP object with two egress IPs defined") // Assign the egress IP without conflicting with any node IP, @@ -1003,10 +1003,10 @@ spec: framework.Logf("Created pod %s on node %s", hostNetPod.name, egress2Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, @@ -1143,10 +1143,10 @@ spec: e2enode.ExpectNodeHasLabel(context.TODO(), f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, @@ -1256,13 +1256,14 @@ spec: e2enode.ExpectNodeHasLabel(context.TODO(), f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create one pod matching the EgressIP: running on node2 (pod2Node, egress1Node)") - createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + _, err := createGenericPodWithLabel(f, pod1Name, pod2Node.name, f.Namespace.Name, command, podEgressLabel) + framework.ExpectNoError(err, "Step 1. Create one pod matching the EgressIP: running on node2 (pod2Node, egress1Node), failed, err: %v", err) srcPodIP, err := getPodIPWithRetry(f.ClientSet, isIPv6TestRun, podNamespace.Name, pod1Name) framework.ExpectNoError(err, "Step 1. Create one pod matching the EgressIP: running on node2 (pod2Node, egress1Node), failed, err: %v", err) framework.Logf("Created pod %s on node %s", pod1Name, pod2Node.name) @@ -1592,10 +1593,10 @@ spec: egressIP1[len(egressIP1)-2]++ podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) var egressIPConfig = fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 kind: EgressIP @@ -1775,10 +1776,10 @@ spec: e2enode.AddOrUpdateLabelOnNode(f.ClientSet, egress1Node.name, "k8s.ovn.org/egress-assignable", "dummy") podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, @@ -1919,10 +1920,10 @@ spec: usedEgressNodeAvailabilityHandler.Enable(egress1Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("Creating an EgressIP object with one egress IPs defined") // Assign the egress IP without conflicting with any node IP, @@ -2093,10 +2094,10 @@ spec: defer egressNodeAvailabilityHandler.Restore(egress1Node.name) defer egressNodeAvailabilityHandler.Restore(egress2Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with two egress IPs - both hosted by the same secondary host network") egressIPConfig := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 @@ -2330,10 +2331,10 @@ spec: defer egressNodeAvailabilityHandler.Restore(egress1Node.name) defer egressNodeAvailabilityHandler.Restore(egress2Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create an EgressIP object with two egress IPs - one hosted by an OVN network and one by a secondary host network") // Assign the egress IP without conflicting with any node IP, @@ -2588,10 +2589,10 @@ spec: egressNodeAvailabilityHandler.Enable(egress1Node.name) defer egressNodeAvailabilityHandler.Restore(egress1Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("1. Create two EgressIP objects with one egress IP each - hosted by a secondary host network") egressIPConfig := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 @@ -2765,10 +2766,10 @@ spec: egressNodeAvailabilityHandler.Enable(egress1Node.name) defer egressNodeAvailabilityHandler.Restore(egress1Node.name) podNamespace := f.Namespace - podNamespace.Labels = map[string]string{ + labels := map[string]string{ "name": f.Namespace.Name, } - updateNamespace(f, podNamespace) + updateNamespaceLabels(f, podNamespace, labels) ginkgo.By("2. Create one EgressIP object with one egress IP hosted by a secondary host network") egressIPConfig := fmt.Sprintf(`apiVersion: k8s.ovn.org/v1 @@ -2818,7 +2819,8 @@ spec: } ginkgo.By(fmt.Sprintf("Building another namespace api object, basename %s", f.BaseName)) otherNetworkNamespace, err := f.CreateNamespace(context.Background(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", }) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) @@ -2842,11 +2844,9 @@ spec: "wants": "egress", } pod1Namespace := f.Namespace - pod1Namespace.Labels = selectedByEIPLabels - updateNamespace(f, pod1Namespace) + updateNamespaceLabels(f, pod1Namespace, selectedByEIPLabels) pod2OtherNetworkNamespace := otherNetworkNamespace.Name - otherNetworkNamespace.Labels = selectedByEIPLabels - updateNamespace(f, otherNetworkNamespace) + updateNamespaceLabels(f, otherNetworkNamespace, selectedByEIPLabels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, @@ -2930,7 +2930,8 @@ spec: } ginkgo.By(fmt.Sprintf("Building a namespace api object, basename %s", f.BaseName)) otherNetworkNamespace, err := f.CreateNamespace(context.Background(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", }) gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) isOtherNetworkIPv6 := utilnet.IsIPv6CIDRString(otherNetworkAttachParms.cidr) @@ -2964,11 +2965,9 @@ spec: "wants": "egress", } pod1Namespace := f.Namespace - pod1Namespace.Labels = selectedByEIPLabels - updateNamespace(f, pod1Namespace) + updateNamespaceLabels(f, pod1Namespace, selectedByEIPLabels) pod2OtherNetworkNamespace := otherNetworkNamespace.Name - otherNetworkNamespace.Labels = selectedByEIPLabels - updateNamespace(f, otherNetworkNamespace) + updateNamespaceLabels(f, otherNetworkNamespace, selectedByEIPLabels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") // Assign the egress IP without conflicting with any node IP, diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index edc837d410..8935b699db 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -33,9 +33,12 @@ import ( ) const openDefaultPortsAnnotation = "k8s.ovn.org/open-default-ports" +const RequiredUDNNamespaceLabel = "k8s.ovn.org/primary-user-defined-network" var _ = Describe("Network Segmentation", func() { f := wrappedTestFramework("network-segmentation") + // disable automatic namespace creation, we need to add the required UDN label + f.SkipNamespaceCreation = true var ( cs clientset.Interface @@ -60,6 +63,12 @@ var _ = Describe("Network Segmentation", func() { var err error nadClient, err = nadclient.NewForConfig(f.ClientConfig()) Expect(err).NotTo(HaveOccurred()) + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) }) Context("a user defined primary network", func() { @@ -434,7 +443,7 @@ var _ = Describe("Network Segmentation", func() { kapi, err := cs.CoreV1().Services("default").Get(context.Background(), "kubernetes", metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred()) for _, kapiIP := range kapi.Spec.ClusterIPs { - By("checking the UDN pod can't reach kapi service on IP " + kapiIP) + By("checking the UDN pod can't reach kapi service on IP " + kapiIP + "via eth0") Consistently(func() bool { _, err := e2ekubectl.RunKubectl( udnPodConfig.namespace, @@ -510,7 +519,8 @@ var _ = Describe("Network Segmentation", func() { By("Creating namespace " + namespace) _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: namespace, + Name: namespace, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, }, }, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) @@ -770,32 +780,46 @@ var _ = Describe("Network Segmentation", func() { userDefinedNetworkResource = "userdefinednetwork" ) + var ( + defaultNetNamespace *v1.Namespace + ) + Context("for L2 secondary network", func() { BeforeEach(func() { + // default cluster network namespace, for use when only testing secondary UDNs/NADs + defaultNetNamespace = &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: f.Namespace.Name + "-default", + }, + } + f.AddNamespacesToDelete(defaultNetNamespace) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), defaultNetNamespace, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + By("create tests UserDefinedNetwork") - cleanup, err := createManifest(f.Namespace.Name, newL2SecondaryUDNManifest(testUdnName)) + cleanup, err := createManifest(defaultNetNamespace.Name, newL2SecondaryUDNManifest(testUdnName)) DeferCleanup(cleanup) Expect(err).NotTo(HaveOccurred()) - Expect(waitForUserDefinedNetworkReady(f.Namespace.Name, testUdnName, 5*time.Second)).To(Succeed()) + Expect(waitForUserDefinedNetworkReady(defaultNetNamespace.Name, testUdnName, 5*time.Second)).To(Succeed()) }) It("should create NetworkAttachmentDefinition according to spec", func() { - udnUidRaw, err := e2ekubectl.RunKubectl(f.Namespace.Name, "get", userDefinedNetworkResource, testUdnName, "-o", "jsonpath='{.metadata.uid}'") + udnUidRaw, err := e2ekubectl.RunKubectl(defaultNetNamespace.Name, "get", userDefinedNetworkResource, testUdnName, "-o", "jsonpath='{.metadata.uid}'") Expect(err).NotTo(HaveOccurred(), "should get the UserDefinedNetwork UID") testUdnUID := strings.Trim(udnUidRaw, "'") By("verify a NetworkAttachmentDefinition is created according to spec") - assertL2SecondaryNetAttachDefManifest(nadClient, f.Namespace.Name, testUdnName, testUdnUID) + assertL2SecondaryNetAttachDefManifest(nadClient, defaultNetNamespace.Name, testUdnName, testUdnUID) }) It("should delete NetworkAttachmentDefinition when UserDefinedNetwork is deleted", func() { By("delete UserDefinedNetwork") - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "delete", userDefinedNetworkResource, testUdnName) + _, err := e2ekubectl.RunKubectl(defaultNetNamespace.Name, "delete", userDefinedNetworkResource, testUdnName) Expect(err).NotTo(HaveOccurred()) By("verify a NetworkAttachmentDefinition has been deleted") Eventually(func() bool { - _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Get(context.Background(), testUdnName, metav1.GetOptions{}) + _, err := nadClient.NetworkAttachmentDefinitions(defaultNetNamespace.Name).Get(context.Background(), testUdnName, metav1.GetOptions{}) return err != nil && kerrors.IsNotFound(err) }, time.Second*3, time.Second*1).Should(BeTrue(), "NetworkAttachmentDefinition should be deleted following UserDefinedNetwork deletion") @@ -813,16 +837,16 @@ var _ = Describe("Network Segmentation", func() { BeforeEach(func() { By("create pod") networkAttachments := []nadapi.NetworkSelectionElement{ - {Name: testUdnName, Namespace: f.Namespace.Name}, + {Name: testUdnName, Namespace: defaultNetNamespace.Name}, } cfg := podConfig(testPodName, withNetworkAttachment(networkAttachments)) - cfg.namespace = f.Namespace.Name - runUDNPod(cs, f.Namespace.Name, *cfg, nil) + cfg.namespace = defaultNetNamespace.Name + runUDNPod(cs, defaultNetNamespace.Name, *cfg, nil) }) It("cannot be deleted when being used", func() { By("verify UserDefinedNetwork cannot be deleted") - cmd := e2ekubectl.NewKubectlCommand(f.Namespace.Name, "delete", userDefinedNetworkResource, testUdnName) + cmd := e2ekubectl.NewKubectlCommand(defaultNetNamespace.Name, "delete", userDefinedNetworkResource, testUdnName) cmd.WithTimeout(time.NewTimer(deleteNetworkTimeout).C) _, err := cmd.Exec() Expect(err).To(HaveOccurred(), @@ -832,29 +856,29 @@ var _ = Describe("Network Segmentation", func() { Eventually(func() error { ctx, cancel := context.WithTimeout(context.Background(), deleteNetworkTimeout) defer cancel() - _ = nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Delete(ctx, testUdnName, metav1.DeleteOptions{}) - _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Get(ctx, testUdnName, metav1.GetOptions{}) + _ = nadClient.NetworkAttachmentDefinitions(defaultNetNamespace.Name).Delete(ctx, testUdnName, metav1.DeleteOptions{}) + _, err := nadClient.NetworkAttachmentDefinitions(defaultNetNamespace.Name).Get(ctx, testUdnName, metav1.GetOptions{}) return err }).ShouldNot(HaveOccurred(), "should fail to delete UserDefinedNetwork associated NetworkAttachmentDefinition when used") By("verify UserDefinedNetwork status reports consuming pod") - assertUDNStatusReportsConsumers(f.Namespace.Name, testUdnName, testPodName) + assertUDNStatusReportsConsumers(defaultNetNamespace.Name, testUdnName, testPodName) By("delete test pod") - err = cs.CoreV1().Pods(f.Namespace.Name).Delete(context.Background(), testPodName, metav1.DeleteOptions{}) + err = cs.CoreV1().Pods(defaultNetNamespace.Name).Delete(context.Background(), testPodName, metav1.DeleteOptions{}) Expect(err).ToNot(HaveOccurred()) By("verify UserDefinedNetwork has been deleted") Eventually(func() error { - _, err := e2ekubectl.RunKubectl(f.Namespace.Name, "get", userDefinedNetworkResource, testUdnName) + _, err := e2ekubectl.RunKubectl(defaultNetNamespace.Name, "get", userDefinedNetworkResource, testUdnName) return err }, udnInUseDeleteTimeout, deleteNetworkInterval).Should(HaveOccurred(), "UserDefinedNetwork should be deleted following test pod deletion") By("verify UserDefinedNetwork associated NetworkAttachmentDefinition has been deleted") Eventually(func() bool { - _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Get(context.Background(), testUdnName, metav1.GetOptions{}) + _, err := nadClient.NetworkAttachmentDefinitions(defaultNetNamespace.Name).Get(context.Background(), testUdnName, metav1.GetOptions{}) return err != nil && kerrors.IsNotFound(err) }, deleteNetworkTimeout, deleteNetworkInterval).Should(BeTrue(), "NetworkAttachmentDefinition should be deleted following UserDefinedNetwork deletion") @@ -966,6 +990,7 @@ spec: const clusterUserDefinedNetworkResource = "clusteruserdefinednetwork" var testTenantNamespaces []string + var defaultNetNamespace *v1.Namespace BeforeEach(func() { testTenantNamespaces = []string{ @@ -975,13 +1000,28 @@ spec: By("Creating test tenants namespaces") for _, nsName := range testTenantNamespaces { - _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName}}, metav1.CreateOptions{}) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) DeferCleanup(func() error { err := cs.CoreV1().Namespaces().Delete(context.Background(), nsName, metav1.DeleteOptions{}) return err }) } + // default cluster network namespace, for use when only testing secondary UDNs/NADs + defaultNetNamespace = &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: f.Namespace.Name + "-default", + }, + } + f.AddNamespacesToDelete(defaultNetNamespace) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), defaultNetNamespace, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + testTenantNamespaces = append(testTenantNamespaces, defaultNetNamespace.Name) + }) var testClusterUdnName string @@ -1042,7 +1082,11 @@ spec: assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, testTenantNamespaces...) By("create the new target namespace") - _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: testNewNs}}, metav1.CreateOptions{}) + _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNewNs, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) DeferCleanup(func() error { err := cs.CoreV1().Namespaces().Delete(context.Background(), testNewNs, metav1.DeleteOptions{}) @@ -1053,7 +1097,7 @@ spec: assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, expectedActiveNamespaces...) udnUidRaw, err := e2ekubectl.RunKubectl("", "get", clusterUserDefinedNetworkResource, testClusterUdnName, "-o", "jsonpath='{.metadata.uid}'") - Expect(err).NotTo(HaveOccurred(), "should get the ClsuterUserDefinedNetwork UID") + Expect(err).NotTo(HaveOccurred(), "should get the ClusterUserDefinedNetwork UID") testUdnUID := strings.Trim(udnUidRaw, "'") By("verify a NAD exist in new namespace according to spec") @@ -1065,7 +1109,11 @@ spec: testNewNs := f.Namespace.Name + "green" By("create new namespace") - _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: testNewNs}}, metav1.CreateOptions{}) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: testNewNs, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) DeferCleanup(func() error { err := cs.CoreV1().Namespaces().Delete(context.Background(), testNewNs, metav1.DeleteOptions{}) @@ -1121,7 +1169,7 @@ spec: ) BeforeEach(func() { - inUseNetTestTenantNamespace = testTenantNamespaces[0] + inUseNetTestTenantNamespace = defaultNetNamespace.Name By("create pod in one of the test tenant namespaces") networkAttachments := []nadapi.NetworkSelectionElement{ @@ -1184,7 +1232,11 @@ spec: } By("Creating test tenants namespaces") for _, nsName := range testTenantNamespaces { - _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: nsName}}, metav1.CreateOptions{}) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: nsName, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, + }}, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) DeferCleanup(func() error { err := cs.CoreV1().Namespaces().Delete(context.Background(), nsName, metav1.DeleteOptions{}) diff --git a/test/e2e/network_segmentation_endpointslices_mirror.go b/test/e2e/network_segmentation_endpointslices_mirror.go index 2985d99415..517117158e 100644 --- a/test/e2e/network_segmentation_endpointslices_mirror.go +++ b/test/e2e/network_segmentation_endpointslices_mirror.go @@ -24,6 +24,7 @@ import ( var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { f := wrappedTestFramework("endpointslices-mirror") + f.SkipNamespaceCreation = true Context("a user defined primary network", func() { const ( userDefinedNetworkIPv4Subnet = "10.128.0.0/16" @@ -38,8 +39,12 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { BeforeEach(func() { cs = f.ClientSet - - var err error + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) nadClient, err = nadclient.NewForConfig(f.ClientConfig()) Expect(err).NotTo(HaveOccurred()) }) @@ -174,17 +179,26 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { func( netConfig networkAttachmentConfigParams, ) { + By("creating default net namespace") + defaultNetNamespace := &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: f.Namespace.Name + "-default", + }, + } + f.AddNamespacesToDelete(defaultNetNamespace) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), defaultNetNamespace, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) By("creating the network") - netConfig.namespace = f.Namespace.Name + netConfig.namespace = defaultNetNamespace.Name Expect(createNetworkFn(netConfig)).To(Succeed()) replicas := int32(3) By("creating the deployment") deployment := e2edeployment.NewDeployment("test-deployment", replicas, map[string]string{"app": "test"}, "agnhost", agnhostImage, appsv1.RollingUpdateDeploymentStrategyType) - deployment.Namespace = f.Namespace.Name + deployment.Namespace = defaultNetNamespace.Name deployment.Spec.Template.Spec.Containers[0].Command = e2epod.GenerateScriptCmd("/agnhost netexec --http-port 80") - _, err := cs.AppsV1().Deployments(f.Namespace.Name).Create(context.Background(), deployment, metav1.CreateOptions{}) + _, err = cs.AppsV1().Deployments(defaultNetNamespace.Name).Create(context.Background(), deployment, metav1.CreateOptions{}) framework.ExpectNoError(err, "Failed creating the deployment %v", err) err = e2edeployment.WaitForDeploymentComplete(cs, deployment) framework.ExpectNoError(err, "Failed starting the deployment %v", err) @@ -193,12 +207,12 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { svc := e2eservice.CreateServiceSpec("test-service", "", false, map[string]string{"app": "test"}) familyPolicy := v1.IPFamilyPolicyPreferDualStack svc.Spec.IPFamilyPolicy = &familyPolicy - _, err = cs.CoreV1().Services(f.Namespace.Name).Create(context.Background(), svc, metav1.CreateOptions{}) + _, err = cs.CoreV1().Services(defaultNetNamespace.Name).Create(context.Background(), svc, metav1.CreateOptions{}) framework.ExpectNoError(err, "Failed creating service %v", err) By("asserting the mirrored EndpointSlice does not exist") Eventually(func() error { - esList, err := cs.DiscoveryV1().EndpointSlices(f.Namespace.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", "k8s.ovn.org/service-name", svc.Name)}) + esList, err := cs.DiscoveryV1().EndpointSlices(defaultNetNamespace.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: fmt.Sprintf("%s=%s", "k8s.ovn.org/service-name", svc.Name)}) if err != nil { return err } @@ -210,7 +224,7 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { }, 2*time.Minute, 6*time.Second).ShouldNot(HaveOccurred()) }, Entry( - "L2 dualstack primary UDN", + "L2 secondary UDN", networkAttachmentConfigParams{ name: nadName, topology: "layer2", @@ -219,7 +233,7 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { }, ), Entry( - "L3 dualstack primary UDN", + "L3 secondary UDN", networkAttachmentConfigParams{ name: nadName, topology: "layer3", @@ -232,14 +246,14 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { Entry("NetworkAttachmentDefinitions", func(c networkAttachmentConfigParams) error { netConfig := newNetworkAttachmentConfig(c) nad := generateNAD(netConfig) - _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create(context.Background(), nad, metav1.CreateOptions{}) + _, err := nadClient.NetworkAttachmentDefinitions(fmt.Sprintf("%s-default", f.Namespace.Name)).Create(context.Background(), nad, metav1.CreateOptions{}) return err }), Entry("UserDefinedNetwork", func(c networkAttachmentConfigParams) error { udnManifest := generateUserDefinedNetworkManifest(&c) - cleanup, err := createManifest(f.Namespace.Name, udnManifest) + cleanup, err := createManifest(fmt.Sprintf("%s-default", f.Namespace.Name), udnManifest) DeferCleanup(cleanup) - Expect(waitForUserDefinedNetworkReady(f.Namespace.Name, c.name, 5*time.Second)).To(Succeed()) + Expect(waitForUserDefinedNetworkReady(fmt.Sprintf("%s-default", f.Namespace.Name), c.name, 5*time.Second)).To(Succeed()) return err }), ) diff --git a/test/e2e/network_segmentation_policy.go b/test/e2e/network_segmentation_policy.go index 7b1c48882f..df7f6f1e86 100644 --- a/test/e2e/network_segmentation_policy.go +++ b/test/e2e/network_segmentation_policy.go @@ -20,6 +20,7 @@ import ( var _ = ginkgo.Describe("Network Segmentation: Network Policies", func() { f := wrappedTestFramework("network-segmentation") + f.SkipNamespaceCreation = true ginkgo.Context("on a user defined primary network", func() { const ( @@ -44,8 +45,12 @@ var _ = ginkgo.Describe("Network Segmentation: Network Policies", func() { ginkgo.BeforeEach(func() { cs = f.ClientSet - - var err error + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + gomega.Expect(err).NotTo(gomega.HaveOccurred()) nadClient, err = nadclient.NewForConfig(f.ClientConfig()) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -55,7 +60,8 @@ var _ = ginkgo.Describe("Network Segmentation: Network Policies", func() { ginkgo.By("Creating namespace " + namespace) ns, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ ObjectMeta: metav1.ObjectMeta{ - Name: namespace, + Name: namespace, + Labels: map[string]string{RequiredUDNNamespaceLabel: ""}, }, }, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) diff --git a/test/e2e/network_segmentation_services.go b/test/e2e/network_segmentation_services.go index 3eee561a6e..f5f9c9b260 100644 --- a/test/e2e/network_segmentation_services.go +++ b/test/e2e/network_segmentation_services.go @@ -28,6 +28,7 @@ import ( var _ = Describe("Network Segmentation: services", func() { f := wrappedTestFramework("udn-services") + f.SkipNamespaceCreation = true Context("on a user defined primary network", func() { const ( @@ -50,6 +51,12 @@ var _ = Describe("Network Segmentation: services", func() { var err error nadClient, err = nadclient.NewForConfig(f.ClientConfig()) Expect(err).NotTo(HaveOccurred()) + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + RequiredUDNNamespaceLabel: "", + }) + f.Namespace = namespace + Expect(err).NotTo(HaveOccurred()) }) DescribeTable( From 41dfd98d22f7d065111b6b85a78655cf21427866 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Thu, 19 Dec 2024 10:57:31 -0500 Subject: [PATCH 07/51] Fix multicast net seg tests ip family Signed-off-by: Tim Rozet --- test/e2e/network_segmentation.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 8935b699db..589727dccb 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -736,13 +736,13 @@ var _ = Describe("Network Segmentation", func() { ginkgo.Entry("with primary layer3 UDN", networkAttachmentConfigParams{ name: nadName, topology: "layer3", - cidr: fmt.Sprintf("%s,%s", userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + cidr: correctCIDRFamily(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), role: "primary", }), ginkgo.Entry("with primary layer2 UDN", networkAttachmentConfigParams{ name: nadName, topology: "layer2", - cidr: fmt.Sprintf("%s,%s", userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + cidr: correctCIDRFamily(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), role: "primary", }), ) @@ -761,13 +761,13 @@ var _ = Describe("Network Segmentation", func() { ginkgo.Entry("with primary layer3 UDN", networkAttachmentConfigParams{ name: nadName, topology: "layer3", - cidr: fmt.Sprintf("%s,%s", userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + cidr: correctCIDRFamily(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), role: "primary", }), ginkgo.Entry("with primary layer2 UDN", networkAttachmentConfigParams{ name: nadName, topology: "layer2", - cidr: fmt.Sprintf("%s,%s", userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), + cidr: correctCIDRFamily(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), role: "primary", }), ) From 253025a4cf060be6f86321a731ff9c8b42fa616d Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 20 Dec 2024 17:47:55 -0500 Subject: [PATCH 08/51] Fix egress IP tests Was using ipv6 on ipv4 cluster. Signed-off-by: Tim Rozet --- test/e2e/e2e.go | 4 +++- test/e2e/egressip.go | 55 +++++++++++++++++++++++++++----------------- 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index 1330aa274e..f044b3bca3 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -473,7 +473,9 @@ func deleteClusterExternalContainer(containerName string) { func updateNamespaceLabels(f *framework.Framework, namespace *v1.Namespace, labels map[string]string) { // should never be nil n := *namespace - n.Labels = labels + for k, v := range labels { + n.Labels[k] = v + } if _, ok := namespace.Labels[RequiredUDNNamespaceLabel]; ok { n.Labels[RequiredUDNNamespaceLabel] = "" } diff --git a/test/e2e/egressip.go b/test/e2e/egressip.go index 7d9c5b6856..8bfb549705 100644 --- a/test/e2e/egressip.go +++ b/test/e2e/egressip.go @@ -616,6 +616,7 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa } f := wrappedTestFramework(egressIPName) + f.SkipNamespaceCreation = true // Determine what mode the CI is running in and get relevant endpoint information for the tests ginkgo.BeforeEach(func() { @@ -633,6 +634,17 @@ var _ = ginkgo.DescribeTableSubtree("e2e egress IP validation", func(netConfigPa if len(ips) == 0 { framework.Failf("expect at least one IP address") } + + labels := map[string]string{ + "e2e-framework": f.BaseName, + } + if !isClusterDefaultNetwork(netConfigParams) { + labels[RequiredUDNNamespaceLabel] = "" + } + namespace, err := f.CreateNamespace(context.TODO(), f.BaseName, labels) + f.Namespace = namespace + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + isIPv6TestRun = utilnet.IsIPv6String(ips[0]) egress1Node = node{ name: nodes.Items[1].Name, @@ -2928,19 +2940,21 @@ spec: if !isNetworkSegmentationEnabled() { ginkgo.Skip("network segmentation is disabled") } - ginkgo.By(fmt.Sprintf("Building a namespace api object, basename %s", f.BaseName)) - otherNetworkNamespace, err := f.CreateNamespace(context.Background(), f.BaseName, map[string]string{ - "e2e-framework": f.BaseName, - RequiredUDNNamespaceLabel: "", - }) + var otherNetworkNamespace *corev1.Namespace + var err error gomega.Expect(err).ShouldNot(gomega.HaveOccurred()) isOtherNetworkIPv6 := utilnet.IsIPv6CIDRString(otherNetworkAttachParms.cidr) // The EgressIP IP must match both networks IP family if isOtherNetworkIPv6 != isIPv6TestRun { ginkgo.Skip(fmt.Sprintf("Test run IP family (is IPv6: %v) doesn't match other networks IP family (is IPv6: %v)", isIPv6TestRun, isOtherNetworkIPv6)) } - // is the test namespace a CDN? If so create the UDN + // is the test namespace a CDN? If so create the UDN namespace if isClusterDefaultNetwork(netConfigParams) { + ginkgo.By(fmt.Sprintf("Building other namespace api object for Primary UDN, basename %s", f.BaseName)) + otherNetworkNamespace, err = f.CreateNamespace(context.Background(), f.BaseName, map[string]string{ + RequiredUDNNamespaceLabel: "", + "e2e-framework": f.BaseName, + }) ginkgo.By(fmt.Sprintf("namespace is connected to CDN, create a namespace with %s primary UDN", otherNetworkAttachParms.topology)) // create primary UDN nadClient, err := nadclient.NewForConfig(f.ClientConfig()) @@ -2954,6 +2968,10 @@ spec: ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) } else { + ginkgo.By(fmt.Sprintf("Building other namespace api object for CDN, basename %s", f.BaseName)) + otherNetworkNamespace, err = f.CreateNamespace(context.Background(), f.BaseName, map[string]string{ + "e2e-framework": f.BaseName, + }) // if network is L3 or L2 UDN, then other network is CDN } egressNodeAvailabilityHandler := egressNodeAvailabilityHandlerViaLabel{f} @@ -2965,8 +2983,14 @@ spec: "wants": "egress", } pod1Namespace := f.Namespace + _, isUDNRequired := pod1Namespace.Labels[RequiredUDNNamespaceLabel] + ginkgo.By(fmt.Sprintf("Updating namespace label for base namespace: %s, with required UDN label: %t", + pod1Namespace.Name, isUDNRequired)) updateNamespaceLabels(f, pod1Namespace, selectedByEIPLabels) pod2OtherNetworkNamespace := otherNetworkNamespace.Name + _, isUDNRequired = otherNetworkNamespace.Labels[RequiredUDNNamespaceLabel] + ginkgo.By(fmt.Sprintf("Updating namespace label for other namespace: %s, with required UDN label: %t", + otherNetworkNamespace.Name, isUDNRequired)) updateNamespaceLabels(f, otherNetworkNamespace, selectedByEIPLabels) ginkgo.By("3. Create an EgressIP object with one egress IP defined") @@ -3028,27 +3052,16 @@ spec: err = wait.PollImmediate(retryInterval, retryTimeout, targetExternalContainerAndTest(targetNode, pod2Name, pod2OtherNetworkNamespace, true, []string{egressIP1.String()})) framework.ExpectNoError(err, "Step 7. Check connectivity from pod connected to a different network and verify that the srcIP is the expected nodeIP, failed: %v", err) }, - ginkgo.Entry("IPv4 L3 Primary UDN", networkAttachmentConfigParams{ + ginkgo.Entry("L3 Primary UDN", networkAttachmentConfigParams{ name: "l3primary", topology: types.Layer3Topology, - cidr: "30.10.0.0/16", + cidr: correctCIDRFamily("30.10.0.0/16", "2014:100:200::0/60"), role: "primary", }), - ginkgo.Entry("IPv6 L3 Primary UDN", networkAttachmentConfigParams{ - name: "l3primary", - topology: types.Layer3Topology, - cidr: "2014:100:200::0/60", - }), - ginkgo.Entry("IPv4 L2 Primary UDN", networkAttachmentConfigParams{ - name: "l2primary", - topology: types.Layer2Topology, - cidr: "10.10.0.0/16", - role: "primary", - }), - ginkgo.Entry("IPv6 L2 Primary UDN", networkAttachmentConfigParams{ + ginkgo.Entry("L2 Primary UDN", networkAttachmentConfigParams{ name: "l2primary", topology: types.Layer2Topology, - cidr: "2014:100:200::0/60", + cidr: correctCIDRFamily("10.10.0.0/16", "2014:100:200::0/60"), role: "primary", }), ) From 0dd81cf4df9aacabc90c0a866d247666f36d68d7 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 8 Jan 2025 14:26:00 -0500 Subject: [PATCH 09/51] Fix egress IP delete path EgressIP was depending on getActiveNetworkFromNamespace to work, or would fail to remove egressIP status. Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/egressip.go | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 53f20e2edd..6cd35e4232 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -720,6 +720,7 @@ func (e *EgressIPController) addPodEgressIPAssignmentsWithLock(ni util.NetInfo, // pod w.r.t to each status. This is mainly done to avoid a lot of duplicated // work on ovnkube-master restarts when all egress IP handlers will most likely // match and perform the setup for the same pod and status multiple times over. +// requires holding the podAssignmentMutex lock func (e *EgressIPController) addPodEgressIPAssignments(ni util.NetInfo, name string, statusAssignments []egressipv1.EgressIPStatusItem, mark util.EgressIPMark, pod *kapi.Pod) error { podKey := getPodKey(pod) // If pod is already in succeeded or failed state, return it without proceeding further. @@ -865,27 +866,27 @@ func (e *EgressIPController) deleteEgressIPAssignments(name string, statusesToRe continue } podNamespace, podName := getPodNamespaceAndNameFromKey(podKey) - ni, err := e.networkManager.GetActiveNetworkForNamespace(podNamespace) - if err != nil { - return fmt.Errorf("failed to get active network for namespace %s", podNamespace) - } cachedNetwork := e.getNetworkFromPodAssignment(podKey) - err = e.nodeZoneState.DoWithLock(statusToRemove.Node, func(key string) error { + if cachedNetwork == nil { + panic(fmt.Sprintf("cached network is missing for egress IP pod assignment: %q. This should never happen!", podKey)) + } + + err := e.nodeZoneState.DoWithLock(statusToRemove.Node, func(key string) error { // this statusToRemove was managing at least one pod, hence let's tear down the setup for this status - if _, ok := processedNetworks[ni.GetNetworkName()]; !ok { + if _, ok := processedNetworks[cachedNetwork.GetNetworkName()]; !ok { klog.V(2).Infof("Deleting pod egress IP status: %v for EgressIP: %s", statusToRemove, name) - if err := e.deleteEgressIPStatusSetup(ni, name, statusToRemove); err != nil { - return fmt.Errorf("failed to delete EgressIP %s status setup for network %s: %v", name, ni.GetNetworkName(), err) + if err := e.deleteEgressIPStatusSetup(cachedNetwork, name, statusToRemove); err != nil { + return fmt.Errorf("failed to delete EgressIP %s status setup for network %s: %v", name, cachedNetwork.GetNetworkName(), err) } - if cachedNetwork != nil && util.AreNetworksCompatible(cachedNetwork, ni) { + if cachedNetwork != nil { if err := e.deleteEgressIPStatusSetup(cachedNetwork, name, statusToRemove); err != nil { klog.Errorf("Failed to delete EgressIP %s status setup for network %s: %v", name, cachedNetwork.GetNetworkName(), err) } } } - processedNetworks[ni.GetNetworkName()] = struct{}{} + processedNetworks[cachedNetwork.GetNetworkName()] = struct{}{} // this pod was managed by statusToRemove.EgressIP; we need to try and add its SNAT back towards nodeIP - if err := e.addExternalGWPodSNAT(ni, podNamespace, podName, statusToRemove); err != nil { + if err := e.addExternalGWPodSNAT(cachedNetwork, podNamespace, podName, statusToRemove); err != nil { return err } podStatus.egressStatuses.delete(statusToRemove) @@ -902,14 +903,14 @@ func (e *EgressIPController) deleteEgressIPAssignments(name string, statusesToRe // delete the podIP from the global egressIP address set since its no longer managed by egressIPs // NOTE(tssurya): There is no way to infer if pod was local to this zone or not, // so we try to nuke the IP from address-set anyways - it will be a no-op for remote pods - if err := e.deletePodIPsFromAddressSet(ni.GetNetworkName(), e.controllerName, podStatus.podIPs...); err != nil { + if err := e.deletePodIPsFromAddressSet(cachedNetwork.GetNetworkName(), e.controllerName, podStatus.podIPs...); err != nil { return fmt.Errorf("cannot delete egressPodIPs for the pod %s from the address set: err: %v", podKey, err) } delete(e.podAssignment, podKey) } else if len(podStatus.egressStatuses.statusMap) == 0 && len(podStatus.standbyEgressIPNames) > 0 { klog.V(2).Infof("Pod %s has standby egress IP %+v", podKey, podStatus.standbyEgressIPNames.UnsortedList()) podStatus.egressIPName = "" // we have deleted the current egressIP that was managing the pod - if err := e.addStandByEgressIPAssignment(ni, podKey, podStatus); err != nil { + if err := e.addStandByEgressIPAssignment(cachedNetwork, podKey, podStatus); err != nil { klog.Errorf("Adding standby egressIPs for pod %s with status %v failed: %v", podKey, podStatus, err) // We are not returning the error on purpose, this will be best effort without any retries because // retrying deleteEgressIPAssignments for original EIP because addStandByEgressIPAssignment failed is useless. From a9b3e44505d6c1301fe0b6448f7fd075d6097973 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 8 Jan 2025 15:09:34 -0500 Subject: [PATCH 10/51] Update kubevirt e2es Signed-off-by: Tim Rozet --- test/e2e/kubevirt.go | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/test/e2e/kubevirt.go b/test/e2e/kubevirt.go index 7d9e052957..326d5bf791 100644 --- a/test/e2e/kubevirt.go +++ b/test/e2e/kubevirt.go @@ -116,6 +116,9 @@ var _ = Describe("Kubevirt Virtual Machines", func() { } ) + // disable automatic namespace creation, we need to add the required UDN label + fr.SkipNamespaceCreation = true + type liveMigrationTestData struct { mode kubevirtv1.MigrationMode numberOfVMs int @@ -971,7 +974,6 @@ passwd: } ) BeforeEach(func() { - namespace = fr.Namespace.Name // So we can use it at AfterEach, since fr.ClientSet is nil there clientSet = fr.ClientSet @@ -983,6 +985,11 @@ passwd: Context("with default pod network", func() { BeforeEach(func() { + ns, err := fr.CreateNamespace(context.TODO(), fr.BaseName, map[string]string{ + "e2e-framework": fr.BaseName, + }) + fr.Namespace = ns + namespace = fr.Namespace.Name workerNodeList, err := fr.ClientSet.CoreV1().Nodes().List(context.Background(), metav1.ListOptions{LabelSelector: labels.FormatLabels(map[string]string{"node-role.kubernetes.io/worker": ""})}) Expect(err).ToNot(HaveOccurred()) nodesByOVNZone := map[string][]corev1.Node{} @@ -1272,6 +1279,17 @@ passwd: role string } DescribeTable("should keep ip", func(td testData) { + l := map[string]string{ + "e2e-framework": fr.BaseName, + } + if td.role == "primary" { + l[RequiredUDNNamespaceLabel] = "" + } + ns, err := fr.CreateNamespace(context.TODO(), fr.BaseName, l) + Expect(err).NotTo(HaveOccurred()) + fr.Namespace = ns + namespace = fr.Namespace.Name + netConfig := newNetworkAttachmentConfig( networkAttachmentConfigParams{ namespace: namespace, @@ -1457,6 +1475,13 @@ passwd: } ) BeforeEach(func() { + ns, err := fr.CreateNamespace(context.TODO(), fr.BaseName, map[string]string{ + "e2e-framework": fr.BaseName, + RequiredUDNNamespaceLabel: "", + }) + fr.Namespace = ns + namespace = fr.Namespace.Name + netConfig := newNetworkAttachmentConfig( networkAttachmentConfigParams{ namespace: namespace, @@ -1497,7 +1522,7 @@ passwd: Should(Succeed()) By("Reconfigure primary UDN interface to use dhcp/nd for ipv4 and ipv6") - _, err := virtLauncherCommand(kubevirt.GenerateAddressDiscoveryConfigurationCommand("ovn-udn1")) + _, err = virtLauncherCommand(kubevirt.GenerateAddressDiscoveryConfigurationCommand("ovn-udn1")) Expect(err).ToNot(HaveOccurred()) }) From ef21acb51b2182636e71308f9c8d18f6523340be Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 13 Jan 2025 14:10:50 -0500 Subject: [PATCH 11/51] Remove per network load balancer groups on cleanup Signed-off-by: Tim Rozet --- go-controller/pkg/libovsdb/ops/lbgroup.go | 30 ++++++++++++++++++- go-controller/pkg/ovn/ovn_test.go | 22 -------------- .../secondary_layer2_network_controller.go | 10 +++++++ .../secondary_layer3_network_controller.go | 10 +++++++ 4 files changed, 49 insertions(+), 23 deletions(-) diff --git a/go-controller/pkg/libovsdb/ops/lbgroup.go b/go-controller/pkg/libovsdb/ops/lbgroup.go index 854c8f2b2d..71517cb9c0 100644 --- a/go-controller/pkg/libovsdb/ops/lbgroup.go +++ b/go-controller/pkg/libovsdb/ops/lbgroup.go @@ -2,7 +2,6 @@ package ops import ( "context" - libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" libovsdb "github.com/ovn-org/libovsdb/ovsdb" @@ -30,6 +29,35 @@ func CreateOrUpdateLoadBalancerGroupOps(nbClient libovsdbclient.Client, ops []ov return ops, nil } +// DeleteLoadBalancerGroupsOps DeleteLoadBalncerGroupOps creates the operations for deleting load balancer groups +func DeleteLoadBalancerGroupsOps(nbClient libovsdbclient.Client, ops []libovsdb.Operation, groups ...*nbdb.LoadBalancerGroup) ([]ovsdb.Operation, error) { + opModels := make([]operationModel, 0, len(groups)) + for i := range groups { + // can't use i in the predicate, for loop replaces it in-memory + lb := groups[i] + opModel := operationModel{ + Model: lb, + ErrNotFound: false, + BulkOp: false, + } + opModels = append(opModels, opModel) + } + + modelClient := newModelClient(nbClient) + return modelClient.DeleteOps(ops, opModels...) +} + +// DeleteLoadBalancerGroups deletes the provided load balancer groups +func DeleteLoadBalancerGroups(nbClient libovsdbclient.Client, groups []*nbdb.LoadBalancerGroup) error { + ops, err := DeleteLoadBalancerGroupsOps(nbClient, nil, groups...) + if err != nil { + return err + } + + _, err = TransactAndCheck(nbClient, ops) + return err +} + // AddLoadBalancersToGroupOps adds the provided load balancers to the provided // group and returns the corresponding ops func AddLoadBalancersToGroupOps(nbClient libovsdbclient.Client, ops []libovsdb.Operation, group *nbdb.LoadBalancerGroup, lbs ...*nbdb.LoadBalancer) ([]libovsdb.Operation, error) { diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index cdcbe5896f..302eefc2d8 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -318,28 +318,6 @@ func generateUDNPostInitDB(testData []libovsdbtest.TestData, netName string) []l } testData = append(testData, copp) - clusterLBGroupName := types.ClusterLBGroupName - clusterSwitchLBGroupName := types.ClusterSwitchLBGroupName - clusterRouterLBGroupName := types.ClusterRouterLBGroupName - if len(netName) > 0 { - clusterLBGroupName = fmt.Sprintf("%s_%s", netName, clusterLBGroupName) - clusterSwitchLBGroupName = fmt.Sprintf("%s_%s", netName, clusterSwitchLBGroupName) - clusterRouterLBGroupName = fmt.Sprintf("%s_%s", netName, clusterRouterLBGroupName) - } - - testData = append(testData, - &nbdb.LoadBalancerGroup{ - Name: clusterLBGroupName, - UUID: clusterLBGroupName + "-UUID", - }, - &nbdb.LoadBalancerGroup{ - Name: clusterSwitchLBGroupName, - UUID: clusterSwitchLBGroupName + "-UUID", - }, - &nbdb.LoadBalancerGroup{ - Name: clusterRouterLBGroupName, - UUID: clusterRouterLBGroupName + "-UUID", - }) return testData } diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller.go b/go-controller/pkg/ovn/secondary_layer2_network_controller.go index 179c86beef..e4b1edc71e 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller.go @@ -414,6 +414,16 @@ func (oc *SecondaryLayer2NetworkController) Cleanup() error { } return true }) + + // remove load balancer groups + lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) + for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { + lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) + } + if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) + } + return nil } diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index 441011511f..50161d9cd2 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -532,6 +532,16 @@ func (oc *SecondaryLayer3NetworkController) Cleanup() error { return fmt.Errorf("failed to delete interconnect transit switch of network %s: %v", netName, err) } } + + // remove load balancer groups + lbGroups := make([]*nbdb.LoadBalancerGroup, 0, 3) + for _, lbGroupUUID := range []string{oc.switchLoadBalancerGroupUUID, oc.clusterLoadBalancerGroupUUID, oc.routerLoadBalancerGroupUUID} { + lbGroups = append(lbGroups, &nbdb.LoadBalancerGroup{UUID: lbGroupUUID}) + } + if err := libovsdbops.DeleteLoadBalancerGroups(oc.nbClient, lbGroups); err != nil { + klog.Errorf("Failed to delete load balancer groups on network: %q, error: %v", oc.GetNetworkName(), err) + } + return nil } From a084679e6a5a18bb4eb11e1e7c7e101320b0da8a Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 13 Jan 2025 16:12:14 -0500 Subject: [PATCH 12/51] Adds e2e test to for wiring pod with missing UDN label Test ensures that a pod will still come up when a UDN exists, but the UDN required label is missing on the namespace. The pod will be wired to the default cluster network. Signed-off-by: Tim Rozet --- test/e2e/network_segmentation.go | 44 ++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 589727dccb..e75ba02cd9 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -34,6 +34,7 @@ import ( const openDefaultPortsAnnotation = "k8s.ovn.org/open-default-ports" const RequiredUDNNamespaceLabel = "k8s.ovn.org/primary-user-defined-network" +const OvnPodAnnotationName = "k8s.ovn.org/pod-networks" var _ = Describe("Network Segmentation", func() { f := wrappedTestFramework("network-segmentation") @@ -784,6 +785,37 @@ var _ = Describe("Network Segmentation", func() { defaultNetNamespace *v1.Namespace ) + Context("for primary UDN without required namespace label", func() { + BeforeEach(func() { + // default cluster network namespace, for use when doing negative testing for UDNs/NADs + defaultNetNamespace = &v1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: f.Namespace.Name + "-default", + }, + } + f.AddNamespacesToDelete(defaultNetNamespace) + _, err := cs.CoreV1().Namespaces().Create(context.Background(), defaultNetNamespace, metav1.CreateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + By("create tests UserDefinedNetwork") + cleanup, err := createManifest(defaultNetNamespace.Name, newPrimaryUserDefinedNetworkManifest(testUdnName)) + DeferCleanup(cleanup) + Expect(err).NotTo(HaveOccurred()) + Expect(waitForUserDefinedNetworkReady(defaultNetNamespace.Name, testUdnName, 5*time.Second)).To(Not(Succeed())) + }) + + It("should be able to create pod and it will attach to the cluster default network", func() { + podConfig := *podConfig("some-pod") + podConfig.namespace = defaultNetNamespace.Name + pod := runUDNPod(cs, defaultNetNamespace.Name, podConfig, nil) + ovnPodAnnotation, err := unmarshalPodAnnotationAllNetworks(pod.Annotations) + Expect(err).NotTo(HaveOccurred()) + Expect(len(ovnPodAnnotation)).To(BeNumerically("==", 1)) + Expect(ovnPodAnnotation).To(HaveKey("default")) + }) + + }) + Context("for L2 secondary network", func() { BeforeEach(func() { // default cluster network namespace, for use when only testing secondary UDNs/NADs @@ -2006,3 +2038,15 @@ func expectedNumberOfRoutes(netConfig networkAttachmentConfigParams) int { } return 3 //only one family, each has 3 routes } + +func unmarshalPodAnnotationAllNetworks(annotations map[string]string) (map[string]podAnnotation, error) { + podNetworks := make(map[string]podAnnotation) + ovnAnnotation, ok := annotations[OvnPodAnnotationName] + if ok { + if err := json.Unmarshal([]byte(ovnAnnotation), &podNetworks); err != nil { + return nil, fmt.Errorf("failed to unmarshal ovn pod annotation %q: %v", + ovnAnnotation, err) + } + } + return podNetworks, nil +} From b8c3d7882714e4fa79875a784806c5ef99ed5b8f Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 13 Jan 2025 16:18:10 -0500 Subject: [PATCH 13/51] Adds E2E to verify UDN label cannot be added/removed later Signed-off-by: Tim Rozet --- test/e2e/network_segmentation.go | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index e75ba02cd9..c3d3b31a77 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -814,6 +814,24 @@ var _ = Describe("Network Segmentation", func() { Expect(ovnPodAnnotation).To(HaveKey("default")) }) + It("should not be able to update the namespace and add the UDN label", func() { + defaultNetNamespace.Labels = map[string]string{ + RequiredUDNNamespaceLabel: "", + } + _, err := cs.CoreV1().Namespaces().Update(context.TODO(), defaultNetNamespace, metav1.UpdateOptions{}) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("The 'k8s.ovn.org/primary-user-defined-network' label cannot be added/removed after the namespace was created")) + }) + + It("should not be able to update the namespace and remove the UDN label", func() { + udnNamespace, err := cs.CoreV1().Namespaces().Get(context.TODO(), f.Namespace.Name, metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + udnNamespace.Labels = map[string]string{} + _, err = cs.CoreV1().Namespaces().Update(context.TODO(), udnNamespace, metav1.UpdateOptions{}) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("The 'k8s.ovn.org/primary-user-defined-network' label cannot be added/removed after the namespace was created")) + }) + }) Context("for L2 secondary network", func() { From 10a2af040542abf340623f924d1111165d2d33f4 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Tue, 22 Oct 2024 10:18:50 +0200 Subject: [PATCH 14/51] Add GetInterfaceUDNs to convert interface name to (C)UDN ns+name. Signed-off-by: Nadia Pinaeva --- .../observability-lib/ovsdb/interface.go | 903 ++++++++++++++++++ .../observability-lib/ovsdb/observ_model.go | 1 + .../sampledecoder/db_client.go | 1 + .../sampledecoder/sample_decoder.go | 46 + .../template/net-attach-def-template.go | 2 + 5 files changed, 953 insertions(+) create mode 100644 go-controller/observability-lib/ovsdb/interface.go diff --git a/go-controller/observability-lib/ovsdb/interface.go b/go-controller/observability-lib/ovsdb/interface.go new file mode 100644 index 0000000000..e9f350995c --- /dev/null +++ b/go-controller/observability-lib/ovsdb/interface.go @@ -0,0 +1,903 @@ +// Code generated by "libovsdb.modelgen" +// DO NOT EDIT. + +package ovsdb + +import "github.com/ovn-org/libovsdb/model" + +const InterfaceTable = "Interface" + +type ( + InterfaceAdminState = string + InterfaceCFMRemoteOpstate = string + InterfaceDuplex = string + InterfaceLinkState = string +) + +var ( + InterfaceAdminStateUp InterfaceAdminState = "up" + InterfaceAdminStateDown InterfaceAdminState = "down" + InterfaceCFMRemoteOpstateUp InterfaceCFMRemoteOpstate = "up" + InterfaceCFMRemoteOpstateDown InterfaceCFMRemoteOpstate = "down" + InterfaceDuplexHalf InterfaceDuplex = "half" + InterfaceDuplexFull InterfaceDuplex = "full" + InterfaceLinkStateUp InterfaceLinkState = "up" + InterfaceLinkStateDown InterfaceLinkState = "down" +) + +// Interface defines an object in Interface table +type Interface struct { + UUID string `ovsdb:"_uuid"` + AdminState *InterfaceAdminState `ovsdb:"admin_state"` + BFD map[string]string `ovsdb:"bfd"` + BFDStatus map[string]string `ovsdb:"bfd_status"` + CFMFault *bool `ovsdb:"cfm_fault"` + CFMFaultStatus []string `ovsdb:"cfm_fault_status"` + CFMFlapCount *int `ovsdb:"cfm_flap_count"` + CFMHealth *int `ovsdb:"cfm_health"` + CFMMpid *int `ovsdb:"cfm_mpid"` + CFMRemoteMpids []int `ovsdb:"cfm_remote_mpids"` + CFMRemoteOpstate *InterfaceCFMRemoteOpstate `ovsdb:"cfm_remote_opstate"` + Duplex *InterfaceDuplex `ovsdb:"duplex"` + Error *string `ovsdb:"error"` + ExternalIDs map[string]string `ovsdb:"external_ids"` + Ifindex *int `ovsdb:"ifindex"` + IngressPolicingBurst int `ovsdb:"ingress_policing_burst"` + IngressPolicingKpktsBurst int `ovsdb:"ingress_policing_kpkts_burst"` + IngressPolicingKpktsRate int `ovsdb:"ingress_policing_kpkts_rate"` + IngressPolicingRate int `ovsdb:"ingress_policing_rate"` + LACPCurrent *bool `ovsdb:"lacp_current"` + LinkResets *int `ovsdb:"link_resets"` + LinkSpeed *int `ovsdb:"link_speed"` + LinkState *InterfaceLinkState `ovsdb:"link_state"` + LLDP map[string]string `ovsdb:"lldp"` + MAC *string `ovsdb:"mac"` + MACInUse *string `ovsdb:"mac_in_use"` + MTU *int `ovsdb:"mtu"` + MTURequest *int `ovsdb:"mtu_request"` + Name string `ovsdb:"name"` + Ofport *int `ovsdb:"ofport"` + OfportRequest *int `ovsdb:"ofport_request"` + Options map[string]string `ovsdb:"options"` + OtherConfig map[string]string `ovsdb:"other_config"` + Statistics map[string]int `ovsdb:"statistics"` + Status map[string]string `ovsdb:"status"` + Type string `ovsdb:"type"` +} + +func (a *Interface) GetUUID() string { + return a.UUID +} + +func (a *Interface) GetAdminState() *InterfaceAdminState { + return a.AdminState +} + +func copyInterfaceAdminState(a *InterfaceAdminState) *InterfaceAdminState { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceAdminState(a, b *InterfaceAdminState) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetBFD() map[string]string { + return a.BFD +} + +func copyInterfaceBFD(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceBFD(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetBFDStatus() map[string]string { + return a.BFDStatus +} + +func copyInterfaceBFDStatus(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceBFDStatus(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetCFMFault() *bool { + return a.CFMFault +} + +func copyInterfaceCFMFault(a *bool) *bool { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceCFMFault(a, b *bool) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetCFMFaultStatus() []string { + return a.CFMFaultStatus +} + +func copyInterfaceCFMFaultStatus(a []string) []string { + if a == nil { + return nil + } + b := make([]string, len(a)) + copy(b, a) + return b +} + +func equalInterfaceCFMFaultStatus(a, b []string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for i, v := range a { + if b[i] != v { + return false + } + } + return true +} + +func (a *Interface) GetCFMFlapCount() *int { + return a.CFMFlapCount +} + +func copyInterfaceCFMFlapCount(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceCFMFlapCount(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetCFMHealth() *int { + return a.CFMHealth +} + +func copyInterfaceCFMHealth(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceCFMHealth(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetCFMMpid() *int { + return a.CFMMpid +} + +func copyInterfaceCFMMpid(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceCFMMpid(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetCFMRemoteMpids() []int { + return a.CFMRemoteMpids +} + +func copyInterfaceCFMRemoteMpids(a []int) []int { + if a == nil { + return nil + } + b := make([]int, len(a)) + copy(b, a) + return b +} + +func equalInterfaceCFMRemoteMpids(a, b []int) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for i, v := range a { + if b[i] != v { + return false + } + } + return true +} + +func (a *Interface) GetCFMRemoteOpstate() *InterfaceCFMRemoteOpstate { + return a.CFMRemoteOpstate +} + +func copyInterfaceCFMRemoteOpstate(a *InterfaceCFMRemoteOpstate) *InterfaceCFMRemoteOpstate { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceCFMRemoteOpstate(a, b *InterfaceCFMRemoteOpstate) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetDuplex() *InterfaceDuplex { + return a.Duplex +} + +func copyInterfaceDuplex(a *InterfaceDuplex) *InterfaceDuplex { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceDuplex(a, b *InterfaceDuplex) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetError() *string { + return a.Error +} + +func copyInterfaceError(a *string) *string { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceError(a, b *string) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetExternalIDs() map[string]string { + return a.ExternalIDs +} + +func copyInterfaceExternalIDs(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceExternalIDs(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetIfindex() *int { + return a.Ifindex +} + +func copyInterfaceIfindex(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceIfindex(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetIngressPolicingBurst() int { + return a.IngressPolicingBurst +} + +func (a *Interface) GetIngressPolicingKpktsBurst() int { + return a.IngressPolicingKpktsBurst +} + +func (a *Interface) GetIngressPolicingKpktsRate() int { + return a.IngressPolicingKpktsRate +} + +func (a *Interface) GetIngressPolicingRate() int { + return a.IngressPolicingRate +} + +func (a *Interface) GetLACPCurrent() *bool { + return a.LACPCurrent +} + +func copyInterfaceLACPCurrent(a *bool) *bool { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceLACPCurrent(a, b *bool) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetLinkResets() *int { + return a.LinkResets +} + +func copyInterfaceLinkResets(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceLinkResets(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetLinkSpeed() *int { + return a.LinkSpeed +} + +func copyInterfaceLinkSpeed(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceLinkSpeed(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetLinkState() *InterfaceLinkState { + return a.LinkState +} + +func copyInterfaceLinkState(a *InterfaceLinkState) *InterfaceLinkState { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceLinkState(a, b *InterfaceLinkState) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetLLDP() map[string]string { + return a.LLDP +} + +func copyInterfaceLLDP(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceLLDP(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetMAC() *string { + return a.MAC +} + +func copyInterfaceMAC(a *string) *string { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceMAC(a, b *string) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetMACInUse() *string { + return a.MACInUse +} + +func copyInterfaceMACInUse(a *string) *string { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceMACInUse(a, b *string) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetMTU() *int { + return a.MTU +} + +func copyInterfaceMTU(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceMTU(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetMTURequest() *int { + return a.MTURequest +} + +func copyInterfaceMTURequest(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceMTURequest(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetName() string { + return a.Name +} + +func (a *Interface) GetOfport() *int { + return a.Ofport +} + +func copyInterfaceOfport(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceOfport(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetOfportRequest() *int { + return a.OfportRequest +} + +func copyInterfaceOfportRequest(a *int) *int { + if a == nil { + return nil + } + b := *a + return &b +} + +func equalInterfaceOfportRequest(a, b *int) bool { + if (a == nil) != (b == nil) { + return false + } + if a == b { + return true + } + return *a == *b +} + +func (a *Interface) GetOptions() map[string]string { + return a.Options +} + +func copyInterfaceOptions(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceOptions(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetOtherConfig() map[string]string { + return a.OtherConfig +} + +func copyInterfaceOtherConfig(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceOtherConfig(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetStatistics() map[string]int { + return a.Statistics +} + +func copyInterfaceStatistics(a map[string]int) map[string]int { + if a == nil { + return nil + } + b := make(map[string]int, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceStatistics(a, b map[string]int) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetStatus() map[string]string { + return a.Status +} + +func copyInterfaceStatus(a map[string]string) map[string]string { + if a == nil { + return nil + } + b := make(map[string]string, len(a)) + for k, v := range a { + b[k] = v + } + return b +} + +func equalInterfaceStatus(a, b map[string]string) bool { + if (a == nil) != (b == nil) { + return false + } + if len(a) != len(b) { + return false + } + for k, v := range a { + if w, ok := b[k]; !ok || v != w { + return false + } + } + return true +} + +func (a *Interface) GetType() string { + return a.Type +} + +func (a *Interface) DeepCopyInto(b *Interface) { + *b = *a + b.AdminState = copyInterfaceAdminState(a.AdminState) + b.BFD = copyInterfaceBFD(a.BFD) + b.BFDStatus = copyInterfaceBFDStatus(a.BFDStatus) + b.CFMFault = copyInterfaceCFMFault(a.CFMFault) + b.CFMFaultStatus = copyInterfaceCFMFaultStatus(a.CFMFaultStatus) + b.CFMFlapCount = copyInterfaceCFMFlapCount(a.CFMFlapCount) + b.CFMHealth = copyInterfaceCFMHealth(a.CFMHealth) + b.CFMMpid = copyInterfaceCFMMpid(a.CFMMpid) + b.CFMRemoteMpids = copyInterfaceCFMRemoteMpids(a.CFMRemoteMpids) + b.CFMRemoteOpstate = copyInterfaceCFMRemoteOpstate(a.CFMRemoteOpstate) + b.Duplex = copyInterfaceDuplex(a.Duplex) + b.Error = copyInterfaceError(a.Error) + b.ExternalIDs = copyInterfaceExternalIDs(a.ExternalIDs) + b.Ifindex = copyInterfaceIfindex(a.Ifindex) + b.LACPCurrent = copyInterfaceLACPCurrent(a.LACPCurrent) + b.LinkResets = copyInterfaceLinkResets(a.LinkResets) + b.LinkSpeed = copyInterfaceLinkSpeed(a.LinkSpeed) + b.LinkState = copyInterfaceLinkState(a.LinkState) + b.LLDP = copyInterfaceLLDP(a.LLDP) + b.MAC = copyInterfaceMAC(a.MAC) + b.MACInUse = copyInterfaceMACInUse(a.MACInUse) + b.MTU = copyInterfaceMTU(a.MTU) + b.MTURequest = copyInterfaceMTURequest(a.MTURequest) + b.Ofport = copyInterfaceOfport(a.Ofport) + b.OfportRequest = copyInterfaceOfportRequest(a.OfportRequest) + b.Options = copyInterfaceOptions(a.Options) + b.OtherConfig = copyInterfaceOtherConfig(a.OtherConfig) + b.Statistics = copyInterfaceStatistics(a.Statistics) + b.Status = copyInterfaceStatus(a.Status) +} + +func (a *Interface) DeepCopy() *Interface { + b := new(Interface) + a.DeepCopyInto(b) + return b +} + +func (a *Interface) CloneModelInto(b model.Model) { + c := b.(*Interface) + a.DeepCopyInto(c) +} + +func (a *Interface) CloneModel() model.Model { + return a.DeepCopy() +} + +func (a *Interface) Equals(b *Interface) bool { + return a.UUID == b.UUID && + equalInterfaceAdminState(a.AdminState, b.AdminState) && + equalInterfaceBFD(a.BFD, b.BFD) && + equalInterfaceBFDStatus(a.BFDStatus, b.BFDStatus) && + equalInterfaceCFMFault(a.CFMFault, b.CFMFault) && + equalInterfaceCFMFaultStatus(a.CFMFaultStatus, b.CFMFaultStatus) && + equalInterfaceCFMFlapCount(a.CFMFlapCount, b.CFMFlapCount) && + equalInterfaceCFMHealth(a.CFMHealth, b.CFMHealth) && + equalInterfaceCFMMpid(a.CFMMpid, b.CFMMpid) && + equalInterfaceCFMRemoteMpids(a.CFMRemoteMpids, b.CFMRemoteMpids) && + equalInterfaceCFMRemoteOpstate(a.CFMRemoteOpstate, b.CFMRemoteOpstate) && + equalInterfaceDuplex(a.Duplex, b.Duplex) && + equalInterfaceError(a.Error, b.Error) && + equalInterfaceExternalIDs(a.ExternalIDs, b.ExternalIDs) && + equalInterfaceIfindex(a.Ifindex, b.Ifindex) && + a.IngressPolicingBurst == b.IngressPolicingBurst && + a.IngressPolicingKpktsBurst == b.IngressPolicingKpktsBurst && + a.IngressPolicingKpktsRate == b.IngressPolicingKpktsRate && + a.IngressPolicingRate == b.IngressPolicingRate && + equalInterfaceLACPCurrent(a.LACPCurrent, b.LACPCurrent) && + equalInterfaceLinkResets(a.LinkResets, b.LinkResets) && + equalInterfaceLinkSpeed(a.LinkSpeed, b.LinkSpeed) && + equalInterfaceLinkState(a.LinkState, b.LinkState) && + equalInterfaceLLDP(a.LLDP, b.LLDP) && + equalInterfaceMAC(a.MAC, b.MAC) && + equalInterfaceMACInUse(a.MACInUse, b.MACInUse) && + equalInterfaceMTU(a.MTU, b.MTU) && + equalInterfaceMTURequest(a.MTURequest, b.MTURequest) && + a.Name == b.Name && + equalInterfaceOfport(a.Ofport, b.Ofport) && + equalInterfaceOfportRequest(a.OfportRequest, b.OfportRequest) && + equalInterfaceOptions(a.Options, b.Options) && + equalInterfaceOtherConfig(a.OtherConfig, b.OtherConfig) && + equalInterfaceStatistics(a.Statistics, b.Statistics) && + equalInterfaceStatus(a.Status, b.Status) && + a.Type == b.Type +} + +func (a *Interface) EqualsModel(b model.Model) bool { + c := b.(*Interface) + return a.Equals(c) +} + +var _ model.CloneableModel = &Interface{} +var _ model.ComparableModel = &Interface{} diff --git a/go-controller/observability-lib/ovsdb/observ_model.go b/go-controller/observability-lib/ovsdb/observ_model.go index 7ba2329e34..22547a3f8c 100644 --- a/go-controller/observability-lib/ovsdb/observ_model.go +++ b/go-controller/observability-lib/ovsdb/observ_model.go @@ -7,5 +7,6 @@ func ObservDatabaseModel() (model.ClientDBModel, error) { return model.NewClientDBModel("Open_vSwitch", map[string]model.Model{ "Bridge": &Bridge{}, "Flow_Sample_Collector_Set": &FlowSampleCollectorSet{}, + "Interface": &Interface{}, }) } diff --git a/go-controller/observability-lib/sampledecoder/db_client.go b/go-controller/observability-lib/sampledecoder/db_client.go index 5ff1587a6f..8b58e3608e 100644 --- a/go-controller/observability-lib/sampledecoder/db_client.go +++ b/go-controller/observability-lib/sampledecoder/db_client.go @@ -65,6 +65,7 @@ func NewOVSDBClientWithConfig(ctx context.Context, cfg dbConfig) (client.Client, c.NewMonitor( client.WithTable(&ovsdb.FlowSampleCollectorSet{}), client.WithTable(&ovsdb.Bridge{}), + client.WithTable(&ovsdb.Interface{}), ), ) if err != nil { diff --git a/go-controller/observability-lib/sampledecoder/sample_decoder.go b/go-controller/observability-lib/sampledecoder/sample_decoder.go index d691fd9cca..0642e795b8 100644 --- a/go-controller/observability-lib/sampledecoder/sample_decoder.go +++ b/go-controller/observability-lib/sampledecoder/sample_decoder.go @@ -291,3 +291,49 @@ func (d *SampleDecoder) DeleteCollector(collectorID int) error { fmt.Println("res: ", res) return err } + +// This is a copy of the ParseNetworkName function from go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +// We need to copy it to optimize dependencies of observability-lib. +func ParseNetworkName(networkName string) (udnNamespace, udnName string) { + parts := strings.Split(networkName, ".") + if len(parts) == 2 { + return parts[0], parts[1] + } + return "", "" +} + +func networkNameToUDNNamespacedName(networkName string) string { + namespace, name := ParseNetworkName(networkName) + if name == "" { + return "" + } + namespacedName := name + if namespace != "" { + namespacedName = namespace + "/" + name + } + return namespacedName +} + +// GetInterfaceUDNs returns a map of all pod interface names to their corresponding (C)UDN namespaced names. +// default network or NAD that is not created by (C)UDN is represented by an empty string. +// UDN namespace+name are joined by "/", CUDN will just have a name. +func (d *SampleDecoder) GetInterfaceUDNs() (map[string]string, error) { + res := map[string]string{} + ifaces := []*ovsdb.Interface{} + err := d.ovsdbClient.List(context.Background(), &ifaces) + if err != nil { + return nil, fmt.Errorf("failed listing interfaces: %w", err) + } + for _, iface := range ifaces { + if iface.ExternalIDs["iface-id-ver"] == "" || iface.ExternalIDs["iface-id"] == "" { + // not a pod interface + continue + } + if iface.ExternalIDs["k8s.ovn.org/network"] == "" { + res[iface.Name] = "" + continue + } + res[iface.Name] = networkNameToUDNNamespacedName(iface.ExternalIDs["k8s.ovn.org/network"]) + } + return res, nil +} diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index 6dbc473338..422b9339e5 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -34,6 +34,8 @@ type SpecGetter interface { GetLayer2() *userdefinednetworkv1.Layer2Config } +// This function has a copy in go-controller/observability-lib/sampledecoder/sample_decoder.go +// Please update together with this function. func ParseNetworkName(networkName string) (udnNamespace, udnName string) { parts := strings.Split(networkName, ".") if len(parts) == 2 { From da702b8bc5470c7624c09fa0ffe3b1259a5b430b Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Tue, 14 Jan 2025 16:03:50 -0500 Subject: [PATCH 15/51] Fix multicast tests for ipv6 with UDN Fixes test "should be able to send multicast UDP traffic between nodes" which was failing in IPv6 lane due to bugs with an older iperf version. Updates the test case to bind iperf to the right interface (eth0 or ovn-udn1) depending on the test. Test "should be able to receive multicast IGMP query" is skipped on IPv6. I tried to fix it, but it doesn't seem to work. I left some notes there so someone can follow up later to fix the test and unskip it. Signed-off-by: Tim Rozet --- test/e2e/e2e.go | 3 +++ test/e2e/multicast.go | 33 ++++++++++++++++++-------------- test/e2e/network_segmentation.go | 2 +- test/e2e/util.go | 21 ++++++++++++++++++++ 4 files changed, 44 insertions(+), 15 deletions(-) diff --git a/test/e2e/e2e.go b/test/e2e/e2e.go index f044b3bca3..6cda27fc81 100644 --- a/test/e2e/e2e.go +++ b/test/e2e/e2e.go @@ -44,10 +44,13 @@ const ( retryTimeout = 40 * time.Second // polling timeout rolloutTimeout = 10 * time.Minute agnhostImage = "registry.k8s.io/e2e-test-images/agnhost:2.26" + agnhostImageNew = "registry.k8s.io/e2e-test-images/agnhost:2.53" iperf3Image = "quay.io/sronanrh/iperf" redirectIP = "123.123.123.123" redirectPort = "13337" exContainerName = "tcp-continuous-client" + defaultPodInterface = "eth0" + udnPodInterface = "ovn-udn1" ) type podCondition = func(pod *v1.Pod) (bool, error) diff --git a/test/e2e/multicast.go b/test/e2e/multicast.go index 2e7c2ce73c..f90cf37b5f 100644 --- a/test/e2e/multicast.go +++ b/test/e2e/multicast.go @@ -58,7 +58,7 @@ var _ = ginkgo.Describe("Multicast", func() { }) ginkgo.It("should be able to send multicast UDP traffic between nodes", func() { - testMulticastUDPTraffic(fr, clientNodeInfo, serverNodeInfo) + testMulticastUDPTraffic(fr, clientNodeInfo, serverNodeInfo, defaultPodInterface) }) ginkgo.It("should be able to receive multicast IGMP query", func() { testMulticastIGMPQuery(fr, clientNodeInfo, serverNodeInfo) @@ -66,7 +66,7 @@ var _ = ginkgo.Describe("Multicast", func() { }) }) -func testMulticastUDPTraffic(fr *framework.Framework, clientNodeInfo, serverNodeInfo nodeInfo) { +func testMulticastUDPTraffic(fr *framework.Framework, clientNodeInfo, serverNodeInfo nodeInfo, iface string) { ginkgo.GinkgoHelper() const ( mcastSource = "pod-client" @@ -91,48 +91,48 @@ func testMulticastUDPTraffic(fr *framework.Framework, clientNodeInfo, serverNode // Start the multicast source (iperf client is the sender in multicast) ginkgo.By("creating a pod as a multicast source in node " + clientNodeInfo.name) // multicast group (-c 224.3.3.3), UDP (-u), TTL (-T 3), during (-t 3000) seconds, report every (-i 5) seconds - iperf := fmt.Sprintf("iperf -c %s -u -T 3 -t 3000 -i 5", mcastGroup) + iperf := fmt.Sprintf("iperf -c %s%%%s -u -T 3 -t 3000 -i 5", mcastGroup, iface) if IsIPv6Cluster(fr.ClientSet) { iperf = iperf + " -V" } cmd := []string{"/bin/sh", "-c", iperf} - clientPod := newAgnhostPod(fr.Namespace.Name, mcastSource, cmd...) + clientPod := newLatestAgnhostPod(fr.Namespace.Name, mcastSource, cmd...) clientPod.Spec.NodeName = clientNodeInfo.name e2epod.NewPodClient(fr).CreateSync(context.TODO(), clientPod) // Start a multicast listener on the same groups and verify it received the traffic (iperf server is the multicast listener) // join multicast group (-B 224.3.3.3), UDP (-u), during (-t 30) seconds, report every (-i 1) seconds ginkgo.By("creating first multicast listener pod in node " + serverNodeInfo.name) - iperf = fmt.Sprintf("iperf -s -B %s -u -t 180 -i 5", mcastGroup) + iperf = fmt.Sprintf("iperf -s -B %s%%%s -u -t 180 -i 5", mcastGroup, iface) if IsIPv6Cluster(fr.ClientSet) { iperf = iperf + " -V" } cmd = []string{"/bin/sh", "-c", iperf} - mcastServerPod1 := newAgnhostPod(fr.Namespace.Name, mcastServer1, cmd...) + mcastServerPod1 := newLatestAgnhostPod(fr.Namespace.Name, mcastServer1, cmd...) mcastServerPod1.Spec.NodeName = serverNodeInfo.name e2epod.NewPodClient(fr).CreateSync(context.TODO(), mcastServerPod1) // Start a multicast listener on on other group and verify it does not receive the traffic (iperf server is the multicast listener) // join multicast group (-B 224.4.4.4), UDP (-u), during (-t 30) seconds, report every (-i 1) seconds ginkgo.By("creating second multicast listener pod in node " + serverNodeInfo.name) - iperf = fmt.Sprintf("iperf -s -B %s -u -t 180 -i 5", mcastGroupBad) + iperf = fmt.Sprintf("iperf -s -B %s%%%s -u -t 180 -i 5", mcastGroupBad, iface) if IsIPv6Cluster(fr.ClientSet) { iperf = iperf + " -V" } cmd = []string{"/bin/sh", "-c", iperf} - mcastServerPod2 := newAgnhostPod(fr.Namespace.Name, mcastServer2, cmd...) + mcastServerPod2 := newLatestAgnhostPod(fr.Namespace.Name, mcastServer2, cmd...) mcastServerPod2.Spec.NodeName = serverNodeInfo.name e2epod.NewPodClient(fr).CreateSync(context.TODO(), mcastServerPod2) // Start a multicast listener on the same groups and verify it received the traffic (iperf server is the multicast listener) // join multicast group (-B 224.3.3.3), UDP (-u), during (-t 30) seconds, report every (-i 1) seconds ginkgo.By("creating first multicast listener pod in node " + clientNodeInfo.name) - iperf = fmt.Sprintf("iperf -s -B %s -u -t 180 -i 5", mcastGroup) + iperf = fmt.Sprintf("iperf -s -B %s%%%s -u -t 180 -i 5", mcastGroup, iface) if IsIPv6Cluster(fr.ClientSet) { iperf = iperf + " -V" } cmd = []string{"/bin/sh", "-c", iperf} - mcastServerPod3 := newAgnhostPod(fr.Namespace.Name, mcastServer3, cmd...) + mcastServerPod3 := newLatestAgnhostPod(fr.Namespace.Name, mcastServer3, cmd...) mcastServerPod3.Spec.NodeName = clientNodeInfo.name e2epod.NewPodClient(fr).CreateSync(context.TODO(), mcastServerPod3) @@ -174,21 +174,26 @@ func testMulticastIGMPQuery(f *framework.Framework, clientNodeInfo, serverNodeIn fmt.Sprintf("iperf -c %s -u -T 2 -t 3000 -i 5", mcastGroup)} ) - // Create a multicast source pod + // FIXME(trozet): the tcpdump filter is not correct for ipv6, it should be + // 'icmp6 and (ip6[40] == 0x8a or ip6[40] == 0x8b or ip6[40] == 0x8c or ip6[40] == 0x8d)' + // additionally this function needs to be modified like testMulticastUDPTraffic to specify correct pod interfaces if IsIPv6Cluster(f.ClientSet) { // Multicast group (-c ff3e::4321:1234), UDP (-u), TTL (-T 2), during (-t 3000) seconds, report every (-i 5) seconds, -V (Set the domain to IPv6) multicastSourceCommand = []string{"bash", "-c", fmt.Sprintf("iperf -c %s -u -T 2 -t 3000 -i 5 -V", mcastV6Group)} } + // Create a multicast source pod ginkgo.By("creating a multicast source pod in node " + clientNodeInfo.name) - createGenericPod(f, multicastSourcePod, clientNodeInfo.name, f.Namespace.Name, multicastSourceCommand) + _, err := createGenericPod(f, multicastSourcePod, clientNodeInfo.name, f.Namespace.Name, multicastSourceCommand) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) // Create a multicast listener pod ginkgo.By("creating a multicast listener pod in node " + serverNodeInfo.name) - createGenericPod(f, multicastListenerPod, serverNodeInfo.name, f.Namespace.Name, tcpDumpCommand) + _, err = createGenericPod(f, multicastListenerPod, serverNodeInfo.name, f.Namespace.Name, tcpDumpCommand) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) // Wait for tcpdump on listener pod to be ready - err := wait.PollUntilContextTimeout(context.Background(), retryInterval, retryTimeout, true /*immediate*/, func(context.Context) (bool, error) { + err = wait.PollUntilContextTimeout(context.Background(), retryInterval, retryTimeout, true /*immediate*/, func(context.Context) (bool, error) { kubectlOut, err := e2ekubectl.RunKubectl(f.Namespace.Name, "exec", multicastListenerPod, "--", "/bin/bash", "-c", "ls") if err != nil { framework.Failf("failed to retrieve multicast IGMP query: " + err.Error()) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index c3d3b31a77..f432b9112f 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -732,7 +732,7 @@ var _ = Describe("Network Segmentation", func() { metav1.CreateOptions{}, ) framework.ExpectNoError(err) - testMulticastUDPTraffic(f, clientNodeInfo, serverNodeInfo) + testMulticastUDPTraffic(f, clientNodeInfo, serverNodeInfo, udnPodInterface) }, ginkgo.Entry("with primary layer3 UDN", networkAttachmentConfigParams{ name: nadName, diff --git a/test/e2e/util.go b/test/e2e/util.go index 1dcce54a68..18349b5382 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -125,6 +125,27 @@ func newAgnhostPod(namespace, name string, command ...string) *v1.Pod { } } +// newLatestAgnhostPod returns a pod that uses the newer agnhost image. The image's binary supports various subcommands +// that behave the same, no matter the underlying OS. +func newLatestAgnhostPod(namespace, name string, command ...string) *v1.Pod { + return &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: name, + Image: agnhostImageNew, + Command: command, + }, + }, + RestartPolicy: v1.RestartPolicyNever, + }, + } +} + // newAgnhostPod returns a pod that uses the agnhost image. The image's binary supports various subcommands // that behave the same, no matter the underlying OS. func newAgnhostPodOnNode(name, nodeName string, labels map[string]string, command ...string) *v1.Pod { From 299db52cb2cc203e1dc9bdf9c6f9865434e7a93d Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Thu, 19 Dec 2024 11:40:16 +0100 Subject: [PATCH 16/51] (C)UDN CRD: add IPAM section and IPAM.Mode field. IPAM options are only available for Layer2 network for now. IPAM.Lifecycle is only supported when IPAM.Mode is Enabled (previously expressed with non-empty subnets). IPAM.Mode=Disabled is only supported for secondary network. Update CEL: require or omit subnets based on the IPAM.Mode instead of network role. The only controller change needed is the location of IPAMLifecycle. Empty subnets means turn off IPAM in NAD config, and CEL ensures that subnets will be empty if and only if IPAM.Mode is Disabled. We do have a duplicate check for some CEL validations in the code, they are covered by unit tests. Fix "should fail to render NAD" test, as it used to always return error because of the empty target namespace instead of expected one. Signed-off-by: Nadia Pinaeva --- ...ovn.org_clusteruserdefinednetworks.yaml.j2 | 69 +++++++++--- .../k8s.ovn.org_userdefinednetworks.yaml.j2 | 68 +++++++++--- .../userdefinednetwork/controller_test.go | 6 +- .../template/net-attach-def-template.go | 25 ++++- .../template/net-attach-def-template_test.go | 103 +++++++++++++----- .../userdefinednetwork/v1/ipamconfig.go | 51 +++++++++ .../userdefinednetwork/v1/layer2config.go | 18 +-- .../v1/apis/applyconfiguration/utils.go | 2 + .../pkg/crd/userdefinednetwork/v1/shared.go | 42 +++++-- .../v1/zz_generated.deepcopy.go | 21 ++++ 10 files changed, 325 insertions(+), 80 deletions(-) create mode 100644 go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/ipamconfig.go diff --git a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 index faca8b6073..b9c910ee70 100644 --- a/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 +++ b/dist/templates/k8s.ovn.org_clusteruserdefinednetworks.yaml.j2 @@ -94,16 +94,42 @@ spec: layer2: description: Layer2 is the Layer2 topology configuration. properties: - ipamLifecycle: - description: |- - IPAMLifecycle controls IP addresses management lifecycle. + ipam: + description: IPAM section contains IPAM-related configuration + for the network. + minProperties: 1 + properties: + lifecycle: + description: |- + Lifecycle controls IP addresses management lifecycle. - The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an - `ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested. - Only supported when "subnets" are set. - enum: - - Persistent - type: string + The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an + `ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested. + Only supported when mode is `Enabled`. + enum: + - Persistent + type: string + mode: + description: |- + Mode controls how much of the IP configuration will be managed by OVN. + `Enabled` means OVN-Kubernetes will apply IP configuration to the SDN infrastructure and it will also assign IPs + from the selected subnet to the individual pods. + `Disabled` means OVN-Kubernetes will only assign MAC addresses and provide layer 2 communication, letting users + configure IP addresses for the pods. + `Disabled` is only available for Secondary networks. + By disabling IPAM, any Kubernetes features that rely on selecting pods by IP will no longer function + (such as network policy, services, etc). Additionally, IP port security will also be disabled for interfaces attached to this network. + Defaults to `Enabled`. + enum: + - Enabled + - Disabled + type: string + type: object + x-kubernetes-validations: + - message: lifecycle Persistent is only supported when ipam.mode + is Enabled + rule: '!has(self.lifecycle) || self.lifecycle != ''Persistent'' + || !has(self.mode) || self.mode == ''Enabled''' joinSubnets: description: |- JoinSubnets are used inside the OVN network topology. @@ -113,6 +139,7 @@ spec: It is not recommended to set this field without explicit need and understanding of the OVN network topology. When omitted, the platform will choose a reasonable default which is subject to change over time. items: + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -149,9 +176,9 @@ spec: Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed. The format should match standard CIDR notation (for example, "10.128.0.0/16"). - This field may be omitted. In that case the logical switch implementing the network only provides layer 2 communication, - and users must configure IP addresses for the pods. As a consequence, Port security only prevents MAC spoofing. + This field must be omitted if `ipam.mode` is `Disabled`. items: + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -168,14 +195,19 @@ spec: - role type: object x-kubernetes-validations: - - message: Subnets is required for Primary Layer2 topology - rule: self.role != 'Primary' || has(self.subnets) + - message: Subnets is required with ipam.mode is Enabled or unset + rule: has(self.ipam) && has(self.ipam.mode) && self.ipam.mode + != 'Enabled' || has(self.subnets) + - message: Subnets must be unset when ipam.mode is Disabled + rule: '!has(self.ipam) || !has(self.ipam.mode) || self.ipam.mode + != ''Disabled'' || !has(self.subnets)' + - message: Disabled ipam.mode is only supported for Secondary + network + rule: '!has(self.ipam) || !has(self.ipam.mode) || self.ipam.mode + != ''Disabled'' || self.role == ''Secondary''' - message: JoinSubnets is only supported for Primary network rule: '!has(self.joinSubnets) || has(self.role) && self.role == ''Primary''' - - message: IPAMLifecycle is only supported when subnets are set - rule: '!has(self.ipamLifecycle) || has(self.subnets) && size(self.subnets) - > 0' - message: MTU should be greater than or equal to 1280 when IPv6 subent is used rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, @@ -192,6 +224,7 @@ spec: It is not recommended to set this field without explicit need and understanding of the OVN network topology. When omitted, the platform will choose a reasonable default which is subject to change over time. items: + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -235,6 +268,7 @@ spec: cidr: description: CIDR specifies L3Subnet, which is split into smaller subnets for every node. + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -278,7 +312,8 @@ spec: - message: MTU should be greater than or equal to 1280 when IPv6 subent is used rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, - cidr(i.cidr).ip().family() == 6) || self.mtu >= 1280' + isCIDR(i.cidr) && cidr(i.cidr).ip().family() == 6) || self.mtu + >= 1280' topology: description: |- Topology describes network configuration. diff --git a/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 b/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 index fda6c5978b..4a6d45eacd 100644 --- a/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 +++ b/dist/templates/k8s.ovn.org_userdefinednetworks.yaml.j2 @@ -42,16 +42,42 @@ spec: layer2: description: Layer2 is the Layer2 topology configuration. properties: - ipamLifecycle: - description: |- - IPAMLifecycle controls IP addresses management lifecycle. + ipam: + description: IPAM section contains IPAM-related configuration + for the network. + minProperties: 1 + properties: + lifecycle: + description: |- + Lifecycle controls IP addresses management lifecycle. - The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an - `ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested. - Only supported when "subnets" are set. - enum: - - Persistent - type: string + The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an + `ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested. + Only supported when mode is `Enabled`. + enum: + - Persistent + type: string + mode: + description: |- + Mode controls how much of the IP configuration will be managed by OVN. + `Enabled` means OVN-Kubernetes will apply IP configuration to the SDN infrastructure and it will also assign IPs + from the selected subnet to the individual pods. + `Disabled` means OVN-Kubernetes will only assign MAC addresses and provide layer 2 communication, letting users + configure IP addresses for the pods. + `Disabled` is only available for Secondary networks. + By disabling IPAM, any Kubernetes features that rely on selecting pods by IP will no longer function + (such as network policy, services, etc). Additionally, IP port security will also be disabled for interfaces attached to this network. + Defaults to `Enabled`. + enum: + - Enabled + - Disabled + type: string + type: object + x-kubernetes-validations: + - message: lifecycle Persistent is only supported when ipam.mode + is Enabled + rule: '!has(self.lifecycle) || self.lifecycle != ''Persistent'' + || !has(self.mode) || self.mode == ''Enabled''' joinSubnets: description: |- JoinSubnets are used inside the OVN network topology. @@ -61,6 +87,7 @@ spec: It is not recommended to set this field without explicit need and understanding of the OVN network topology. When omitted, the platform will choose a reasonable default which is subject to change over time. items: + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -97,9 +124,9 @@ spec: Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed. The format should match standard CIDR notation (for example, "10.128.0.0/16"). - This field may be omitted. In that case the logical switch implementing the network only provides layer 2 communication, - and users must configure IP addresses for the pods. As a consequence, Port security only prevents MAC spoofing. + This field must be omitted if `ipam.mode` is `Disabled`. items: + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -116,14 +143,18 @@ spec: - role type: object x-kubernetes-validations: - - message: Subnets is required for Primary Layer2 topology - rule: self.role != 'Primary' || has(self.subnets) + - message: Subnets is required with ipam.mode is Enabled or unset + rule: has(self.ipam) && has(self.ipam.mode) && self.ipam.mode != + 'Enabled' || has(self.subnets) + - message: Subnets must be unset when ipam.mode is Disabled + rule: '!has(self.ipam) || !has(self.ipam.mode) || self.ipam.mode + != ''Disabled'' || !has(self.subnets)' + - message: Disabled ipam.mode is only supported for Secondary network + rule: '!has(self.ipam) || !has(self.ipam.mode) || self.ipam.mode + != ''Disabled'' || self.role == ''Secondary''' - message: JoinSubnets is only supported for Primary network rule: '!has(self.joinSubnets) || has(self.role) && self.role == ''Primary''' - - message: IPAMLifecycle is only supported when subnets are set - rule: '!has(self.ipamLifecycle) || has(self.subnets) && size(self.subnets) - > 0' - message: MTU should be greater than or equal to 1280 when IPv6 subent is used rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, @@ -140,6 +171,7 @@ spec: It is not recommended to set this field without explicit need and understanding of the OVN network topology. When omitted, the platform will choose a reasonable default which is subject to change over time. items: + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -183,6 +215,7 @@ spec: cidr: description: CIDR specifies L3Subnet, which is split into smaller subnets for every node. + maxLength: 43 type: string x-kubernetes-validations: - message: CIDR is invalid @@ -225,7 +258,8 @@ spec: - message: MTU should be greater than or equal to 1280 when IPv6 subent is used rule: '!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, - cidr(i.cidr).ip().family() == 6) || self.mtu >= 1280' + isCIDR(i.cidr) && cidr(i.cidr).ip().family() == 6) || self.mtu + >= 1280' topology: description: |- Topology describes network configuration. diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go index bd3fb29d75..8aeb7e2f0e 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller_test.go @@ -363,7 +363,9 @@ var _ = Describe("User Defined Network Controller", func() { objs = append(objs, testNamespace(nsName)) } cudn := testClusterUDN("test", testNamespaces...) - cudn.Spec.Network = udnv1.NetworkSpec{Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{}} + cudn.Spec.Network = udnv1.NetworkSpec{Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ + Subnets: udnv1.DualStackCIDRs{"10.10.10.0/24"}, + }} objs = append(objs, cudn) c = newTestController(template.RenderNetAttachDefManifest, objs...) @@ -374,7 +376,7 @@ var _ = Describe("User Defined Network Controller", func() { nad := testClusterUdnNAD(cudn.Name, nsName) networkName := "cluster.udn." + cudn.Name nadName := nsName + "/" + cudn.Name - nad.Spec.Config = `{"cniVersion":"1.0.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","topology":"layer2","type":"ovn-k8s-cni-overlay"}` + nad.Spec.Config = `{"cniVersion":"1.0.0","name":"` + networkName + `","netAttachDefName":"` + nadName + `","role":"","subnets":"10.10.10.0/24","topology":"layer2","type":"ovn-k8s-cni-overlay"}` expectedNsNADs[nsName] = nad } diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go index 6dbc473338..508feed01a 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template.go @@ -132,9 +132,18 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s netConfSpec.JoinSubnet = cidrString(renderJoinSubnets(cfg.Role, cfg.JoinSubnets)) case userdefinednetworkv1.NetworkTopologyLayer2: cfg := spec.GetLayer2() + if err := validateIPAM(cfg.IPAM); err != nil { + return nil, err + } netConfSpec.Role = strings.ToLower(string(cfg.Role)) netConfSpec.MTU = int(cfg.MTU) - netConfSpec.AllowPersistentIPs = cfg.IPAMLifecycle == userdefinednetworkv1.IPAMLifecyclePersistent + netConfSpec.AllowPersistentIPs = cfg.IPAM != nil && cfg.IPAM.Lifecycle == userdefinednetworkv1.IPAMLifecyclePersistent + if ipamEnabled(cfg.IPAM) && len(cfg.Subnets) == 0 { + return nil, fmt.Errorf("subnets is required with ipam.mode is Enabled or unset") + } + if !ipamEnabled(cfg.IPAM) && len(cfg.Subnets) > 0 { + return nil, fmt.Errorf("subnets must be unset when ipam.mode is Disabled") + } netConfSpec.Subnets = cidrString(cfg.Subnets) netConfSpec.JoinSubnet = cidrString(renderJoinSubnets(cfg.Role, cfg.JoinSubnets)) } @@ -176,6 +185,20 @@ func renderCNINetworkConfig(networkName, nadName string, spec SpecGetter) (map[s return cniNetConf, nil } +func ipamEnabled(ipam *userdefinednetworkv1.IPAMConfig) bool { + return ipam == nil || ipam.Mode == "" || ipam.Mode == userdefinednetworkv1.IPAMEnabled +} + +func validateIPAM(ipam *userdefinednetworkv1.IPAMConfig) error { + if ipam == nil { + return nil + } + if ipam.Lifecycle == userdefinednetworkv1.IPAMLifecyclePersistent && !ipamEnabled(ipam) { + return fmt.Errorf("lifecycle Persistent is only supported when ipam.mode is Enabled") + } + return nil +} + func renderJoinSubnets(role userdefinednetworkv1.NetworkRole, joinSubnetes []userdefinednetworkv1.CIDR) []userdefinednetworkv1.CIDR { if role != userdefinednetworkv1.NetworkRolePrimary { return nil diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go index 29679a6cee..6fca45a378 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/template/net-attach-def-template_test.go @@ -172,7 +172,7 @@ var _ = Describe("NetAttachDefTemplate", func() { DescribeTable("should fail to render NAD, given", func(obj client.Object) { - _, err := RenderNetAttachDefManifest(obj, "") + _, err := RenderNetAttachDefManifest(obj, "test") Expect(err).To(HaveOccurred()) }, Entry("UDN, invalid topology: topology layer2 & layer3 config", @@ -183,6 +183,43 @@ var _ = Describe("NetAttachDefTemplate", func() { &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer3, Layer2: &udnv1.Layer2Config{}}}, ), + Entry("UDN, invalid IPAM config: IPAM lifecycle & disabled ipam mode", + &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/16"}, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + Mode: udnv1.IPAMDisabled, + }, + }, + }}, + ), + Entry("UDN, invalid IPAM config: IPAM enabled & no subnet", + &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{}, + IPAM: &udnv1.IPAMConfig{ + Mode: udnv1.IPAMEnabled, + }, + }, + }}, + ), + Entry("UDN, invalid IPAM config: IPAM disabled & subnet", + &udnv1.UserDefinedNetwork{Spec: udnv1.UserDefinedNetworkSpec{ + Topology: udnv1.NetworkTopologyLayer2, + Layer2: &udnv1.Layer2Config{ + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/16"}, + IPAM: &udnv1.IPAMConfig{ + Mode: udnv1.IPAMDisabled, + }, + }, + }}, + ), Entry("CUDN, invalid topology: topology layer2 & layer3 config", &udnv1.ClusterUserDefinedNetwork{Spec: udnv1.ClusterUserDefinedNetworkSpec{Network: udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer3: &udnv1.Layer3Config{}}}}, @@ -273,10 +310,12 @@ var _ = Describe("NetAttachDefTemplate", func() { udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, - MTU: 1500, - IPAMLifecycle: udnv1.IPAMLifecyclePersistent, + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, + MTU: 1500, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + }, }, }, `{ @@ -296,11 +335,13 @@ var _ = Describe("NetAttachDefTemplate", func() { udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, - JoinSubnets: udnv1.DualStackCIDRs{"100.62.0.0/24", "fd92::/64"}, - MTU: 1500, - IPAMLifecycle: udnv1.IPAMLifecyclePersistent, + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, + JoinSubnets: udnv1.DualStackCIDRs{"100.62.0.0/24", "fd92::/64"}, + MTU: 1500, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + }, }, }, `{ @@ -320,10 +361,12 @@ var _ = Describe("NetAttachDefTemplate", func() { udnv1.UserDefinedNetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRoleSecondary, - Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, - MTU: 1500, - IPAMLifecycle: udnv1.IPAMLifecyclePersistent, + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, + MTU: 1500, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + }, }, }, `{ @@ -400,10 +443,12 @@ var _ = Describe("NetAttachDefTemplate", func() { udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, - MTU: 1500, - IPAMLifecycle: udnv1.IPAMLifecyclePersistent, + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, + MTU: 1500, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + }, }, }, `{ @@ -423,11 +468,13 @@ var _ = Describe("NetAttachDefTemplate", func() { udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRolePrimary, - Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, - JoinSubnets: udnv1.DualStackCIDRs{"100.62.0.0/24", "fd92::/64"}, - MTU: 1500, - IPAMLifecycle: udnv1.IPAMLifecyclePersistent, + Role: udnv1.NetworkRolePrimary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, + JoinSubnets: udnv1.DualStackCIDRs{"100.62.0.0/24", "fd92::/64"}, + MTU: 1500, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + }, }, }, `{ @@ -447,10 +494,12 @@ var _ = Describe("NetAttachDefTemplate", func() { udnv1.NetworkSpec{ Topology: udnv1.NetworkTopologyLayer2, Layer2: &udnv1.Layer2Config{ - Role: udnv1.NetworkRoleSecondary, - Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, - MTU: 1500, - IPAMLifecycle: udnv1.IPAMLifecyclePersistent, + Role: udnv1.NetworkRoleSecondary, + Subnets: udnv1.DualStackCIDRs{"192.168.100.0/24", "2001:dbb::/64"}, + MTU: 1500, + IPAM: &udnv1.IPAMConfig{ + Lifecycle: udnv1.IPAMLifecyclePersistent, + }, }, }, `{ diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/ipamconfig.go b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/ipamconfig.go new file mode 100644 index 0000000000..0672f6bc45 --- /dev/null +++ b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/ipamconfig.go @@ -0,0 +1,51 @@ +/* + + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +// Code generated by applyconfiguration-gen. DO NOT EDIT. + +package v1 + +import ( + v1 "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/crd/userdefinednetwork/v1" +) + +// IPAMConfigApplyConfiguration represents a declarative configuration of the IPAMConfig type for use +// with apply. +type IPAMConfigApplyConfiguration struct { + Mode *v1.IPAMMode `json:"mode,omitempty"` + Lifecycle *v1.NetworkIPAMLifecycle `json:"lifecycle,omitempty"` +} + +// IPAMConfigApplyConfiguration constructs a declarative configuration of the IPAMConfig type for use with +// apply. +func IPAMConfig() *IPAMConfigApplyConfiguration { + return &IPAMConfigApplyConfiguration{} +} + +// WithMode sets the Mode field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Mode field is set to the value of the last call. +func (b *IPAMConfigApplyConfiguration) WithMode(value v1.IPAMMode) *IPAMConfigApplyConfiguration { + b.Mode = &value + return b +} + +// WithLifecycle sets the Lifecycle field in the declarative configuration to the given value +// and returns the receiver, so that objects can be built by chaining "With" function invocations. +// If called multiple times, the Lifecycle field is set to the value of the last call. +func (b *IPAMConfigApplyConfiguration) WithLifecycle(value v1.NetworkIPAMLifecycle) *IPAMConfigApplyConfiguration { + b.Lifecycle = &value + return b +} diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/layer2config.go b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/layer2config.go index 4b4e68d353..324e4364c1 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/layer2config.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/userdefinednetwork/v1/layer2config.go @@ -24,11 +24,11 @@ import ( // Layer2ConfigApplyConfiguration represents a declarative configuration of the Layer2Config type for use // with apply. type Layer2ConfigApplyConfiguration struct { - Role *v1.NetworkRole `json:"role,omitempty"` - MTU *int32 `json:"mtu,omitempty"` - Subnets *v1.DualStackCIDRs `json:"subnets,omitempty"` - JoinSubnets *v1.DualStackCIDRs `json:"joinSubnets,omitempty"` - IPAMLifecycle *v1.NetworkIPAMLifecycle `json:"ipamLifecycle,omitempty"` + Role *v1.NetworkRole `json:"role,omitempty"` + MTU *int32 `json:"mtu,omitempty"` + Subnets *v1.DualStackCIDRs `json:"subnets,omitempty"` + JoinSubnets *v1.DualStackCIDRs `json:"joinSubnets,omitempty"` + IPAM *IPAMConfigApplyConfiguration `json:"ipam,omitempty"` } // Layer2ConfigApplyConfiguration constructs a declarative configuration of the Layer2Config type for use with @@ -69,10 +69,10 @@ func (b *Layer2ConfigApplyConfiguration) WithJoinSubnets(value v1.DualStackCIDRs return b } -// WithIPAMLifecycle sets the IPAMLifecycle field in the declarative configuration to the given value +// WithIPAM sets the IPAM field in the declarative configuration to the given value // and returns the receiver, so that objects can be built by chaining "With" function invocations. -// If called multiple times, the IPAMLifecycle field is set to the value of the last call. -func (b *Layer2ConfigApplyConfiguration) WithIPAMLifecycle(value v1.NetworkIPAMLifecycle) *Layer2ConfigApplyConfiguration { - b.IPAMLifecycle = &value +// If called multiple times, the IPAM field is set to the value of the last call. +func (b *Layer2ConfigApplyConfiguration) WithIPAM(value *IPAMConfigApplyConfiguration) *Layer2ConfigApplyConfiguration { + b.IPAM = value return b } diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go index c60ceb338e..83c79739ff 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/apis/applyconfiguration/utils.go @@ -37,6 +37,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &userdefinednetworkv1.ClusterUserDefinedNetworkSpecApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("ClusterUserDefinedNetworkStatus"): return &userdefinednetworkv1.ClusterUserDefinedNetworkStatusApplyConfiguration{} + case v1.SchemeGroupVersion.WithKind("IPAMConfig"): + return &userdefinednetworkv1.IPAMConfigApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("Layer2Config"): return &userdefinednetworkv1.Layer2ConfigApplyConfiguration{} case v1.SchemeGroupVersion.WithKind("Layer3Config"): diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/shared.go b/go-controller/pkg/crd/userdefinednetwork/v1/shared.go index f25e2df159..31cab34cce 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/shared.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/shared.go @@ -86,9 +86,10 @@ type Layer3Subnet struct { HostSubnet int32 `json:"hostSubnet,omitempty"` } -// +kubebuilder:validation:XValidation:rule="self.role != 'Primary' || has(self.subnets)", message="Subnets is required for Primary Layer2 topology" +// +kubebuilder:validation:XValidation:rule="has(self.ipam) && has(self.ipam.mode) && self.ipam.mode != 'Enabled' || has(self.subnets)", message="Subnets is required with ipam.mode is Enabled or unset" +// +kubebuilder:validation:XValidation:rule="!has(self.ipam) || !has(self.ipam.mode) || self.ipam.mode != 'Disabled' || !has(self.subnets)", message="Subnets must be unset when ipam.mode is Disabled" +// +kubebuilder:validation:XValidation:rule="!has(self.ipam) || !has(self.ipam.mode) || self.ipam.mode != 'Disabled' || self.role == 'Secondary'", message="Disabled ipam.mode is only supported for Secondary network" // +kubebuilder:validation:XValidation:rule="!has(self.joinSubnets) || has(self.role) && self.role == 'Primary'", message="JoinSubnets is only supported for Primary network" -// +kubebuilder:validation:XValidation:rule="!has(self.ipamLifecycle) || has(self.subnets) && size(self.subnets) > 0", message="IPAMLifecycle is only supported when subnets are set" // +kubebuilder:validation:XValidation:rule="!has(self.subnets) || !has(self.mtu) || !self.subnets.exists_one(i, isCIDR(i) && cidr(i).ip().family() == 6) || self.mtu >= 1280", message="MTU should be greater than or equal to 1280 when IPv6 subent is used" type Layer2Config struct { // Role describes the network role in the pod. @@ -112,8 +113,7 @@ type Layer2Config struct { // Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed. // // The format should match standard CIDR notation (for example, "10.128.0.0/16"). - // This field may be omitted. In that case the logical switch implementing the network only provides layer 2 communication, - // and users must configure IP addresses for the pods. As a consequence, Port security only prevents MAC spoofing. + // This field must be omitted if `ipam.mode` is `Disabled`. // // +optional Subnets DualStackCIDRs `json:"subnets,omitempty"` @@ -128,16 +128,44 @@ type Layer2Config struct { // +optional JoinSubnets DualStackCIDRs `json:"joinSubnets,omitempty"` - // IPAMLifecycle controls IP addresses management lifecycle. + // IPAM section contains IPAM-related configuration for the network. + // +optional + IPAM *IPAMConfig `json:"ipam,omitempty"` +} + +// +kubebuilder:validation:XValidation:rule="!has(self.lifecycle) || self.lifecycle != 'Persistent' || !has(self.mode) || self.mode == 'Enabled'", message="lifecycle Persistent is only supported when ipam.mode is Enabled" +// +kubebuilder:validation:MinProperties=1 +type IPAMConfig struct { + // Mode controls how much of the IP configuration will be managed by OVN. + // `Enabled` means OVN-Kubernetes will apply IP configuration to the SDN infrastructure and it will also assign IPs + // from the selected subnet to the individual pods. + // `Disabled` means OVN-Kubernetes will only assign MAC addresses and provide layer 2 communication, letting users + // configure IP addresses for the pods. + // `Disabled` is only available for Secondary networks. + // By disabling IPAM, any Kubernetes features that rely on selecting pods by IP will no longer function + // (such as network policy, services, etc). Additionally, IP port security will also be disabled for interfaces attached to this network. + // Defaults to `Enabled`. + // +optional + Mode IPAMMode `json:"mode,omitempty"` + + // Lifecycle controls IP addresses management lifecycle. // // The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an // `ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested. - // Only supported when "subnets" are set. + // Only supported when mode is `Enabled`. // // +optional - IPAMLifecycle NetworkIPAMLifecycle `json:"ipamLifecycle,omitempty"` + Lifecycle NetworkIPAMLifecycle `json:"lifecycle,omitempty"` } +// +kubebuilder:validation:Enum=Enabled;Disabled +type IPAMMode string + +const ( + IPAMEnabled IPAMMode = "Enabled" + IPAMDisabled IPAMMode = "Disabled" +) + // +kubebuilder:validation:Enum=Primary;Secondary type NetworkRole string diff --git a/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go b/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go index cc8b6a3341..355bc6aaee 100644 --- a/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go +++ b/go-controller/pkg/crd/userdefinednetwork/v1/zz_generated.deepcopy.go @@ -147,6 +147,22 @@ func (in DualStackCIDRs) DeepCopy() DualStackCIDRs { return *out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *IPAMConfig) DeepCopyInto(out *IPAMConfig) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new IPAMConfig. +func (in *IPAMConfig) DeepCopy() *IPAMConfig { + if in == nil { + return nil + } + out := new(IPAMConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Layer2Config) DeepCopyInto(out *Layer2Config) { *out = *in @@ -160,6 +176,11 @@ func (in *Layer2Config) DeepCopyInto(out *Layer2Config) { *out = make(DualStackCIDRs, len(*in)) copy(*out, *in) } + if in.IPAM != nil { + in, out := &in.IPAM, &out.IPAM + *out = new(IPAMConfig) + **out = **in + } return } From f31b70f113374120aa10f6ac02aee63bc158e3f7 Mon Sep 17 00:00:00 2001 From: Or Mergi Date: Thu, 16 Jan 2025 13:36:21 +0200 Subject: [PATCH 17/51] e2e,udn crd: Use the right IP family according to env Signed-off-by: Or Mergi --- test/e2e/network_segmentation.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index ede9dddd81..1ec3952340 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -940,7 +940,7 @@ spec: topology: "layer3", name: primaryNadName, networkName: primaryNadName, - cidr: "10.10.100.0/24", + cidr: correctCIDRFamily(userDefinedNetworkIPv4Subnet, userDefinedNetworkIPv6Subnet), })) _, err := nadClient.NetworkAttachmentDefinitions(f.Namespace.Name).Create(context.Background(), primaryNetNad, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) From 8bbeac797272a85910e56d3d497554572bb9cabc Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Fri, 17 Jan 2025 14:38:44 +0100 Subject: [PATCH 18/51] Fix pods cleanup in ClusterUserDefinedNetwork test netConfig pointer was being modified in a loop so pods were removed from only one namespace. Signed-off-by: Patryk Diak --- test/e2e/network_segmentation.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index bb294a1dc5..4f34a18c8f 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -500,11 +500,6 @@ var _ = Describe("Network Segmentation", func() { namespaceRed := f.Namespace.Name + "-" + red namespaceBlue := f.Namespace.Name + "-" + blue - netConfig := &networkAttachmentConfigParams{ - topology: topology, - cidr: correctCIDRFamily(userDefinedv4Subnet, userDefinedv6Subnet), - role: "primary", - } for _, namespace := range []string{namespaceRed, namespaceBlue} { By("Creating namespace " + namespace) _, err := cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ @@ -514,14 +509,21 @@ var _ = Describe("Network Segmentation", func() { }, metav1.CreateOptions{}) Expect(err).NotTo(HaveOccurred()) defer func() { + By("Removing namespace " + namespace) Expect(cs.CoreV1().Namespaces().Delete(context.Background(), namespace, metav1.DeleteOptions{})).To(Succeed()) }() } networkNamespaceMap := map[string]string{namespaceRed: red, namespaceBlue: blue} for namespace, network := range networkNamespaceMap { By("creating the network " + network + " in namespace " + namespace) - netConfig.namespace = namespace - netConfig.name = network + + netConfig := &networkAttachmentConfigParams{ + topology: topology, + cidr: correctCIDRFamily(userDefinedv4Subnet, userDefinedv6Subnet), + role: "primary", + namespace: namespace, + name: network, + } Expect(createNetworkFn(netConfig)).To(Succeed()) // update the name because createNetworkFn may mutate the netConfig.name @@ -673,7 +675,7 @@ var _ = Describe("Network Segmentation", func() { cleanup, err := createManifest("", cudnManifest) DeferCleanup(func() { cleanup() - By("delete pods in test namespace to unblock CUDN CR & associate NAD deletion") + By(fmt.Sprintf("delete pods in %s namespace to unblock CUDN CR & associate NAD deletion", c.namespace)) Expect(cs.CoreV1().Pods(c.namespace).DeleteCollection(context.Background(), metav1.DeleteOptions{}, metav1.ListOptions{})).To(Succeed()) _, err := e2ekubectl.RunKubectl("", "delete", "clusteruserdefinednetwork", cudnName, "--wait", fmt.Sprintf("--timeout=%ds", 120)) Expect(err).NotTo(HaveOccurred()) From a3c44c2483d9f9a7cc336ce05d10143e34f08bd9 Mon Sep 17 00:00:00 2001 From: nithyar Date: Thu, 16 Jan 2025 20:03:26 -0800 Subject: [PATCH 19/51] Add missing host-cidrs annotation for DPU Host Signed-off-by: nithyar --- .../pkg/node/node_ip_handler_linux.go | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/node/node_ip_handler_linux.go b/go-controller/pkg/node/node_ip_handler_linux.go index eb5c368eff..085d332d71 100644 --- a/go-controller/pkg/node/node_ip_handler_linux.go +++ b/go-controller/pkg/node/node_ip_handler_linux.go @@ -62,7 +62,34 @@ func newAddressManagerInternal(nodeName string, k kube.Interface, mgmtConfig *ma syncPeriod: 30 * time.Second, } mgr.nodeAnnotator = kube.NewNodeAnnotator(k, nodeName) - mgr.sync() + if config.OvnKubeNode.Mode == types.NodeModeDPU { + var ifAddrs []*net.IPNet + + // update k8s.ovn.org/host-cidrs + node, err := watchFactory.GetNode(nodeName) + if err != nil { + klog.Errorf("Failed to get node %s: %v", nodeName, err) + return nil + } + if useNetlink { + // get updated interface IP addresses for the gateway bridge + ifAddrs, err = gwBridge.updateInterfaceIPAddresses(node) + if err != nil { + klog.Errorf("Failed to obtain interface IP addresses for node %s: %v", nodeName, err) + return nil + } + } + if err = mgr.updateHostCIDRs(node, ifAddrs); err != nil { + klog.Errorf("Failed to update host-cidrs annotations on node %s: %v", nodeName, err) + return nil + } + if err = mgr.nodeAnnotator.Run(); err != nil { + klog.Errorf("Failed to set host-cidrs annotations on node %s: %v", nodeName, err) + return nil + } + } else { + mgr.sync() + } return mgr } From 4be55de68509e8c68c676473f2441314b25bac61 Mon Sep 17 00:00:00 2001 From: Martin Kennelly Date: Sat, 18 Jan 2025 09:49:21 +0000 Subject: [PATCH 20/51] EIP: reduce log spam from errors that arent really errors due to race for data Within some funcs for EIP, we depend on OVN constructs (address sets usually) that are created async. We are cluttering up the logs with spam when its not really an error that said constructs haven't been (yet) created. Signed-off-by: Martin Kennelly --- go-controller/pkg/ovn/egressip.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 6cd35e4232..d9767386a4 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -3169,8 +3169,9 @@ func createDefaultReRouteQoSRuleOps(nbClient libovsdbclient.Client, addressSetFa Direction: nbdb.QoSDirectionFromLport, } if isIPv4Mode { + // if address set hash name is empty, the address set has yet to be created if ipv4EgressIPServedPodsAS == "" { - return nil, nil, fmt.Errorf("failed to fetch IPv4 address set %s hash names", EgressIPServedPodsAddrSetName) + return nil, nil, types.NewSuppressedError(fmt.Errorf("failed to fetch IPv4 address set %s hash names", EgressIPServedPodsAddrSetName)) } qosV4Rule := qosRule qosV4Rule.Match = fmt.Sprintf(`ip4.src == $%s && ct.trk && ct.rpl`, ipv4EgressIPServedPodsAS) @@ -3182,8 +3183,9 @@ func createDefaultReRouteQoSRuleOps(nbClient libovsdbclient.Client, addressSetFa qoses = append(qoses, &qosV4Rule) } if isIPv6Mode { + // if address set hash name is empty, the address set has yet to be created if ipv6EgressIPServedPodsAS == "" { - return nil, nil, fmt.Errorf("failed to fetch IPv6 address set %s hash names", EgressIPServedPodsAddrSetName) + return nil, nil, types.NewSuppressedError(fmt.Errorf("failed to fetch IPv6 address set %s hash names", EgressIPServedPodsAddrSetName)) } qosV6Rule := qosRule qosV6Rule.Match = fmt.Sprintf(`ip6.src == $%s && ct.trk && ct.rpl`, ipv6EgressIPServedPodsAS) @@ -3386,15 +3388,17 @@ func ensureDefaultNoRerouteNodePolicies(nbClient libovsdbclient.Client, addressS var matchV4, matchV6 string // construct the policy match if len(v4NodeAddrs) > 0 { + // if address set hash name is empty, the address set has yet to be created if ipv4EgressIPServedPodsAS == "" || ipv4EgressServiceServedPodsAS == "" || ipv4ClusterNodeIPAS == "" { - return fmt.Errorf("address set name(s) %s not found %q %q %q", as.GetName(), ipv4EgressServiceServedPodsAS, ipv4EgressServiceServedPodsAS, ipv4ClusterNodeIPAS) + return types.NewSuppressedError(fmt.Errorf("address set name(s) %s not found %q %q %q", as.GetName(), ipv4EgressServiceServedPodsAS, ipv4EgressServiceServedPodsAS, ipv4ClusterNodeIPAS)) } matchV4 = fmt.Sprintf(`(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s`, ipv4EgressIPServedPodsAS, ipv4EgressServiceServedPodsAS, ipv4ClusterNodeIPAS) } if len(v6NodeAddrs) > 0 { + // if address set hash name is empty, the address set has yet to be created if ipv6EgressIPServedPodsAS == "" || ipv6EgressServiceServedPodsAS == "" || ipv6ClusterNodeIPAS == "" { - return fmt.Errorf("address set hash name(s) %s not found", as.GetName()) + return types.NewSuppressedError(fmt.Errorf("address set hash name(s) %s not found", as.GetName())) } matchV6 = fmt.Sprintf(`(ip6.src == $%s || ip6.src == $%s) && ip6.dst == $%s`, ipv6EgressIPServedPodsAS, ipv6EgressServiceServedPodsAS, ipv6ClusterNodeIPAS) @@ -3600,14 +3604,17 @@ func ensureDefaultNoRerouteUDNEnabledSvcPolicies(nbClient libovsdbclient.Client, if err != nil { return fmt.Errorf("failed to retrieve UDN enabled service address set from NB DB: %v", err) } - + // if address set hash name is empty, the address set has yet to be created + if (v4 && ipv4UDNEnabledSvcAS == "") || (v6 && ipv6UDNEnabledSvcAS == "") { + return types.NewSuppressedError(fmt.Errorf("failed to retrieve UDN enabled service address set")) + } var matchV4, matchV6 string // construct the policy match - if v4 && ipv4UDNEnabledSvcAS != "" { + if v4 { matchV4 = fmt.Sprintf(`(ip4.src == $%s || ip4.src == $%s) && ip4.dst == $%s`, ipv4EgressIPServedPodsAS, ipv4EgressServiceServedPodsAS, ipv4UDNEnabledSvcAS) } - if v6 && ipv6UDNEnabledSvcAS != "" { + if v6 { if ipv6EgressIPServedPodsAS == "" || ipv6EgressServiceServedPodsAS == "" || ipv6UDNEnabledSvcAS == "" { return fmt.Errorf("address set hash name(s) %s not found", as.GetName()) } From 87de9f821fb395f3b4b9b4cc8381781e0f05b504 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sat, 18 Jan 2025 14:39:48 -0500 Subject: [PATCH 21/51] Suppress layer 2 missing pod anno errors For layer 2 UDNs it is expected that ovnkube-controller will process the pod with the annotation missing while waiting for cluster manager to allocate it. Suppress the error in that case. Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/base_network_controller_pods.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller_pods.go b/go-controller/pkg/ovn/base_network_controller_pods.go index 37be0ed77e..6cc65f7e37 100644 --- a/go-controller/pkg/ovn/base_network_controller_pods.go +++ b/go-controller/pkg/ovn/base_network_controller_pods.go @@ -931,8 +931,9 @@ func (bnc *BaseNetworkController) allocatePodAnnotationForSecondaryNetwork(pod * if !bnc.allocatesPodAnnotation() { podAnnotation, _ := util.UnmarshalPodAnnotation(pod.Annotations, nadName) if !util.IsValidPodAnnotation(podAnnotation) { - return nil, false, fmt.Errorf("failed to get PodAnnotation for %s/%s/%s, cluster manager might have not allocated it yet", - nadName, pod.Namespace, pod.Name) + return nil, false, ovntypes.NewSuppressedError(fmt.Errorf( + "failed to get PodAnnotation for %s/%s/%s, cluster manager might have not allocated it yet", + nadName, pod.Namespace, pod.Name)) } return podAnnotation, false, nil From e8a0e68bf93fdb92bb8faf6922207ea0e1998914 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Sat, 18 Jan 2025 15:01:47 -0500 Subject: [PATCH 22/51] Fix missing error wrap on IC remote node add Without this the error for missing annotations will not be suppressed and then false positive errors will be reported. Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/secondary_layer3_network_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller.go b/go-controller/pkg/ovn/secondary_layer3_network_controller.go index 50161d9cd2..f34953905b 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller.go @@ -798,7 +798,7 @@ func (oc *SecondaryLayer3NetworkController) addUpdateRemoteNodeEvent(node *kapi. var err error if syncZoneIc && config.OVNKubernetesFeature.EnableInterconnect { if err = oc.zoneICHandler.AddRemoteZoneNode(node); err != nil { - err = fmt.Errorf("failed to add the remote zone node [%s] to the zone interconnect handler, err : %v", node.Name, err) + err = fmt.Errorf("failed to add the remote zone node [%s] to the zone interconnect handler, err : %w", node.Name, err) oc.syncZoneICFailed.Store(node.Name, true) } else { oc.syncZoneICFailed.Delete(node.Name) From 0058b32630ff76037eb8a8ae8b12c777f2691d44 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Wed, 11 Dec 2024 15:39:55 +0100 Subject: [PATCH 23/51] e2e/external_gateways: Update test to avoid accidental matching of conntrack entries. With the OVN bump to 24.09.1-10 a bug is fixed in OVN due to which ecmp-symmetric-reply wasn't honored for "single path ECMP" routes. The "Should validate TCP/UDP connectivity even after MAC change (gateway migration) for egress" e2e test happened to use a "single path ECMP" route and the OVN bug fix made it fail (because now conntrack entries correctly get created even if the route has a single path). The purpose of the test wasn't related to ECMP symmetric reply behavior necessarily but more related to ARP/ND updates on gateway migration. Therefore it's safe to change the test so that the packet that's sent after gateway migration uses a different TCP/UDP source port. Like that it won't match any existing conntrack entries. Signed-off-by: Dumitru Ceara --- test/e2e/external_gateways.go | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/test/e2e/external_gateways.go b/test/e2e/external_gateways.go index c6057b47e8..c618552e64 100644 --- a/test/e2e/external_gateways.go +++ b/test/e2e/external_gateways.go @@ -1465,6 +1465,13 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.DescribeTable("Should validate TCP/UDP connectivity even after MAC change (gateway migration) for egress", func(protocol string, addresses *gatewayTestIPs, destPort, destPortOnPod int) { + ncCmd := func(sourcePort int, target string) []string { + if protocol == "tcp" { + return []string {"exec", srcPingPodName, "--", "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 %s %d", sourcePort, addresses.srcPodIP, target, destPort)} + } else { + return []string {"exec", srcPingPodName, "--", "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 -u %s %d", sourcePort, addresses.srcPodIP, target, destPort)} + } + } if addresses.srcPodIP == "" || addresses.nodeIP == "" { skipper.Skipf("Skipping as pod ip / node ip are not set pod ip %s node ip %s", addresses.srcPodIP, addresses.nodeIP) } @@ -1504,13 +1511,8 @@ var _ = ginkgo.Describe("External Gateway", func() { ginkgo.By("Checking if one of the external gateways are reachable via Egress") target := addresses.targetIPs[0] sourcePort := 50000 - args := []string{"exec", srcPingPodName, "--"} - if protocol == "tcp" { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 %s %d", sourcePort, addresses.srcPodIP, target, destPort)) - } else { - args = append(args, "bash", "-c", fmt.Sprintf("echo | nc -p %d -s %s -w 1 -u %s %d", sourcePort, addresses.srcPodIP, target, destPort)) - } - res, err := e2ekubectl.RunKubectl(f.Namespace.Name, args...) + + res, err := e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, target)...) framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) hostname := strings.TrimSuffix(res, "\n") var gateway string @@ -1528,7 +1530,7 @@ var _ = ginkgo.Describe("External Gateway", func() { tcpDumpSync := sync.WaitGroup{} tcpDumpSync.Add(1) go checkReceivedPacketsOnContainer(gateway, srcPingPodName, anyLink, []string{protocol, "and", "port", strconv.Itoa(sourcePort)}, &tcpDumpSync) - res, err = e2ekubectl.RunKubectl(f.Namespace.Name, args...) + res, err = e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, target)...) framework.ExpectNoError(err, "failed to reach %s (%s)", target, protocol) hostname2 := strings.TrimSuffix(res, "\n") gomega.Expect(hostname).To(gomega.Equal(hostname2)) @@ -1561,6 +1563,10 @@ var _ = ginkgo.Describe("External Gateway", func() { time.Sleep(1 * time.Second) ginkgo.By("Post-Migration: Sending Egress traffic and verify it is received") + // We don't want traffic to hit the already existing conntrack entry (created for source port 50000) + // so we use a fresh source port. + sourcePort = 50001 + tcpDumpSync = sync.WaitGroup{} tcpDumpSync.Add(1) go checkReceivedPacketsOnContainer(gateway, srcPingPodName, gwLink, []string{protocol, "and", "ether", "host", newDummyMac, "and", "port", strconv.Itoa(sourcePort)}, &tcpDumpSync) @@ -1568,7 +1574,7 @@ var _ = ginkgo.Describe("External Gateway", func() { // SKB_DROP_REASON_NEIGH_FAILED after changing the MAC address. Something breaks with ARP // on the gateway container. Therefore, ignore the reply from gateway, as we only care about the egress // packet arriving with correct MAC address. - _, _ = e2ekubectl.RunKubectl(f.Namespace.Name, args...) + _, _ = e2ekubectl.RunKubectl(f.Namespace.Name, ncCmd(sourcePort, target)...) tcpDumpSync.Wait() checkAPBExternalRouteStatus(defaultPolicyName) From 73492d99f8de43177eb1afe6965e4cba80f62fc0 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Wed, 11 Dec 2024 10:06:43 +0100 Subject: [PATCH 24/51] Dockerfile.fedora: Bump OVN to ovn24.09-24.09.1-10.fc41 This picks up the following relevant bug fixes: https://issues.redhat.com/browse/FDP-906 "ovn-controller: lib/ovsdb-idl.c:3596: assertion row->new_datum != NULL failed in ovsdb_idl_txn_write__()" 6448f5e364 pinctrl: Skip non-local mac bindings in run_buffered_binding(). ea35347320 pinctrl: Skip deleted mac bindings in run_buffered_binding(). 33a6ae53f4 pinctrl: Use correct map size in pinctrl_handle_put_fdb(). 8eaa7d5991 controller: Fix "use after free" issue in statctrl_run(). 8579859f51 mac-cache: Properly handle deletion of SB mac_bindings. https://issues.redhat.com/browse/FDP-752 "ovn-northd IPAM incorrectly reports duplicate IP when part of excluded_ips" 2a24b03f7f ipam: Do not report error for static assigned IPs. https://issues.redhat.com/browse/FDP-786 "When an ECMP symmetric route is removed, northd removes all logical flows from SBDB for ECMP" 7b00627433 northd: Respect --ecmp-symmetric-reply for single routes. Signed-off-by: Dumitru Ceara --- dist/images/Dockerfile.fedora | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/images/Dockerfile.fedora b/dist/images/Dockerfile.fedora index 858f6f9d88..b1634d61e8 100644 --- a/dist/images/Dockerfile.fedora +++ b/dist/images/Dockerfile.fedora @@ -15,7 +15,7 @@ USER root ENV PYTHONDONTWRITEBYTECODE yes -ARG ovnver=ovn-24.09.0-33.fc41 +ARG ovnver=ovn-24.09.1-10.fc41 # Automatically populated when using docker buildx ARG TARGETPLATFORM ARG BUILDPLATFORM From 9743febaa6a6b97005e1e6a90b6f584a15b80321 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Mon, 20 Jan 2025 18:26:51 +0100 Subject: [PATCH 25/51] Refactor and fix UDN/CUDN readiness checks Signed-off-by: Patryk Diak --- test/e2e/network_segmentation.go | 245 +++++++++++++----- ...work_segmentation_endpointslices_mirror.go | 4 +- 2 files changed, 178 insertions(+), 71 deletions(-) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index c7ede35642..0a6986cc8a 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -6,21 +6,24 @@ import ( "fmt" "net" "os" + "reflect" "strings" "time" + nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - - nadapi "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" - nadclient "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned/typed/k8s.cni.cncf.io/v1" v1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/rand" + "k8s.io/client-go/dynamic" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubectl/pkg/util/podutils" "k8s.io/kubernetes/test/e2e/framework" @@ -676,7 +679,7 @@ var _ = Describe("Network Segmentation", func() { udnManifest := generateUserDefinedNetworkManifest(c) cleanup, err := createManifest(c.namespace, udnManifest) DeferCleanup(cleanup) - Expect(waitForUserDefinedNetworkReady(c.namespace, c.name, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, c.namespace, c.name), 5*time.Second, time.Second).Should(Succeed()) return err }), Entry("ClusterUserDefinedNetwork", func(c *networkAttachmentConfigParams) error { @@ -691,7 +694,7 @@ var _ = Describe("Network Segmentation", func() { _, err := e2ekubectl.RunKubectl("", "delete", "clusteruserdefinednetwork", cudnName, "--wait", fmt.Sprintf("--timeout=%ds", 120)) Expect(err).NotTo(HaveOccurred()) }) - Expect(waitForClusterUserDefinedNetworkReady(c.name, 5*time.Second)).To(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, c.name), 5*time.Second, time.Second).Should(Succeed()) return err }), ) @@ -802,7 +805,7 @@ var _ = Describe("Network Segmentation", func() { cleanup, err := createManifest(defaultNetNamespace.Name, newPrimaryUserDefinedNetworkManifest(testUdnName)) DeferCleanup(cleanup) Expect(err).NotTo(HaveOccurred()) - Expect(waitForUserDefinedNetworkReady(defaultNetNamespace.Name, testUdnName, 5*time.Second)).To(Not(Succeed())) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, defaultNetNamespace.Name, testUdnName), 5*time.Second).Should(Not(Succeed())) }) It("should be able to create pod and it will attach to the cluster default network", func() { @@ -851,7 +854,7 @@ var _ = Describe("Network Segmentation", func() { cleanup, err := createManifest(defaultNetNamespace.Name, newL2SecondaryUDNManifest(testUdnName)) DeferCleanup(cleanup) Expect(err).NotTo(HaveOccurred()) - Expect(waitForUserDefinedNetworkReady(defaultNetNamespace.Name, testUdnName, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, defaultNetNamespace.Name, testUdnName), 5*time.Second, time.Second).Should(Succeed()) }) It("should create NetworkAttachmentDefinition according to spec", func() { @@ -914,7 +917,8 @@ var _ = Describe("Network Segmentation", func() { "should fail to delete UserDefinedNetwork associated NetworkAttachmentDefinition when used") By("verify UserDefinedNetwork status reports consuming pod") - assertUDNStatusReportsConsumers(defaultNetNamespace.Name, testUdnName, testPodName) + err = validateUDNStatusReportsConsumers(f.DynamicClient, defaultNetNamespace.Name, testUdnName, testPodName) + Expect(err).ToNot(HaveOccurred()) By("delete test pod") err = cs.CoreV1().Pods(defaultNetNamespace.Name).Delete(context.Background(), testPodName, metav1.DeleteOptions{}) @@ -961,7 +965,7 @@ spec: cleanup, err := createManifest(f.Namespace.Name, udnManifest) defer cleanup() Expect(err).NotTo(HaveOccurred()) - Expect(waitForUserDefinedNetworkReady(f.Namespace.Name, testUdnName, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, f.Namespace.Name, testUdnName), 5*time.Second, time.Second).Should(Succeed()) conditionsJSON, err := e2ekubectl.RunKubectl(f.Namespace.Name, "get", "userdefinednetwork", testUdnName, "-o", "jsonpath={.status.conditions}") Expect(err).NotTo(HaveOccurred()) @@ -1094,11 +1098,13 @@ spec: return nil }) Expect(err).NotTo(HaveOccurred()) - Expect(waitForClusterUserDefinedNetworkReady(testClusterUdnName, 5*time.Second)).To(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, testClusterUdnName), 5*time.Second, time.Second).Should(Succeed()) }) It("should create NAD according to spec in each target namespace and report active namespaces", func() { - assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, testTenantNamespaces...) + Eventually( + validateClusterUDNStatusReportsActiveNamespacesFunc(f.DynamicClient, testClusterUdnName, testTenantNamespaces...), + 1*time.Minute, 3*time.Second).Should(Succeed()) udnUidRaw, err := e2ekubectl.RunKubectl("", "get", clusterUserDefinedNetworkResource, testClusterUdnName, "-o", "jsonpath='{.metadata.uid}'") Expect(err).NotTo(HaveOccurred(), "should get the ClsuterUserDefinedNetwork UID") @@ -1132,8 +1138,9 @@ spec: patch := fmt.Sprintf(`[{"op": "add", "path": "./spec/namespaceSelector/matchExpressions/0/values/-", "value": "%s"}]`, testNewNs) _, err := e2ekubectl.RunKubectl("", "patch", clusterUserDefinedNetworkResource, testClusterUdnName, "--type=json", "-p="+patch) Expect(err).NotTo(HaveOccurred()) - Expect(waitForClusterUserDefinedNetworkReady(testClusterUdnName, 5*time.Second)).To(Succeed()) - assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, testTenantNamespaces...) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, testClusterUdnName), 5*time.Second, time.Second).Should(Succeed()) + err = validateClusterUDNStatusReportsActiveNamespacesFunc(f.DynamicClient, testClusterUdnName, testTenantNamespaces...)() + Expect(err).NotTo(HaveOccurred()) By("create the new target namespace") _, err = cs.CoreV1().Namespaces().Create(context.Background(), &v1.Namespace{ @@ -1148,7 +1155,9 @@ spec: }) expectedActiveNamespaces := append(testTenantNamespaces, testNewNs) - assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, expectedActiveNamespaces...) + Eventually( + validateClusterUDNStatusReportsActiveNamespacesFunc(f.DynamicClient, testClusterUdnName, expectedActiveNamespaces...), + 1*time.Minute, 3*time.Second).Should(Succeed()) udnUidRaw, err := e2ekubectl.RunKubectl("", "get", clusterUserDefinedNetworkResource, testClusterUdnName, "-o", "jsonpath='{.metadata.uid}'") Expect(err).NotTo(HaveOccurred(), "should get the ClusterUserDefinedNetwork UID") @@ -1181,8 +1190,9 @@ spec: By("verify status reports the new added namespace as active") expectedActiveNs := append(testTenantNamespaces, testNewNs) - assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, expectedActiveNs...) - + Eventually( + validateClusterUDNStatusReportsActiveNamespacesFunc(f.DynamicClient, testClusterUdnName, expectedActiveNs...), + 1*time.Minute, 3*time.Second).Should(Succeed()) By("verify a NAD is created in new target namespace according to spec") udnUidRaw, err := e2ekubectl.RunKubectl("", "get", clusterUserDefinedNetworkResource, testClusterUdnName, "-o", "jsonpath='{.metadata.uid}'") Expect(err).NotTo(HaveOccurred(), "should get the ClusterUserDefinedNetwork UID") @@ -1199,7 +1209,9 @@ spec: By("verify status reports remained target namespaces only as active") expectedActiveNs := []string{activeTenantNs} - assertClusterUDNStatusReportsActiveNamespaces(testClusterUdnName, expectedActiveNs...) + Eventually( + validateClusterUDNStatusReportsActiveNamespacesFunc(f.DynamicClient, testClusterUdnName, expectedActiveNs...), + 1*time.Minute, 3*time.Second).Should(Succeed()) removedTenantNs := testTenantNamespaces[0] By("verify managed NAD not exist in removed target namespace") @@ -1252,9 +1264,8 @@ spec: "should fail to delete UserDefinedNetwork associated NetworkAttachmentDefinition when used") By("verify CR status reports consuming pod") - conditionsJSON, err := e2ekubectl.RunKubectl("", "get", clusterUserDefinedNetworkResource, testClusterUdnName, "-o", "jsonpath='{.status.conditions}'") + err = validateClusterUDNStatusReportConsumers(f.DynamicClient, testClusterUdnName, inUseNetTestTenantNamespace, testPodName) Expect(err).NotTo(HaveOccurred()) - assertClusterUDNStatusReportConsumers(conditionsJSON, testClusterUdnName, inUseNetTestTenantNamespace, testPodName) By("delete test pod") err = cs.CoreV1().Pods(inUseNetTestTenantNamespace).Delete(context.Background(), testPodName, metav1.DeleteOptions{}) @@ -1429,7 +1440,7 @@ spec: udnManifest := generateUserDefinedNetworkManifest(c) cleanup, err := createManifest(f.Namespace.Name, udnManifest) DeferCleanup(cleanup) - Expect(waitForUserDefinedNetworkReady(f.Namespace.Name, c.name, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, f.Namespace.Name, c.name), 5*time.Second, time.Second).Should(Succeed()) return err }), Entry("ClusterUserDefinedNetwork", func(c *networkAttachmentConfigParams) error { @@ -1443,7 +1454,7 @@ spec: _, err := e2ekubectl.RunKubectl("", "delete", "clusteruserdefinednetwork", c.name, "--wait", fmt.Sprintf("--timeout=%ds", 120)) Expect(err).NotTo(HaveOccurred()) }) - Expect(waitForClusterUserDefinedNetworkReady(c.name, 5*time.Second)).To(Succeed()) + Eventually(clusterUserDefinedNetworkReadyFunc(f.DynamicClient, c.name), 5*time.Second, time.Second).Should(Succeed()) return err }), ) @@ -1462,7 +1473,7 @@ spec: cleanup, err := createManifest(f.Namespace.Name, newPrimaryUserDefinedNetworkManifest(testUdnName)) DeferCleanup(cleanup) Expect(err).NotTo(HaveOccurred()) - Expect(waitForUserDefinedNetworkReady(f.Namespace.Name, testUdnName, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, f.Namespace.Name, testUdnName), 5*time.Second, time.Second).Should(Succeed()) By("create UDN pod") cfg := podConfig(testPodName, withCommand(func() []string { return httpServerContainerCmd(port) @@ -1603,7 +1614,7 @@ spec: cleanup, err := createManifest(netConfig.namespace, udnManifest) Expect(err).ShouldNot(HaveOccurred(), "creating manifest must succeed") DeferCleanup(cleanup) - Expect(waitForUserDefinedNetworkReady(netConfig.namespace, netConfig.name, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, netConfig.namespace, netConfig.name), 5*time.Second, time.Second).Should(Succeed()) nodes, err := e2enode.GetBoundedReadySchedulableNodes(context.Background(), f.ClientSet, 2) Expect(err).ShouldNot(HaveOccurred(), "test requires at least two schedulable nodes") Expect(len(nodes.Items)).Should(BeNumerically(">=", 2), "test requires >= 2 Ready nodes") @@ -1744,14 +1755,50 @@ func generateLayer3Subnets(cidrs string) []string { return subnets } -func waitForUserDefinedNetworkReady(namespace, name string, timeout time.Duration) error { - _, err := e2ekubectl.RunKubectl(namespace, "wait", "userdefinednetwork", name, "--for", "condition=NetworkCreated=True", "--timeout", timeout.String()) - return err +// userDefinedNetworkReadyFunc returns a function that checks for the NetworkCreated condition in the provided udn +func userDefinedNetworkReadyFunc(client dynamic.Interface, namespace, name string) func() error { + return func() error { + udn, err := client.Resource(udnGVR).Namespace(namespace).Get(context.Background(), name, metav1.GetOptions{}, "status") + if err != nil { + return err + } + conditions, err := getConditions(udn) + if err != nil { + return err + } + if len(conditions) == 0 { + return fmt.Errorf("no conditions found in: %v", udn) + } + for _, condition := range conditions { + if condition.Type == "NetworkCreated" && condition.Status == metav1.ConditionTrue { + return nil + } + } + return fmt.Errorf("no NetworkCreated condition found in: %v", udn) + } } -func waitForClusterUserDefinedNetworkReady(name string, timeout time.Duration) error { - _, err := e2ekubectl.RunKubectl("", "wait", "clusteruserdefinednetwork", name, "--for", "condition=NetworkCreated=True", "--timeout", timeout.String()) - return err +// userDefinedNetworkReadyFunc returns a function that checks for the NetworkCreated condition in the provided cluster udn +func clusterUserDefinedNetworkReadyFunc(client dynamic.Interface, name string) func() error { + return func() error { + cUDN, err := client.Resource(clusterUDNGVR).Get(context.Background(), name, metav1.GetOptions{}, "status") + if err != nil { + return err + } + conditions, err := getConditions(cUDN) + if err != nil { + return err + } + if len(conditions) == 0 { + return fmt.Errorf("no conditions found in: %v", cUDN) + } + for _, condition := range conditions { + if condition.Type == "NetworkCreated" && condition.Status == metav1.ConditionTrue { + return nil + } + } + return fmt.Errorf("no NetworkCreated condition found in: %v", cUDN) + } } func createManifest(namespace, manifest string) (func(), error) { @@ -1798,34 +1845,37 @@ func assertL2SecondaryNetAttachDefManifest(nadClient nadclient.K8sCniCncfIoV1Int }`)) } -func assertUDNStatusReportsConsumers(udnNamesapce, udnName, expectedPodName string) { - conditionsRaw, err := e2ekubectl.RunKubectl(udnNamesapce, "get", "userdefinednetwork", udnName, "-o", "jsonpath='{.status.conditions}'") - Expect(err).NotTo(HaveOccurred()) - conditionsRaw = strings.ReplaceAll(conditionsRaw, `\`, ``) - conditionsRaw = strings.ReplaceAll(conditionsRaw, `'`, ``) - var conditions []metav1.Condition - Expect(json.Unmarshal([]byte(conditionsRaw), &conditions)).To(Succeed()) +func validateUDNStatusReportsConsumers(client dynamic.Interface, udnNamesapce, udnName, expectedPodName string) error { + udn, err := client.Resource(udnGVR).Namespace(udnNamesapce).Get(context.Background(), udnName, metav1.GetOptions{}) + if err != nil { + return err + } + conditions, err := getConditions(udn) + if err != nil { + return err + } conditions = normalizeConditions(conditions) expectedMsg := fmt.Sprintf("failed to delete NetworkAttachmentDefinition [%[1]s/%[2]s]: network in use by the following pods: [%[1]s/%[3]s]", udnNamesapce, udnName, expectedPodName) - found := false + expectedCondition := metav1.Condition{ + Type: "NetworkCreated", + Status: "False", + Reason: "SyncError", + Message: expectedMsg, + } for _, condition := range conditions { - if found, _ = Equal(metav1.Condition{ - Type: "NetworkCreated", - Status: "False", - Reason: "SyncError", - Message: expectedMsg, - }).Match(condition); found { - break + if condition == expectedCondition { + return nil } } - Expect(found).To(BeTrue(), "expected condition not found in %v", conditions) + return fmt.Errorf("expected condition %v not found in %v", expectedCondition, conditions) } func normalizeConditions(conditions []metav1.Condition) []metav1.Condition { for i := range conditions { t := metav1.NewTime(time.Time{}) conditions[i].LastTransitionTime = t + conditions[i].ObservedGeneration = 0 } return conditions } @@ -1860,43 +1910,100 @@ func assertClusterNADManifest(nadClient nadclient.K8sCniCncfIoV1Interface, names }`)) } -func assertClusterUDNStatusReportsActiveNamespaces(cudnName string, expectedActiveNsNames ...string) { - conditionsRaw, err := e2ekubectl.RunKubectl("", "get", "clusteruserdefinednetwork", cudnName, "-o", "jsonpath='{.status.conditions}'") - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - conditionsRaw = strings.ReplaceAll(conditionsRaw, `\`, ``) - conditionsRaw = strings.ReplaceAll(conditionsRaw, `'`, ``) - var conditions []metav1.Condition - ExpectWithOffset(1, json.Unmarshal([]byte(conditionsRaw), &conditions)).To(Succeed()) +var clusterUDNGVR = schema.GroupVersionResource{ + Group: "k8s.ovn.org", + Version: "v1", + Resource: "clusteruserdefinednetworks", +} - c := conditions[0] - // equality matcher cannot be used since condition message namespaces order is inconsistent - ExpectWithOffset(1, c.Type).Should(Equal("NetworkCreated")) - ExpectWithOffset(1, c.Status).Should(Equal(metav1.ConditionTrue)) - ExpectWithOffset(1, c.Reason).Should(Equal("NetworkAttachmentDefinitionCreated")) +var udnGVR = schema.GroupVersionResource{ + Group: "k8s.ovn.org", + Version: "v1", + Resource: "userdefinednetworks", +} + +// getConditions extracts metav1 conditions from .status.conditions of an unstructured object +func getConditions(uns *unstructured.Unstructured) ([]metav1.Condition, error) { + var conditions []metav1.Condition + conditionsRaw, found, err := unstructured.NestedFieldNoCopy(uns.Object, "status", "conditions") + if err != nil { + return nil, fmt.Errorf("failed getting conditions in %s: %v", uns.GetName(), err) + } + if !found { + return nil, fmt.Errorf("conditions not found in %v", uns) + } - ExpectWithOffset(1, c.Message).To(ContainSubstring("NetworkAttachmentDefinition has been created in following namespaces:")) - for _, ns := range expectedActiveNsNames { - Expect(c.Message).To(ContainSubstring(ns)) + conditionsJSON, err := json.Marshal(conditionsRaw) + if err != nil { + return nil, err } + if err := json.Unmarshal(conditionsJSON, &conditions); err != nil { + return nil, err + } + + return conditions, nil } -func assertClusterUDNStatusReportConsumers(conditionsJSON, udnName, udnNamespace, expectedPodName string) { - conditionsJSON = strings.ReplaceAll(conditionsJSON, `\`, ``) - conditionsJSON = strings.ReplaceAll(conditionsJSON, `'`, ``) +func validateClusterUDNStatusReportsActiveNamespacesFunc(client dynamic.Interface, cUDNName string, expectedActiveNsNames ...string) func() error { + return func() error { + cUDN, err := client.Resource(clusterUDNGVR).Get(context.Background(), cUDNName, metav1.GetOptions{}) + if err != nil { + return err + } + conditions, err := getConditions(cUDN) + if err != nil { + return err + } + if len(conditions) == 0 { + return fmt.Errorf("expected at least one condition in %v", cUDN) + } - var conditions []metav1.Condition - ExpectWithOffset(1, json.Unmarshal([]byte(conditionsJSON), &conditions)).To(Succeed()) + c := conditions[0] + if c.Type != "NetworkCreated" { + return fmt.Errorf("expected NetworkCreated type in %v", c) + } + if c.Status != metav1.ConditionTrue { + return fmt.Errorf("expected True status in %v", c) + } + if c.Reason != "NetworkAttachmentDefinitionCreated" { + return fmt.Errorf("expected NetworkAttachmentDefinitionCreated reason in %v", c) + } + if !strings.Contains(c.Message, "NetworkAttachmentDefinition has been created in following namespaces:") { + return fmt.Errorf("expected \"NetworkAttachmentDefinition has been created in following namespaces:\" in %s", c.Message) + } + + for _, ns := range expectedActiveNsNames { + if !strings.Contains(c.Message, ns) { + return fmt.Errorf("expected to find %q namespace in %s", ns, c.Message) + } + } + return nil + } +} + +func validateClusterUDNStatusReportConsumers(client dynamic.Interface, cUDNName, udnNamespace, expectedPodName string) error { + cUDN, err := client.Resource(clusterUDNGVR).Get(context.Background(), cUDNName, metav1.GetOptions{}) + if err != nil { + return err + } + conditions, err := getConditions(cUDN) + if err != nil { + return err + } conditions = normalizeConditions(conditions) expectedMsg := fmt.Sprintf("failed to delete NetworkAttachmentDefinition [%[1]s/%[2]s]: network in use by the following pods: [%[1]s/%[3]s]", - udnNamespace, udnName, expectedPodName) - ExpectWithOffset(1, conditions).To(Equal([]metav1.Condition{ + udnNamespace, cUDNName, expectedPodName) + expectedConditions := []metav1.Condition{ { Type: "NetworkCreated", Status: "False", Reason: "NetworkAttachmentDefinitionSyncError", Message: expectedMsg, - }, - })) + }} + if !reflect.DeepEqual(conditions, expectedConditions) { + return fmt.Errorf("expected conditions: %v, got: %v", expectedConditions, conditions) + } + return nil } func newClusterUDNManifest(name string, targetNamespaces ...string) string { diff --git a/test/e2e/network_segmentation_endpointslices_mirror.go b/test/e2e/network_segmentation_endpointslices_mirror.go index 517117158e..ae00d5fdec 100644 --- a/test/e2e/network_segmentation_endpointslices_mirror.go +++ b/test/e2e/network_segmentation_endpointslices_mirror.go @@ -167,7 +167,7 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { udnManifest := generateUserDefinedNetworkManifest(&c) cleanup, err := createManifest(f.Namespace.Name, udnManifest) DeferCleanup(cleanup) - Expect(waitForUserDefinedNetworkReady(f.Namespace.Name, c.name, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, f.Namespace.Name, c.name), 5*time.Second, time.Second).Should(Succeed()) return err }), ) @@ -253,7 +253,7 @@ var _ = Describe("Network Segmentation EndpointSlices mirroring", func() { udnManifest := generateUserDefinedNetworkManifest(&c) cleanup, err := createManifest(fmt.Sprintf("%s-default", f.Namespace.Name), udnManifest) DeferCleanup(cleanup) - Expect(waitForUserDefinedNetworkReady(fmt.Sprintf("%s-default", f.Namespace.Name), c.name, 5*time.Second)).To(Succeed()) + Eventually(userDefinedNetworkReadyFunc(f.DynamicClient, fmt.Sprintf("%s-default", f.Namespace.Name), c.name), 5*time.Second, time.Second).Should(Succeed()) return err }), ) From 344d4dca340a3734cba2129fc347804b0f038eda Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Wed, 15 Jan 2025 16:08:57 +0100 Subject: [PATCH 26/51] Remove per-pod-SNAT for UDNs disable-SNAT-Multiple-GWs means we use per pod SNATs instead of subnet SNAT at the GR. This option is only relevant to default network as of this PR. We are removing the support for per pod SNAT due to two reasons: 1) VM Live migration on L2 UDNs needs the older SNAT for the pod to the present on the older node which is hard to keep around else some traffic will leave with podIP from the old node immediately after live migration has finished 2) lots of SNATs cannot be good for scale, a SNAT per network subnect sounds better. 3) ICNI is the only feature that uses per pod SNAT which is not supported on UDNs so we don't need that anyways Signed-off-by: Surya Seetharaman --- go-controller/pkg/config/config.go | 1 + .../ovn/base_network_controller_secondary.go | 76 ------------------- go-controller/pkg/ovn/gateway.go | 2 +- ...econdary_layer2_network_controller_test.go | 18 +---- ...econdary_layer3_network_controller_test.go | 15 +--- 5 files changed, 9 insertions(+), 103 deletions(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 1f601153fe..0f6d2fa381 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -460,6 +460,7 @@ type GatewayConfig struct { // NodeportEnable sets whether to provide Kubernetes NodePort service or not NodeportEnable bool `gcfg:"nodeport"` // DisableSNATMultipleGws sets whether to disable SNAT of egress traffic in namespaces annotated with routing-external-gws + // only applicable to the default network not for UDNs DisableSNATMultipleGWs bool `gcfg:"disable-snat-multiple-gws"` // V4JoinSubnet to be used in the cluster V4JoinSubnet string `gcfg:"v4-join-subnet"` diff --git a/go-controller/pkg/ovn/base_network_controller_secondary.go b/go-controller/pkg/ovn/base_network_controller_secondary.go index 1d0d617a93..57ebe23ad2 100644 --- a/go-controller/pkg/ovn/base_network_controller_secondary.go +++ b/go-controller/pkg/ovn/base_network_controller_secondary.go @@ -381,18 +381,6 @@ func (bsnc *BaseSecondaryNetworkController) addLogicalPortToNetworkForNAD(pod *c ops = append(ops, addOps...) } - if util.IsNetworkSegmentationSupportEnabled() && bsnc.IsPrimaryNetwork() && config.Gateway.DisableSNATMultipleGWs { - // we need to add per-pod SNATs for UDN networks - snatOps, err := bsnc.addPerPodSNATOps(pod, podAnnotation.IPs) - if err != nil { - return fmt.Errorf("failed to construct SNAT for pod %s/%s which is part of network %s, err: %v", - pod.Namespace, pod.Name, bsnc.GetNetworkName(), err) - } - if snatOps != nil { - ops = append(ops, snatOps...) - } - } - recordOps, txOkCallBack, _, err := bsnc.AddConfigDurationRecord("pod", pod.Namespace, pod.Name) if err != nil { klog.Errorf("Config duration recorder: %v", err) @@ -426,30 +414,6 @@ func (bsnc *BaseSecondaryNetworkController) addLogicalPortToNetworkForNAD(pod *c return nil } -// addPerPodSNATOps returns the ops that will add the SNAT towards masqueradeIP for this given pod -func (bsnc *BaseSecondaryNetworkController) addPerPodSNATOps(pod *corev1.Pod, podIPs []*net.IPNet) ([]ovsdb.Operation, error) { - if !bsnc.isPodScheduledinLocalZone(pod) { - // nothing to do if its a remote zone pod - return nil, nil - } - // we need to add per-pod SNATs for UDN networks - networkID, err := bsnc.getNetworkID() - if err != nil { - return nil, fmt.Errorf("failed to get networkID for network %q: %v", bsnc.GetNetworkName(), err) - } - masqIPs, err := udn.GetUDNGatewayMasqueradeIPs(networkID) - if err != nil { - return nil, fmt.Errorf("failed to get masquerade IPs, network %s (%d): %v", bsnc.GetNetworkName(), networkID, err) - } - - ops, err := addOrUpdatePodSNATOps(bsnc.nbClient, bsnc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), masqIPs, podIPs, bsnc.GetNetworkScopedClusterSubnetSNATMatch(pod.Spec.NodeName), nil) - if err != nil { - return nil, fmt.Errorf("failed to construct SNAT pods for pod %s/%s which is part of network %s, err: %v", - pod.Namespace, pod.Name, bsnc.GetNetworkName(), err) - } - return ops, nil -} - // removePodForSecondaryNetwork tried to tear down a pod. It returns nil on success and error on failure; // failure indicates the pod tear down should be retried later. func (bsnc *BaseSecondaryNetworkController) removePodForSecondaryNetwork(pod *corev1.Pod, portInfoMap map[string]*lpInfo) error { @@ -514,15 +478,6 @@ func (bsnc *BaseSecondaryNetworkController) removePodForSecondaryNetwork(pod *co return err } - // Cleanup the SNAT entries before checking whether this controller handled the IP allocation - if util.IsNetworkSegmentationSupportEnabled() && bsnc.IsPrimaryNetwork() && config.Gateway.DisableSNATMultipleGWs { - // we need to delete per-pod SNATs for UDN networks - if err := bsnc.delPerPodSNAT(pod, nadName); err != nil { - return fmt.Errorf("failed to delete SNAT for pod %s/%s which is part of network %s, err: %v", - pod.Namespace, pod.Name, bsnc.GetNetworkName(), err) - } - } - // do not release IP address if this controller does not handle IP allocation if !bsnc.allocatesPodAnnotation() { continue @@ -615,37 +570,6 @@ func (bsnc *BaseSecondaryNetworkController) hasIPAMClaim(pod *corev1.Pod, nadNam return hasIPAMClaim, nil } -// delPerPodSNAT will delete the SNAT towards masqueradeIP for this given pod -func (bsnc *BaseSecondaryNetworkController) delPerPodSNAT(pod *corev1.Pod, nadName string) error { - if !bsnc.isPodScheduledinLocalZone(pod) { - // nothing to do if its a remote zone pod - return nil - } - // we need to add per-pod SNATs for UDN networks - networkID, err := bsnc.getNetworkID() - if err != nil { - return fmt.Errorf("failed to get networkID for network %q: %v", bsnc.GetNetworkName(), err) - } - masqIPs, err := udn.GetUDNGatewayMasqueradeIPs(networkID) - if err != nil { - return fmt.Errorf("failed to get masquerade IPs, network %s (%d): %v", bsnc.GetNetworkName(), networkID, err) - } - podNetAnnotation, err := util.UnmarshalPodAnnotation(pod.Annotations, nadName) - if err != nil { - return fmt.Errorf("failed to fetch annotations for pod %s/%s in network %s; err: %v", pod.Namespace, pod.Name, bsnc.GetNetworkName(), err) - } - ops, err := deletePodSNATOps(bsnc.nbClient, nil, bsnc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), masqIPs, podNetAnnotation.IPs, bsnc.GetNetworkScopedClusterSubnetSNATMatch(pod.Spec.NodeName)) - if err != nil { - return fmt.Errorf("failed to construct SNAT pods for pod %s/%s which is part of network %s, err: %v", - pod.Namespace, pod.Name, bsnc.GetNetworkName(), err) - } - if _, err = libovsdbops.TransactAndCheck(bsnc.nbClient, ops); err != nil { - return fmt.Errorf("failed to delete SNAT rule for pod %s/%s in network %s on gateway router %s: %w", - pod.Namespace, pod.Name, bsnc.GetNetworkName(), bsnc.GetNetworkScopedGWRouterName(pod.Spec.NodeName), err) - } - return nil -} - func (bsnc *BaseSecondaryNetworkController) syncPodsForSecondaryNetwork(pods []interface{}) error { annotatedLocalPods := map[*corev1.Pod]map[string]*util.PodAnnotation{} // get the list of logical switch ports (equivalent to pods). Reserve all existing Pod IPs to diff --git a/go-controller/pkg/ovn/gateway.go b/go-controller/pkg/ovn/gateway.go index 43a2fe0181..5af52e533e 100644 --- a/go-controller/pkg/ovn/gateway.go +++ b/go-controller/pkg/ovn/gateway.go @@ -787,7 +787,7 @@ func (gw *GatewayManager) GatewayInit( nats := make([]*nbdb.NAT, 0, len(clusterIPSubnet)) var nat *nbdb.NAT - if !config.Gateway.DisableSNATMultipleGWs && !gw.isRoutingAdvertised(nodeName) { + if (!config.Gateway.DisableSNATMultipleGWs || gw.netInfo.IsPrimaryNetwork()) && !gw.isRoutingAdvertised(nodeName) { // Default SNAT rules. DisableSNATMultipleGWs=false in LGW (traffic egresses via mp0) always. // We are not checking for gateway mode to be shared explicitly to reduce topology differences. for _, entry := range clusterIPSubnet { diff --git a/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go b/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go index 1709874b0c..57a41a58d9 100644 --- a/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go +++ b/go-controller/pkg/ovn/secondary_layer2_network_controller_test.go @@ -552,11 +552,7 @@ func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC masqSNAT := newMasqueradeManagementNATEntry(masqSNATUUID1, netInfo) var nat []string - if config.Gateway.DisableSNATMultipleGWs { - nat = append(nat, nat1, nat3, perPodSNAT, masqSNATUUID1) - } else { - nat = append(nat, nat1, nat2, nat3, masqSNATUUID1) - } + nat = append(nat, nat1, nat2, nat3, masqSNATUUID1) gr := &nbdb.LogicalRouter{ Name: gwRouterName, UUID: gwRouterName + "-UUID", @@ -593,15 +589,9 @@ func expectedLayer2EgressEntities(netInfo util.NetInfo, gwConfig util.L3GatewayC } expectedEntities = append(expectedEntities, expectedExternalSwitchAndLSPs(netInfo, gwConfig, nodeName)...) - if config.Gateway.DisableSNATMultipleGWs { - expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - expectedEntities = append(expectedEntities, newNATEntry(nat3, dummyMasqueradeIP().IP.String(), layer2SubnetGWAddr().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - expectedEntities = append(expectedEntities, newNATEntry(perPodSNAT, dummyMasqueradeIP().IP.String(), dummyL2TestPodAdditionalNetworkIP(), nil, fmt.Sprintf("outport == %q", gwRouterToExtSwitchPortName))) - } else { - expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - expectedEntities = append(expectedEntities, newNATEntry(nat2, dummyMasqueradeIP().IP.String(), layer2Subnet().String(), standardNonDefaultNetworkExtIDs(netInfo), fmt.Sprintf("outport == %q", gwRouterToExtSwitchPortName))) - expectedEntities = append(expectedEntities, newNATEntry(nat3, dummyMasqueradeIP().IP.String(), layer2SubnetGWAddr().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - } + expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) + expectedEntities = append(expectedEntities, newNATEntry(nat2, dummyMasqueradeIP().IP.String(), layer2Subnet().String(), standardNonDefaultNetworkExtIDs(netInfo), fmt.Sprintf("outport == %q", gwRouterToExtSwitchPortName))) + expectedEntities = append(expectedEntities, newNATEntry(nat3, dummyMasqueradeIP().IP.String(), layer2SubnetGWAddr().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) return expectedEntities } diff --git a/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go b/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go index 02671eaf54..cbebc5ee9c 100644 --- a/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go +++ b/go-controller/pkg/ovn/secondary_layer3_network_controller_test.go @@ -724,11 +724,7 @@ func expectedGWRouterPlusNATAndStaticRoutes( nextHopMasqIP := nextHopMasqueradeIP().String() masqSubnet := config.Gateway.V4MasqueradeSubnet var nat []string - if config.Gateway.DisableSNATMultipleGWs { - nat = append(nat, nat1, perPodSNAT) - } else { - nat = append(nat, nat1, nat2) - } + nat = append(nat, nat1, nat2) expectedEntities := []libovsdbtest.TestData{ &nbdb.LogicalRouter{ Name: gwRouterName, @@ -743,13 +739,8 @@ func expectedGWRouterPlusNATAndStaticRoutes( expectedGRStaticRoute(staticRoute2, ipv4DefaultRoute, nextHopIP, nil, &staticRouteOutputPort, netInfo), expectedGRStaticRoute(staticRoute3, masqSubnet, nextHopMasqIP, nil, &staticRouteOutputPort, netInfo), } - if config.Gateway.DisableSNATMultipleGWs { - expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - expectedEntities = append(expectedEntities, newNATEntry(perPodSNAT, dummyMasqueradeIP().IP.String(), dummyTestPodAdditionalNetworkIP(), nil, "")) - } else { - expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - expectedEntities = append(expectedEntities, newNATEntry(nat2, dummyMasqueradeIP().IP.String(), netInfo.Subnets()[0].CIDR.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) - } + expectedEntities = append(expectedEntities, newNATEntry(nat1, dummyMasqueradeIP().IP.String(), gwRouterJoinIPAddress().IP.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) + expectedEntities = append(expectedEntities, newNATEntry(nat2, dummyMasqueradeIP().IP.String(), netInfo.Subnets()[0].CIDR.String(), standardNonDefaultNetworkExtIDs(netInfo), "")) return expectedEntities } From 089009ce060d31f3d2ede8a9f575be0b974219b7 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Mon, 20 Jan 2025 14:24:22 -0500 Subject: [PATCH 27/51] Skip session affinity conformance test Test has been flaking and timing out jobs. Already fixed in k8s upstream. See #4965 Signed-off-by: Tim Rozet --- test/scripts/e2e-kind.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/scripts/e2e-kind.sh b/test/scripts/e2e-kind.sh index d355d3a7f1..501dd60dfd 100755 --- a/test/scripts/e2e-kind.sh +++ b/test/scripts/e2e-kind.sh @@ -39,6 +39,9 @@ service.kubernetes.io/headless # TO BE FIXED BY https://github.com/kubernetes/kubernetes/pull/95351 should resolve connection reset issue #74839 +# TO BE FIXED BY https://github.com/kubernetes/kubernetes/pull/129049 +Services should be able to switch session affinity for NodePort service + # api flakes sig-api-machinery From 561af6ef2bf9f6e972afd7f5903fb637770ec516 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Mon, 13 Jan 2025 20:42:19 +0100 Subject: [PATCH 28/51] Use annotations in mirrored EndpointSlices for potentially long values Label values have a length limit of 63 characters. Use annotations for 'k8s.ovn.org/source-endpointslice' and 'k8s.ovn.org/endpointslice-network'. Signed-off-by: Patryk Diak --- .../mirrored-endpointslices.md | 10 +- .../endpointslice_mirror_controller.go | 36 ++-- .../endpointslice_mirror_controller_test.go | 166 +++++++++++++----- go-controller/pkg/testing/kube.go | 10 +- go-controller/pkg/types/const.go | 8 +- go-controller/pkg/util/util.go | 50 +++++- go-controller/pkg/util/util_unit_test.go | 16 +- 7 files changed, 215 insertions(+), 81 deletions(-) diff --git a/docs/features/multiple-networks/mirrored-endpointslices.md b/docs/features/multiple-networks/mirrored-endpointslices.md index 2c8e438bd3..39f8615779 100644 --- a/docs/features/multiple-networks/mirrored-endpointslices.md +++ b/docs/features/multiple-networks/mirrored-endpointslices.md @@ -17,9 +17,12 @@ The EndpointSlices mirror controller uses a separate set of labels: - `endpointslice.kubernetes.io/managed-by:endpointslice-mirror-controller.k8s.ovn.org` - Indicates that the EndpointSlice is managed by the mirror controller. - `k8s.ovn.org/service-name:` - The service that this mirrored EndpointSlice belongs to, used by the user-defined network service controller. Note that the label key is different from the default EndpointSlice. +- `k8s.ovn.org/source-endpointslice-version:` - The last reconciled resource version from the default EndpointSlice. + +and annotations (Label values have a length limit of 63 characters): - `k8s.ovn.org/endpointslice-network:` - The user-defined network that the IP addresses in the mirrored EndpointSlice belong to. - `k8s.ovn.org/source-endpointslice:` - The name of the default EndpointSlice that was the source of the mirrored EndpointSlice. -- `k8s.ovn.org/source-endpointslice-version:` - The last reconciled resource version from the default EndpointSlice. + ### Example @@ -99,10 +102,11 @@ metadata: generateName: l3-network-sample-deployment- labels: endpointslice.kubernetes.io/managed-by: endpointslice-mirror-controller.k8s.ovn.org - k8s.ovn.org/endpointslice-network: l3-network k8s.ovn.org/service-name: sample-deployment - k8s.ovn.org/source-endpointslice: sample-deployment-rkk4n k8s.ovn.org/source-endpointslice-version: "31533" + annotations: + k8s.ovn.org/endpointslice-network: l3-network + k8s.ovn.org/source-endpointslice: sample-deployment-rkk4n namespace: nad-l3 resourceVersion: "31535" addressType: IPv4 diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go index 0e077449be..efa335a307 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller.go @@ -25,6 +25,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/networkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" + utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" ) const maxRetries = 10 @@ -48,12 +49,12 @@ type Controller struct { } // getDefaultEndpointSliceKey returns the key for the default EndpointSlice associated with the given EndpointSlice. -// For mirrored EndpointSlices it returns the key based on the value of the "k8s.ovn.org/source-endpointslice" label. +// For mirrored EndpointSlices it returns the key based on the value of the "k8s.ovn.org/source-endpointslice" annotation. // For default EndpointSlices it returns the / key. // For other EndpointSlices it returns an empty value. func (c *Controller) getDefaultEndpointSliceKey(endpointSlice *v1.EndpointSlice) string { if c.isManagedByController(endpointSlice) { - defaultEndpointSliceName, found := endpointSlice.Labels[types.LabelSourceEndpointSlice] + defaultEndpointSliceName, found := endpointSlice.Annotations[types.SourceEndpointSliceAnnotation] if !found { utilruntime.HandleError(fmt.Errorf("couldn't determine the source EndpointSlice for %s", cache.MetaObjectToName(endpointSlice))) return "" @@ -255,11 +256,6 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e return nil } - mirrorEndpointSliceSelector := labels.Set(map[string]string{ - types.LabelSourceEndpointSlice: name, - v1.LabelManagedBy: c.name, - }).AsSelectorPreValidated() - klog.Infof("Processing %s/%s EndpointSlice in %q primary network", namespace, name, namespacePrimaryNetwork.GetNetworkName()) defaultEndpointSlice, err := c.endpointSliceLister.EndpointSlices(namespace).Get(name) @@ -269,7 +265,7 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e var mirroredEndpointSlice *v1.EndpointSlice - slices, err := c.endpointSliceLister.EndpointSlices(namespace).List(mirrorEndpointSliceSelector) + slices, err := util.GetMirroredEndpointSlices(c.name, name, namespace, c.endpointSliceLister) if err != nil { return err } @@ -279,16 +275,23 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e } if len(slices) > 1 { klog.Errorf("Found %d mirrored EndpointSlices for %s/%s, removing all of them", len(slices), namespace, name) - if err := c.kubeClient.DiscoveryV1().EndpointSlices(namespace).DeleteCollection(ctx, metav1.DeleteOptions{}, metav1.ListOptions{LabelSelector: mirrorEndpointSliceSelector.String()}); err != nil { - return err + var errorList []error + for _, endpointSlice := range slices { + if err := c.kubeClient.DiscoveryV1().EndpointSlices(namespace).Delete(ctx, endpointSlice.Name, metav1.DeleteOptions{}); err != nil { + errorList = append(errorList, err) + } } + if len(errorList) != 0 { + return utilerrors.Join(errorList...) + } + // return an error so there is a retry that will recreate the correct mirrored EndpointSlice return fmt.Errorf("found and removed %d mirrored EndpointSlices for %s/%s", len(slices), namespace, name) } if defaultEndpointSlice == nil { if mirroredEndpointSlice != nil { - klog.Infof("The default EndpointSlice %s/%s no longer exists, removing the mirrored one: %s", namespace, mirroredEndpointSlice.Labels[types.LabelSourceEndpointSlice], cache.MetaObjectToName(mirroredEndpointSlice)) + klog.Infof("The default EndpointSlice %s/%s no longer exists, removing the mirrored one: %s", namespace, mirroredEndpointSlice.Annotations[types.SourceEndpointSliceAnnotation], cache.MetaObjectToName(mirroredEndpointSlice)) return c.kubeClient.DiscoveryV1().EndpointSlices(namespace).Delete(ctx, mirroredEndpointSlice.Name, metav1.DeleteOptions{}) } klog.Infof("The default EndpointSlice %s/%s no longer exists", namespace, name) @@ -306,7 +309,7 @@ func (c *Controller) syncDefaultEndpointSlice(ctx context.Context, key string) e if mirroredEndpointSlice != nil { // nothing to do if we already reconciled this exact EndpointSlice - if mirroredResourceVersion, ok := mirroredEndpointSlice.Labels[types.LabelSourceEndpointSliceVersion]; ok { + if mirroredResourceVersion, ok := mirroredEndpointSlice.Annotations[types.LabelSourceEndpointSliceVersion]; ok { if mirroredResourceVersion == defaultEndpointSlice.ResourceVersion { return nil } @@ -401,15 +404,18 @@ func (c *Controller) mirrorEndpointSlice(mirroredEndpointSlice, defaultEndpointS if currentMirror.Labels == nil { currentMirror.Labels = map[string]string{} } + if currentMirror.Annotations == nil { + currentMirror.Annotations = make(map[string]string) + } currentMirror.AddressType = defaultEndpointSlice.AddressType currentMirror.Ports = defaultEndpointSlice.Ports // set the custom labels, generateName and reset the endpoints currentMirror.Labels[v1.LabelManagedBy] = c.name - currentMirror.Labels[types.LabelSourceEndpointSlice] = defaultEndpointSlice.Name - currentMirror.Labels[types.LabelSourceEndpointSliceVersion] = defaultEndpointSlice.ResourceVersion - currentMirror.Labels[types.LabelUserDefinedEndpointSliceNetwork] = network.GetNetworkName() currentMirror.Labels[types.LabelUserDefinedServiceName] = defaultEndpointSlice.Labels[v1.LabelServiceName] + currentMirror.Annotations[types.SourceEndpointSliceAnnotation] = defaultEndpointSlice.Name + currentMirror.Annotations[types.LabelSourceEndpointSliceVersion] = defaultEndpointSlice.ResourceVersion + currentMirror.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation] = network.GetNetworkName() // Set the GenerateName only for new objects if len(currentMirror.Name) == 0 { diff --git a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go index 56365e13ba..fad1be1e80 100644 --- a/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go +++ b/go-controller/pkg/clustermanager/endpointslicemirror/endpointslice_mirror_controller_test.go @@ -3,6 +3,7 @@ package endpointslicemirror import ( "context" "fmt" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -109,7 +110,7 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( }, } staleEndpointSlice := testing.MirrorEndpointSlice(&defaultEndpointSlice, "l3-network", false) - staleEndpointSlice.Labels[types.LabelSourceEndpointSlice] = "non-existing-endpointslice" + staleEndpointSlice.Annotations[types.SourceEndpointSliceAnnotation] = "non-existing-endpointslice" objs := []runtime.Object{ &v1.PodList{ @@ -140,7 +141,7 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( metav1.CreateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - var mirroredEndpointSlices *discovery.EndpointSliceList + var mirroredEndpointSlices []*discovery.EndpointSlice gomega.Eventually(func() error { // defaultEndpointSlice should exist _, err := fakeClient.KubeClient.DiscoveryV1().EndpointSlices(namespaceT.Name).Get(context.TODO(), defaultEndpointSlice.Name, metav1.GetOptions{}) @@ -158,26 +159,21 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( } // new mirrored EndpointSlice should get created - mirrorEndpointSliceSelector := labels.Set(map[string]string{ - types.LabelSourceEndpointSlice: defaultEndpointSlice.Name, - discovery.LabelManagedBy: types.EndpointSliceMirrorControllerName, - }).AsSelectorPreValidated() - - mirroredEndpointSlices, err = fakeClient.KubeClient.DiscoveryV1().EndpointSlices(namespaceT.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: mirrorEndpointSliceSelector.String()}) + mirroredEndpointSlices, err = util.GetMirroredEndpointSlices(types.EndpointSliceMirrorControllerName, defaultEndpointSlice.Name, namespaceT.Name, controller.endpointSliceLister) if err != nil { return err } - if len(mirroredEndpointSlices.Items) == 0 { + if len(mirroredEndpointSlices) == 0 { return fmt.Errorf("expected one mirrored EndpointSlices") } return nil }).WithTimeout(5 * time.Second).ShouldNot(gomega.HaveOccurred()) - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints).To(gomega.HaveLen(1)) - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints[0].Addresses).To(gomega.HaveLen(1)) + gomega.Expect(mirroredEndpointSlices[0].Endpoints).To(gomega.HaveLen(1)) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses).To(gomega.HaveLen(1)) // check if the Address is set to the primary IP - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints[0].Addresses[0]).To(gomega.BeEquivalentTo("10.132.2.4")) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses[0]).To(gomega.BeEquivalentTo("10.132.2.4")) return nil } @@ -344,7 +340,7 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( metav1.CreateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - var mirroredEndpointSlices *discovery.EndpointSliceList + var mirroredEndpointSlices []*discovery.EndpointSlice gomega.Eventually(func() error { // nad should exist _, err := fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(namespaceT.Name).Get(context.TODO(), "l3-network", metav1.GetOptions{}) @@ -359,25 +355,20 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( } // mirrored EndpointSlices should exist - mirrorEndpointSliceSelector := labels.Set(map[string]string{ - types.LabelSourceEndpointSlice: defaultEndpointSlice.Name, - discovery.LabelManagedBy: types.EndpointSliceMirrorControllerName, - }).AsSelectorPreValidated() - - mirroredEndpointSlices, err = fakeClient.KubeClient.DiscoveryV1().EndpointSlices(namespaceT.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: mirrorEndpointSliceSelector.String()}) + mirroredEndpointSlices, err = util.GetMirroredEndpointSlices(types.EndpointSliceMirrorControllerName, defaultEndpointSlice.Name, namespaceT.Name, controller.endpointSliceLister) if err != nil { return err } - if len(mirroredEndpointSlices.Items) != 1 { + if len(mirroredEndpointSlices) != 1 { return fmt.Errorf("expected one mirrored EndpointSlice") } - if len(mirroredEndpointSlices.Items[0].Endpoints) != 1 { + if len(mirroredEndpointSlices[0].Endpoints) != 1 { return fmt.Errorf("expected one Endpoint") } return nil }).WithTimeout(5 * time.Second).ShouldNot(gomega.HaveOccurred()) - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints[0].Addresses).To(gomega.HaveLen(1)) - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints[0].Addresses).To(gomega.BeEquivalentTo([]string{"10.132.2.4"})) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses).To(gomega.HaveLen(1)) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses).To(gomega.BeEquivalentTo([]string{"10.132.2.4"})) ginkgo.By("when the EndpointSlice changes the mirrored one gets updated") newPod := v1.Pod{ @@ -416,43 +407,33 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( return err } - mirrorEndpointSliceSelector := labels.Set(map[string]string{ - types.LabelSourceEndpointSlice: defaultEndpointSlice.Name, - discovery.LabelManagedBy: types.EndpointSliceMirrorControllerName, - }).AsSelectorPreValidated() - - mirroredEndpointSlices, err = fakeClient.KubeClient.DiscoveryV1().EndpointSlices(namespaceT.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: mirrorEndpointSliceSelector.String()}) + mirroredEndpointSlices, err = util.GetMirroredEndpointSlices(types.EndpointSliceMirrorControllerName, defaultEndpointSlice.Name, namespaceT.Name, controller.endpointSliceLister) if err != nil { return err } - if len(mirroredEndpointSlices.Items) != 1 { + if len(mirroredEndpointSlices) != 1 { return fmt.Errorf("expected one mirrored EndpointSlice") } - if len(mirroredEndpointSlices.Items[0].Endpoints) != 2 { - return fmt.Errorf("expected two addresses, got: %d", len(mirroredEndpointSlices.Items[0].Endpoints)) + if len(mirroredEndpointSlices[0].Endpoints) != 2 { + return fmt.Errorf("expected two addresses, got: %d", len(mirroredEndpointSlices[0].Endpoints)) } return nil }).WithTimeout(5 * time.Second).ShouldNot(gomega.HaveOccurred()) - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints[0].Addresses[0]).To(gomega.BeEquivalentTo("10.132.2.4")) - gomega.Expect(mirroredEndpointSlices.Items[0].Endpoints[1].Addresses[0]).To(gomega.BeEquivalentTo("10.132.2.5")) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses[0]).To(gomega.BeEquivalentTo("10.132.2.4")) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[1].Addresses[0]).To(gomega.BeEquivalentTo("10.132.2.5")) ginkgo.By("when the default EndpointSlice is removed the mirrored one follows") err = fakeClient.KubeClient.DiscoveryV1().EndpointSlices(newPod.Namespace).Delete(context.TODO(), defaultEndpointSlice.Name, metav1.DeleteOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) gomega.Eventually(func() error { - mirrorEndpointSliceSelector := labels.Set(map[string]string{ - types.LabelSourceEndpointSlice: defaultEndpointSlice.Name, - discovery.LabelManagedBy: types.EndpointSliceMirrorControllerName, - }).AsSelectorPreValidated() - - mirroredEndpointSlices, err = fakeClient.KubeClient.DiscoveryV1().EndpointSlices(namespaceT.Name).List(context.TODO(), metav1.ListOptions{LabelSelector: mirrorEndpointSliceSelector.String()}) + mirroredEndpointSlices, err = util.GetMirroredEndpointSlices(types.EndpointSliceMirrorControllerName, defaultEndpointSlice.Name, namespaceT.Name, controller.endpointSliceLister) if err != nil { return err } - if len(mirroredEndpointSlices.Items) != 0 { + if len(mirroredEndpointSlices) != 0 { return fmt.Errorf("expected no mirrored EndpointSlices") } return nil @@ -464,5 +445,108 @@ var _ = ginkgo.Describe("Cluster manager EndpointSlice mirror controller", func( gomega.Expect(err).ToNot(gomega.HaveOccurred()) }) + ginkgo.It("should create mirrored EndpointSlices for long endpointslice and network names", func() { + app.Action = func(ctx *cli.Context) error { + namespaceT := *util.NewNamespace("testns") + namespaceT.Labels[types.RequiredUDNNamespaceLabel] = "" + + pod := v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: namespaceT.Name, + Annotations: map[string]string{util.OvnPodAnnotationName: `{"default":{"mac_address":"0a:58:0a:f4:02:03","ip_address":"10.244.2.3/24","role":"infrastructure-locked"},"testns/l3-network":{"mac_address":"0a:58:0a:84:02:04","ip_address":"10.132.2.4/24","role":"primary"}}`}, + }, + Status: v1.PodStatus{Phase: v1.PodRunning}, + } + longName := strings.Repeat("a", 253) + + defaultEndpointSlice := discovery.EndpointSlice{ + ObjectMeta: metav1.ObjectMeta{ + Name: longName, + Namespace: namespaceT.Name, + Labels: map[string]string{ + discovery.LabelServiceName: "svc2", + discovery.LabelManagedBy: types.EndpointSliceDefaultControllerName, + }, + ResourceVersion: "1", + }, + Endpoints: []discovery.Endpoint{ + { + Addresses: []string{"10.244.2.3"}, + TargetRef: &v1.ObjectReference{ + Kind: "Pod", + Namespace: namespaceT.Name, + Name: pod.Name, + }, + }, + }, + } + // make sure that really long network names work too + longNetName := "network" + longName + mirroredEndpointSlice := testing.MirrorEndpointSlice(&defaultEndpointSlice, longNetName, false) + objs := []runtime.Object{ + &v1.PodList{ + Items: []v1.Pod{ + pod, + }, + }, + &v1.NamespaceList{ + Items: []v1.Namespace{ + namespaceT, + }, + }, + &discovery.EndpointSliceList{ + Items: []discovery.EndpointSlice{ + defaultEndpointSlice, + *mirroredEndpointSlice, + }, + }, + } + + start(objs...) + + nad := testing.GenerateNAD("l3-network", "l3-network", namespaceT.Name, types.Layer3Topology, "10.132.2.0/16/24", types.NetworkRolePrimary) + _, err := fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(namespaceT.Name).Create( + context.TODO(), + nad, + metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + var mirroredEndpointSlices []*discovery.EndpointSlice + gomega.Eventually(func() error { + // nad should exist + _, err := fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(namespaceT.Name).Get(context.TODO(), "l3-network", metav1.GetOptions{}) + if err != nil { + return err + } + + // defaultEndpointSlice should exist + _, err = fakeClient.KubeClient.DiscoveryV1().EndpointSlices(namespaceT.Name).Get(context.TODO(), defaultEndpointSlice.Name, metav1.GetOptions{}) + if err != nil { + return err + } + + // mirrored EndpointSlices should exist + mirroredEndpointSlices, err = util.GetMirroredEndpointSlices(types.EndpointSliceMirrorControllerName, defaultEndpointSlice.Name, namespaceT.Name, controller.endpointSliceLister) + if err != nil { + return err + } + if len(mirroredEndpointSlices) != 1 { + return fmt.Errorf("expected one mirrored EndpointSlice") + } + if len(mirroredEndpointSlices[0].Endpoints) != 1 { + return fmt.Errorf("expected one Endpoint") + } + return nil + }).WithTimeout(5 * time.Second).ShouldNot(gomega.HaveOccurred()) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses).To(gomega.HaveLen(1)) + gomega.Expect(mirroredEndpointSlices[0].Endpoints[0].Addresses).To(gomega.BeEquivalentTo([]string{"10.132.2.4"})) + + return nil + } + + err := app.Run([]string{app.Name}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) }) }) diff --git a/go-controller/pkg/testing/kube.go b/go-controller/pkg/testing/kube.go index 2dbf5d7fc4..d47564d61a 100644 --- a/go-controller/pkg/testing/kube.go +++ b/go-controller/pkg/testing/kube.go @@ -1,9 +1,10 @@ package testing import ( - "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" discovery "k8s.io/api/discovery/v1" "k8s.io/utils/ptr" + + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" ) // USED ONLY FOR TESTING @@ -55,9 +56,12 @@ func MirrorEndpointSlice(defaultEndpointSlice *discovery.EndpointSlice, network mirror := defaultEndpointSlice.DeepCopy() mirror.Name = defaultEndpointSlice.Name + "-mirrored" mirror.Labels[discovery.LabelManagedBy] = types.EndpointSliceMirrorControllerName - mirror.Labels[types.LabelSourceEndpointSlice] = defaultEndpointSlice.Name - mirror.Labels[types.LabelUserDefinedEndpointSliceNetwork] = network mirror.Labels[types.LabelUserDefinedServiceName] = defaultEndpointSlice.Labels[discovery.LabelServiceName] + if mirror.Annotations == nil { + mirror.Annotations = make(map[string]string) + } + mirror.Annotations[types.SourceEndpointSliceAnnotation] = defaultEndpointSlice.Name + mirror.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation] = network if !keepEndpoints { mirror.Endpoints = nil diff --git a/go-controller/pkg/types/const.go b/go-controller/pkg/types/const.go index 94891a58bd..cb87130bd6 100644 --- a/go-controller/pkg/types/const.go +++ b/go-controller/pkg/types/const.go @@ -123,14 +123,14 @@ const ( EndpointSliceMirrorControllerName = "endpointslice-mirror-controller.k8s.ovn.org" // EndpointSliceDefaultControllerName default kubernetes EndpointSlice controller name (used as a value for the "endpointslice.kubernetes.io/managed-by" label) EndpointSliceDefaultControllerName = "endpointslice-controller.k8s.io" - // LabelSourceEndpointSlice label key used in mirrored EndpointSlice + // SourceEndpointSliceAnnotation key used in mirrored EndpointSlice // that has the value of the default EndpointSlice name - LabelSourceEndpointSlice = "k8s.ovn.org/source-endpointslice" + SourceEndpointSliceAnnotation = "k8s.ovn.org/source-endpointslice" // LabelSourceEndpointSliceVersion label key used in mirrored EndpointSlice // that has the value of the last known default EndpointSlice ResourceVersion LabelSourceEndpointSliceVersion = "k8s.ovn.org/source-endpointslice-version" - // LabelUserDefinedEndpointSliceNetwork label key used in mirrored EndpointSlices that contains the current primary user defined network name - LabelUserDefinedEndpointSliceNetwork = "k8s.ovn.org/endpointslice-network" + // UserDefinedNetworkEndpointSliceAnnotation key used in mirrored EndpointSlices that contains the current primary user defined network name + UserDefinedNetworkEndpointSliceAnnotation = "k8s.ovn.org/endpointslice-network" // LabelUserDefinedServiceName label key used in mirrored EndpointSlices that contains the service name matching the EndpointSlice LabelUserDefinedServiceName = "k8s.ovn.org/service-name" diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 58645192c9..37b85afb8c 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -12,6 +12,7 @@ import ( "time" "golang.org/x/exp/constraints" + "k8s.io/apimachinery/pkg/labels" k8stypes "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/cache" @@ -521,10 +522,10 @@ func IsDefaultEndpointSlice(endpointSlice *discoveryv1.EndpointSlice) bool { } // IsEndpointSliceForNetwork checks if the provided EndpointSlice is meant for the given network -// if types.LabelUserDefinedEndpointSliceNetwork is set it compares it to the network name, +// if types.UserDefinedNetworkEndpointSliceAnnotation is set it compares it to the network name, // otherwise it returns true if the network is the default func IsEndpointSliceForNetwork(endpointSlice *discoveryv1.EndpointSlice, network NetInfo) bool { - if endpointSliceNetwork, ok := endpointSlice.Labels[types.LabelUserDefinedEndpointSliceNetwork]; ok { + if endpointSliceNetwork, ok := endpointSlice.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation]; ok { return endpointSliceNetwork == network.GetNetworkName() } return network.IsDefault() @@ -597,13 +598,24 @@ func GetServiceEndpointSlices(namespace, svcName, network string, endpointSliceL selector = metav1.LabelSelector{MatchLabels: map[string]string{ discovery.LabelServiceName: svcName, }} - } else { - selector = metav1.LabelSelector{MatchLabels: map[string]string{ - types.LabelUserDefinedServiceName: svcName, - types.LabelUserDefinedEndpointSliceNetwork: network, - }} + return GetEndpointSlicesBySelector(namespace, selector, endpointSliceLister) + } + + selector = metav1.LabelSelector{MatchLabels: map[string]string{ + types.LabelUserDefinedServiceName: svcName, + }} + endpointSlices, err := GetEndpointSlicesBySelector(namespace, selector, endpointSliceLister) + if err != nil { + return nil, fmt.Errorf("failed to list endpoint slices for service %s/%s: %w", namespace, svcName, err) + } + networkEndpointSlices := make([]*discovery.EndpointSlice, 0, len(endpointSlices)) + for _, endpointSlice := range endpointSlices { + if endpointSlice.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation] == network { + networkEndpointSlices = append(networkEndpointSlices, endpointSlice) + } } - return GetEndpointSlicesBySelector(namespace, selector, endpointSliceLister) + + return networkEndpointSlices, nil } // IsUDNEnabledService checks whether the provided namespaced name key is a UDN enabled service specified in config.Default.UDNAllowedDefaultServices @@ -621,7 +633,7 @@ func IsUDNEnabledService(key string) bool { func ServiceFromEndpointSlice(eps *discovery.EndpointSlice, netInfo NetInfo) (*k8stypes.NamespacedName, error) { labelKey := discovery.LabelServiceName if netInfo.IsPrimaryNetwork() { - if eps.Labels[types.LabelUserDefinedEndpointSliceNetwork] != netInfo.GetNetworkName() { + if eps.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation] != netInfo.GetNetworkName() { return nil, fmt.Errorf("endpointslice %s/%s does not belong to %s network", eps.Namespace, eps.Name, netInfo.GetNetworkName()) } labelKey = types.LabelUserDefinedServiceName @@ -638,3 +650,23 @@ func ServiceFromEndpointSlice(eps *discovery.EndpointSlice, netInfo NetInfo) (*k return &k8stypes.NamespacedName{Namespace: eps.Namespace, Name: svcName}, nil } + +// GetMirroredEndpointSlices retrieves all EndpointSlices in the given namespace that are managed +// by the controller and are mirrored from the sourceName EndpointSlice. +func GetMirroredEndpointSlices(controller, sourceName, namespace string, endpointSliceLister discoverylisters.EndpointSliceLister) (ret []*discovery.EndpointSlice, err error) { + mirrorEndpointSliceSelector := labels.Set(map[string]string{ + discovery.LabelManagedBy: controller, + }).AsSelectorPreValidated() + allMirroredEndpointSlices, err := endpointSliceLister.EndpointSlices(namespace).List(mirrorEndpointSliceSelector) + if err != nil { + return nil, err + } + + var mirroredEndpointSlices []*discovery.EndpointSlice + for _, endpointSlice := range allMirroredEndpointSlices { + if val, exists := endpointSlice.Annotations[types.SourceEndpointSliceAnnotation]; exists && val == sourceName { + mirroredEndpointSlices = append(mirroredEndpointSlices, endpointSlice) + } + } + return mirroredEndpointSlices, nil +} diff --git a/go-controller/pkg/util/util_unit_test.go b/go-controller/pkg/util/util_unit_test.go index 658c945a34..e19e8a237c 100644 --- a/go-controller/pkg/util/util_unit_test.go +++ b/go-controller/pkg/util/util_unit_test.go @@ -289,9 +289,11 @@ func TestServiceFromEndpointSlice(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: "test-namespace", Name: "test-eps", + Annotations: map[string]string{ + types.UserDefinedNetworkEndpointSliceAnnotation: "primary-network", + }, Labels: map[string]string{ - types.LabelUserDefinedEndpointSliceNetwork: "primary-network", - types.LabelUserDefinedServiceName: "test-service", + types.LabelUserDefinedServiceName: "test-service", }, }, }, @@ -310,9 +312,11 @@ func TestServiceFromEndpointSlice(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: "test-namespace", Name: "test-eps", + Annotations: map[string]string{ + types.UserDefinedNetworkEndpointSliceAnnotation: "wrong-network", + }, Labels: map[string]string{ - types.LabelUserDefinedEndpointSliceNetwork: "wrong-network", - types.LabelUserDefinedServiceName: "test-service", + types.LabelUserDefinedServiceName: "test-service", }, }, }, @@ -328,8 +332,8 @@ func TestServiceFromEndpointSlice(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Namespace: "test-namespace", Name: "test-eps", - Labels: map[string]string{ - types.LabelUserDefinedEndpointSliceNetwork: "primary-network", + Annotations: map[string]string{ + types.UserDefinedNetworkEndpointSliceAnnotation: "primary-network", }, }, }, From 46c34a2c02f5af0c2199a1cd076134fe89f97eec Mon Sep 17 00:00:00 2001 From: Enrique Llorente Date: Tue, 21 Jan 2025 09:35:15 +0100 Subject: [PATCH 29/51] e2e, pudn: Use port to check overlapping isolation Instead of deleting the pods from one network to check isolation on the other towards those IPs, use different http port per network to check isolation Signed-off-by: Enrique Llorente --- test/e2e/network_segmentation.go | 80 ++++++++++++-------------------- 1 file changed, 29 insertions(+), 51 deletions(-) diff --git a/test/e2e/network_segmentation.go b/test/e2e/network_segmentation.go index 0a6986cc8a..4fbb64f302 100644 --- a/test/e2e/network_segmentation.go +++ b/test/e2e/network_segmentation.go @@ -15,6 +15,7 @@ import ( "github.com/onsi/ginkgo/v2" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -548,14 +549,21 @@ var _ = Describe("Network Segmentation", func() { blue = networkNamespaceMap[namespaceBlue] pods := []*v1.Pod{} - redIPs := []string{} - blueIPs := []string{} + podIPs := []string{} + redIPs := map[string]bool{} + blueIPs := map[string]bool{} + bluePort := uint16(9091) + redPort := uint16(9092) for namespace, network := range networkNamespaceMap { for i := range numberOfPods { + httpServerPort := redPort + if network != red { + httpServerPort = bluePort + } podConfig := *podConfig( fmt.Sprintf("%s-pod-%d", network, i), withCommand(func() []string { - return httpServerContainerCmd(port) + return httpServerContainerCmd(httpServerPort) }), ) podConfig.namespace = namespace @@ -578,10 +586,11 @@ var _ = Describe("Network Segmentation", func() { 0, ) Expect(err).NotTo(HaveOccurred()) + podIPs = append(podIPs, podIP) if network == red { - redIPs = append(redIPs, podIP) + redIPs[podIP] = true } else { - blueIPs = append(blueIPs, podIP) + blueIPs[podIP] = true } } } @@ -589,11 +598,16 @@ var _ = Describe("Network Segmentation", func() { By("ensuring pods only communicate with pods in their network") for _, pod := range pods { isRedPod := strings.Contains(pod.Name, red) - ips := redIPs + expectedHostname := red if !isRedPod { - ips = blueIPs + expectedHostname = blue } - for _, ip := range ips { + for _, ip := range podIPs { + isRedIP := redIPs[ip] + httpServerPort := redPort + if !isRedIP { + httpServerPort = bluePort + } result, err := e2ekubectl.RunKubectl( pod.Namespace, "exec", @@ -602,54 +616,18 @@ var _ = Describe("Network Segmentation", func() { "curl", "--connect-timeout", "2", - net.JoinHostPort(ip, fmt.Sprintf("%d", port)+"/hostname"), + net.JoinHostPort(ip, fmt.Sprintf("%d", httpServerPort)+"/hostname"), ) - Expect(err).NotTo(HaveOccurred()) - if isRedPod { - Expect(strings.Contains(result, red)).To(BeTrue()) + + sameNetwork := isRedPod == redIPs[ip] + if !sameNetwork { + Expect(err).To(HaveOccurred(), "should isolate from different networks") } else { - Expect(strings.Contains(result, blue)).To(BeTrue()) + Expect(err).NotTo(HaveOccurred()) + Expect(strings.Contains(result, expectedHostname)).To(BeTrue()) } } } - - By("Deleting pods in network blue except " + fmt.Sprintf("%s-pod-%d", blue, numberOfPods-1)) - for i := range numberOfPods - 1 { - err := cs.CoreV1().Pods(namespaceBlue).Delete( - context.Background(), - fmt.Sprintf("%s-pod-%d", blue, i), - metav1.DeleteOptions{}, - ) - Expect(err).NotTo(HaveOccurred()) - } - - podIP, err := podIPsForUserDefinedPrimaryNetwork( - cs, - namespaceBlue, - fmt.Sprintf("%s-pod-%d", blue, numberOfPods-1), - namespacedName(namespaceBlue, blue), - 0, - ) - Expect(err).NotTo(HaveOccurred()) - - By("Remaining blue pod cannot communicate with red networks overlapping CIDR") - for _, ip := range redIPs { - if podIP == ip { - //don't try with your own IP - continue - } - _, err := e2ekubectl.RunKubectl( - namespaceBlue, - "exec", - fmt.Sprintf("%s-pod-%d", blue, numberOfPods-1), - "--", - "curl", - "--connect-timeout", - "2", - net.JoinHostPort(ip, fmt.Sprintf("%d", port)), - ) - Expect(err).To(MatchError(ContainSubstring("exit code 28"))) - } }, // can completely fill the L2 topology because it does not depend on the size of the clusters hostsubnet Entry( From 0a06cfe1652b6b9e12de7d6348794e994f050052 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Tue, 21 Jan 2025 15:29:33 +0100 Subject: [PATCH 30/51] docs: update (C)UDN CRD reference. This commit changes are result of the following command: crd-ref-docs --source-path ./go-controller/pkg/crd/userdefinednetwork --config=crd-docs-config.yaml --renderer=markdown --output-path=./docs/api-reference/userdefinednetwork-api-spec.md Signed-off-by: Nadia Pinaeva --- .../userdefinednetwork-api-spec.md | 68 +++++++++++++++---- 1 file changed, 53 insertions(+), 15 deletions(-) diff --git a/docs/api-reference/userdefinednetwork-api-spec.md b/docs/api-reference/userdefinednetwork-api-spec.md index 0ccdda7f06..b5f843aa72 100644 --- a/docs/api-reference/userdefinednetwork-api-spec.md +++ b/docs/api-reference/userdefinednetwork-api-spec.md @@ -22,7 +22,8 @@ _Underlying type:_ _string_ - +_Validation:_ +- MaxLength: 43 _Appears in:_ - [DualStackCIDRs](#dualstackcidrs) @@ -109,6 +110,7 @@ _Underlying type:_ _[CIDR](#cidr)_ _Validation:_ - MaxItems: 2 +- MaxLength: 43 - MinItems: 1 _Appears in:_ @@ -117,6 +119,42 @@ _Appears in:_ +#### IPAMConfig + + + + + +_Validation:_ +- MinProperties: 1 + +_Appears in:_ +- [Layer2Config](#layer2config) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `mode` _[IPAMMode](#ipammode)_ | Mode controls how much of the IP configuration will be managed by OVN.
`Enabled` means OVN-Kubernetes will apply IP configuration to the SDN infrastructure and it will also assign IPs
from the selected subnet to the individual pods.
`Disabled` means OVN-Kubernetes will only assign MAC addresses and provide layer 2 communication, letting users
configure IP addresses for the pods.
`Disabled` is only available for Secondary networks.
By disabling IPAM, any Kubernetes features that rely on selecting pods by IP will no longer function
(such as network policy, services, etc). Additionally, IP port security will also be disabled for interfaces attached to this network.
Defaults to `Enabled`. | | Enum: [Enabled Disabled]
| +| `lifecycle` _[NetworkIPAMLifecycle](#networkipamlifecycle)_ | Lifecycle controls IP addresses management lifecycle.
The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an
`ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested.
Only supported when mode is `Enabled`. | | Enum: [Persistent]
| + + +#### IPAMMode + +_Underlying type:_ _string_ + + + +_Validation:_ +- Enum: [Enabled Disabled] + +_Appears in:_ +- [IPAMConfig](#ipamconfig) + +| Field | Description | +| --- | --- | +| `Enabled` | | +| `Disabled` | | + + #### Layer2Config @@ -131,11 +169,11 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.

Allowed value is "Secondary".
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| -| `mtu` _integer_ | MTU is the maximum transmission unit for a network.
MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 0
| -| `subnets` _[DualStackCIDRs](#dualstackcidrs)_ | Subnets are used for the pod network across the cluster.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.

The format should match standard CIDR notation (for example, "10.128.0.0/16").
This field may be omitted. In that case the logical switch implementing the network only provides layer 2 communication,
and users must configure IP addresses for the pods. As a consequence, Port security only prevents MAC spoofing. | | MaxItems: 2
MinItems: 1
| -| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.

Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MinItems: 1
| -| `ipamLifecycle` _[NetworkIPAMLifecycle](#networkipamlifecycle)_ | IPAMLifecycle controls IP addresses management lifecycle.

The only allowed value is Persistent. When set, OVN Kubernetes assigned IP addresses will be persisted in an
`ipamclaims.k8s.cni.cncf.io` object. These IP addresses will be reused by other pods if requested.
Only supported when "subnets" are set. | | Enum: [Persistent]
| +| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.
Allowed value is "Secondary".
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| +| `mtu` _integer_ | MTU is the maximum transmission unit for a network.
MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 576
| +| `subnets` _[DualStackCIDRs](#dualstackcidrs)_ | Subnets are used for the pod network across the cluster.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
The format should match standard CIDR notation (for example, "10.128.0.0/16").
This field must be omitted if `ipam.mode` is `Disabled`. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| +| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| +| `ipam` _[IPAMConfig](#ipamconfig)_ | IPAM section contains IPAM-related configuration for the network. | | MinProperties: 1
| #### Layer3Config @@ -152,10 +190,10 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.

Allowed values are "Primary" and "Secondary".
Primary network is automatically assigned to every pod created in the same namespace.
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| -| `mtu` _integer_ | MTU is the maximum transmission unit for a network.

MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 0
| -| `subnets` _[Layer3Subnet](#layer3subnet) array_ | Subnets are used for the pod network across the cluster.

Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
Given subnet is split into smaller subnets for every node. | | MaxItems: 2
MinItems: 1
| -| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.

Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MinItems: 1
| +| `role` _[NetworkRole](#networkrole)_ | Role describes the network role in the pod.
Allowed values are "Primary" and "Secondary".
Primary network is automatically assigned to every pod created in the same namespace.
Secondary network is only assigned to pods that use `k8s.v1.cni.cncf.io/networks` annotation to select given network. | | Enum: [Primary Secondary]
Required: \{\}
| +| `mtu` _integer_ | MTU is the maximum transmission unit for a network.
MTU is optional, if not provided, the globally configured value in OVN-Kubernetes (defaults to 1400) is used for the network. | | Maximum: 65536
Minimum: 576
| +| `subnets` _[Layer3Subnet](#layer3subnet) array_ | Subnets are used for the pod network across the cluster.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
Given subnet is split into smaller subnets for every node. | | MaxItems: 2
MinItems: 1
| +| `joinSubnets` _[DualStackCIDRs](#dualstackcidrs)_ | JoinSubnets are used inside the OVN network topology.
Dual-stack clusters may set 2 subnets (one for each IP family), otherwise only 1 subnet is allowed.
This field is only allowed for "Primary" network.
It is not recommended to set this field without explicit need and understanding of the OVN network topology.
When omitted, the platform will choose a reasonable default which is subject to change over time. | | MaxItems: 2
MaxLength: 43
MinItems: 1
| #### Layer3Subnet @@ -171,8 +209,8 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `cidr` _[CIDR](#cidr)_ | CIDR specifies L3Subnet, which is split into smaller subnets for every node. | | | -| `hostSubnet` _integer_ | HostSubnet specifies the subnet size for every node.

When not set, it will be assigned automatically. | | Maximum: 127
Minimum: 1
| +| `cidr` _[CIDR](#cidr)_ | CIDR specifies L3Subnet, which is split into smaller subnets for every node. | | MaxLength: 43
| +| `hostSubnet` _integer_ | HostSubnet specifies the subnet size for every node.
When not set, it will be assigned automatically. | | Maximum: 127
Minimum: 1
| #### NetworkIPAMLifecycle @@ -185,7 +223,7 @@ _Validation:_ - Enum: [Persistent] _Appears in:_ -- [Layer2Config](#layer2config) +- [IPAMConfig](#ipamconfig) | Field | Description | | --- | --- | @@ -224,7 +262,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.

Allowed values are "Layer3", "Layer2".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes. | | Enum: [Layer2 Layer3]
Required: \{\}
| +| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.
Allowed values are "Layer3", "Layer2".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes. | | Enum: [Layer2 Layer3]
Required: \{\}
| | `layer3` _[Layer3Config](#layer3config)_ | Layer3 is the Layer3 topology configuration. | | | | `layer2` _[Layer2Config](#layer2config)_ | Layer2 is the Layer2 topology configuration. | | | @@ -299,7 +337,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.

Allowed values are "Layer3", "Layer2".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes. | | Enum: [Layer2 Layer3]
Required: \{\}
| +| `topology` _[NetworkTopology](#networktopology)_ | Topology describes network configuration.
Allowed values are "Layer3", "Layer2".
Layer3 topology creates a layer 2 segment per node, each with a different subnet. Layer 3 routing is used to interconnect node subnets.
Layer2 topology creates one logical switch shared by all nodes. | | Enum: [Layer2 Layer3]
Required: \{\}
| | `layer3` _[Layer3Config](#layer3config)_ | Layer3 is the Layer3 topology configuration. | | | | `layer2` _[Layer2Config](#layer2config)_ | Layer2 is the Layer2 topology configuration. | | | From 16ef5d8ca09ea27074c1b4e9d9956a29ca6a9f7f Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Tue, 21 Jan 2025 15:31:24 +0100 Subject: [PATCH 31/51] Ignore removed namespaces in clustermanager When a namespace is removed it shouldn't be considered as an error during reconciliation. Signed-off-by: Patryk Diak --- .../clustermanager/userdefinednetwork/controller.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go index bc90b1ac0e..36df2d41d6 100644 --- a/go-controller/pkg/clustermanager/userdefinednetwork/controller.go +++ b/go-controller/pkg/clustermanager/userdefinednetwork/controller.go @@ -253,15 +253,19 @@ func (c *Controller) ReconcileNetAttachDef(key string) error { // ReconcileNamespace enqueue relevant Cluster UDN CR requests following namespace events. func (c *Controller) ReconcileNamespace(key string) error { - c.namespaceTrackerLock.RLock() - defer c.namespaceTrackerLock.RUnlock() - namespace, err := c.namespaceInformer.Lister().Get(key) if err != nil { - return fmt.Errorf("failed to get namespace %q from cahce: %w", key, err) + // Ignore removed namespaces + if kerrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("failed to get namespace %q from cache: %w", key, err) } namespaceLabels := labels.Set(namespace.Labels) + c.namespaceTrackerLock.RLock() + defer c.namespaceTrackerLock.RUnlock() + for cudnName, affectedNamespaces := range c.namespaceTracker { affectedNamespace := affectedNamespaces.Has(key) From 0306d4cee091c8da2f7bcdf825467e678c636d42 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Wed, 22 Jan 2025 14:12:59 +0100 Subject: [PATCH 32/51] Do not rely on the active network while removing the EndpointSlice Do not rely on the active network while removing the EndpointSlice from the nodeport watcher as there is no guarantee that the network is still there. Signed-off-by: Patryk Diak --- go-controller/pkg/node/gateway_shared_intf.go | 19 +++++++++---------- go-controller/pkg/util/util.go | 10 +++++----- go-controller/pkg/util/util_unit_test.go | 2 +- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/go-controller/pkg/node/gateway_shared_intf.go b/go-controller/pkg/node/gateway_shared_intf.go index 037a9ef459..0d3060f2c1 100644 --- a/go-controller/pkg/node/gateway_shared_intf.go +++ b/go-controller/pkg/node/gateway_shared_intf.go @@ -1091,7 +1091,7 @@ func (npw *nodePortWatcher) AddEndpointSlice(epSlice *discovery.EndpointSlice) e return nil } - svcNamespacedName, err := util.ServiceFromEndpointSlice(epSlice, netInfo) + svcNamespacedName, err := util.ServiceFromEndpointSlice(epSlice, netInfo.GetNetworkName()) if err != nil || svcNamespacedName == nil { return err } @@ -1158,21 +1158,20 @@ func (npw *nodePortWatcher) DeleteEndpointSlice(epSlice *discovery.EndpointSlice var errors []error var hasLocalHostNetworkEp = false - netInfo, err := npw.networkManager.GetActiveNetworkForNamespace(epSlice.Namespace) - if err != nil { - return fmt.Errorf("error getting active network for endpointslice %s in namespace %s: %w", epSlice.Name, epSlice.Namespace, err) - } - if util.IsNetworkSegmentationSupportEnabled() && !util.IsEndpointSliceForNetwork(epSlice, netInfo) { - return nil + networkName := types.DefaultNetworkName + if util.IsNetworkSegmentationSupportEnabled() { + if netName, ok := epSlice.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation]; ok { + networkName = netName + } } klog.V(5).Infof("Deleting endpointslice %s in namespace %s", epSlice.Name, epSlice.Namespace) // remove rules for endpoints and add back normal ones - namespacedName, err := util.ServiceFromEndpointSlice(epSlice, netInfo) + namespacedName, err := util.ServiceFromEndpointSlice(epSlice, networkName) if err != nil || namespacedName == nil { return err } - epSlices, err := npw.watchFactory.GetServiceEndpointSlices(namespacedName.Namespace, namespacedName.Name, netInfo.GetNetworkName()) + epSlices, err := npw.watchFactory.GetServiceEndpointSlices(namespacedName.Namespace, namespacedName.Name, networkName) if err != nil { if !kerrors.IsNotFound(err) { return fmt.Errorf("error retrieving all endpointslices for service %s/%s during endpointslice delete on %s: %w", @@ -1233,7 +1232,7 @@ func (npw *nodePortWatcher) UpdateEndpointSlice(oldEpSlice, newEpSlice *discover return nil } - namespacedName, err := util.ServiceFromEndpointSlice(newEpSlice, netInfo) + namespacedName, err := util.ServiceFromEndpointSlice(newEpSlice, netInfo.GetNetworkName()) if err != nil || namespacedName == nil { return err } diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index 37b85afb8c..c2b81de30a 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -630,11 +630,11 @@ func IsUDNEnabledService(key string) bool { // ServiceFromEndpointSlice returns the namespaced name of the service that corresponds to the given endpointSlice // in the given network. If the service label is missing the returned namespaced name and the error are nil. -func ServiceFromEndpointSlice(eps *discovery.EndpointSlice, netInfo NetInfo) (*k8stypes.NamespacedName, error) { +func ServiceFromEndpointSlice(eps *discovery.EndpointSlice, netName string) (*k8stypes.NamespacedName, error) { labelKey := discovery.LabelServiceName - if netInfo.IsPrimaryNetwork() { - if eps.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation] != netInfo.GetNetworkName() { - return nil, fmt.Errorf("endpointslice %s/%s does not belong to %s network", eps.Namespace, eps.Name, netInfo.GetNetworkName()) + if netName != types.DefaultNetworkName { + if eps.Annotations[types.UserDefinedNetworkEndpointSliceAnnotation] != netName { + return nil, fmt.Errorf("endpointslice %s/%s does not belong to %s network", eps.Namespace, eps.Name, netName) } labelKey = types.LabelUserDefinedServiceName } @@ -645,7 +645,7 @@ func ServiceFromEndpointSlice(eps *discovery.EndpointSlice, netInfo NetInfo) (*k if svcName == "" { return nil, fmt.Errorf("endpointslice %s/%s has empty svcName for label %s in network %s", - eps.Namespace, eps.Name, labelKey, netInfo.GetNetworkName()) + eps.Namespace, eps.Name, labelKey, netName) } return &k8stypes.NamespacedName{Namespace: eps.Namespace, Name: svcName}, nil diff --git a/go-controller/pkg/util/util_unit_test.go b/go-controller/pkg/util/util_unit_test.go index e19e8a237c..ca1eac7a83 100644 --- a/go-controller/pkg/util/util_unit_test.go +++ b/go-controller/pkg/util/util_unit_test.go @@ -376,7 +376,7 @@ func TestServiceFromEndpointSlice(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := ServiceFromEndpointSlice(tt.args.eps, tt.args.netInfo) + got, err := ServiceFromEndpointSlice(tt.args.eps, tt.args.netInfo.GetNetworkName()) if !tt.wantErr(t, err, fmt.Sprintf("ServiceFromEndpointSlice(%v, %v)", tt.args.eps, tt.args.netInfo)) { return } From c5b834d4d3edfa798c062d39ff5e5f3372d2d9ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Tue, 21 Jan 2025 13:22:24 +0000 Subject: [PATCH 33/51] EIP node: skip OVN managed interfaces that don't serve as secondary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ovnkube-node rollout, the node EIP controller was removing routes from route tables associated to UDN VRFs potentially causing either connectivity issues for the UDN or even preventing the UDN controller to start. The node EIP controller should only be handling routes of routing tables associated to secondary interfaces. Signed-off-by: Jaime Caamaño Ruiz --- go-controller/pkg/node/controllers/egressip/egressip.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/go-controller/pkg/node/controllers/egressip/egressip.go b/go-controller/pkg/node/controllers/egressip/egressip.go index 60767d674c..881e985f64 100644 --- a/go-controller/pkg/node/controllers/egressip/egressip.go +++ b/go-controller/pkg/node/controllers/egressip/egressip.go @@ -23,6 +23,7 @@ import ( "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/linkmanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/syncmap" + "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util" utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors" @@ -905,6 +906,10 @@ func (c *Controller) repairNode() error { for _, link := range links { link := link linkName := link.Attrs().Name + // skip OVN managed interfaces that don't serve as secondary interfaces + if strings.HasPrefix(linkName, types.K8sMgmtIntfNamePrefix) { + continue + } linkIdx := link.Attrs().Index addresses, err := util.GetFilteredInterfaceAddrs(link, c.v4, c.v6) if err != nil { From e65cc4e73b88c81c613c4b593ea179a69e14e700 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 22 Jan 2025 12:35:24 -0500 Subject: [PATCH 34/51] Unit tests: set klog level explicitly We use PrepareTestConfig to set the logging level to 5, however that was not taking effect for klog all the time. This results in some unit tests only printing libovsdb logs, and regular klog level lines. We miss a ton of debug logs required to investigate unit tests failures. Signed-off-by: Tim Rozet --- go-controller/pkg/config/config.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index 0f6d2fa381..2759f071f2 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -662,6 +662,12 @@ func PrepareTestConfig() error { return err } + // set klog level here as some tests will not call InitConfig + var level klog.Level + if err := level.Set(strconv.Itoa(Logging.Level)); err != nil { + return fmt.Errorf("failed to set klog log level %v", err) + } + // Don't pick up defaults from the environment os.Unsetenv("KUBECONFIG") os.Unsetenv("K8S_CACERT") From f0723b3078536b78b4467ed829e3abd71ba696db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Caama=C3=B1o=20Ruiz?= Date: Wed, 22 Jan 2025 17:48:23 +0000 Subject: [PATCH 35/51] Set network ID on netInfo even when already annotated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On cluster manager restart, there wouldn't be a need to update the annotation of existing networks but we were not setting the annotated ID on netInfo. This failed to ensure the network and in turn make the controller believe the network was stale, cleaning up the annotated ID on the node causing the network to definately not start and a crashloop. Signed-off-by: Jaime Caamaño Ruiz --- .../pkg/networkmanager/nad_controller.go | 1 + .../pkg/networkmanager/nad_controller_test.go | 41 +++++++++++++++++-- .../networkmanager/network_controller_test.go | 6 +-- go-controller/pkg/util/multi_network.go | 2 + 4 files changed, 44 insertions(+), 6 deletions(-) diff --git a/go-controller/pkg/networkmanager/nad_controller.go b/go-controller/pkg/networkmanager/nad_controller.go index 9c60719c00..2a5d26edc3 100644 --- a/go-controller/pkg/networkmanager/nad_controller.go +++ b/go-controller/pkg/networkmanager/nad_controller.go @@ -620,6 +620,7 @@ func (c *nadController) handleNetworkID(old util.NetInfo, new util.MutableNetInf delete(annotations, types.OvnNetworkIDAnnotation) } if len(annotations) == 0 { + new.SetNetworkID(id) return nil } diff --git a/go-controller/pkg/networkmanager/nad_controller_test.go b/go-controller/pkg/networkmanager/nad_controller_test.go index a683ef4347..33ff13a797 100644 --- a/go-controller/pkg/networkmanager/nad_controller_test.go +++ b/go-controller/pkg/networkmanager/nad_controller_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "strconv" "strings" "sync" "testing" @@ -618,8 +619,9 @@ func TestSyncAll(t *testing.T) { MTU: 1400, } type TestNAD struct { - name string - netconf *ovncnitypes.NetConf + name string + netconf *ovncnitypes.NetConf + networkID string } tests := []struct { name string @@ -644,6 +646,16 @@ func TestSyncAll(t *testing.T) { }, syncAllError: ErrNetworkControllerTopologyNotManaged, }, + { + name: "nad already annotated with network ID", + testNADs: []TestNAD{ + { + name: "test/nad1", + netconf: network_A, + networkID: "1", + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -689,7 +701,11 @@ func TestSyncAll(t *testing.T) { namespace, name, err := cache.SplitMetaNamespaceKey(testNAD.name) g.Expect(err).ToNot(gomega.HaveOccurred()) testNAD.netconf.NADName = testNAD.name - nad, err := buildNAD(name, namespace, testNAD.netconf) + nadAnnotations := map[string]string{ + types.OvnNetworkNameAnnotation: testNAD.netconf.Name, + types.OvnNetworkIDAnnotation: testNAD.networkID, + } + nad, err := buildNADWithAnnotations(name, namespace, testNAD.netconf, nadAnnotations) g.Expect(err).ToNot(gomega.HaveOccurred()) _, err = fakeClient.NetworkAttchDefClient.K8sCniCncfIoV1().NetworkAttachmentDefinitions(namespace).Create( context.Background(), @@ -700,6 +716,13 @@ func TestSyncAll(t *testing.T) { netInfo := expectedNetworks[testNAD.netconf.Name] if netInfo == nil { netInfo, err = util.NewNetInfo(testNAD.netconf) + mutableNetInfo := util.NewMutableNetInfo(netInfo) + if testNAD.networkID != "" { + id, err := strconv.Atoi(testNAD.networkID) + g.Expect(err).ToNot(gomega.HaveOccurred()) + mutableNetInfo.SetNetworkID(id) + netInfo = mutableNetInfo + } g.Expect(err).ToNot(gomega.HaveOccurred()) expectedNetworks[testNAD.netconf.Name] = netInfo if netInfo.IsPrimaryNetwork() && !netInfo.IsDefault() { @@ -726,6 +749,9 @@ func TestSyncAll(t *testing.T) { for name, network := range expectedNetworks { g.Expect(actualNetworks).To(gomega.HaveKey(name)) g.Expect(util.AreNetworksCompatible(actualNetworks[name], network)).To(gomega.BeTrue()) + if network.GetNetworkID() != util.InvalidID { + g.Expect(actualNetworks[name].GetNetworkID()).To(gomega.Equal(network.GetNetworkID())) + } } actualPrimaryNetwork, err := controller.Interface().GetActiveNetworkForNamespace("test") @@ -753,3 +779,12 @@ func buildNAD(name, namespace string, network *ovncnitypes.NetConf) (*nettypes.N } return nad, nil } + +func buildNADWithAnnotations(name, namespace string, network *ovncnitypes.NetConf, annotations map[string]string) (*nettypes.NetworkAttachmentDefinition, error) { + nad, err := buildNAD(name, namespace, network) + if err != nil { + return nil, err + } + nad.Annotations = annotations + return nad, nil +} diff --git a/go-controller/pkg/networkmanager/network_controller_test.go b/go-controller/pkg/networkmanager/network_controller_test.go index a477c2192c..c753bca0db 100644 --- a/go-controller/pkg/networkmanager/network_controller_test.go +++ b/go-controller/pkg/networkmanager/network_controller_test.go @@ -184,11 +184,11 @@ func TestSetAdvertisements(t *testing.T) { namespace, name, err := cache.SplitMetaNamespaceKey(testNADName) g.Expect(err).ToNot(gomega.HaveOccurred()) - nad, err := buildNAD(name, namespace, tt.network) - g.Expect(err).ToNot(gomega.HaveOccurred()) - nad.Annotations = map[string]string{ + nadAnnotations := map[string]string{ types.OvnRouteAdvertisementsKey: "[\"" + tt.ra.Name + "\"]", } + nad, err := buildNADWithAnnotations(name, namespace, tt.network, nadAnnotations) + g.Expect(err).ToNot(gomega.HaveOccurred()) _, err = fakeClient.KubeClient.CoreV1().Nodes().Create(context.Background(), &tt.node, v1.CreateOptions{}) g.Expect(err).ToNot(gomega.HaveOccurred()) diff --git a/go-controller/pkg/util/multi_network.go b/go-controller/pkg/util/multi_network.go index a2ab9dc42e..eefe71aeaf 100644 --- a/go-controller/pkg/util/multi_network.go +++ b/go-controller/pkg/util/multi_network.go @@ -286,6 +286,8 @@ func (l *mutableNetInfo) copyFrom(r *mutableNetInfo) { } func (nInfo *mutableNetInfo) GetNetworkID() int { + nInfo.RLock() + defer nInfo.RUnlock() return nInfo.id } From f21f8fb5c42a2e030a3d480587b558077d5d0a69 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Wed, 22 Jan 2025 15:22:52 -0500 Subject: [PATCH 36/51] Fixes SNAT removal with egress IP When a single ovnkube-controller manages the NBDB for multiple nodes, such as without IC or IC with multiple nodes per zone, the predicate search for NATs to remove would accidentally remove egress IP SNATs that were not part of the status being removed. For example in the test: EgressIP UPDATE should update OVN on EgressIP .spec.egressips change interconnect disabled; non-ic - single zone setup The test will: 1. create node 1 and 2, with egress ip .101 and .102, respectively 2. change the egress ip on node 1 to be .103 The test would flake because the SNAT for .102 would be missing for node 2. The reason this happens is because when the egress IP status is updated for .103 to be added on node1: 1. the code will call e.deletePreviousNetworkPodEgressIPAssignments 2. the code will then call e.addPodEgressIPAssignments When the delete is called, it passes the status to remove any stale pod configuration for this status. However, it was searching for any SNAT for the egress IP name, which would in turn remove the SNAT for .102 as well. This fixes it by passing a predicate to ensure the external IP in the SNAT matches the egress IP status. Signed-off-by: Tim Rozet --- go-controller/pkg/ovn/egressip.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index d9767386a4..184589ef17 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -3533,8 +3533,11 @@ func (e *EgressIPController) createNATRuleOps(ni util.NetInfo, ops []ovsdb.Opera func (e *EgressIPController) deleteNATRuleOps(ni util.NetInfo, ops []ovsdb.Operation, status egressipv1.EgressIPStatusItem, egressIPName, podNamespace, podName string) ([]ovsdb.Operation, error) { var err error - pV4 := libovsdbops.GetPredicate[*nbdb.NAT](getEgressIPNATDbIDs(egressIPName, podNamespace, podName, IPFamilyValueV4, e.controllerName), nil) - pV6 := libovsdbops.GetPredicate[*nbdb.NAT](getEgressIPNATDbIDs(egressIPName, podNamespace, podName, IPFamilyValueV6, e.controllerName), nil) + filterByIP := func(n *nbdb.NAT) bool { + return n.ExternalIP == status.EgressIP + } + pV4 := libovsdbops.GetPredicate[*nbdb.NAT](getEgressIPNATDbIDs(egressIPName, podNamespace, podName, IPFamilyValueV4, e.controllerName), filterByIP) + pV6 := libovsdbops.GetPredicate[*nbdb.NAT](getEgressIPNATDbIDs(egressIPName, podNamespace, podName, IPFamilyValueV6, e.controllerName), filterByIP) router := &nbdb.LogicalRouter{ Name: ni.GetNetworkScopedGWRouterName(status.Node), } From bbc56776f4db7a9afaf2d894988e44e561698b32 Mon Sep 17 00:00:00 2001 From: Periyasamy Palanisamy Date: Wed, 22 Jan 2025 18:45:46 +0100 Subject: [PATCH 37/51] Fix race between NAD and NetPol deletions It's noticed NAD is deleted even before handling NetPol delete event which causes NetPol deletion never happened and error is always thrown upon retries. This skips such primary network error and proceeds with deleting netpol object. Signed-off-by: Periyasamy Palanisamy --- go-controller/pkg/ovn/base_network_controller.go | 9 ++++++--- go-controller/pkg/util/util.go | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index 38f7d1b834..d38dc3fb1a 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -1005,10 +1005,13 @@ func (bnc *BaseNetworkController) DeleteResourceCommon(objType reflect.Type, obj return fmt.Errorf("could not cast obj of type %T to *knet.NetworkPolicy", obj) } netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(knp.Namespace) - if err != nil { - return fmt.Errorf("could not get active network for namespace %s: %v", knp.Namespace, err) + // The InvalidPrimaryNetworkError error is thrown when UDN is not found because + // it has been already deleted, so just proceed with deleting NetworkPolicy in + // such a scenario as well. + if err != nil && !util.IsInvalidPrimaryNetworkError(err) { + return fmt.Errorf("could not get active network for namespace %s: %w", knp.Namespace, err) } - if bnc.GetNetworkName() != netinfo.GetNetworkName() { + if err == nil && bnc.GetNetworkName() != netinfo.GetNetworkName() { return nil } return bnc.deleteNetworkPolicy(knp) diff --git a/go-controller/pkg/util/util.go b/go-controller/pkg/util/util.go index c2b81de30a..b0867704d3 100644 --- a/go-controller/pkg/util/util.go +++ b/go-controller/pkg/util/util.go @@ -402,6 +402,11 @@ func NewInvalidPrimaryNetworkError(namespace string) *InvalidPrimaryNetworkError return &InvalidPrimaryNetworkError{namespace: namespace} } +func IsInvalidPrimaryNetworkError(err error) bool { + var invalidPrimaryNetworkError *InvalidPrimaryNetworkError + return errors.As(err, &invalidPrimaryNetworkError) +} + func GetUserDefinedNetworkRole(isPrimary bool) string { networkRole := types.NetworkRoleSecondary if isPrimary { From ab83afb42d69d719442c3c7fad96942d331d4244 Mon Sep 17 00:00:00 2001 From: Nadia Pinaeva Date: Thu, 23 Jan 2025 10:35:21 +0100 Subject: [PATCH 38/51] UDN isolation: trigger update on open-default-ports annotation change. Signed-off-by: Nadia Pinaeva --- go-controller/pkg/node/udn_isolation.go | 3 ++- go-controller/pkg/node/udn_isolation_test.go | 21 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/go-controller/pkg/node/udn_isolation.go b/go-controller/pkg/node/udn_isolation.go index 83ee56fbca..998108f82f 100644 --- a/go-controller/pkg/node/udn_isolation.go +++ b/go-controller/pkg/node/udn_isolation.go @@ -408,7 +408,8 @@ func podNeedsUpdate(oldObj, newObj *v1.Pod) bool { } // react to pod IP changes return !reflect.DeepEqual(oldObj.Status, newObj.Status) || - oldObj.Annotations[util.OvnPodAnnotationName] != newObj.Annotations[util.OvnPodAnnotationName] + oldObj.Annotations[util.OvnPodAnnotationName] != newObj.Annotations[util.OvnPodAnnotationName] || + oldObj.Annotations[util.UDNOpenPortsAnnotationName] != newObj.Annotations[util.UDNOpenPortsAnnotationName] } func (m *UDNHostIsolationManager) reconcilePod(key string) error { diff --git a/go-controller/pkg/node/udn_isolation_test.go b/go-controller/pkg/node/udn_isolation_test.go index ffb6f5fd4b..bbd717d293 100644 --- a/go-controller/pkg/node/udn_isolation_test.go +++ b/go-controller/pkg/node/udn_isolation_test.go @@ -449,6 +449,27 @@ add rule inet ovn-kubernetes udn-isolation ip6 daddr @udn-pod-default-ips-v6 dro }).Should(Succeed()) }) + It("on pod update", func() { + start( + newPodWithIPs(nadNamespace, "pod1", true, []string{"1.1.1.1", "2014:100:200::1"})) + err := nodenft.MatchNFTRules(getExpectedDumpWithOpenPorts([]string{"1.1.1.1"}, []string{"2014:100:200::1"}, nil), nft.Dump()) + Expect(err).NotTo(HaveOccurred()) + pod, err := fakeClient.KubeClient.CoreV1().Pods(nadNamespace).Get(context.TODO(), + "pod1", metav1.GetOptions{}) + Expect(err).NotTo(HaveOccurred()) + pod.Annotations[util.UDNOpenPortsAnnotationName] = getOpenPortAnnotation([]util.OpenPort{{Protocol: "tcp", Port: intRef(80)}})[util.UDNOpenPortsAnnotationName] + _, err = fakeClient.KubeClient.CoreV1().Pods(nadNamespace).Update(context.TODO(), + pod, metav1.UpdateOptions{}) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() error { + return nodenft.MatchNFTRules(getExpectedDumpWithOpenPorts([]string{"1.1.1.1"}, []string{"2014:100:200::1"}, map[string][]*util.OpenPort{ + "1.1.1.1": {{Protocol: "tcp", Port: intRef(80)}}, + "2014:100:200::1": {{Protocol: "tcp", Port: intRef(80)}}, + }), nft.Dump()) + }).Should(Succeed()) + }) + It("on pod delete", func() { start( newPodWithIPs(nadNamespace, "pod1", true, []string{"1.1.1.1", "2014:100:200::1"}, util.OpenPort{Protocol: "tcp", Port: intRef(80)}), From 5530ab8ade9a911818751bad8b60ca87fffe5738 Mon Sep 17 00:00:00 2001 From: Tim Rozet Date: Fri, 6 Dec 2024 09:28:56 -0500 Subject: [PATCH 39/51] Always use queued informer Increases async performance of informer cache being able to always queue events and not blocking while performing ADD/UPDATE/DELETE operation. Signed-off-by: Tim Rozet --- go-controller/pkg/factory/factory.go | 126 +++++++++++++++++++-------- go-controller/pkg/factory/handler.go | 103 ++-------------------- 2 files changed, 93 insertions(+), 136 deletions(-) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 845dff4a24..8716a2f2d1 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -147,6 +147,8 @@ const ( // namespace, node, and pod handlers defaultNumEventQueues uint32 = 15 + // rest of handlers + minNumEventQueues = 1 // default priorities for various handlers (also the highest priority) defaultHandlerPriority int = 0 @@ -220,7 +222,8 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory } wf.cpipcFactory = ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval) if util.PlatformTypeIsEgressIPCloudProvider() { - wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) + wf.informers[CloudPrivateIPConfigType], err = newQueuedInformer(CloudPrivateIPConfigType, + wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -336,11 +339,13 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if err != nil { return nil, err } - wf.informers[ServiceType], err = newInformer(ServiceType, wf.iFactory.Core().V1().Services().Informer()) + wf.informers[ServiceType], err = newQueuedInformer(ServiceType, wf.iFactory.Core().V1().Services().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[PolicyType], err = newInformer(PolicyType, wf.iFactory.Networking().V1().NetworkPolicies().Informer()) + wf.informers[PolicyType], err = newQueuedInformer(PolicyType, wf.iFactory.Networking().V1().NetworkPolicies().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -354,28 +359,33 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if err != nil { return nil, err } - wf.informers[EndpointSliceType], err = newInformer(EndpointSliceType, wf.iFactory.Discovery().V1().EndpointSlices().Informer()) + wf.informers[EndpointSliceType], err = newQueuedInformer(EndpointSliceType, wf.iFactory.Discovery().V1().EndpointSlices().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableAdminNetworkPolicy { - wf.informers[AdminNetworkPolicyType], err = newInformer(AdminNetworkPolicyType, wf.anpFactory.Policy().V1alpha1().AdminNetworkPolicies().Informer()) + wf.informers[AdminNetworkPolicyType], err = newQueuedInformer(AdminNetworkPolicyType, + wf.anpFactory.Policy().V1alpha1().AdminNetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[BaselineAdminNetworkPolicyType], err = newInformer(BaselineAdminNetworkPolicyType, wf.anpFactory.Policy().V1alpha1().BaselineAdminNetworkPolicies().Informer()) + wf.informers[BaselineAdminNetworkPolicyType], err = newQueuedInformer(BaselineAdminNetworkPolicyType, + wf.anpFactory.Policy().V1alpha1().BaselineAdminNetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressIP { - wf.informers[EgressIPType], err = newInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer()) + wf.informers[EgressIPType], err = newQueuedInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), wf.stopChan, + minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressFirewall { - wf.informers[EgressFirewallType], err = newInformer(EgressFirewallType, wf.efFactory.K8s().V1().EgressFirewalls().Informer()) + wf.informers[EgressFirewallType], err = newQueuedInformer(EgressFirewallType, wf.efFactory.K8s().V1().EgressFirewalls().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -386,13 +396,15 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient } } if config.OVNKubernetesFeature.EnableEgressQoS { - wf.informers[EgressQoSType], err = newInformer(EgressQoSType, wf.egressQoSFactory.K8s().V1().EgressQoSes().Informer()) + wf.informers[EgressQoSType], err = newQueuedInformer(EgressQoSType, wf.egressQoSFactory.K8s().V1().EgressQoSes().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressService { - wf.informers[EgressServiceType], err = newInformer(EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer()) + wf.informers[EgressServiceType], err = newQueuedInformer(EgressServiceType, + wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -400,14 +412,16 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if config.OVNKubernetesFeature.EnableMultiNetwork { wf.nadFactory = nadinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkAttchDefClient, resyncInterval) - wf.informers[NetworkAttachmentDefinitionType], err = newInformer(NetworkAttachmentDefinitionType, wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer()) + wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(NetworkAttachmentDefinitionType, + wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnablePersistentIPs && !config.OVNKubernetesFeature.EnableInterconnect { wf.ipamClaimsFactory = ipamclaimsfactory.NewSharedInformerFactory(ovnClientset.IPAMClaimsClient, resyncInterval) - wf.informers[IPAMClaimsType], err = newInformer(IPAMClaimsType, wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer()) + wf.informers[IPAMClaimsType], err = newQueuedInformer(IPAMClaimsType, + wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -416,19 +430,22 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if util.IsNetworkSegmentationSupportEnabled() { wf.udnFactory = userdefinednetworkapiinformerfactory.NewSharedInformerFactory(ovnClientset.UserDefinedNetworkClient, resyncInterval) - wf.informers[UserDefinedNetworkType], err = newInformer(UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer()) + wf.informers[UserDefinedNetworkType], err = newQueuedInformer(UserDefinedNetworkType, + wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[ClusterUserDefinedNetworkType], err = newInformer(ClusterUserDefinedNetworkType, wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer()) + wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(ClusterUserDefinedNetworkType, + wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if util.IsMultiNetworkPoliciesSupportEnabled() { - wf.informers[MultiNetworkPolicyType], err = newInformer(MultiNetworkPolicyType, wf.mnpFactory.K8sCniCncfIo().V1beta1().MultiNetworkPolicies().Informer()) + wf.informers[MultiNetworkPolicyType], err = newQueuedInformer(MultiNetworkPolicyType, + wf.mnpFactory.K8sCniCncfIo().V1beta1().MultiNetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -708,7 +725,8 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( getEndpointSliceSelector()) }) - wf.informers[NamespaceType], err = newInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer()) + wf.informers[NamespaceType], err = newQueuedInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -717,32 +735,35 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( if err != nil { return nil, err } - wf.informers[ServiceType], err = newInformer( + wf.informers[ServiceType], err = newQueuedInformer( ServiceType, - wf.iFactory.Core().V1().Services().Informer()) + wf.iFactory.Core().V1().Services().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[EndpointSliceType], err = newInformer( + wf.informers[EndpointSliceType], err = newQueuedInformer( EndpointSliceType, - wf.iFactory.Discovery().V1().EndpointSlices().Informer()) + wf.iFactory.Discovery().V1().EndpointSlices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[NodeType], err = newInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer()) + wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, + minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableEgressService { - wf.informers[EgressServiceType], err = newInformer(EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer()) + wf.informers[EgressServiceType], err = newQueuedInformer(EgressServiceType, + wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressIP { - wf.informers[EgressIPType], err = newInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer()) + wf.informers[EgressIPType], err = newQueuedInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -764,7 +785,8 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( // needs the NAD factory whenever the UDN feature is used. if config.OVNKubernetesFeature.EnableMultiNetwork && (config.OVNKubernetesFeature.EnableNetworkSegmentation || config.OvnKubeNode.Mode == types.NodeModeDPU) { wf.nadFactory = nadinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkAttchDefClient, resyncInterval) - wf.informers[NetworkAttachmentDefinitionType], err = newInformer(NetworkAttachmentDefinitionType, wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer()) + wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(NetworkAttachmentDefinitionType, + wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -772,12 +794,14 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( if util.IsNetworkSegmentationSupportEnabled() { wf.udnFactory = userdefinednetworkapiinformerfactory.NewSharedInformerFactory(ovnClientset.UserDefinedNetworkClient, resyncInterval) - wf.informers[UserDefinedNetworkType], err = newInformer(UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer()) + wf.informers[UserDefinedNetworkType], err = newQueuedInformer(UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[ClusterUserDefinedNetworkType], err = newInformer(ClusterUserDefinedNetworkType, wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer()) + wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(ClusterUserDefinedNetworkType, + wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -854,37 +878,42 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset var err error // Create our informer-wrapper informer (and underlying shared informer) for types we need - wf.informers[ServiceType], err = newInformer(ServiceType, wf.iFactory.Core().V1().Services().Informer()) + wf.informers[ServiceType], err = newQueuedInformer(ServiceType, wf.iFactory.Core().V1().Services().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[EndpointSliceType], err = newInformer( + wf.informers[EndpointSliceType], err = newQueuedInformer( EndpointSliceType, - wf.iFactory.Discovery().V1().EndpointSlices().Informer()) + wf.iFactory.Discovery().V1().EndpointSlices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[NodeType], err = newInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer()) + wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableEgressIP { - wf.informers[EgressIPType], err = newInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer()) + wf.informers[EgressIPType], err = newQueuedInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if util.PlatformTypeIsEgressIPCloudProvider() { - wf.informers[CloudPrivateIPConfigType], err = newInformer(CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer()) + wf.informers[CloudPrivateIPConfigType], err = newQueuedInformer(CloudPrivateIPConfigType, + wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressService { - wf.informers[EgressServiceType], err = newInformer(EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer()) + wf.informers[EgressServiceType], err = newQueuedInformer(EgressServiceType, + wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -892,20 +921,23 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset if config.OVNKubernetesFeature.EnableMultiNetwork { wf.nadFactory = nadinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkAttchDefClient, resyncInterval) - wf.informers[NetworkAttachmentDefinitionType], err = newInformer(NetworkAttachmentDefinitionType, wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer()) + wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(NetworkAttachmentDefinitionType, + wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableInterconnect { - wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, defaultNumEventQueues) + wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), + wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnablePersistentIPs { wf.ipamClaimsFactory = ipamclaimsfactory.NewSharedInformerFactory(ovnClientset.IPAMClaimsClient, resyncInterval) - wf.informers[IPAMClaimsType], err = newInformer(IPAMClaimsType, wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer()) + wf.informers[IPAMClaimsType], err = newQueuedInformer(IPAMClaimsType, + wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -930,11 +962,13 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset if util.IsNetworkSegmentationSupportEnabled() { wf.udnFactory = userdefinednetworkapiinformerfactory.NewSharedInformerFactory(ovnClientset.UserDefinedNetworkClient, resyncInterval) - wf.informers[UserDefinedNetworkType], err = newInformer(UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer()) + wf.informers[UserDefinedNetworkType], err = newQueuedInformer(UserDefinedNetworkType, + wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[ClusterUserDefinedNetworkType], err = newInformer(ClusterUserDefinedNetworkType, wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer()) + wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(ClusterUserDefinedNetworkType, + wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -1028,6 +1062,22 @@ func getObjectMeta(objType reflect.Type, obj interface{}) (*metav1.ObjectMeta, e if persistentips, ok := obj.(*ipamclaimsapi.IPAMClaim); ok { return &persistentips.ObjectMeta, nil } + case EgressQoSType: + if egressQoS, ok := obj.(*egressqosapi.EgressQoS); ok { + return &egressQoS.ObjectMeta, nil + } + case EgressServiceType: + if egressService, ok := obj.(*egressserviceapi.EgressService); ok { + return &egressService.ObjectMeta, nil + } + case UserDefinedNetworkType: + if udn, ok := obj.(*userdefinednetworkapi.UserDefinedNetwork); ok { + return &udn.ObjectMeta, nil + } + case ClusterUserDefinedNetworkType: + if cudn, ok := obj.(*userdefinednetworkapi.ClusterUserDefinedNetwork); ok { + return &cudn.ObjectMeta, nil + } } return nil, fmt.Errorf("cannot get ObjectMeta from type %v", objType) diff --git a/go-controller/pkg/factory/handler.go b/go-controller/pkg/factory/handler.go index bcc5551f0b..7667b5c0b8 100644 --- a/go-controller/pkg/factory/handler.go +++ b/go-controller/pkg/factory/handler.go @@ -137,40 +137,6 @@ func (i *informer) forEachQueuedHandlerReversed(f func(h *Handler)) { } } -func (i *informer) forEachHandler(obj interface{}, f func(h *Handler)) { - i.RLock() - defer i.RUnlock() - - objType := reflect.TypeOf(obj) - if objType != i.oType { - klog.Errorf("Object type %v did not match expected %v", objType, i.oType) - return - } - - for priority := 0; priority <= minHandlerPriority; priority++ { // loop over priority higest to lowest - for _, handler := range i.handlers[priority] { - f(handler) - } - } -} - -func (i *informer) forEachHandlerReversed(obj interface{}, f func(h *Handler)) { - i.RLock() - defer i.RUnlock() - - objType := reflect.TypeOf(obj) - if objType != i.oType { - klog.Errorf("Object type %v did not match expected %v", objType, i.oType) - return - } - - for priority := minHandlerPriority; priority >= 0; priority-- { // loop over priority lowest to highest - for _, handler := range i.handlers[priority] { - f(handler) - } - } -} - func (i *informer) addHandler(id uint64, priority int, filterFunc func(obj interface{}) bool, funcs cache.ResourceEventHandler, existingItems []interface{}) *Handler { handler := &Handler{ cache.FilteringResourceEventHandler{ @@ -272,6 +238,9 @@ func (qm *queueMap) shutdown() { func (qm *queueMap) getNewQueueNum() uint32 { var j, startIdx, queueIdx uint32 numEventQueues := uint32(len(qm.queues)) + if numEventQueues == 1 { + return 0 + } startIdx = uint32(cryptorand.Intn(int64(numEventQueues - 1))) queueIdx = startIdx lowestNum := len(qm.queues[startIdx]) @@ -388,7 +357,7 @@ func ensureObjectOnDelete(obj interface{}, expectedType reflect.Type) (interface return obj, nil } -func (i *informer) newFederatedQueuedHandler(numEventQueues uint32) cache.ResourceEventHandlerFuncs { +func (i *informer) newFederatedQueuedHandler() cache.ResourceEventHandlerFuncs { name := i.oType.Elem().Name() return cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { @@ -438,50 +407,6 @@ func (i *informer) newFederatedQueuedHandler(numEventQueues uint32) cache.Resour } } -func (i *informer) newFederatedHandler() cache.ResourceEventHandlerFuncs { - name := i.oType.Elem().Name() - return cache.ResourceEventHandlerFuncs{ - AddFunc: func(obj interface{}) { - metrics.MetricResourceUpdateCount.WithLabelValues(name, "add").Inc() - start := time.Now() - i.forEachHandler(obj, func(h *Handler) { - h.OnAdd(obj, false) - }) - metrics.MetricResourceAddLatency.Observe(time.Since(start).Seconds()) - }, - UpdateFunc: func(oldObj, newObj interface{}) { - metrics.MetricResourceUpdateCount.WithLabelValues(name, "update").Inc() - start := time.Now() - i.forEachHandler(newObj, func(h *Handler) { - old := oldObj.(metav1.Object) - new := newObj.(metav1.Object) - if old.GetUID() != new.GetUID() { - // This occurs not so often, so log this occurance. - klog.Infof("Object %s/%s is replaced, invoking delete followed by add handler", new.GetNamespace(), new.GetName()) - h.OnDelete(oldObj) - h.OnAdd(newObj, false) - } else { - h.OnUpdate(oldObj, newObj) - } - }) - metrics.MetricResourceUpdateLatency.Observe(time.Since(start).Seconds()) - }, - DeleteFunc: func(obj interface{}) { - realObj, err := ensureObjectOnDelete(obj, i.oType) - if err != nil { - klog.Errorf(err.Error()) - return - } - metrics.MetricResourceUpdateCount.WithLabelValues(name, "delete").Inc() - start := time.Now() - i.forEachHandlerReversed(realObj, func(h *Handler) { - h.OnDelete(realObj) - }) - metrics.MetricResourceDeleteLatency.Observe(time.Since(start).Seconds()) - }, - } -} - func (i *informer) removeAllHandlers() { i.Lock() defer i.Unlock() @@ -557,24 +482,6 @@ func newBaseInformer(oType reflect.Type, sharedInformer cache.SharedIndexInforme }, nil } -func newInformer(oType reflect.Type, sharedInformer cache.SharedIndexInformer) (*informer, error) { - i, err := newBaseInformer(oType, sharedInformer) - if err != nil { - return nil, err - } - i.initialAddFunc = func(h *Handler, items []interface{}) { - for _, item := range items { - h.OnAdd(item, false) - } - } - _, err = i.inf.AddEventHandler(i.newFederatedHandler()) - if err != nil { - return nil, err - } - return i, nil - -} - func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInformer, stopChan chan struct{}, numEventQueues uint32) (*informer, error) { i, err := newBaseInformer(oType, sharedInformer) @@ -606,7 +513,7 @@ func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInfor addsWg.Wait() } - _, err = i.inf.AddEventHandler(i.newFederatedQueuedHandler(numEventQueues)) + _, err = i.inf.AddEventHandler(i.newFederatedQueuedHandler()) if err != nil { return nil, err } From 2b8be2722aafd2c18572e4b30843cc7c17c24215 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Wed, 15 Jan 2025 11:03:58 +0100 Subject: [PATCH 40/51] factory: Use 15 event queues per informer for pod, node and namespace. Signed-off-by: Patryk Diak Co-authored-by: Dumitru Ceara Signed-off-by: Dumitru Ceara --- go-controller/pkg/factory/factory.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index 8716a2f2d1..b014278132 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -726,7 +726,7 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( }) wf.informers[NamespaceType], err = newQueuedInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), - wf.stopChan, minNumEventQueues) + wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } @@ -749,7 +749,7 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( } wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, - minNumEventQueues) + defaultNumEventQueues) if err != nil { return nil, err } @@ -892,7 +892,7 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset } wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), - wf.stopChan, minNumEventQueues) + wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } From fe17136cfd3a85160376ae8e7860f4e0febe5162 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Mon, 2 Dec 2024 22:25:49 +0100 Subject: [PATCH 41/51] factory: Reduce contention on informer locks. Add a pool of Event handlers instead of a single (federated) event handler per informer. Ensure a controller always gets registered with the same event handler. Set the pool size to 201 (200 for secondary controllers and one, index 0, reserved for the default network). Always use pool entry with index 0 for the default network controller. Signed-off-by: Dumitru Ceara --- .../pkg/clustermanager/clustermanager.go | 1 + .../secondary_network_cluster_manager.go | 2 +- .../controllermanager/controller_manager.go | 14 +- .../node_controller_manager.go | 10 +- go-controller/pkg/factory/factory.go | 55 ++++++-- go-controller/pkg/factory/handler.go | 133 +++++++++++------- 6 files changed, 147 insertions(+), 68 deletions(-) diff --git a/go-controller/pkg/clustermanager/clustermanager.go b/go-controller/pkg/clustermanager/clustermanager.go index 09d83d11fa..a979cb3420 100644 --- a/go-controller/pkg/clustermanager/clustermanager.go +++ b/go-controller/pkg/clustermanager/clustermanager.go @@ -67,6 +67,7 @@ type ClusterManager struct { func NewClusterManager(ovnClient *util.OVNClusterManagerClientset, wf *factory.WatchFactory, identity string, wg *sync.WaitGroup, recorder record.EventRecorder) (*ClusterManager, error) { + wf = wf.ShallowClone() defaultNetClusterController := newDefaultNetworkClusterController(&util.DefaultNetInfo{}, ovnClient, wf, recorder) zoneClusterController, err := newZoneClusterController(ovnClient, wf) diff --git a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go index 37022b77e8..6144063603 100644 --- a/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go +++ b/go-controller/pkg/clustermanager/secondary_network_cluster_manager.go @@ -64,7 +64,7 @@ func (sncm *secondaryNetworkClusterManager) NewNetworkController(nInfo util.NetI sncc := newNetworkClusterController( nInfo, sncm.ovnClient, - sncm.watchFactory, + sncm.watchFactory.ShallowClone(), sncm.recorder, sncm.networkManager, sncm.errorReporter, diff --git a/go-controller/pkg/controllermanager/controller_manager.go b/go-controller/pkg/controllermanager/controller_manager.go index be156d2eef..97620f5d32 100644 --- a/go-controller/pkg/controllermanager/controller_manager.go +++ b/go-controller/pkg/controllermanager/controller_manager.go @@ -65,7 +65,9 @@ type ControllerManager struct { } func (cm *ControllerManager) NewNetworkController(nInfo util.NetInfo) (networkmanager.NetworkController, error) { - cnci, err := cm.newCommonNetworkControllerInfo() + // Pass a shallow clone of the watch factory, this allows multiplexing + // informers for secondary networks. + cnci, err := cm.newCommonNetworkControllerInfo(cm.watchFactory.ShallowClone()) if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } @@ -83,7 +85,9 @@ func (cm *ControllerManager) NewNetworkController(nInfo util.NetInfo) (networkma // newDummyNetworkController creates a dummy network controller used to clean up specific network func (cm *ControllerManager) newDummyNetworkController(topoType, netName string) (networkmanager.NetworkController, error) { - cnci, err := cm.newCommonNetworkControllerInfo() + // Pass a shallow clone of the watch factory, this allows multiplexing + // informers for secondary networks. + cnci, err := cm.newCommonNetworkControllerInfo(cm.watchFactory.ShallowClone()) if err != nil { return nil, fmt.Errorf("failed to create network controller info %w", err) } @@ -301,14 +305,14 @@ func (cm *ControllerManager) createACLLoggingMeter() error { } // newCommonNetworkControllerInfo creates and returns the common networkController info -func (cm *ControllerManager) newCommonNetworkControllerInfo() (*ovn.CommonNetworkControllerInfo, error) { - return ovn.NewCommonNetworkControllerInfo(cm.client, cm.kube, cm.watchFactory, cm.recorder, cm.nbClient, +func (cm *ControllerManager) newCommonNetworkControllerInfo(wf *factory.WatchFactory) (*ovn.CommonNetworkControllerInfo, error) { + return ovn.NewCommonNetworkControllerInfo(cm.client, cm.kube, wf, cm.recorder, cm.nbClient, cm.sbClient, cm.podRecorder, cm.SCTPSupport, cm.multicastSupport, cm.svcTemplateSupport) } // initDefaultNetworkController creates the controller for default network func (cm *ControllerManager) initDefaultNetworkController(observManager *observability.Manager) error { - cnci, err := cm.newCommonNetworkControllerInfo() + cnci, err := cm.newCommonNetworkControllerInfo(cm.watchFactory) if err != nil { return fmt.Errorf("failed to create common network controller info: %w", err) } diff --git a/go-controller/pkg/controllermanager/node_controller_manager.go b/go-controller/pkg/controllermanager/node_controller_manager.go index 11bb6c5891..daaaf5bf89 100644 --- a/go-controller/pkg/controllermanager/node_controller_manager.go +++ b/go-controller/pkg/controllermanager/node_controller_manager.go @@ -53,7 +53,9 @@ func (ncm *NodeControllerManager) NewNetworkController(nInfo util.NetInfo) (netw topoType := nInfo.TopologyType() switch topoType { case ovntypes.Layer3Topology, ovntypes.Layer2Topology, ovntypes.LocalnetTopology: - return node.NewSecondaryNodeNetworkController(ncm.newCommonNetworkControllerInfo(), + // Pass a shallow clone of the watch factory, this allows multiplexing + // informers for secondary networks. + return node.NewSecondaryNodeNetworkController(ncm.newCommonNetworkControllerInfo(ncm.watchFactory.(*factory.WatchFactory).ShallowClone()), nInfo, ncm.vrfManager, ncm.ruleManager, ncm.defaultNodeNetworkController.Gateway) } return nil, fmt.Errorf("topology type %s not supported", topoType) @@ -79,8 +81,8 @@ func (ncm *NodeControllerManager) CleanupStaleNetworks(validNetworks ...util.Net } // newCommonNetworkControllerInfo creates and returns the base node network controller info -func (ncm *NodeControllerManager) newCommonNetworkControllerInfo() *node.CommonNodeNetworkControllerInfo { - return node.NewCommonNodeNetworkControllerInfo(ncm.ovnNodeClient.KubeClient, ncm.ovnNodeClient.AdminPolicyRouteClient, ncm.watchFactory, ncm.recorder, ncm.name, ncm.routeManager) +func (ncm *NodeControllerManager) newCommonNetworkControllerInfo(wf factory.NodeWatchFactory) *node.CommonNodeNetworkControllerInfo { + return node.NewCommonNodeNetworkControllerInfo(ncm.ovnNodeClient.KubeClient, ncm.ovnNodeClient.AdminPolicyRouteClient, wf, ncm.recorder, ncm.name, ncm.routeManager) } // isNetworkManagerRequiredForNode checks if network manager should be started @@ -127,7 +129,7 @@ func NewNodeControllerManager(ovnClient *util.OVNClientset, wf factory.NodeWatch // initDefaultNodeNetworkController creates the controller for default network func (ncm *NodeControllerManager) initDefaultNodeNetworkController() error { - defaultNodeNetworkController, err := node.NewDefaultNodeNetworkController(ncm.newCommonNetworkControllerInfo(), ncm.networkManager.Interface()) + defaultNodeNetworkController, err := node.NewDefaultNodeNetworkController(ncm.newCommonNetworkControllerInfo(ncm.watchFactory), ncm.networkManager.Interface()) if err != nil { return err } diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index b014278132..e75939f6b9 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -3,6 +3,7 @@ package factory import ( "context" "fmt" + "math/rand/v2" "reflect" "sync/atomic" "time" @@ -105,11 +106,15 @@ import ( "k8s.io/klog/v2" ) -// WatchFactory initializes and manages common kube watches -type WatchFactory struct { +type handlerCounter struct { // Must be first member in the struct due to Golang ARM/x86 32-bit // requirements with atomic accesses - handlerCounter uint64 + counter uint64 +} + +// WatchFactory initializes and manages common kube watches +type WatchFactory struct { + handlerCounter *handlerCounter iFactory informerfactory.SharedInformerFactory anpFactory anpinformerfactory.SharedInformerFactory @@ -129,10 +134,39 @@ type WatchFactory struct { informers map[reflect.Type]*informer stopChan chan struct{} + + // Shallow watch factory clones potentially use different internal + // informers (to allow multiplexing and load sharing). + internalInformerIndex int +} + +func (wf *WatchFactory) ShallowClone() *WatchFactory { + return &WatchFactory{ + handlerCounter: wf.handlerCounter, + iFactory: wf.iFactory, + anpFactory: wf.anpFactory, + eipFactory: wf.eipFactory, + efFactory: wf.efFactory, + dnsFactory: wf.dnsFactory, + cpipcFactory: wf.cpipcFactory, + egressQoSFactory: wf.egressQoSFactory, + mnpFactory: wf.mnpFactory, + egressServiceFactory: wf.egressServiceFactory, + apbRouteFactory: wf.apbRouteFactory, + ipamClaimsFactory: wf.ipamClaimsFactory, + nadFactory: wf.nadFactory, + udnFactory: wf.udnFactory, + informers: wf.informers, + stopChan: wf.stopChan, + + // Choose a random internalInformer to use for this clone of the + // factory. Reserve index 0 for default network handlers. + internalInformerIndex: rand.IntN(internalInformerPoolSize-1) + 1, + } } // WatchFactory implements the ObjectCacheInterface interface. -var _ ObjectCacheInterface = &WatchFactory{} +var _ ObjectCacheInterface = &WatchFactory{handlerCounter: &handlerCounter{}} const ( // resync time is 0, none of the resources being watched in ovn-kubernetes have @@ -258,6 +292,7 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient // the downside of making it tight (like 10 minutes) is needless spinning on all resources // However, AddEventHandlerWithResyncPeriod can specify a per handler resync period wf := &WatchFactory{ + handlerCounter: &handlerCounter{}, iFactory: informerfactory.NewSharedInformerFactoryWithOptions(ovnClientset.KubeClient, resyncInterval, informerfactory.WithTransform(informerObjectTrim)), anpFactory: anpinformerfactory.NewSharedInformerFactory(ovnClientset.ANPClient, resyncInterval), eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), @@ -654,6 +689,7 @@ func (wf *WatchFactory) Stop() { // of the localPodSelector or figure out how to deal with selecting all pods everywhere. func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) (*WatchFactory, error) { wf := &WatchFactory{ + handlerCounter: &handlerCounter{}, iFactory: informerfactory.NewSharedInformerFactoryWithOptions(ovnClientset.KubeClient, resyncInterval, informerfactory.WithTransform(informerObjectTrim)), egressServiceFactory: egressserviceinformerfactory.NewSharedInformerFactory(ovnClientset.EgressServiceClient, resyncInterval), eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), @@ -815,6 +851,7 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( // mode process. func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset) (*WatchFactory, error) { wf := &WatchFactory{ + handlerCounter: &handlerCounter{}, iFactory: informerfactory.NewSharedInformerFactoryWithOptions(ovnClientset.KubeClient, resyncInterval, informerfactory.WithTransform(informerObjectTrim)), efFactory: egressfirewallinformerfactory.NewSharedInformerFactory(ovnClientset.EgressFirewallClient, resyncInterval), eipFactory: egressipinformerfactory.NewSharedInformerFactory(ovnClientset.EgressIPClient, resyncInterval), @@ -1218,8 +1255,10 @@ func (wf *WatchFactory) addHandler(objType reflect.Type, namespace string, sel l return true } - inf.Lock() - defer inf.Unlock() + intInf := inf.internalInformers[wf.internalInformerIndex] + + intInf.Lock() + defer intInf.Unlock() items := make([]interface{}, 0) for _, obj := range inf.inf.GetStore().List() { @@ -1243,8 +1282,8 @@ func (wf *WatchFactory) addHandler(objType reflect.Type, namespace string, sel l } } - handlerID := atomic.AddUint64(&wf.handlerCounter, 1) - handler := inf.addHandler(handlerID, priority, filterFunc, funcs, items) + handlerID := atomic.AddUint64(&wf.handlerCounter.counter, 1) + handler := inf.addHandler(wf.internalInformerIndex, handlerID, priority, filterFunc, funcs, items) klog.V(5).Infof("Added %v event handler %d", objType, handler.id) return handler, nil } diff --git a/go-controller/pkg/factory/handler.go b/go-controller/pkg/factory/handler.go index 7667b5c0b8..29eaa3cca4 100644 --- a/go-controller/pkg/factory/handler.go +++ b/go-controller/pkg/factory/handler.go @@ -33,6 +33,12 @@ import ( "k8s.io/klog/v2" ) +// Use a pool of internal informers to allow multiplexing of events +// between multiple internal informers. This reduces lock contention +// when adding/removing event handlers by distributing them between +// internal informers. +const internalInformerPoolSize int = 201 + // Handler represents an event handler and is private to the factory module type Handler struct { base cache.FilteringResourceEventHandler @@ -48,6 +54,10 @@ type Handler struct { // example: a handler with priority 0 will process the received event first // before a handler with priority 1. priority int + + // indicates which informer.internalInformers index to use + // clients are distributed between internal informers + internalInformerIndex int } func (h *Handler) OnAdd(obj interface{}, isInInitialList bool) { @@ -95,8 +105,15 @@ type queueMapEntry struct { refcount int32 } -type informer struct { +type internalInformer struct { sync.RWMutex + oType reflect.Type + handlers map[int]map[uint64]*Handler + // queueMap handles distributing events across a queued handler's queues + queueMap *queueMap +} + +type informer struct { oType reflect.Type inf cache.SharedIndexInformer // keyed by priority - used to track the handler's priority of being invoked. @@ -104,40 +121,37 @@ type informer struct { // before a handler with priority 1, 0 being the higest priority. // NOTE: we can have multiple handlers with the same priority hence the value // is a map of handlers keyed by its unique id. - handlers map[int]map[uint64]*Handler - lister listerInterface + lister listerInterface // initialAddFunc will be called to deliver the initial list of objects // when a handler is added initialAddFunc initialAddFn shutdownWg sync.WaitGroup - // queueMap handles distributing events across a queued handler's queues - queueMap *queueMap + internalInformers []*internalInformer } -func (i *informer) forEachQueuedHandler(f func(h *Handler)) { - i.RLock() - defer i.RUnlock() - +func (inf *internalInformer) forEachQueuedHandler(f func(h *Handler)) { + inf.RLock() + defer inf.RUnlock() for priority := 0; priority <= minHandlerPriority; priority++ { // loop over priority higest to lowest - for _, handler := range i.handlers[priority] { + for _, handler := range inf.handlers[priority] { f(handler) } } } -func (i *informer) forEachQueuedHandlerReversed(f func(h *Handler)) { - i.RLock() - defer i.RUnlock() +func (inf *internalInformer) forEachQueuedHandlerReversed(f func(h *Handler)) { + inf.RLock() + defer inf.RUnlock() for priority := minHandlerPriority; priority >= 0; priority-- { // loop over priority lowest to highest - for _, handler := range i.handlers[priority] { + for _, handler := range inf.handlers[priority] { f(handler) } } } -func (i *informer) addHandler(id uint64, priority int, filterFunc func(obj interface{}) bool, funcs cache.ResourceEventHandler, existingItems []interface{}) *Handler { +func (i *informer) addHandler(internalInformerIndex int, id uint64, priority int, filterFunc func(obj interface{}) bool, funcs cache.ResourceEventHandler, existingItems []interface{}) *Handler { handler := &Handler{ cache.FilteringResourceEventHandler{ FilterFunc: filterFunc, @@ -146,6 +160,7 @@ func (i *informer) addHandler(id uint64, priority int, filterFunc func(obj inter id, handlerAlive, priority, + internalInformerIndex, } // Send existing items to the handler's add function; informers usually @@ -153,11 +168,13 @@ func (i *informer) addHandler(id uint64, priority int, filterFunc func(obj inter // we must emulate that here i.initialAddFunc(handler, existingItems) - _, ok := i.handlers[priority] + intInf := i.internalInformers[internalInformerIndex] + + _, ok := intInf.handlers[priority] if !ok { - i.handlers[priority] = make(map[uint64]*Handler) + intInf.handlers[priority] = make(map[uint64]*Handler) } - i.handlers[priority][id] = handler + intInf.handlers[priority][id] = handler return handler } @@ -171,16 +188,18 @@ func (i *informer) removeHandler(handler *Handler) { klog.V(5).Infof("Sending %v event handler %d for removal", i.oType, handler.id) go func() { - i.Lock() - defer i.Unlock() + intInf := i.internalInformers[handler.internalInformerIndex] + + intInf.Lock() + defer intInf.Unlock() removed := 0 - for priority := range i.handlers { // loop over priority - if _, ok := i.handlers[priority]; !ok { + for priority := range intInf.handlers { // loop over priority + if _, ok := intInf.handlers[priority]; !ok { continue // protection against nil map as value } - if _, ok := i.handlers[priority][handler.id]; ok { + if _, ok := intInf.handlers[priority][handler.id]; ok { // Remove the handler - delete(i.handlers[priority], handler.id) + delete(intInf.handlers[priority], handler.id) removed = 1 klog.V(5).Infof("Removed %v event handler %d", i.oType, handler.id) } @@ -357,24 +376,25 @@ func ensureObjectOnDelete(obj interface{}, expectedType reflect.Type) (interface return obj, nil } -func (i *informer) newFederatedQueuedHandler() cache.ResourceEventHandlerFuncs { +func (i *informer) newFederatedQueuedHandler(internalInformerIndex int) cache.ResourceEventHandlerFuncs { name := i.oType.Elem().Name() + intInf := i.internalInformers[internalInformerIndex] return cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - i.queueMap.enqueueEvent(nil, obj, i.oType, false, func(e *event) { + intInf.queueMap.enqueueEvent(nil, obj, i.oType, false, func(e *event) { metrics.MetricResourceUpdateCount.WithLabelValues(name, "add").Inc() start := time.Now() - i.forEachQueuedHandler(func(h *Handler) { + intInf.forEachQueuedHandler(func(h *Handler) { h.OnAdd(e.obj, false) }) metrics.MetricResourceAddLatency.Observe(time.Since(start).Seconds()) }) }, UpdateFunc: func(oldObj, newObj interface{}) { - i.queueMap.enqueueEvent(oldObj, newObj, i.oType, false, func(e *event) { + intInf.queueMap.enqueueEvent(oldObj, newObj, i.oType, false, func(e *event) { metrics.MetricResourceUpdateCount.WithLabelValues(name, "update").Inc() start := time.Now() - i.forEachQueuedHandler(func(h *Handler) { + intInf.forEachQueuedHandler(func(h *Handler) { old := oldObj.(metav1.Object) new := newObj.(metav1.Object) if old.GetUID() != new.GetUID() { @@ -395,10 +415,10 @@ func (i *informer) newFederatedQueuedHandler() cache.ResourceEventHandlerFuncs { klog.Errorf(err.Error()) return } - i.queueMap.enqueueEvent(nil, realObj, i.oType, true, func(e *event) { + intInf.queueMap.enqueueEvent(nil, realObj, i.oType, true, func(e *event) { metrics.MetricResourceUpdateCount.WithLabelValues(name, "delete").Inc() start := time.Now() - i.forEachQueuedHandlerReversed(func(h *Handler) { + intInf.forEachQueuedHandlerReversed(func(h *Handler) { h.OnDelete(e.obj) }) metrics.MetricResourceDeleteLatency.Observe(time.Since(start).Seconds()) @@ -407,12 +427,14 @@ func (i *informer) newFederatedQueuedHandler() cache.ResourceEventHandlerFuncs { } } -func (i *informer) removeAllHandlers() { - i.Lock() - defer i.Unlock() - for _, handlers := range i.handlers { - for _, handler := range handlers { - i.removeHandler(handler) +func (inf *informer) removeAllHandlers() { + for _, intInf := range inf.internalInformers { + intInf.Lock() + defer intInf.Unlock() + for _, handlers := range intInf.handlers { + for _, handler := range handlers { + inf.removeHandler(handler) + } } } } @@ -474,24 +496,30 @@ func newBaseInformer(oType reflect.Type, sharedInformer cache.SharedIndexInforme return nil, err } + internalInformers := make([]*internalInformer, 0, internalInformerPoolSize) + for i := 0; i < internalInformerPoolSize; i++ { + internalInformers = append(internalInformers, &internalInformer{ + oType: oType, + handlers: make(map[int]map[uint64]*Handler), + }) + } + return &informer{ - oType: oType, - inf: sharedInformer, - lister: lister, - handlers: make(map[int]map[uint64]*Handler), + oType: oType, + inf: sharedInformer, + lister: lister, + internalInformers: internalInformers, }, nil } func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInformer, stopChan chan struct{}, numEventQueues uint32) (*informer, error) { - i, err := newBaseInformer(oType, sharedInformer) + informer, err := newBaseInformer(oType, sharedInformer) if err != nil { return nil, err } - i.queueMap = newQueueMap(numEventQueues, &i.shutdownWg, stopChan) - i.queueMap.start() - i.initialAddFunc = func(h *Handler, items []interface{}) { + informer.initialAddFunc = func(h *Handler, items []interface{}) { // Make a handler-specific channel array across which the // initial add events will be distributed. When a new handler // is added, only that handler should receive events for all @@ -503,7 +531,7 @@ func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInfor // Distribute the existing items into the handler-specific // channel array. for _, obj := range items { - addsMap.enqueueEvent(nil, obj, i.oType, false, func(e *event) { + addsMap.enqueueEvent(nil, obj, informer.oType, false, func(e *event) { h.OnAdd(e.obj, false) }) } @@ -513,11 +541,16 @@ func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInfor addsWg.Wait() } - _, err = i.inf.AddEventHandler(i.newFederatedQueuedHandler()) - if err != nil { - return nil, err + for i := 0; i < internalInformerPoolSize; i++ { + informer.internalInformers[i].queueMap = newQueueMap(numEventQueues, &informer.shutdownWg, stopChan) + informer.internalInformers[i].queueMap.start() + + _, err = informer.inf.AddEventHandler(informer.newFederatedQueuedHandler(i)) + if err != nil { + return nil, err + } } - return i, nil + return informer, nil } From 6dda0b511d26964033f9591c01247be7e49666e8 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Mon, 23 Dec 2024 12:12:07 +0100 Subject: [PATCH 42/51] factory: Bump the event queue size to 1K. Signed-off-by: Dumitru Ceara --- go-controller/pkg/factory/factory.go | 118 ++++++++++++++++----------- go-controller/pkg/factory/handler.go | 11 +-- 2 files changed, 78 insertions(+), 51 deletions(-) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index e75939f6b9..ac34de4007 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -188,6 +188,10 @@ const ( defaultHandlerPriority int = 0 // lowest priority among various handlers (See GetHandlerPriority for more information) minHandlerPriority int = 4 + + // Use a larger queue for incoming events to avoid bottlenecks + // due to handlers being slow. + eventQueueSize uint32 = 1000 ) // types for dynamic handlers created when adding a network policy @@ -256,7 +260,7 @@ func NewMasterWatchFactory(ovnClientset *util.OVNMasterClientset) (*WatchFactory } wf.cpipcFactory = ocpcloudnetworkinformerfactory.NewSharedInformerFactory(ovnClientset.CloudNetworkClient, resyncInterval) if util.PlatformTypeIsEgressIPCloudProvider() { - wf.informers[CloudPrivateIPConfigType], err = newQueuedInformer(CloudPrivateIPConfigType, + wf.informers[CloudPrivateIPConfigType], err = newQueuedInformer(eventQueueSize, CloudPrivateIPConfigType, wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -369,57 +373,57 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient var err error // Create our informer-wrapper informer (and underlying shared informer) for types we need - wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, + wf.informers[PodType], err = newQueuedInformer(eventQueueSize, PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } - wf.informers[ServiceType], err = newQueuedInformer(ServiceType, wf.iFactory.Core().V1().Services().Informer(), + wf.informers[ServiceType], err = newQueuedInformer(eventQueueSize, ServiceType, wf.iFactory.Core().V1().Services().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[PolicyType], err = newQueuedInformer(PolicyType, wf.iFactory.Networking().V1().NetworkPolicies().Informer(), + wf.informers[PolicyType], err = newQueuedInformer(eventQueueSize, PolicyType, wf.iFactory.Networking().V1().NetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[NamespaceType], err = newQueuedInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), + wf.informers[NamespaceType], err = newQueuedInformer(eventQueueSize, NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } - wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, + wf.informers[NodeType], err = newQueuedInformer(eventQueueSize, NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } - wf.informers[EndpointSliceType], err = newQueuedInformer(EndpointSliceType, wf.iFactory.Discovery().V1().EndpointSlices().Informer(), + wf.informers[EndpointSliceType], err = newQueuedInformer(eventQueueSize, EndpointSliceType, wf.iFactory.Discovery().V1().EndpointSlices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableAdminNetworkPolicy { - wf.informers[AdminNetworkPolicyType], err = newQueuedInformer(AdminNetworkPolicyType, + wf.informers[AdminNetworkPolicyType], err = newQueuedInformer(eventQueueSize, AdminNetworkPolicyType, wf.anpFactory.Policy().V1alpha1().AdminNetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[BaselineAdminNetworkPolicyType], err = newQueuedInformer(BaselineAdminNetworkPolicyType, + wf.informers[BaselineAdminNetworkPolicyType], err = newQueuedInformer(eventQueueSize, BaselineAdminNetworkPolicyType, wf.anpFactory.Policy().V1alpha1().BaselineAdminNetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressIP { - wf.informers[EgressIPType], err = newQueuedInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), wf.stopChan, + wf.informers[EgressIPType], err = newQueuedInformer(eventQueueSize, EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressFirewall { - wf.informers[EgressFirewallType], err = newQueuedInformer(EgressFirewallType, wf.efFactory.K8s().V1().EgressFirewalls().Informer(), + wf.informers[EgressFirewallType], err = newQueuedInformer(eventQueueSize, EgressFirewallType, wf.efFactory.K8s().V1().EgressFirewalls().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -431,14 +435,14 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient } } if config.OVNKubernetesFeature.EnableEgressQoS { - wf.informers[EgressQoSType], err = newQueuedInformer(EgressQoSType, wf.egressQoSFactory.K8s().V1().EgressQoSes().Informer(), + wf.informers[EgressQoSType], err = newQueuedInformer(eventQueueSize, EgressQoSType, wf.egressQoSFactory.K8s().V1().EgressQoSes().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressService { - wf.informers[EgressServiceType], err = newQueuedInformer(EgressServiceType, + wf.informers[EgressServiceType], err = newQueuedInformer(eventQueueSize, EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -447,7 +451,7 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if config.OVNKubernetesFeature.EnableMultiNetwork { wf.nadFactory = nadinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkAttchDefClient, resyncInterval) - wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(NetworkAttachmentDefinitionType, + wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(eventQueueSize, NetworkAttachmentDefinitionType, wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -455,7 +459,7 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if config.OVNKubernetesFeature.EnablePersistentIPs && !config.OVNKubernetesFeature.EnableInterconnect { wf.ipamClaimsFactory = ipamclaimsfactory.NewSharedInformerFactory(ovnClientset.IPAMClaimsClient, resyncInterval) - wf.informers[IPAMClaimsType], err = newQueuedInformer(IPAMClaimsType, + wf.informers[IPAMClaimsType], err = newQueuedInformer(eventQueueSize, IPAMClaimsType, wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -465,13 +469,13 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient if util.IsNetworkSegmentationSupportEnabled() { wf.udnFactory = userdefinednetworkapiinformerfactory.NewSharedInformerFactory(ovnClientset.UserDefinedNetworkClient, resyncInterval) - wf.informers[UserDefinedNetworkType], err = newQueuedInformer(UserDefinedNetworkType, + wf.informers[UserDefinedNetworkType], err = newQueuedInformer(eventQueueSize, UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(ClusterUserDefinedNetworkType, + wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(eventQueueSize, ClusterUserDefinedNetworkType, wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -479,7 +483,7 @@ func NewOVNKubeControllerWatchFactory(ovnClientset *util.OVNKubeControllerClient } if util.IsMultiNetworkPoliciesSupportEnabled() { - wf.informers[MultiNetworkPolicyType], err = newQueuedInformer(MultiNetworkPolicyType, + wf.informers[MultiNetworkPolicyType], err = newQueuedInformer(eventQueueSize, MultiNetworkPolicyType, wf.mnpFactory.K8sCniCncfIo().V1beta1().MultiNetworkPolicies().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -715,7 +719,7 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( } var err error - wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, + wf.informers[PodType], err = newQueuedInformer(eventQueueSize, PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err @@ -761,44 +765,46 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( getEndpointSliceSelector()) }) - wf.informers[NamespaceType], err = newQueuedInformer(NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), + wf.informers[NamespaceType], err = newQueuedInformer(eventQueueSize, NamespaceType, wf.iFactory.Core().V1().Namespaces().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } - wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, + wf.informers[PodType], err = newQueuedInformer(eventQueueSize, PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } wf.informers[ServiceType], err = newQueuedInformer( + eventQueueSize, ServiceType, wf.iFactory.Core().V1().Services().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } wf.informers[EndpointSliceType], err = newQueuedInformer( + eventQueueSize, EndpointSliceType, wf.iFactory.Discovery().V1().EndpointSlices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, + wf.informers[NodeType], err = newQueuedInformer(eventQueueSize, NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableEgressService { - wf.informers[EgressServiceType], err = newQueuedInformer(EgressServiceType, + wf.informers[EgressServiceType], err = newQueuedInformer(eventQueueSize, EgressServiceType, wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressIP { - wf.informers[EgressIPType], err = newQueuedInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), + wf.informers[EgressIPType], err = newQueuedInformer(eventQueueSize, EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err @@ -821,8 +827,9 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( // needs the NAD factory whenever the UDN feature is used. if config.OVNKubernetesFeature.EnableMultiNetwork && (config.OVNKubernetesFeature.EnableNetworkSegmentation || config.OvnKubeNode.Mode == types.NodeModeDPU) { wf.nadFactory = nadinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkAttchDefClient, resyncInterval) - wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(NetworkAttachmentDefinitionType, - wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(eventQueueSize, + NetworkAttachmentDefinitionType, wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -830,14 +837,16 @@ func NewNodeWatchFactory(ovnClientset *util.OVNNodeClientset, nodeName string) ( if util.IsNetworkSegmentationSupportEnabled() { wf.udnFactory = userdefinednetworkapiinformerfactory.NewSharedInformerFactory(ovnClientset.UserDefinedNetworkClient, resyncInterval) - wf.informers[UserDefinedNetworkType], err = newQueuedInformer(UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), + wf.informers[UserDefinedNetworkType], err = newQueuedInformer(eventQueueSize, + UserDefinedNetworkType, wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(ClusterUserDefinedNetworkType, - wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(eventQueueSize, + ClusterUserDefinedNetworkType, wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -915,42 +924,50 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset var err error // Create our informer-wrapper informer (and underlying shared informer) for types we need - wf.informers[ServiceType], err = newQueuedInformer(ServiceType, wf.iFactory.Core().V1().Services().Informer(), - wf.stopChan, minNumEventQueues) + wf.informers[ServiceType], err = newQueuedInformer(eventQueueSize, ServiceType, + wf.iFactory.Core().V1().Services().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } wf.informers[EndpointSliceType], err = newQueuedInformer( + eventQueueSize, EndpointSliceType, wf.iFactory.Discovery().V1().EndpointSlices().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[NodeType], err = newQueuedInformer(NodeType, wf.iFactory.Core().V1().Nodes().Informer(), + wf.informers[NodeType], err = newQueuedInformer(eventQueueSize, + NodeType, wf.iFactory.Core().V1().Nodes().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableEgressIP { - wf.informers[EgressIPType], err = newQueuedInformer(EgressIPType, wf.eipFactory.K8s().V1().EgressIPs().Informer(), + wf.informers[EgressIPType], err = newQueuedInformer(eventQueueSize, + EgressIPType, + wf.eipFactory.K8s().V1().EgressIPs().Informer(), wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if util.PlatformTypeIsEgressIPCloudProvider() { - wf.informers[CloudPrivateIPConfigType], err = newQueuedInformer(CloudPrivateIPConfigType, - wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[CloudPrivateIPConfigType], err = newQueuedInformer(eventQueueSize, + CloudPrivateIPConfigType, + wf.cpipcFactory.Cloud().V1().CloudPrivateIPConfigs().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } } if config.OVNKubernetesFeature.EnableEgressService { - wf.informers[EgressServiceType], err = newQueuedInformer(EgressServiceType, - wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[EgressServiceType], err = newQueuedInformer(eventQueueSize, + EgressServiceType, + wf.egressServiceFactory.K8s().V1().EgressServices().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -958,14 +975,17 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset if config.OVNKubernetesFeature.EnableMultiNetwork { wf.nadFactory = nadinformerfactory.NewSharedInformerFactory(ovnClientset.NetworkAttchDefClient, resyncInterval) - wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(NetworkAttachmentDefinitionType, - wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[NetworkAttachmentDefinitionType], err = newQueuedInformer(eventQueueSize, + NetworkAttachmentDefinitionType, + wf.nadFactory.K8sCniCncfIo().V1().NetworkAttachmentDefinitions().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } if config.OVNKubernetesFeature.EnableInterconnect { - wf.informers[PodType], err = newQueuedInformer(PodType, wf.iFactory.Core().V1().Pods().Informer(), + wf.informers[PodType], err = newQueuedInformer(eventQueueSize, + PodType, wf.iFactory.Core().V1().Pods().Informer(), wf.stopChan, defaultNumEventQueues) if err != nil { return nil, err @@ -973,8 +993,10 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset if config.OVNKubernetesFeature.EnablePersistentIPs { wf.ipamClaimsFactory = ipamclaimsfactory.NewSharedInformerFactory(ovnClientset.IPAMClaimsClient, resyncInterval) - wf.informers[IPAMClaimsType], err = newQueuedInformer(IPAMClaimsType, - wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[IPAMClaimsType], err = newQueuedInformer(eventQueueSize, + IPAMClaimsType, + wf.ipamClaimsFactory.K8s().V1alpha1().IPAMClaims().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } @@ -999,13 +1021,17 @@ func NewClusterManagerWatchFactory(ovnClientset *util.OVNClusterManagerClientset if util.IsNetworkSegmentationSupportEnabled() { wf.udnFactory = userdefinednetworkapiinformerfactory.NewSharedInformerFactory(ovnClientset.UserDefinedNetworkClient, resyncInterval) - wf.informers[UserDefinedNetworkType], err = newQueuedInformer(UserDefinedNetworkType, - wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[UserDefinedNetworkType], err = newQueuedInformer(eventQueueSize, + UserDefinedNetworkType, + wf.udnFactory.K8s().V1().UserDefinedNetworks().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } - wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(ClusterUserDefinedNetworkType, - wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), wf.stopChan, minNumEventQueues) + wf.informers[ClusterUserDefinedNetworkType], err = newQueuedInformer(eventQueueSize, + ClusterUserDefinedNetworkType, + wf.udnFactory.K8s().V1().ClusterUserDefinedNetworks().Informer(), + wf.stopChan, minNumEventQueues) if err != nil { return nil, err } diff --git a/go-controller/pkg/factory/handler.go b/go-controller/pkg/factory/handler.go index 29eaa3cca4..eb0a1867ba 100644 --- a/go-controller/pkg/factory/handler.go +++ b/go-controller/pkg/factory/handler.go @@ -210,7 +210,7 @@ func (i *informer) removeHandler(handler *Handler) { }() } -func newQueueMap(numEventQueues uint32, wg *sync.WaitGroup, stopChan chan struct{}) *queueMap { +func newQueueMap(qSize uint32, numEventQueues uint32, wg *sync.WaitGroup, stopChan chan struct{}) *queueMap { qm := &queueMap{ entries: make(map[ktypes.NamespacedName]*queueMapEntry), queues: make([]chan *event, numEventQueues), @@ -218,7 +218,7 @@ func newQueueMap(numEventQueues uint32, wg *sync.WaitGroup, stopChan chan struct stopChan: stopChan, } for j := 0; j < int(numEventQueues); j++ { - qm.queues[j] = make(chan *event, 10) + qm.queues[j] = make(chan *event, qSize) } return qm } @@ -512,7 +512,7 @@ func newBaseInformer(oType reflect.Type, sharedInformer cache.SharedIndexInforme }, nil } -func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInformer, +func newQueuedInformer(queueSize uint32, oType reflect.Type, sharedInformer cache.SharedIndexInformer, stopChan chan struct{}, numEventQueues uint32) (*informer, error) { informer, err := newBaseInformer(oType, sharedInformer) if err != nil { @@ -525,7 +525,8 @@ func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInfor // is added, only that handler should receive events for all // existing objects. addsWg := &sync.WaitGroup{} - addsMap := newQueueMap(numEventQueues, addsWg, stopChan) + + addsMap := newQueueMap(queueSize, numEventQueues, addsWg, stopChan) addsMap.start() // Distribute the existing items into the handler-specific @@ -542,7 +543,7 @@ func newQueuedInformer(oType reflect.Type, sharedInformer cache.SharedIndexInfor } for i := 0; i < internalInformerPoolSize; i++ { - informer.internalInformers[i].queueMap = newQueueMap(numEventQueues, &informer.shutdownWg, stopChan) + informer.internalInformers[i].queueMap = newQueueMap(queueSize, numEventQueues, &informer.shutdownWg, stopChan) informer.internalInformers[i].queueMap.start() _, err = informer.inf.AddEventHandler(informer.newFederatedQueuedHandler(i)) From f952cee7fc03f623fa1bbb2ab8380f29c5ff5b7a Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 23 Jan 2025 13:41:45 +0100 Subject: [PATCH 43/51] factory: Reduce event queue size for unit tests. The unit tests run with race detection enabled and on constrained environments (e.g., default GitHub runners) and run out of resources when using such large event queues. This change doesn't affect e2e tests in any way. Those will use default event queue sizes (1K) and informer pool size (201) in order to test what gets deployed on actual clusters. Signed-off-by: Dumitru Ceara --- go-controller/pkg/factory/factory.go | 7 +++++++ go-controller/pkg/ovn/ovn_test.go | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/go-controller/pkg/factory/factory.go b/go-controller/pkg/factory/factory.go index ac34de4007..6de5c2bec9 100644 --- a/go-controller/pkg/factory/factory.go +++ b/go-controller/pkg/factory/factory.go @@ -188,12 +188,19 @@ const ( defaultHandlerPriority int = 0 // lowest priority among various handlers (See GetHandlerPriority for more information) minHandlerPriority int = 4 +) +var ( // Use a larger queue for incoming events to avoid bottlenecks // due to handlers being slow. eventQueueSize uint32 = 1000 ) +// Override default event queue configuration. Used only for tests. +func SetEventQueueSize(newEventQueueSize uint32) { + eventQueueSize = newEventQueueSize +} + // types for dynamic handlers created when adding a network policy type addressSetNamespaceAndPodSelector struct{} type peerNamespaceSelector struct{} diff --git a/go-controller/pkg/ovn/ovn_test.go b/go-controller/pkg/ovn/ovn_test.go index 302eefc2d8..1cec615909 100644 --- a/go-controller/pkg/ovn/ovn_test.go +++ b/go-controller/pkg/ovn/ovn_test.go @@ -202,6 +202,11 @@ func (o *FakeOVN) shutdown() { func (o *FakeOVN) init(nadList []nettypes.NetworkAttachmentDefinition) { var err error + // Use shorter event queues for unit tests (reduce to 10 from the default) + // to avoid running out of resources in constrained CI environments + // (e.g., on GitHub). + factory.SetEventQueueSize(10) + o.watcher, err = factory.NewMasterWatchFactory(o.fakeClient) gomega.Expect(err).NotTo(gomega.HaveOccurred()) From ad85acc8644be30b9f2c104142273299998dba1b Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Tue, 7 Jan 2025 15:01:25 +0100 Subject: [PATCH 44/51] ovn_test: Properly shutdown the watch factory. Signed-off-by: Dumitru Ceara --- go-controller/pkg/node/ovn_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go-controller/pkg/node/ovn_test.go b/go-controller/pkg/node/ovn_test.go index 92115f6bf0..74d1a72fc7 100644 --- a/go-controller/pkg/node/ovn_test.go +++ b/go-controller/pkg/node/ovn_test.go @@ -73,6 +73,7 @@ func (o *FakeOVNNode) restart() { func (o *FakeOVNNode) shutdown() { close(o.stopChan) o.wg.Wait() + o.watcher.Shutdown() } func (o *FakeOVNNode) init() { From 199d61d44a7db792d5ab9bb7cd54652c8dc6668a Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Tue, 7 Jan 2025 13:24:47 +0100 Subject: [PATCH 45/51] gateway_localnet_linux_test: Serialize access to openflow manager flow table. Signed-off-by: Dumitru Ceara --- .../pkg/node/gateway_localnet_linux_test.go | 60 +++++++++---------- go-controller/pkg/node/openflow_manager.go | 6 ++ 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 41a9e1c3a7..92908beaca 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -683,7 +683,7 @@ var _ = Describe("Node Operations", func() { err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) return nil @@ -887,11 +887,11 @@ var _ = Describe("Node Operations", func() { err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) - flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] + flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") Expect(flows).To(Equal(expectedLBIngressFlows)) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") Expect(flows).To(Equal(expectedLBExternalIPFlows)) return nil @@ -1024,8 +1024,8 @@ var _ = Describe("Node Operations", func() { err := nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - Expect(fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_80"]).To(Equal(expectedLBIngressFlows)) - Expect(fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_80"]).To(Equal(expectedLBExternalIPFlows)) + Expect(fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_80")).To(Equal(expectedLBIngressFlows)) + Expect(fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_80")).To(Equal(expectedLBExternalIPFlows)) return nil } Expect(app.Run([]string{app.Name})).To(Succeed()) @@ -1129,11 +1129,11 @@ var _ = Describe("Node Operations", func() { expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) - flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] + flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") Expect(flows).To(Equal(expectedLBIngressFlows)) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") Expect(flows).To(Equal(expectedLBExternalIPFlows)) return nil @@ -1257,11 +1257,11 @@ var _ = Describe("Node Operations", func() { err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedNodePortFlows)) - flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] + flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") Expect(flows).To(Equal(expectedLBIngressFlows)) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") Expect(flows).To(Equal(expectedLBExternalIPFlows)) return nil @@ -1816,13 +1816,13 @@ var _ = Describe("Node Operations", func() { } Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) - flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] + flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") Expect(flows).To(Equal(expectedLBIngressFlows)) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") Expect(flows).To(Equal(expectedLBExternalIPFlows1)) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.2_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.2_8080") Expect(flows).To(Equal(expectedLBExternalIPFlows2)) addConntrackMocks(netlinkMock, []ctFilterDesc{ @@ -1834,13 +1834,13 @@ var _ = Describe("Node Operations", func() { }) err = fNPW.DeleteService(&service) Expect(err).NotTo(HaveOccurred()) - flows = fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) - flows = fNPW.ofm.flowCache["Ingress_namespace1_service1_5.5.5.5_8080"] + flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") Expect(flows).To(BeNil()) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.1_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") Expect(flows).To(BeNil()) - flows = fNPW.ofm.flowCache["External_namespace1_service1_1.1.1.2_8080"] + flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.2_8080") Expect(flows).To(BeNil()) return nil @@ -2153,7 +2153,7 @@ var _ = Describe("Node Operations", func() { err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) @@ -2193,7 +2193,7 @@ var _ = Describe("Node Operations", func() { expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows = fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) return nil @@ -2293,7 +2293,7 @@ var _ = Describe("Node Operations", func() { err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) @@ -2333,7 +2333,7 @@ var _ = Describe("Node Operations", func() { expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows = fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) return nil @@ -2437,7 +2437,7 @@ var _ = Describe("Node Operations", func() { expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) @@ -2477,7 +2477,7 @@ var _ = Describe("Node Operations", func() { expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows = fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) return nil @@ -2578,7 +2578,7 @@ var _ = Describe("Node Operations", func() { err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) @@ -2618,7 +2618,7 @@ var _ = Describe("Node Operations", func() { expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows = fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) return nil @@ -2721,7 +2721,7 @@ var _ = Describe("Node Operations", func() { expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows := fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) @@ -2761,7 +2761,7 @@ var _ = Describe("Node Operations", func() { expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - flows = fNPW.ofm.flowCache["NodePort_namespace1_service1_tcp_31111"] + flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) return nil diff --git a/go-controller/pkg/node/openflow_manager.go b/go-controller/pkg/node/openflow_manager.go index 91afdd09a1..c99f491a04 100644 --- a/go-controller/pkg/node/openflow_manager.go +++ b/go-controller/pkg/node/openflow_manager.go @@ -90,6 +90,12 @@ func (c *openflowManager) deleteFlowsByKey(key string) { delete(c.flowCache, key) } +func (c *openflowManager) getFlowsByKey(key string) []string { + c.flowMutex.Lock() + defer c.flowMutex.Unlock() + return c.flowCache[key] +} + func (c *openflowManager) updateExBridgeFlowCacheEntry(key string, flows []string) { c.exGWFlowMutex.Lock() defer c.exGWFlowMutex.Unlock() From 3bf5109275759c0abf795ffb23e52771ff4bdeda Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Thu, 9 Jan 2025 11:07:05 +0100 Subject: [PATCH 46/51] node_ip_handler_linux_test: Avoid DATA race due to concurrent config access. The IP Manager was started in a goroutine (in background) before each test spec. The IP Manager reads global config values. However, some of the test specs change the global config. This was causing a data race. Avoid the race by explicitly starting the IP Manager _AFTER_ all configuration changes have happened. Signed-off-by: Dumitru Ceara --- .../pkg/node/node_ip_handler_linux_test.go | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/go-controller/pkg/node/node_ip_handler_linux_test.go b/go-controller/pkg/node/node_ip_handler_linux_test.go index 087def035b..1203193dfc 100644 --- a/go-controller/pkg/node/node_ip_handler_linux_test.go +++ b/go-controller/pkg/node/node_ip_handler_linux_test.go @@ -199,12 +199,6 @@ var _ = Describe("Node IP Handler tests", func() { config.IPv6Mode = true tc = configureKubeOVNContextWithNs(nodeName) tc.ipManager.syncPeriod = 10 * time.Millisecond - tc.doneWg.Add(1) - go tc.ns.Do(func(netNS ns.NetNS) error { - tc.ipManager.runInternal(tc.stopCh, tc.ipManager.getNetlinkAddrSubFunc(tc.stopCh)) - tc.doneWg.Done() - return nil - }) }) AfterEach(func() { @@ -217,6 +211,7 @@ var _ = Describe("Node IP Handler tests", func() { Context("valid addresses", func() { ovntest.OnSupportedPlatformsIt("allows keepalived VIP", func() { + runIpManagerRoutine(tc) Expect(tc.ns.Do(func(netNS ns.NetNS) error { link, err := netlink.LinkByName(dummyBrName) if err != nil { @@ -236,6 +231,7 @@ var _ = Describe("Node IP Handler tests", func() { }) ovntest.OnSupportedPlatformsIt("allows unique local address", func() { + runIpManagerRoutine(tc) Expect(tc.ns.Do(func(netNS ns.NetNS) error { link, err := netlink.LinkByName(dummyBrName) if err != nil { @@ -255,6 +251,7 @@ var _ = Describe("Node IP Handler tests", func() { }) ovntest.OnSupportedPlatformsIt("allow secondary IP", func() { + runIpManagerRoutine(tc) primaryIPNet := ovntest.MustParseIPNet(dummyAdditionalIPv4CIDR) // create an additional IP which resides within the primary subnet aka secondary IP secondaryIP := make(net.IP, len(primaryIPNet.IP)) @@ -288,6 +285,7 @@ var _ = Describe("Node IP Handler tests", func() { config.Gateway.MasqueradeIPs.V4OVNMasqueradeIP = ovntest.MustParseIP(dummyMasqIPv4) config.Gateway.MasqueradeIPs.V6OVNMasqueradeIP = ovntest.MustParseIP(dummyMasqIPv6) + runIpManagerRoutine(tc) Expect(tc.ns.Do(func(netNS ns.NetNS) error { link, err := netlink.LinkByName(dummyBrName) if err != nil { @@ -310,6 +308,8 @@ var _ = Describe("Node IP Handler tests", func() { ovntest.OnSupportedPlatformsIt("doesn't allow OVN management port IPs", func() { config.OVNKubernetesFeature.EnableMultiNetwork = true config.OVNKubernetesFeature.EnableNetworkSegmentation = true + + runIpManagerRoutine(tc) Expect(tc.ns.Do(func(netNS ns.NetNS) error { mpLink := ovntest.AddLink(fmt.Sprintf("%s1234", ovntypes.K8sMgmtIntfNamePrefix)) return netlink.AddrAdd(mpLink, &netlink.Addr{LinkIndex: mpLink.Attrs().Index, Scope: unix.RT_SCOPE_UNIVERSE, @@ -322,6 +322,18 @@ var _ = Describe("Node IP Handler tests", func() { }) }) +// The runIpManagerRoutine reads from the config (e.g., +// IsNetworkSegmentationSupportEnabled()) so it must be called explicitly +// from each test spec _AFTER_ all custom config changes happened. +func runIpManagerRoutine(tc *testCtx) { + tc.doneWg.Add(1) + go tc.ns.Do(func(netNS ns.NetNS) error { + tc.ipManager.runInternal(tc.stopCh, tc.ipManager.getNetlinkAddrSubFunc(tc.stopCh)) + tc.doneWg.Done() + return nil + }) +} + func configureKubeOVNContextWithNs(nodeName string) *testCtx { testNs, err := testutils.NewNS() Expect(err).NotTo(HaveOccurred()) From 29ec1066cb94f80663ff216a700338f466336f51 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Wed, 15 Jan 2025 11:55:07 +0100 Subject: [PATCH 47/51] subnet_allocator: Take lock when reading subnetAllocatorRanges. Otherwise there's a data race because subnets can be allocated while we read the current usage/count. Signed-off-by: Dumitru Ceara --- go-controller/pkg/clustermanager/node/subnet_allocator.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/go-controller/pkg/clustermanager/node/subnet_allocator.go b/go-controller/pkg/clustermanager/node/subnet_allocator.go index 744b55d62d..42221d2e9a 100644 --- a/go-controller/pkg/clustermanager/node/subnet_allocator.go +++ b/go-controller/pkg/clustermanager/node/subnet_allocator.go @@ -44,6 +44,9 @@ func NewSubnetAllocator() SubnetAllocator { // Usage returns the number of used/allocated v4 and v6 subnets func (sna *BaseSubnetAllocator) Usage() (uint64, uint64) { + sna.Lock() + defer sna.Unlock() + var v4used, v6used uint64 for _, snr := range sna.v4ranges { v4used = v4used + snr.usage() @@ -56,6 +59,9 @@ func (sna *BaseSubnetAllocator) Usage() (uint64, uint64) { // Count returns the number of available (both used and unused) v4 and v6 subnets func (sna *BaseSubnetAllocator) Count() (uint64, uint64) { + sna.Lock() + defer sna.Unlock() + var v4count, v6count uint64 for _, snr := range sna.v4ranges { v4count = v4count + snr.count() From a1427dc88e66ff58eb6a78fc9bc70f7a054aba83 Mon Sep 17 00:00:00 2001 From: Dumitru Ceara Date: Wed, 22 Jan 2025 11:43:09 +0100 Subject: [PATCH 48/51] gateway_localnet_linux_test.go: Fix various test issues. - tests should not call AddService()/DelService() directly, that should happen through kube calls; this means we need to check for eventual consistency (the object event is processed at some point after the kube call). This is not necessary though for objects that are already in the initial state of the API. Those are processed synchronously before the test object handler finishes registration. - for deletion bump eventual consistency timeout to 2 seconds (from the default of 1 second) to try to reduce the number of flakes we might see in CI on constrained systems (e.g., GitHub actions). - a bunch of the tests called "match()" functions but never checked their return values. - There's an inherent race when adding an event handler to a federated queue informer which may cause initial Add events to be processed twice. That's because the shared index informer enqueues the Add events in a random way to the internal informers meaning that on startup an internal informer might or might not have already processed the Add event before an event handler is registered. In practice that's not a real problem because event handlers must be able to process multiple Add events for the same object. For unit tests, however, that's more problematic because unit tests often expect a fixed pre-defined set of operations (e.g., ovs-ofctl calls) to happen. Adapt the tests that are racy and relax checks to avoid failures when the race happens. Signed-off-by: Dumitru Ceara --- .../pkg/node/gateway_localnet_linux_test.go | 464 +++++++++--------- go-controller/pkg/testing/exec.go | 21 + 2 files changed, 261 insertions(+), 224 deletions(-) diff --git a/go-controller/pkg/node/gateway_localnet_linux_test.go b/go-controller/pkg/node/gateway_localnet_linux_test.go index 92908beaca..2c4583b716 100644 --- a/go-controller/pkg/node/gateway_localnet_linux_test.go +++ b/go-controller/pkg/node/gateway_localnet_linux_test.go @@ -113,16 +113,16 @@ func startNodePortWatcher(n *nodePortWatcher, fakeClient *util.OVNNodeClientset, _, err := n.watchFactory.AddServiceHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { svc := obj.(*kapi.Service) - n.AddService(svc) + Expect(n.AddService(svc)).To(Succeed()) }, UpdateFunc: func(old, new interface{}) { oldSvc := old.(*kapi.Service) newSvc := new.(*kapi.Service) - n.UpdateService(oldSvc, newSvc) + Expect(n.UpdateService(oldSvc, newSvc)).To(Succeed()) }, DeleteFunc: func(obj interface{}) { svc := obj.(*kapi.Service) - n.DeleteService(svc) + Expect(n.DeleteService(svc)).To(Succeed()) }, }, n.SyncServices) @@ -268,6 +268,8 @@ var _ = Describe("Node Operations", func() { fNPW *nodePortWatcher fakeMgmtPortConfig managementPortConfig netlinkMock *mocks.NetLinkOps + + nInitialFakeCommands int ) origNetlinkInst := util.GetNetLinkOps() @@ -286,6 +288,7 @@ var _ = Describe("Node Operations", func() { fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ Cmd: "ovs-vsctl --timeout=15 --no-heading --data=bare --format=csv --columns name list interface", }) + nInitialFakeCommands = 1 iptV4, iptV6 = util.SetFakeIPTablesHelpers() nft = nodenft.SetFakeNFTablesHelper() @@ -318,14 +321,15 @@ var _ = Describe("Node Operations", func() { Context("on startup", func() { It("removes stale iptables/nftables rules while keeping remaining intact", func() { app.Action = func(ctx *cli.Context) error { + // Depending on the order of informer event processing the initial + // Service might be "added" once or twice. Take that into account. + minNFakeCommands := nInitialFakeCommands + 2 + fakeOvnNode.fakeExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, 3) + externalIP := "1.1.1.1" externalIPPort := int32(8032) - for i := 0; i < 2; i++ { - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - }) - } - service := *newService("service1", "namespace1", "10.129.0.2", []v1.ServicePort{ { @@ -399,7 +403,9 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Eventually(func() bool { + return fakeOvnNode.fakeExec.CalledMatchesExpectedAtLeastN(minNFakeCommands) + }, "2s").Should(BeTrue(), fExec.ErrorDesc) Expect(setupManagementPortNFTables(&fakeMgmtPortConfig)).To(Succeed()) expectedTables = map[string]util.FakeTable{ @@ -435,10 +441,7 @@ var _ = Describe("Node Operations", func() { Expect(err).NotTo(HaveOccurred()) expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - Expect(err).NotTo(HaveOccurred()) - - return nil + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) } err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) @@ -475,8 +478,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -507,13 +508,11 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - - return nil + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) } err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) @@ -552,8 +551,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) expectedTables := map[string]util.FakeTable{ @@ -585,13 +582,11 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - - return nil + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) } err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) @@ -641,8 +636,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -675,7 +668,7 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) @@ -694,12 +687,14 @@ var _ = Describe("Node Operations", func() { It("inits iptables rules with LoadBalancer", func() { app.Action = func(ctx *cli.Context) error { + // Depending on the order of informer event processing the initial + // Service might be "added" once or twice. Take that into account. + minNFakeCommands := nInitialFakeCommands + 2 + fakeOvnNode.fakeExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, 3) + externalIP := "1.1.1.1" - for i := 0; i < 3; i++ { - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - }) - } service := *newService("service1", "namespace1", "10.129.0.2", []v1.ServicePort{ { @@ -736,9 +731,9 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Eventually(func() bool { + return fakeOvnNode.fakeExec.CalledMatchesExpectedAtLeastN(minNFakeCommands) + }, "2s").Should(BeTrue(), fExec.ErrorDesc) expectedTables := map[string]util.FakeTable{ "nat": { @@ -772,13 +767,11 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - - return nil + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) } err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) @@ -834,8 +827,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -879,7 +870,7 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) @@ -973,7 +964,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - fNPW.AddService(&service) expectedTables := map[string]util.FakeTable{ "nat": { @@ -1081,8 +1071,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -1123,7 +1111,7 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) @@ -1192,8 +1180,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -1249,7 +1235,7 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + err := f4.MatchState(expectedTables, nil) Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) @@ -1307,8 +1293,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) expectedTables4 := map[string]util.FakeTable{ @@ -1340,7 +1324,7 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables4, nil) + err := f4.MatchState(expectedTables4, nil) Expect(err).NotTo(HaveOccurred()) expectedTables6 := map[string]util.FakeTable{ @@ -1368,16 +1352,18 @@ var _ = Describe("Node Operations", func() { It("inits iptables rules for ExternalIP with DualStack", func() { app.Action = func(ctx *cli.Context) error { + // Depending on the order of informer event processing the initial + // Service might be "added" once or twice. Take that into account. + minNFakeCommands := nInitialFakeCommands + 2 + fakeOvnNode.fakeExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, 3) + externalIPv4 := "10.10.10.1" externalIPv6 := "fd00:96:1::1" clusterIPv4 := "10.129.0.2" clusterIPv6 := "fd00:10:96::10" fNPW.gatewayIPv6 = v6localnetGatewayIP - for i := 0; i < 3; i++ { - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - }) - } service := *newService("service1", "namespace1", clusterIPv4, []v1.ServicePort{ @@ -1402,9 +1388,9 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Eventually(func() bool { + return fakeOvnNode.fakeExec.CalledMatchesExpectedAtLeastN(minNFakeCommands) + }, "2s").Should(BeTrue(), fExec.ErrorDesc) expectedTables4 := map[string]util.FakeTable{ "nat": { @@ -1435,7 +1421,7 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables4, nil) + err := f4.MatchState(expectedTables4, nil) Expect(err).NotTo(HaveOccurred()) expectedTables6 := map[string]util.FakeTable{ @@ -1453,9 +1439,7 @@ var _ = Describe("Node Operations", func() { Expect(err).NotTo(HaveOccurred()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - - return nil + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) } err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) @@ -1465,12 +1449,14 @@ var _ = Describe("Node Operations", func() { Context("on delete", func() { It("deletes iptables rules with ExternalIP", func() { app.Action = func(ctx *cli.Context) error { + // Depending on the order of informer event processing the initial + // Service might be "added" once or twice. Take that into account. + minNFakeCommands := nInitialFakeCommands + 2 + fakeOvnNode.fakeExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, 3) + externalIP := "1.1.1.1" - for i := 0; i < 2; i++ { - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - }) - } service := *newService("service1", "namespace1", "10.129.0.2", []v1.ServicePort{ { @@ -1496,9 +1482,11 @@ var _ = Describe("Node Operations", func() { Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) addConntrackMocks(netlinkMock, []ctFilterDesc{{"1.1.1.1", 8032}, {"10.129.0.2", 8032}}) - err := fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) + Eventually(func() bool { + return fakeOvnNode.fakeExec.CalledMatchesExpectedAtLeastN(minNFakeCommands) + }, "2s").Should(BeTrue(), fExec.ErrorDesc) expectedTables := map[string]util.FakeTable{ "nat": { @@ -1526,21 +1514,25 @@ var _ = Describe("Node Operations", func() { }, } - f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": {}, "filter": {}, "mangle": {}, } - f6 := iptV6.(*util.FakeIPTables) - err = f6.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f6 := iptV6.(*util.FakeIPTables) + return f6.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) return nil } @@ -1577,9 +1569,9 @@ var _ = Describe("Node Operations", func() { Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 0}, {"192.168.18.15", 31111}}) - err := fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) + Eventually(fakeOvnNode.fakeExec.CalledMatchesExpected, "2s").Should(BeTrue(), fExec.ErrorDesc) expectedTables := map[string]util.FakeTable{ "nat": { @@ -1607,9 +1599,10 @@ var _ = Describe("Node Operations", func() { }, } - f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": {}, @@ -1617,12 +1610,15 @@ var _ = Describe("Node Operations", func() { "mangle": {}, } - f6 := iptV6.(*util.FakeIPTables) - err = f6.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f6 := iptV6.(*util.FakeIPTables) + return f6.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) return nil } @@ -1634,12 +1630,14 @@ var _ = Describe("Node Operations", func() { Context("on add and delete", func() { It("manages iptables rules with ExternalIP", func() { app.Action = func(ctx *cli.Context) error { + // Depending on the order of informer event processing the initial + // Service might be "added" once or twice. Take that into account. + minNFakeCommands := nInitialFakeCommands + 2 + fakeOvnNode.fakeExec.AddRepeatedFakeCmd(&ovntest.ExpectedCmd{ + Cmd: "ovs-ofctl show ", + }, 3) + externalIP := "10.10.10.1" - for i := 0; i < 3; i++ { - fakeOvnNode.fakeExec.AddFakeCmd(&ovntest.ExpectedCmd{ - Cmd: "ovs-ofctl show ", - }) - } externalIPPort := int32(8034) service := *newService("service1", "namespace1", "10.129.0.2", []v1.ServicePort{ @@ -1662,9 +1660,9 @@ var _ = Describe("Node Operations", func() { ) fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Eventually(func() bool { + return fakeOvnNode.fakeExec.CalledMatchesExpectedAtLeastN(minNFakeCommands) + }, "2s").Should(BeTrue(), fExec.ErrorDesc) expectedTables := map[string]util.FakeTable{ "nat": { @@ -1696,16 +1694,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }).Should(Succeed()) - expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }).Should(Succeed()) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.10.10.1", 8034}, {"10.129.0.2", 8034}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -1733,18 +1734,20 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) return nil } err := app.Run([]string{app.Name}) Expect(err).NotTo(HaveOccurred()) - }) It("check openflows for LoadBalancer and external ip are correctly added and removed where ETP=local, LGW mode", func() { @@ -1802,8 +1805,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedLBIngressFlows := []string{ "cookie=0x10c6b89e483ea111, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=5.5.5.5, actions=output:LOCAL", @@ -1815,15 +1816,18 @@ var _ = Describe("Node Operations", func() { "cookie=0x77df6d2c74c0a658, priority=110, in_port=eth0, arp, arp_op=1, arp_tpa=1.1.1.2, actions=output:LOCAL", } - Expect(err).NotTo(HaveOccurred()) - flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) - flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") - Expect(flows).To(Equal(expectedLBIngressFlows)) - flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") - Expect(flows).To(Equal(expectedLBExternalIPFlows1)) - flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.2_8080") - Expect(flows).To(Equal(expectedLBExternalIPFlows2)) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }).Should(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") + }).Should(Equal(expectedLBIngressFlows)) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") + }).Should(Equal(expectedLBExternalIPFlows1)) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.2_8080") + }).Should(Equal(expectedLBExternalIPFlows2)) addConntrackMocks(netlinkMock, []ctFilterDesc{ {"1.1.1.1", 8080}, @@ -1832,16 +1836,22 @@ var _ = Describe("Node Operations", func() { {"192.168.18.15", 31111}, {"10.129.0.2", 8080}, }) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) - flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) - flows = fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") - Expect(flows).To(BeNil()) - flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") - Expect(flows).To(BeNil()) - flows = fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.2_8080") - Expect(flows).To(BeNil()) + + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) + + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }, "2s").Should(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("Ingress_namespace1_service1_5.5.5.5_8080") + }, "2s").Should(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.1_8080") + }, "2s").Should(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("External_namespace1_service1_1.1.1.2_8080") + }, "2s").Should(BeNil()) return nil } @@ -1986,9 +1996,7 @@ var _ = Describe("Node Operations", func() { ) fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) - Expect(fakeOvnNode.fakeExec.CalledMatchesExpected()).To(BeTrue(), fExec.ErrorDesc) + Eventually(fakeOvnNode.fakeExec.CalledMatchesExpected).Should(BeTrue(), fExec.ErrorDesc) expectedTables := map[string]util.FakeTable{ "nat": { @@ -2018,16 +2026,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }).Should(Succeed()) - expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }).Should(Succeed()) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 38034}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -2055,12 +2066,15 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) return nil } @@ -2112,8 +2126,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -2146,19 +2158,18 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) + Expect(f4.MatchState(expectedTables, nil)).To(Succeed()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) expectedNFT += fmt.Sprintf("add element inet ovn-kubernetes mgmtport-no-snat-nodeports { tcp . %v }\n", service.Spec.Ports[0].NodePort) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - Expect(err).NotTo(HaveOccurred()) + Expect(nodenft.MatchNFTRules(expectedNFT, nft.Dump())).To(Succeed()) flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(BeNil()) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -2186,15 +2197,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) - flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }, "2s").Should(BeNil()) return nil } @@ -2247,8 +2262,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -2285,20 +2298,18 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Expect(f4.MatchState(expectedTables, nil)).To(Succeed()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) expectedNFT += fmt.Sprintf("add element inet ovn-kubernetes mgmtport-no-snat-nodeports { tcp . %v }\n", service.Spec.Ports[0].NodePort) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - Expect(err).NotTo(HaveOccurred()) + Expect(nodenft.MatchNFTRules(expectedNFT, nft.Dump())).To(Succeed()) flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -2326,15 +2337,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 = iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) - flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }, "2s").Should(BeNil()) return nil } @@ -2393,8 +2408,6 @@ var _ = Describe("Node Operations", func() { // to ensure the endpoint is local-host-networked res := fNPW.nodeIPManager.cidrs.Has(fmt.Sprintf("%s/32", ep1.Addresses[0])) Expect(res).To(BeTrue()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -2431,18 +2444,17 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Expect(f4.MatchState(expectedTables, nil)).To(Succeed()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Expect(nodenft.MatchNFTRules(expectedNFT, nft.Dump())).To(Succeed()) flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -2470,15 +2482,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 = iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) - flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }, "2s").Should(BeNil()) return nil } @@ -2530,8 +2546,6 @@ var _ = Describe("Node Operations", func() { fNPW.watchFactory = fakeOvnNode.watcher Expect(startNodePortWatcher(fNPW, fakeOvnNode.fakeClient, &fakeMgmtPortConfig)).To(Succeed()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { @@ -2570,20 +2584,18 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Expect(f4.MatchState(expectedTables, nil)).To(Succeed()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) expectedNFT += fmt.Sprintf("add element inet ovn-kubernetes mgmtport-no-snat-nodeports { tcp . %v }\n", service.Spec.Ports[0].NodePort) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) - Expect(err).NotTo(HaveOccurred()) + Expect(nodenft.MatchNFTRules(expectedNFT, nft.Dump())).To(Succeed()) flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") Expect(flows).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -2611,15 +2623,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 = iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) - flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }, "2s").Should(BeNil()) return nil } @@ -2676,8 +2692,6 @@ var _ = Describe("Node Operations", func() { // to ensure the endpoint is local-host-networked res := fNPW.nodeIPManager.cidrs.Has(fmt.Sprintf("%s/32", endpointSlice.Endpoints[0].Addresses[0])) Expect(res).To(BeTrue()) - err := fNPW.AddService(&service) - Expect(err).NotTo(HaveOccurred()) expectedTables := map[string]util.FakeTable{ "nat": { "PREROUTING": []string{ @@ -2715,18 +2729,16 @@ var _ = Describe("Node Operations", func() { } f4 := iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Expect(f4.MatchState(expectedTables, nil)).To(Succeed()) expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Expect(nodenft.MatchNFTRules(expectedNFT, nft.Dump())).To(Succeed()) - flows := fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(Equal(expectedFlows)) + Expect(fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111")).To(Equal(expectedFlows)) addConntrackMocks(netlinkMock, []ctFilterDesc{{"10.129.0.2", 8080}, {"192.168.18.15", 31111}}) - err = fNPW.DeleteService(&service) - Expect(err).NotTo(HaveOccurred()) + Expect(fakeOvnNode.fakeClient.KubeClient.CoreV1().Services(service.Namespace).Delete( + context.Background(), service.Name, metav1.DeleteOptions{})).To(Succeed()) expectedTables = map[string]util.FakeTable{ "nat": { @@ -2754,15 +2766,19 @@ var _ = Describe("Node Operations", func() { }, } - f4 = iptV4.(*util.FakeIPTables) - err = f4.MatchState(expectedTables, nil) - Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + f4 := iptV4.(*util.FakeIPTables) + return f4.MatchState(expectedTables, nil) + }, "2s").Should(Succeed()) - expectedNFT = getBaseNFTRules(fakeMgmtPortConfig.ifName) - err = nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + Eventually(func() error { + expectedNFT := getBaseNFTRules(fakeMgmtPortConfig.ifName) + return nodenft.MatchNFTRules(expectedNFT, nft.Dump()) + }, "2s").Should(Succeed()) - flows = fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") - Expect(flows).To(BeNil()) + Eventually(func() []string { + return fNPW.ofm.getFlowsByKey("NodePort_namespace1_service1_tcp_31111") + }, "2s").Should(BeNil()) return nil } diff --git a/go-controller/pkg/testing/exec.go b/go-controller/pkg/testing/exec.go index aa426fb0cc..1d9dc30615 100644 --- a/go-controller/pkg/testing/exec.go +++ b/go-controller/pkg/testing/exec.go @@ -127,6 +127,18 @@ func (f *FakeExec) CalledMatchesExpected() bool { return len(f.executedCommands) == len(f.expectedCommands) } +// CalledMatchesExpectedAtLeastN returns true if the number of commands the code under +// test called is at least 'minNumberOfMatches' and less than or equal to the number of +// expected commands in the FakeExec's list +func (f *FakeExec) CalledMatchesExpectedAtLeastN(minNumberOfMatches int) bool { + f.mu.Lock() + defer f.mu.Unlock() + if f.receivedUnexpected { + return false + } + return len(f.executedCommands) >= minNumberOfMatches && len(f.executedCommands) <= len(f.expectedCommands) +} + // ExpectedCmd contains properties that the testcase expects a called command // to have as well as the output that the fake command should return type ExpectedCmd struct { @@ -298,6 +310,15 @@ func (f *FakeExec) AddFakeCmd(expected *ExpectedCmd) { f.expectedCommands = append(f.expectedCommands, expected) } +// AddRepeatedFakeCmd takes the ExpectedCmd and appends its runner function to +// a fake command action list of the FakeExec repeatCount times +func (f *FakeExec) AddRepeatedFakeCmd(expected *ExpectedCmd, repeatCount int) { + for i := 0; i < repeatCount; i++ { + cmdCopy := *expected + f.AddFakeCmd(&cmdCopy) + } +} + // AddFakeCmdsNoOutputNoError appends a list of commands to the expected // command set. The command cannot return any output or error. func (f *FakeExec) AddFakeCmdsNoOutputNoError(commands []string) { From 988cb71e400b70e2eafb6eb8e25bfd27893cdab5 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Thu, 23 Jan 2025 19:08:35 +0100 Subject: [PATCH 49/51] Get active network from annotations on pod deletion Signed-off-by: Patryk Diak --- .../pkg/clustermanager/pod/allocator.go | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/go-controller/pkg/clustermanager/pod/allocator.go b/go-controller/pkg/clustermanager/pod/allocator.go index 01af49f594..40dab69191 100644 --- a/go-controller/pkg/clustermanager/pod/allocator.go +++ b/go-controller/pkg/clustermanager/pod/allocator.go @@ -204,9 +204,33 @@ func (a *PodAllocator) reconcile(old, new *corev1.Pod, releaseFromAllocator bool return nil } - activeNetwork, err := a.getActiveNetworkForPod(pod) - if err != nil { - return fmt.Errorf("failed looking for an active network: %w", err) + var activeNetwork util.NetInfo + var err error + + if new != nil { + activeNetwork, err = a.getActiveNetworkForPod(pod) + if err != nil { + return fmt.Errorf("failed looking for an active network: %w", err) + } + } else if a.netInfo.IsPrimaryNetwork() { + // During pod deletion, the UDN might already be removed. To handle this, check if the activeNetwork + // is managed by the current allocator using pod annotations. If not, exit as there is nothing to do. + // There is no need to handle non-primary networks as they are already present in the network-selection + // annotations handled in GetPodNADToNetworkMappingWithActiveNetwork. + podNetworks, err := util.UnmarshalPodAnnotationAllNetworks(pod.Annotations) + if err != nil { + return err + } + for nadName := range podNetworks { + if a.netInfo.HasNAD(nadName) { + activeNetwork = a.netInfo + break + } + } + if activeNetwork == nil { + klog.V(5).Infof("Skipping pod cleanup for %s/%s on network %s", pod.Namespace, pod.Name, a.netInfo.GetNetworkName()) + return nil + } } onNetwork, networkMap, err := util.GetPodNADToNetworkMappingWithActiveNetwork(pod, a.netInfo, activeNetwork) From e82f43db9ad081eb8c2acbdab9cebcc14da0227a Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Fri, 24 Jan 2025 17:01:49 +0100 Subject: [PATCH 50/51] Cleanup networkpolicy when a namespace was already removed Signed-off-by: Patryk Diak --- go-controller/pkg/ovn/base_network_controller.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/go-controller/pkg/ovn/base_network_controller.go b/go-controller/pkg/ovn/base_network_controller.go index d38dc3fb1a..8a02b52b43 100644 --- a/go-controller/pkg/ovn/base_network_controller.go +++ b/go-controller/pkg/ovn/base_network_controller.go @@ -10,6 +10,7 @@ import ( libovsdbclient "github.com/ovn-org/libovsdb/client" "github.com/ovn-org/libovsdb/ovsdb" + kapierrors "k8s.io/apimachinery/pkg/api/errors" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/allocator/pod" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config" @@ -1005,10 +1006,9 @@ func (bnc *BaseNetworkController) DeleteResourceCommon(objType reflect.Type, obj return fmt.Errorf("could not cast obj of type %T to *knet.NetworkPolicy", obj) } netinfo, err := bnc.networkManager.GetActiveNetworkForNamespace(knp.Namespace) - // The InvalidPrimaryNetworkError error is thrown when UDN is not found because - // it has been already deleted, so just proceed with deleting NetworkPolicy in - // such a scenario as well. - if err != nil && !util.IsInvalidPrimaryNetworkError(err) { + // The InvalidPrimaryNetworkError is returned when the UDN is not found because it has already been deleted, + // while the NotFound error occurs when the namespace no longer exists. In both cases, proceed with deleting the NetworkPolicy. + if err != nil && !util.IsInvalidPrimaryNetworkError(err) && !kapierrors.IsNotFound(err) { return fmt.Errorf("could not get active network for namespace %s: %w", knp.Namespace, err) } if err == nil && bnc.GetNetworkName() != netinfo.GetNetworkName() { From d108b05b6dfd9b3b49169247ef31e35022222874 Mon Sep 17 00:00:00 2001 From: Patryk Diak Date: Fri, 24 Jan 2025 22:02:12 +0100 Subject: [PATCH 51/51] Revert "hard code disable-udn-host-isolation to "true"" This reverts commit 1243011ac02038e3bc254e670d50554ed4c0cf88. --- go-controller/pkg/config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index f119133736..2759f071f2 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -1073,7 +1073,7 @@ var OVNK8sFeatureFlags = []cli.Flag{ Name: "disable-udn-host-isolation", Usage: "Configure to disable UDN host isolation with ovn-kubernetes.", Destination: &cliConfig.OVNKubernetesFeature.DisableUDNHostIsolation, - Value: true, + Value: OVNKubernetesFeature.DisableUDNHostIsolation, }, &cli.BoolFlag{ Name: "enable-network-segmentation",