diff --git a/contrib/kind.sh b/contrib/kind.sh index c59c81f3a7..4225380300 100755 --- a/contrib/kind.sh +++ b/contrib/kind.sh @@ -853,7 +853,8 @@ create_ovn_kube_manifests() { --multi-network-enable="${ENABLE_MULTI_NET}" \ --ovnkube-metrics-scale-enable="${OVN_METRICS_SCALE_ENABLE}" \ --compact-mode="${OVN_COMPACT_MODE}" \ - --enable-interconnect="${OVN_ENABLE_INTERCONNECT}" + --enable-interconnect="${OVN_ENABLE_INTERCONNECT}" \ + --enable-multi-external-gateway=true popd } diff --git a/dist/images/daemonset.sh b/dist/images/daemonset.sh index f83b73b524..129474ad22 100755 --- a/dist/images/daemonset.sh +++ b/dist/images/daemonset.sh @@ -300,6 +300,9 @@ while [ "$1" != "" ]; do --enable-interconnect) OVN_ENABLE_INTERCONNECT=$VALUE ;; + --enable-multi-external-gateway) + OVN_ENABLE_MULTI_EXTERNAL_GATEWAY=$VALUE + ;; *) echo "WARNING: unknown parameter \"$PARAM\"" exit 1 @@ -456,6 +459,8 @@ ovnkube_compact_mode_enable=${COMPACT_MODE:-"false"} echo "ovnkube_compact_mode_enable: ${ovnkube_compact_mode_enable}" ovn_enable_interconnect=${OVN_ENABLE_INTERCONNECT} echo "ovn_enable_interconnect: ${ovn_enable_interconnect}" +ovn_enable_multi_external_gateway=${OVN_ENABLE_MULTI_EXTERNAL_GATEWAY} +echo "ovn_enable_multi_external_gateway: ${ovn_enable_multi_external_gateway}" ovn_image=${ovnkube_image} \ ovnkube_compact_mode_enable=${ovnkube_compact_mode_enable} \ @@ -498,6 +503,7 @@ ovn_image=${ovnkube_image} \ ovn_disable_ovn_iface_id_ver=${ovn_disable_ovn_iface_id_ver} \ ovnkube_node_mgmt_port_netdev=${ovnkube_node_mgmt_port_netdev} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ ovnkube_app_name=ovnkube-node \ j2 ../templates/ovnkube-node.yaml.j2 -o ${output_dir}/ovnkube-node.yaml @@ -572,6 +578,7 @@ ovn_image=${ovnkube_image} \ ovn_stateless_netpol_enable=${ovn_netpol_acl_enable} \ ovnkube_compact_mode_enable=${ovnkube_compact_mode_enable} \ ovn_unprivileged_mode=${ovn_unprivileged_mode} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ j2 ../templates/ovnkube-master.yaml.j2 -o ${output_dir}/ovnkube-master.yaml ovn_image=${ovnkube_image} \ @@ -603,6 +610,7 @@ ovn_image=${ovnkube_image} \ ovn_gateway_mode=${ovn_gateway_mode} \ ovn_ex_gw_networking_interface=${ovn_ex_gw_networking_interface} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ j2 ../templates/ovnkube-control-plane.yaml.j2 -o ${output_dir}/ovnkube-control-plane.yaml ovn_image=${image} \ @@ -683,6 +691,7 @@ ovn_image=${ovnkube_image} \ ovn_empty_lb_events=${ovn_empty_lb_events} \ ovn_loglevel_nb=${ovn_loglevel_nb} ovn_loglevel_sb=${ovn_loglevel_sb} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ j2 ../templates/ovnkube-single-node-zone.yaml.j2 -o ${output_dir}/ovnkube-single-node-zone.yaml ovn_image=${ovnkube_image} \ @@ -734,6 +743,7 @@ ovn_image=${ovnkube_image} \ ovn_empty_lb_events=${ovn_empty_lb_events} \ ovn_loglevel_nb=${ovn_loglevel_nb} ovn_loglevel_sb=${ovn_loglevel_sb} \ ovn_enable_interconnect=${ovn_enable_interconnect} \ + ovn_enable_multi_external_gateway=${ovn_enable_multi_external_gateway} \ j2 ../templates/ovnkube-zone-controller.yaml.j2 -o ${output_dir}/ovnkube-zone-controller.yaml ovn_image=${image} \ diff --git a/dist/images/ovnkube.sh b/dist/images/ovnkube.sh index 17494f1094..3f20330d7d 100755 --- a/dist/images/ovnkube.sh +++ b/dist/images/ovnkube.sh @@ -84,6 +84,7 @@ fi # OVN_ENCAP_IP - encap IP to be used for OVN traffic on the node. mandatory in case ovnkube-node-mode=="dpu" # OVN_HOST_NETWORK_NAMESPACE - namespace to classify host network traffic for applying network policies # OVN_DISABLE_FORWARDING - disable forwarding on OVNK controlled interfaces +# OVN_ENABLE_MULTI_EXTERNAL_GATEWAY - enable multi external gateway for ovn-kubernetes # The argument to the command is the operation to be performed # ovn-master ovn-controller ovn-node display display_env ovn_debug @@ -239,6 +240,8 @@ ovn_ipfix_cache_active_timeout=${OVN_IPFIX_CACHE_ACTIVE_TIMEOUT:-} \ ovn_stateless_netpol_enable=${OVN_STATELESS_NETPOL_ENABLE:-false} #OVN_ENABLE_INTERCONNECT - enable interconnect with multiple zones ovn_enable_interconnect=${OVN_ENABLE_INTERCONNECT:-false} +#OVN_ENABLE_MULTI_EXTERNAL_GATEWAY - enable multi external gateway +ovn_enable_multi_external_gateway=${OVN_ENABLE_MULTI_EXTERNAL_GATEWAY:-false} # OVNKUBE_NODE_MODE - is the mode which ovnkube node operates ovnkube_node_mode=${OVNKUBE_NODE_MODE:-"full"} @@ -1125,6 +1128,12 @@ ovn-master() { fi echo "ovn_stateless_netpol_enable_flag: ${ovn_stateless_netpol_enable_flag}" + ovnkube_enable_multi_external_gateway_flag= + if [[ ${ovn_enable_multi_external_gateway} == "true" ]]; then + ovnkube_enable_multi_external_gateway_flag="--enable-multi-external-gateway" + fi + echo "ovnkube_enable_multi_external_gateway_flag=${ovnkube_enable_multi_external_gateway_flag}" + init_node_flags= if [[ ${ovnkube_compact_mode_enable} == "true" ]]; then init_node_flags="--init-node ${K8S_NODE} --nodeport" @@ -1165,6 +1174,7 @@ ovn-master() { ${ovnkube_metrics_scale_enable_flag} \ ${multi_network_enabled_flag} \ ${ovn_stateless_netpol_enable_flag} \ + ${ovnkube_enable_multi_external_gateway_flag} \ --metrics-bind-address ${ovnkube_master_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & @@ -1332,6 +1342,12 @@ ovnkube-controller() { fi echo "ovnkube_enable_interconnect_flag: ${ovnkube_enable_interconnect_flag}" + ovnkube_enable_multi_external_gateway_flag= + if [[ ${ovn_enable_multi_external_gateway} == "true" ]]; then + ovnkube_enable_multi_external_gateway_flag="--enable-multi-external-gateway" + fi + echo "ovnkube_enable_multi_external_gateway_flag=${ovnkube_enable_multi_external_gateway_flag}" + echo "=============== ovnkube-controller ========== MASTER ONLY" /usr/bin/ovnkube \ --init-ovnkube-controller ${K8S_NODE} \ @@ -1361,6 +1377,7 @@ ovnkube-controller() { ${ovnkube_config_duration_enable_flag} \ ${multi_network_enabled_flag} \ ${ovnkube_enable_interconnect_flag} \ + ${ovnkube_enable_multi_external_gateway_flag} \ --zone ${ovn_zone} \ --metrics-bind-address ${ovnkube_master_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & @@ -1445,6 +1462,12 @@ ovn-cluster-manager() { fi echo "ovnkube_enable_interconnect_flag: ${ovnkube_enable_interconnect_flag}" + ovnkube_enable_multi_external_gateway_flag= + if [[ ${ovn_enable_multi_external_gateway} == "true" ]]; then + ovnkube_enable_multi_external_gateway_flag="--enable-multi-external-gateway" + fi + echo "ovnkube_enable_multi_external_gateway_flag=${ovnkube_enable_multi_external_gateway_flag}" + echo "=============== ovn-cluster-manager ========== MASTER ONLY" /usr/bin/ovnkube \ --init-cluster-manager ${K8S_NODE} \ @@ -1465,6 +1488,7 @@ ovn-cluster-manager() { ${multi_network_enabled_flag} \ ${egressservice_enabled_flag} \ ${ovnkube_enable_interconnect_flag} \ + ${ovnkube_enable_multi_external_gateway_flag} \ --metrics-bind-address ${ovnkube_cluster_manager_metrics_bind_address} \ --host-network-namespace ${ovn_host_network_namespace} & @@ -1726,6 +1750,12 @@ ovn-node() { ovn_zone=$(get_node_zone) echo "ovnkube-node's configured zone is ${ovn_zone}" + ovnkube_enable_multi_external_gateway_flag= + if [[ ${ovn_enable_multi_external_gateway} == "true" ]]; then + ovnkube_enable_multi_external_gateway_flag="--enable-multi-external-gateway" + fi + echo "ovnkube_enable_multi_external_gateway_flag=${ovnkube_enable_multi_external_gateway_flag}" + if [[ $ovn_nbdb != "local" ]]; then ovn_dbs="--nb-address=${ovn_nbdb}" fi @@ -1776,6 +1806,7 @@ ovn-node() { --metrics-bind-address ${ovnkube_node_metrics_bind_address} \ ${ovnkube_node_mode_flag} \ ${egress_interface} \ + ${ovnkube_enable_multi_external_gateway_flag} \ ${ovnkube_enable_interconnect_flag} \ --zone ${ovn_zone} \ --host-network-namespace ${ovn_host_network_namespace} \ diff --git a/dist/templates/ovnkube-control-plane.yaml.j2 b/dist/templates/ovnkube-control-plane.yaml.j2 index 74d664f8ce..eeac6c6d79 100644 --- a/dist/templates/ovnkube-control-plane.yaml.j2 +++ b/dist/templates/ovnkube-control-plane.yaml.j2 @@ -160,6 +160,8 @@ spec: key: host_network_namespace - name: OVN_ENABLE_INTERCONNECT value: "{{ ovn_enable_interconnect }}" + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: "{{ ovn_enable_multi_external_gateway }}" # end of container volumes: diff --git a/dist/templates/ovnkube-master.yaml.j2 b/dist/templates/ovnkube-master.yaml.j2 index 01301b5cf5..23652d7d04 100644 --- a/dist/templates/ovnkube-master.yaml.j2 +++ b/dist/templates/ovnkube-master.yaml.j2 @@ -279,6 +279,8 @@ spec: value: "{{ ovn_acl_logging_rate_limit }}" - name: OVN_STATELESS_NETPOL_ENABLE value: "{{ ovn_stateless_netpol_enable }}" + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: "{{ ovn_enable_multi_external_gateway }}" - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: diff --git a/dist/templates/ovnkube-node.yaml.j2 b/dist/templates/ovnkube-node.yaml.j2 index 09c01d5c24..6b15a2fff4 100644 --- a/dist/templates/ovnkube-node.yaml.j2 +++ b/dist/templates/ovnkube-node.yaml.j2 @@ -219,6 +219,8 @@ spec: value: "{{ ovn_multi_network_enable }}" - name: OVN_ENABLE_INTERCONNECT value: "{{ ovn_enable_interconnect }}" + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: "{{ ovn_enable_multi_external_gateway }}" {% endif -%} {% if ovnkube_app_name=="ovnkube-node-dpu-host" -%} - name: OVNKUBE_NODE_MODE diff --git a/dist/templates/ovnkube-single-node-zone.yaml.j2 b/dist/templates/ovnkube-single-node-zone.yaml.j2 index 74054c0960..0ac705a1c6 100644 --- a/dist/templates/ovnkube-single-node-zone.yaml.j2 +++ b/dist/templates/ovnkube-single-node-zone.yaml.j2 @@ -340,6 +340,8 @@ spec: value: "{{ ovn_acl_logging_rate_limit }}" - name: OVN_ENABLE_INTERCONNECT value: "{{ ovn_enable_interconnect }}" + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: "{{ ovn_enable_multi_external_gateway }}" - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: @@ -524,6 +526,8 @@ spec: value: "local" - name: OVN_ENABLE_INTERCONNECT value: "{{ ovn_enable_interconnect }}" + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: "{{ ovn_enable_multi_external_gateway }}" readinessProbe: exec: diff --git a/dist/templates/ovnkube-zone-controller.yaml.j2 b/dist/templates/ovnkube-zone-controller.yaml.j2 index c2a20825b4..536904f91d 100644 --- a/dist/templates/ovnkube-zone-controller.yaml.j2 +++ b/dist/templates/ovnkube-zone-controller.yaml.j2 @@ -356,6 +356,8 @@ spec: value: "{{ ovn_acl_logging_rate_limit }}" - name: OVN_ENABLE_INTERCONNECT value: "{{ ovn_enable_interconnect }}" + - name: OVN_ENABLE_MULTI_EXTERNAL_GATEWAY + value: "{{ ovn_enable_multi_external_gateway }}" - name: OVN_HOST_NETWORK_NAMESPACE valueFrom: configMapKeyRef: diff --git a/docs/INSTALL.OPENSHIFT.md b/docs/INSTALL.OPENSHIFT.md index 03823036ac..b7e73690f0 100644 --- a/docs/INSTALL.OPENSHIFT.md +++ b/docs/INSTALL.OPENSHIFT.md @@ -2,7 +2,7 @@ This section describes how an OVN overlay network is setup on Openshift 3.10 and later. It explains the various components and how they come together to establish the OVN overlay network. -People that are interested in understatnding how the ovn cni plugin is installed will find this useful. +People that are interested in understanding how the ovn cni plugin is installed will find this useful. As of OCP-3.10, the Openshift overlay network is managed using daemonsets. The ovs/ovn components are built into a Docker image that has all of the needed packages. Daemonsets are deployed on the nodes @@ -22,7 +22,7 @@ nodes to specify where the daemonsets run. Both daemonsets use the same docker image. Configuration is passed from the daemonset to the image using environment variables and mounting directories from the host. The environment -variables at set from a configmap that is created during installation. The image includes +variables are set from a configmap that is created during installation. The image includes an control script that is the entrypoint for the container, ovnkube and the openvswitch and ovn rpms. @@ -133,7 +133,7 @@ copies files into them. In particular it copies in the entrypoint script, ovnku # docker tag ovn-kube netdev22:5000/ovn-kube:latest # docker push netdev22:5000/ovn-kube:latest ``` -The image is taged and pushed to a docker repository. This example uses the +The image is tagged and pushed to a docker repository. This example uses the local development docker repo, netdev22:5000 NOTE: At present go_controller is built and the resultant files are copied @@ -146,6 +146,7 @@ from openvswitch-ovn-kubernetes rpm. There are three sources of configuration information in the daemonsets: 1. /etc/origin/node/ files + These files are installed by the Openshift install and are in the node daemonset which mounts them on the host. The ovn deamonsets mount them in the pod. diff --git a/go-controller/go.mod b/go-controller/go.mod index 28ba442181..2a3da46d0b 100644 --- a/go-controller/go.mod +++ b/go-controller/go.mod @@ -114,5 +114,4 @@ require ( replace ( github.com/Microsoft/hcsshim => github.com/Microsoft/hcsshim v0.8.20 github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 - github.com/vishvananda/netlink => github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6 ) diff --git a/go-controller/go.sum b/go-controller/go.sum index 946b1e2d23..579d0c0669 100644 --- a/go-controller/go.sum +++ b/go-controller/go.sum @@ -461,8 +461,6 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/j-keck/arping v0.0.0-20160618110441-2cf9dc699c56/go.mod h1:ymszkNOg6tORTn+6F6j+Jc8TOr5osrynvN6ivFWZ2GA= github.com/j-keck/arping v1.0.2 h1:hlLhuXgQkzIJTZuhMigvG/CuSkaspeaD9hRDk2zuiMI= github.com/j-keck/arping v1.0.2/go.mod h1:aJbELhR92bSk7tp79AWM/ftfc90EfEi2bQJrbBFOsPw= -github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6 h1:YZME7iAeCpRSGCd0TROrXuo4Tm/T/J15/42PVSaItkI= -github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/jmespath/go-jmespath v0.0.0-20160202185014-0b12d6b521d8/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= github.com/jmespath/go-jmespath v0.0.0-20160803190731-bd40a432e4c7/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k= @@ -744,7 +742,13 @@ github.com/urfave/cli v1.22.1/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtX github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli/v2 v2.2.0 h1:JTTnM6wKzdA0Jqodd966MVj4vWbbquZykeX1sKbe2C4= github.com/urfave/cli/v2 v2.2.0/go.mod h1:SE9GqnLQmjVa0iPEY0f1w3ygNIYcIJ0OKPMoW2caLfQ= +github.com/vishvananda/netlink v0.0.0-20181108222139-023a6dafdcdf/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk= +github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE= +github.com/vishvananda/netlink v1.1.1-0.20201029203352-d40f9887b852/go.mod h1:twkDnbuQxJYemMlGd4JFIcuhgX83tXhKS2B/PRMpOho= +github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01 h1:F9xjJm4IH8VjcqG4ujciOF+GIM4mjPkHhWLLzOghPtM= +github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01/go.mod h1:cAAsePK2e15YDAMJNyOpGYEWNe4sIghTY7gpz4cX/Ik= github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI= +github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU= github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0= github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8= github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= @@ -929,6 +933,7 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190602015325-4c4f7f33c9ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606203320-7fc4e5ec1444/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -995,6 +1000,7 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220804214406-8e32c043e418/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= diff --git a/go-controller/pkg/config/config.go b/go-controller/pkg/config/config.go index d570a3e51f..e4b5730ed4 100644 --- a/go-controller/pkg/config/config.go +++ b/go-controller/pkg/config/config.go @@ -359,6 +359,7 @@ type OVNKubernetesFeatureConfig struct { EnableMultiNetworkPolicy bool `gcfg:"enable-multi-networkpolicy"` EnableStatelessNetPol bool `gcfg:"enable-stateless-netpol"` EnableInterconnect bool `gcfg:"enable-interconnect"` + EnableMultiExternalGateway bool `gcfg:"enable-multi-external-gateway"` } // GatewayMode holds the node gateway mode @@ -971,6 +972,12 @@ var OVNK8sFeatureFlags = []cli.Flag{ Destination: &cliConfig.OVNKubernetesFeature.EnableEgressService, Value: OVNKubernetesFeature.EnableEgressService, }, + &cli.BoolFlag{ + Name: "enable-multi-external-gateway", + Usage: "Configure to use AdminPolicyBasedExternalRoute CRD feature with ovn-kubernetes.", + Destination: &cliConfig.OVNKubernetesFeature.EnableMultiExternalGateway, + Value: OVNKubernetesFeature.EnableMultiExternalGateway, + }, } // K8sFlags capture Kubernetes-related options diff --git a/go-controller/pkg/config/config_test.go b/go-controller/pkg/config/config_test.go index 1b635cfd23..ecf904f3bf 100644 --- a/go-controller/pkg/config/config_test.go +++ b/go-controller/pkg/config/config_test.go @@ -219,6 +219,7 @@ egressip-node-healthcheck-port=1234 enable-multi-network=false enable-multi-networkpolicy=false enable-interconnect=false +enable-multi-external-gateway=false ` var newData string @@ -317,6 +318,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnableMultiNetworkPolicy).To(gomega.BeFalse()) gomega.Expect(OVNKubernetesFeature.EnableInterconnect).To(gomega.BeFalse()) + gomega.Expect(OVNKubernetesFeature.EnableMultiExternalGateway).To(gomega.BeFalse()) for _, a := range []OvnAuthConfig{OvnNorth, OvnSouth} { gomega.Expect(a.Scheme).To(gomega.Equal(OvnDBSchemeUnix)) @@ -555,6 +557,7 @@ var _ = Describe("Config Operations", func() { "enable-multi-network=true", "enable-multi-networkpolicy=true", "enable-interconnect=true", + "enable-multi-external-gateway=true", "zone=foo", ) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -634,6 +637,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EgressIPNodeHealthCheckPort).To(gomega.Equal(1234)) gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableInterconnect).To(gomega.BeTrue()) + gomega.Expect(OVNKubernetesFeature.EnableMultiExternalGateway).To(gomega.BeTrue()) gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("11.132.0.0/14"), 23}, })) @@ -724,6 +728,7 @@ var _ = Describe("Config Operations", func() { gomega.Expect(OVNKubernetesFeature.EnableMultiNetwork).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableMultiNetworkPolicy).To(gomega.BeTrue()) gomega.Expect(OVNKubernetesFeature.EnableInterconnect).To(gomega.BeTrue()) + gomega.Expect(OVNKubernetesFeature.EnableMultiExternalGateway).To(gomega.BeTrue()) gomega.Expect(HybridOverlay.ClusterSubnets).To(gomega.Equal([]CIDRNetworkEntry{ {ovntest.MustParseIPNet("11.132.0.0/14"), 23}, })) @@ -786,6 +791,7 @@ var _ = Describe("Config Operations", func() { "-enable-multi-network=true", "-enable-multi-networkpolicy=true", "-enable-interconnect=true", + "-enable-multi-external-gateway=true", "-healthz-bind-address=0.0.0.0:4321", "-zone=bar", } diff --git a/go-controller/pkg/node/default_node_network_controller.go b/go-controller/pkg/node/default_node_network_controller.go index b17b4ebf8d..f17e6599b9 100644 --- a/go-controller/pkg/node/default_node_network_controller.go +++ b/go-controller/pkg/node/default_node_network_controller.go @@ -936,11 +936,13 @@ func (nc *DefaultNodeNetworkController) Start(ctx context.Context) error { c.Run(1) }() } - nc.wg.Add(1) - go func() { - defer nc.wg.Done() - nc.apbExternalRouteNodeController.Run(1) - }() + if config.OVNKubernetesFeature.EnableMultiExternalGateway { + nc.wg.Add(1) + go func() { + defer nc.wg.Done() + nc.apbExternalRouteNodeController.Run(1) + }() + } nc.wg.Add(1) go func() { diff --git a/go-controller/pkg/ovn/default_network_controller.go b/go-controller/pkg/ovn/default_network_controller.go index 541e762208..275296f5f5 100644 --- a/go-controller/pkg/ovn/default_network_controller.go +++ b/go-controller/pkg/ovn/default_network_controller.go @@ -461,12 +461,6 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { if err := WithSyncDurationMetric("egress ip", oc.WatchEgressIP); err != nil { return err } - if config.OVNKubernetesFeature.EgressIPReachabiltyTotalTimeout == 0 { - klog.V(2).Infof("EgressIP node reachability check disabled") - } else if config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort != 0 { - klog.Infof("EgressIP node reachability enabled and using gRPC port %d", - config.OVNKubernetesFeature.EgressIPNodeHealthCheckPort) - } } if config.OVNKubernetesFeature.EnableEgressFirewall { @@ -514,11 +508,13 @@ func (oc *DefaultNetworkController) Run(ctx context.Context) error { }() } - oc.wg.Add(1) - go func() { - defer oc.wg.Done() - oc.apbExternalRouteController.Run(1) - }() + if config.OVNKubernetesFeature.EnableMultiExternalGateway { + oc.wg.Add(1) + go func() { + defer oc.wg.Done() + oc.apbExternalRouteController.Run(1) + }() + } end := time.Since(start) klog.Infof("Completing all the Watchers took %v", end) diff --git a/go-controller/pkg/ovn/egressfirewall.go b/go-controller/pkg/ovn/egressfirewall.go index 2fc8d9bb91..9f67919e23 100644 --- a/go-controller/pkg/ovn/egressfirewall.go +++ b/go-controller/pkg/ovn/egressfirewall.go @@ -285,13 +285,19 @@ func (oc *DefaultNetworkController) deleteEgressFirewall(egressFirewallObj *egre return nil } -func (oc *DefaultNetworkController) updateEgressFirewallStatusWithRetry(egressfirewall *egressfirewallapi.EgressFirewall) error { +func (oc *DefaultNetworkController) updateEgressFirewallStatusWithRetry(egressFirewall *egressfirewallapi.EgressFirewall) error { retryErr := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return oc.kube.UpdateEgressFirewall(egressfirewall) + ef, err := oc.watchFactory.GetEgressFirewall(egressFirewall.Namespace, egressFirewall.Name) + if err != nil { + return err + } + c := ef.DeepCopy() + c.Status.Status = egressFirewall.Status.Status + return oc.kube.UpdateEgressFirewall(c) }) if retryErr != nil { return fmt.Errorf("error in updating status on EgressFirewall %s/%s: %v", - egressfirewall.Namespace, egressfirewall.Name, retryErr) + egressFirewall.Namespace, egressFirewall.Name, retryErr) } return nil } diff --git a/go-controller/pkg/ovn/egressip.go b/go-controller/pkg/ovn/egressip.go index 2b5345d40d..26641b31c7 100644 --- a/go-controller/pkg/ovn/egressip.go +++ b/go-controller/pkg/ovn/egressip.go @@ -327,6 +327,12 @@ func (oc *DefaultNetworkController) reconcileEgressIPPod(old, new *v1.Pod) (err oldPod = old namespace, err = oc.watchFactory.GetNamespace(oldPod.Namespace) if err != nil { + // when the whole namespace gets removed, we can ignore the NotFound error here + // any potential configuration will get removed in reconcileEgressIPNamespace + if new == nil && apierrors.IsNotFound(err) { + klog.V(5).Infof("Namespace %s no longer exists for the deleted pod: %s", oldPod.Namespace, oldPod.Name) + return nil + } return err } } diff --git a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go b/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go index 95053869ed..ede4295e09 100644 --- a/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go +++ b/go-controller/pkg/ovn/external_ids_syncer/acl/acl_sync.go @@ -91,7 +91,7 @@ func (syncer *aclSyncer) SyncACLs(existingNodes *v1.NodeList) error { if err != nil { return fmt.Errorf("failed to update stale default deny netpol ACLs: %w", err) } - klog.Infof("Found %d stale default deny netpol ACLs", len(gressPolicyACLs)) + klog.Infof("Found %d stale default deny netpol ACLs", len(defaultDenyACLs)) updatedACLs = append(updatedACLs, defaultDenyACLs...) egressFirewallACLs := syncer.updateStaleEgressFirewallACLs(legacyACLs) @@ -363,7 +363,10 @@ func (syncer *aclSyncer) updateStaleDefaultDenyNetpolACLs(legacyACLs []*nbdb.ACL deleteOps []libovsdb.Operation, err error) { for _, acl := range legacyACLs { // sync default Deny policies - if acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey] == "" { + // defaultDenyPolicyTypeACLExtIdKey ExternalID was used by default deny and multicast acls, + // but multicast acls have specific DefaultMcast priority, filter them out. + if acl.ExternalIDs[defaultDenyPolicyTypeACLExtIdKey] == "" || acl.Priority == types.DefaultMcastDenyPriority || + acl.Priority == types.DefaultMcastAllowPriority { // not default deny policy continue } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/.travis.yml b/go-controller/vendor/github.com/vishvananda/netlink/.travis.yml deleted file mode 100644 index 80219c69d6..0000000000 --- a/go-controller/vendor/github.com/vishvananda/netlink/.travis.yml +++ /dev/null @@ -1,20 +0,0 @@ -language: go -go: - - "1.12.x" - - "1.13.x" - - "1.14.x" -before_script: - # make sure we keep path in tact when we sudo - - sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers - # modprobe ip_gre or else the first gre device can't be deleted - - sudo modprobe ip_gre - # modprobe nf_conntrack for the conntrack testing - - sudo modprobe nf_conntrack - - sudo modprobe nf_conntrack_netlink - - sudo modprobe nf_conntrack_ipv4 - - sudo modprobe nf_conntrack_ipv6 - - sudo modprobe sch_hfsc - - sudo modprobe sch_sfq -install: - - go get -v -t ./... -go_import_path: github.com/vishvananda/netlink diff --git a/go-controller/vendor/github.com/vishvananda/netlink/README.md b/go-controller/vendor/github.com/vishvananda/netlink/README.md index e6ade71c95..0128bc67d5 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/README.md +++ b/go-controller/vendor/github.com/vishvananda/netlink/README.md @@ -1,6 +1,6 @@ # netlink - netlink library for go # -[![Build Status](https://travis-ci.com/vishvananda/netlink.png?branch=master)](https://travis-ci.com/vishvananda/netlink) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) +![Build Status](https://github.com/vishvananda/netlink/actions/workflows/main.yml/badge.svg) [![GoDoc](https://godoc.org/github.com/vishvananda/netlink?status.svg)](https://godoc.org/github.com/vishvananda/netlink) The netlink package provides a simple netlink library for go. Netlink is the interface a user-space program in linux uses to communicate with diff --git a/go-controller/vendor/github.com/vishvananda/netlink/addr_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/addr_linux.go index 6504dbcbd0..72862ce1f4 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/addr_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/addr_linux.go @@ -176,7 +176,7 @@ func AddrList(link Link, family int) ([]Addr, error) { // The list can be filtered by link and ip family. func (h *Handle) AddrList(link Link, family int) ([]Addr, error) { req := h.newNetlinkRequest(unix.RTM_GETADDR, unix.NLM_F_DUMP) - msg := nl.NewIfInfomsg(family) + msg := nl.NewIfAddrmsg(family) req.AddData(msg) msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWADDR) @@ -268,7 +268,7 @@ func parseAddr(m []byte) (addr Addr, family int, err error) { // But obviously, as there are IPv6 PtP addresses, too, // IFA_LOCAL should also be handled for IPv6. if local != nil { - if family == FAMILY_V4 && local.IP.Equal(dst.IP) { + if family == FAMILY_V4 && dst != nil && local.IP.Equal(dst.IP) { addr.IPNet = dst } else { addr.IPNet = local @@ -296,13 +296,13 @@ type AddrUpdate struct { // AddrSubscribe takes a chan down which notifications will be sent // when addresses change. Close the 'done' chan to stop subscription. func AddrSubscribe(ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0) + return addrSubscribeAt(netns.None(), netns.None(), ch, done, nil, false, 0, nil) } // AddrSubscribeAt works like AddrSubscribe plus it allows the caller // to choose the network namespace in which to subscribe (ns). func AddrSubscribeAt(ns netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}) error { - return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0) + return addrSubscribeAt(ns, netns.None(), ch, done, nil, false, 0, nil) } // AddrSubscribeOptions contains a set of options to use with @@ -312,6 +312,7 @@ type AddrSubscribeOptions struct { ErrorCallback func(error) ListExisting bool ReceiveBufferSize int + ReceiveTimeout *unix.Timeval } // AddrSubscribeWithOptions work like AddrSubscribe but enable to @@ -322,14 +323,20 @@ func AddrSubscribeWithOptions(ch chan<- AddrUpdate, done <-chan struct{}, option none := netns.None() options.Namespace = &none } - return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize) + return addrSubscribeAt(*options.Namespace, netns.None(), ch, done, options.ErrorCallback, options.ListExisting, options.ReceiveBufferSize, options.ReceiveTimeout) } -func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int) error { +func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-chan struct{}, cberr func(error), listExisting bool, rcvbuf int, rcvTimeout *unix.Timeval) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_IPV4_IFADDR, unix.RTNLGRP_IPV6_IFADDR) if err != nil { return err } + if rcvTimeout != nil { + if err := s.SetReceiveTimeout(rcvTimeout); err != nil { + return err + } + } + if done != nil { go func() { <-done @@ -357,7 +364,8 @@ func addrSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- AddrUpdate, done <-c msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/bpf_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/bpf_linux.go index 6631626bfc..96befbfe0c 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/bpf_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/bpf_linux.go @@ -16,6 +16,30 @@ const ( BPF_PROG_TYPE_SCHED_ACT BPF_PROG_TYPE_TRACEPOINT BPF_PROG_TYPE_XDP + BPF_PROG_TYPE_PERF_EVENT + BPF_PROG_TYPE_CGROUP_SKB + BPF_PROG_TYPE_CGROUP_SOCK + BPF_PROG_TYPE_LWT_IN + BPF_PROG_TYPE_LWT_OUT + BPF_PROG_TYPE_LWT_XMIT + BPF_PROG_TYPE_SOCK_OPS + BPF_PROG_TYPE_SK_SKB + BPF_PROG_TYPE_CGROUP_DEVICE + BPF_PROG_TYPE_SK_MSG + BPF_PROG_TYPE_RAW_TRACEPOINT + BPF_PROG_TYPE_CGROUP_SOCK_ADDR + BPF_PROG_TYPE_LWT_SEG6LOCAL + BPF_PROG_TYPE_LIRC_MODE2 + BPF_PROG_TYPE_SK_REUSEPORT + BPF_PROG_TYPE_FLOW_DISSECTOR + BPF_PROG_TYPE_CGROUP_SYSCTL + BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE + BPF_PROG_TYPE_CGROUP_SOCKOPT + BPF_PROG_TYPE_TRACING + BPF_PROG_TYPE_STRUCT_OPS + BPF_PROG_TYPE_EXT + BPF_PROG_TYPE_LSM + BPF_PROG_TYPE_SK_LOOKUP ) type BPFAttr struct { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/chain.go b/go-controller/vendor/github.com/vishvananda/netlink/chain.go new file mode 100644 index 0000000000..1d1c144e95 --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netlink/chain.go @@ -0,0 +1,22 @@ +package netlink + +import ( + "fmt" +) + +// Chain contains the attributes of a Chain +type Chain struct { + Parent uint32 + Chain uint32 +} + +func (c Chain) String() string { + return fmt.Sprintf("{Parent: %d, Chain: %d}", c.Parent, c.Chain) +} + +func NewChain(parent uint32, chain uint32) Chain { + return Chain{ + Parent: parent, + Chain: chain, + } +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/chain_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/chain_linux.go new file mode 100644 index 0000000000..d9f441613c --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netlink/chain_linux.go @@ -0,0 +1,112 @@ +package netlink + +import ( + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" +) + +// ChainDel will delete a chain from the system. +func ChainDel(link Link, chain Chain) error { + // Equivalent to: `tc chain del $chain` + return pkgHandle.ChainDel(link, chain) +} + +// ChainDel will delete a chain from the system. +// Equivalent to: `tc chain del $chain` +func (h *Handle) ChainDel(link Link, chain Chain) error { + return h.chainModify(unix.RTM_DELCHAIN, 0, link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func ChainAdd(link Link, chain Chain) error { + return pkgHandle.ChainAdd(link, chain) +} + +// ChainAdd will add a chain to the system. +// Equivalent to: `tc chain add` +func (h *Handle) ChainAdd(link Link, chain Chain) error { + return h.chainModify( + unix.RTM_NEWCHAIN, + unix.NLM_F_CREATE|unix.NLM_F_EXCL, + link, + chain) +} + +func (h *Handle) chainModify(cmd, flags int, link Link, chain Chain) error { + req := h.newNetlinkRequest(cmd, flags|unix.NLM_F_ACK) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: chain.Parent, + } + req.AddData(msg) + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(chain.Chain))) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func ChainList(link Link, parent uint32) ([]Chain, error) { + return pkgHandle.ChainList(link, parent) +} + +// ChainList gets a list of chains in the system. +// Equivalent to: `tc chain list`. +// The list can be filtered by link. +func (h *Handle) ChainList(link Link, parent uint32) ([]Chain, error) { + req := h.newNetlinkRequest(unix.RTM_GETCHAIN, unix.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + Parent: parent, + } + req.AddData(msg) + + msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWCHAIN) + if err != nil { + return nil, err + } + + var res []Chain + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip chains from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + var chain Chain + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_CHAIN: + chain.Chain = native.Uint32(attr.Value) + chain.Parent = parent + } + } + res = append(res, chain) + } + + return res, nil +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/class_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/class_linux.go index fe8963f4fe..6f542ba4e7 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/class_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/class_linux.go @@ -191,9 +191,9 @@ func classPayload(req *nl.NetlinkRequest, class Class) error { opt.Fsc.Set(fm1/8, fd, fm2/8) um1, ud, um2 := hfsc.Usc.Attrs() opt.Usc.Set(um1/8, ud, um2/8) - nl.NewRtAttrChild(options, nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc)) - nl.NewRtAttrChild(options, nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc)) - nl.NewRtAttrChild(options, nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc)) + options.AddRtAttr(nl.TCA_HFSC_RSC, nl.SerializeHfscCurve(&opt.Rsc)) + options.AddRtAttr(nl.TCA_HFSC_FSC, nl.SerializeHfscCurve(&opt.Fsc)) + options.AddRtAttr(nl.TCA_HFSC_USC, nl.SerializeHfscCurve(&opt.Usc)) } req.AddData(options) return nil diff --git a/go-controller/vendor/github.com/vishvananda/netlink/conntrack_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/conntrack_linux.go index 79d9a9585a..67b92e2297 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/conntrack_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/conntrack_linux.go @@ -6,6 +6,7 @@ import ( "errors" "fmt" "net" + "time" "github.com/vishvananda/netlink/nl" "golang.org/x/sys/unix" @@ -145,20 +146,28 @@ type ConntrackFlow struct { Forward ipTuple Reverse ipTuple Mark uint32 + TimeStart uint64 + TimeStop uint64 + TimeOut uint32 Labels []byte } func (s *ConntrackFlow) String() string { // conntrack cmd output: // udp 17 src=127.0.0.1 dst=127.0.0.1 sport=4001 dport=1234 packets=5 bytes=532 [UNREPLIED] src=127.0.0.1 dst=127.0.0.1 sport=1234 dport=4001 packets=10 bytes=1078 mark=0 labels=0x00000000050012ac4202010000000000 - res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=%d", + // start=2019-07-26 01:26:21.557800506 +0000 UTC stop=1970-01-01 00:00:00 +0000 UTC timeout=30(sec) + start := time.Unix(0, int64(s.TimeStart)) + stop := time.Unix(0, int64(s.TimeStop)) + timeout := int32(s.TimeOut) + res := fmt.Sprintf("%s\t%d src=%s dst=%s sport=%d dport=%d packets=%d bytes=%d\tsrc=%s dst=%s sport=%d dport=%d packets=%d bytes=%d mark=0x%x ", nl.L4ProtoMap[s.Forward.Protocol], s.Forward.Protocol, s.Forward.SrcIP.String(), s.Forward.DstIP.String(), s.Forward.SrcPort, s.Forward.DstPort, s.Forward.Packets, s.Forward.Bytes, s.Reverse.SrcIP.String(), s.Reverse.DstIP.String(), s.Reverse.SrcPort, s.Reverse.DstPort, s.Reverse.Packets, s.Reverse.Bytes, s.Mark) if len(s.Labels) > 0 { - res += fmt.Sprintf(" labels=0x%x", s.Labels) + res += fmt.Sprintf("labels=0x%x ", s.Labels) } + res += fmt.Sprintf("start=%v stop=%v timeout=%d(sec)", start, stop, timeout) return res } @@ -179,25 +188,43 @@ func parseIpTuple(reader *bytes.Reader, tpl *ipTuple) uint8 { tpl.DstIP = v } } - // Skip the next 4 bytes nl.NLA_F_NESTED|nl.CTA_TUPLE_PROTO - reader.Seek(4, seekCurrent) - _, t, _, v := parseNfAttrTLV(reader) + // Get total length of nested protocol-specific info. + _, _, protoInfoTotalLen := parseNfAttrTL(reader) + _, t, l, v := parseNfAttrTLV(reader) + // Track the number of bytes read. + protoInfoBytesRead := uint16(nl.SizeofNfattr) + l if t == nl.CTA_PROTO_NUM { tpl.Protocol = uint8(v[0]) } - // Skip some padding 3 bytes + // We only parse TCP & UDP headers. Skip the others. + if tpl.Protocol != 6 && tpl.Protocol != 17 { + // skip the rest + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol + } + // Skip 3 bytes of padding reader.Seek(3, seekCurrent) + protoInfoBytesRead += 3 for i := 0; i < 2; i++ { _, t, _ := parseNfAttrTL(reader) + protoInfoBytesRead += uint16(nl.SizeofNfattr) switch t { case nl.CTA_PROTO_SRC_PORT: parseBERaw16(reader, &tpl.SrcPort) + protoInfoBytesRead += 2 case nl.CTA_PROTO_DST_PORT: parseBERaw16(reader, &tpl.DstPort) + protoInfoBytesRead += 2 } - // Skip some padding 2 byte + // Skip 2 bytes of padding reader.Seek(2, seekCurrent) + protoInfoBytesRead += 2 } + // Skip any remaining/unknown parts of the message + bytesRemaining := protoInfoTotalLen - protoInfoBytesRead + reader.Seek(int64(bytesRemaining), seekCurrent) + return tpl.Protocol } @@ -216,10 +243,14 @@ func parseNfAttrTL(r *bytes.Reader) (isNested bool, attrType, len uint16) { binary.Read(r, nl.NativeEndian(), &attrType) isNested = (attrType & nl.NLA_F_NESTED) == nl.NLA_F_NESTED attrType = attrType & (nl.NLA_F_NESTED - 1) - return isNested, attrType, len } +func skipNfAttrValue(r *bytes.Reader, len uint16) { + len = (len + nl.NLA_ALIGNTO - 1) & ^(nl.NLA_ALIGNTO - 1) + r.Seek(int64(len), seekCurrent) +} + func parseBERaw16(r *bytes.Reader, v *uint16) { binary.Read(r, binary.BigEndian, v) } @@ -246,6 +277,36 @@ func parseByteAndPacketCounters(r *bytes.Reader) (bytes, packets uint64) { return } +// when the flow is alive, only the timestamp_start is returned in structure +func parseTimeStamp(r *bytes.Reader, readSize uint16) (tstart, tstop uint64) { + var numTimeStamps int + oneItem := nl.SizeofNfattr + 8 // 4 bytes attr header + 8 bytes timestamp + if readSize == uint16(oneItem) { + numTimeStamps = 1 + } else if readSize == 2*uint16(oneItem) { + numTimeStamps = 2 + } else { + return + } + for i := 0; i < numTimeStamps; i++ { + switch _, t, _ := parseNfAttrTL(r); t { + case nl.CTA_TIMESTAMP_START: + parseBERaw64(r, &tstart) + case nl.CTA_TIMESTAMP_STOP: + parseBERaw64(r, &tstop) + default: + return + } + } + return + +} + +func parseTimeOut(r *bytes.Reader) (ttimeout uint32) { + parseBERaw32(r, &ttimeout) + return +} + func parseConnectionMark(r *bytes.Reader) (mark uint32) { parseBERaw32(r, &mark) return @@ -277,20 +338,26 @@ func parseRawData(data []byte) *ConntrackFlow { if nested, t, l := parseNfAttrTL(reader); nested { switch t { case nl.CTA_TUPLE_ORIG: - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { parseIpTuple(reader, &s.Forward) } case nl.CTA_TUPLE_REPLY: - if nested, t, _ = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { + if nested, t, l = parseNfAttrTL(reader); nested && t == nl.CTA_TUPLE_IP { parseIpTuple(reader, &s.Reverse) } else { // Header not recognized skip it - reader.Seek(int64(l), seekCurrent) + skipNfAttrValue(reader, l) } case nl.CTA_COUNTERS_ORIG: s.Forward.Bytes, s.Forward.Packets = parseByteAndPacketCounters(reader) case nl.CTA_COUNTERS_REPLY: s.Reverse.Bytes, s.Reverse.Packets = parseByteAndPacketCounters(reader) + case nl.CTA_TIMESTAMP: + s.TimeStart, s.TimeStop = parseTimeStamp(reader, l) + case nl.CTA_PROTOINFO: + skipNfAttrValue(reader, l) + default: + skipNfAttrValue(reader, l) } } else { switch t { @@ -298,6 +365,12 @@ func parseRawData(data []byte) *ConntrackFlow { s.Mark = parseConnectionMark(reader) case nl.CTA_LABELS: s.Labels = parseConnectionLabels(reader) + case nl.CTA_TIMEOUT: + s.TimeOut = parseTimeOut(reader) + case nl.CTA_STATUS, nl.CTA_USE, nl.CTA_ID: + skipNfAttrValue(reader, l) + default: + skipNfAttrValue(reader, l) } } } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/devlink_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/devlink_linux.go index d83693c06a..358b232c6c 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/devlink_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/devlink_linux.go @@ -2,10 +2,12 @@ package netlink import ( "fmt" - "github.com/vishvananda/netlink/nl" - "golang.org/x/sys/unix" "net" + "strings" "syscall" + + "github.com/vishvananda/netlink/nl" + "golang.org/x/sys/unix" ) // DevlinkDevEswitchAttr represents device's eswitch attributes @@ -64,6 +66,24 @@ type DevLinkPortAddAttrs struct { ControllerValid bool } +// DevlinkDeviceInfo represents devlink info +type DevlinkDeviceInfo struct { + Driver string + SerialNumber string + BoardID string + FwApp string + FwAppBoundleID string + FwAppName string + FwBoundleID string + FwMgmt string + FwMgmtAPI string + FwMgmtBuild string + FwNetlist string + FwNetlistBuild string + FwPsidAPI string + FwUndi string +} + func parseDevLinkDeviceList(msgs [][]byte) ([]*DevlinkDevice, error) { devices := make([]*DevlinkDevice, 0, len(msgs)) for _, m := range msgs { @@ -163,12 +183,12 @@ func (h *Handle) getEswitchAttrs(family *GenlFamily, dev *DevlinkDevice) { req := h.newNetlinkRequest(int(family.ID), unix.NLM_F_REQUEST|unix.NLM_F_ACK) req.AddData(msg) - b := make([]byte, len(dev.BusName)) + b := make([]byte, len(dev.BusName)+1) copy(b, dev.BusName) data := nl.NewRtAttr(nl.DEVLINK_ATTR_BUS_NAME, b) req.AddData(data) - b = make([]byte, len(dev.DeviceName)) + b = make([]byte, len(dev.DeviceName)+1) copy(b, dev.DeviceName) data = nl.NewRtAttr(nl.DEVLINK_ATTR_DEV_NAME, b) req.AddData(data) @@ -492,11 +512,11 @@ func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, F fnAttr := nl.NewRtAttr(nl.DEVLINK_ATTR_PORT_FUNCTION|unix.NLA_F_NESTED, nil) - if FnAttrs.HwAddrValid == true { + if FnAttrs.HwAddrValid { fnAttr.AddRtAttr(nl.DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, []byte(FnAttrs.FnAttrs.HwAddr)) } - if FnAttrs.StateValid == true { + if FnAttrs.StateValid { fnAttr.AddRtAttr(nl.DEVLINK_PORT_FN_ATTR_STATE, nl.Uint8Attr(FnAttrs.FnAttrs.State)) } req.AddData(fnAttr) @@ -510,3 +530,199 @@ func (h *Handle) DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, F func DevlinkPortFnSet(Bus string, Device string, PortIndex uint32, FnAttrs DevlinkPortFnSetAttrs) error { return pkgHandle.DevlinkPortFnSet(Bus, Device, PortIndex, FnAttrs) } + +// devlinkInfoGetter is function that is responsible for getting devlink info message +// this is introduced for test purpose +type devlinkInfoGetter func(bus, device string) ([]byte, error) + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByName(Bus string, Device string, getInfoMsg devlinkInfoGetter) (*DevlinkDeviceInfo, error) { + info, err := h.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, getInfoMsg) + if err != nil { + return nil, err + } + + return parseInfoData(info), nil +} + +// DevlinkGetDeviceInfoByName returns devlink info for selected device, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByName(Bus string, Device string) (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func (h *Handle) DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string, getInfoMsg devlinkInfoGetter) (map[string]string, error) { + response, err := getInfoMsg(Bus, Device) + if err != nil { + return nil, err + } + + info, err := parseInfoMsg(response) + if err != nil { + return nil, err + } + + return info, nil +} + +// DevlinkGetDeviceInfoByNameAsMap returns devlink info for selected device as a map, +// otherwise returns an error code. +// Equivalent to: `devlink dev info $dev` +func DevlinkGetDeviceInfoByNameAsMap(Bus string, Device string) (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(Bus, Device, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfo returns devlink info for target device, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfo() (*DevlinkDeviceInfo, error) { + return pkgHandle.DevlinkGetDeviceInfoByName(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +// GetDevlinkInfoAsMap returns devlink info for target device as a map, +// otherwise returns an error code. +func (d *DevlinkDevice) GetDevlinkInfoAsMap() (map[string]string, error) { + return pkgHandle.DevlinkGetDeviceInfoByNameAsMap(d.BusName, d.DeviceName, pkgHandle.getDevlinkInfoMsg) +} + +func (h *Handle) getDevlinkInfoMsg(bus, device string) ([]byte, error) { + _, req, err := h.createCmdReq(nl.DEVLINK_CMD_INFO_GET, bus, device) + if err != nil { + return nil, err + } + + response, err := req.Execute(unix.NETLINK_GENERIC, 0) + if err != nil { + return nil, err + } + + if len(response) < 1 { + return nil, fmt.Errorf("getDevlinkInfoMsg: message too short") + } + + return response[0], nil +} + +func parseInfoMsg(msg []byte) (map[string]string, error) { + if len(msg) < nl.SizeofGenlmsg { + return nil, fmt.Errorf("parseInfoMsg: message too short") + } + + info := make(map[string]string) + err := collectInfoData(msg[nl.SizeofGenlmsg:], info) + + if err != nil { + return nil, err + } + + return info, nil +} + +func collectInfoData(msg []byte, data map[string]string) error { + attrs, err := nl.ParseRouteAttr(msg) + if err != nil { + return err + } + + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_DRIVER_NAME: + data["driver"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_SERIAL_NUMBER: + data["serialNumber"] = parseInfoValue(attr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_RUNNING, nl.DEVLINK_ATTR_INFO_VERSION_FIXED, + nl.DEVLINK_ATTR_INFO_VERSION_STORED: + key, value, err := getNestedInfoData(attr.Value) + if err != nil { + return err + } + data[key] = value + } + } + + if len(data) == 0 { + return fmt.Errorf("collectInfoData: could not read attributes") + } + + return nil +} + +func getNestedInfoData(msg []byte) (string, string, error) { + nestedAttrs, err := nl.ParseRouteAttr(msg) + + var key, value string + + if err != nil { + return "", "", err + } + + if len(nestedAttrs) != 2 { + return "", "", fmt.Errorf("getNestedInfoData: too few attributes in nested structure") + } + + for _, nestedAttr := range nestedAttrs { + switch nestedAttr.Attr.Type { + case nl.DEVLINK_ATTR_INFO_VERSION_NAME: + key = parseInfoValue(nestedAttr.Value) + case nl.DEVLINK_ATTR_INFO_VERSION_VALUE: + value = parseInfoValue(nestedAttr.Value) + } + } + + if key == "" { + return "", "", fmt.Errorf("getNestedInfoData: key not found") + } + + if value == "" { + return "", "", fmt.Errorf("getNestedInfoData: value not found") + } + + return key, value, nil +} + +func parseInfoData(data map[string]string) *DevlinkDeviceInfo { + info := new(DevlinkDeviceInfo) + for key, value := range data { + switch key { + case "driver": + info.Driver = value + case "serialNumber": + info.SerialNumber = value + case "board.id": + info.BoardID = value + case "fw.app": + info.FwApp = value + case "fw.app.bundle_id": + info.FwAppBoundleID = value + case "fw.app.name": + info.FwAppName = value + case "fw.bundle_id": + info.FwBoundleID = value + case "fw.mgmt": + info.FwMgmt = value + case "fw.mgmt.api": + info.FwMgmtAPI = value + case "fw.mgmt.build": + info.FwMgmtBuild = value + case "fw.netlist": + info.FwNetlist = value + case "fw.netlist.build": + info.FwNetlistBuild = value + case "fw.psid.api": + info.FwPsidAPI = value + case "fw.undi": + info.FwUndi = value + } + } + return info +} + +func parseInfoValue(value []byte) string { + v := strings.ReplaceAll(string(value), "\x00", "") + return strings.TrimSpace(v) +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/filter.go b/go-controller/vendor/github.com/vishvananda/netlink/filter.go index 2dc34b9959..fe41813359 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/filter.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/filter.go @@ -19,6 +19,7 @@ type FilterAttrs struct { Parent uint32 Priority uint16 // lower is higher priority Protocol uint16 // unix.ETH_P_* + Chain *uint32 } func (q FilterAttrs) String() string { @@ -27,6 +28,11 @@ func (q FilterAttrs) String() string { type TcAct int32 +const ( + TC_ACT_EXT_SHIFT = 28 + TC_ACT_EXT_VAL_MASK = (1 << TC_ACT_EXT_SHIFT) - 1 +) + const ( TC_ACT_UNSPEC TcAct = -1 TC_ACT_OK TcAct = 0 @@ -40,6 +46,22 @@ const ( TC_ACT_JUMP TcAct = 0x10000000 ) +func getTcActExt(local int32) int32 { + return local << TC_ACT_EXT_SHIFT +} + +func getTcActGotoChain() TcAct { + return TcAct(getTcActExt(2)) +} + +func getTcActExtOpcode(combined int32) int32 { + return combined & (^TC_ACT_EXT_VAL_MASK) +} + +func TcActExtCmp(combined int32, opcode int32) bool { + return getTcActExtOpcode(combined) == opcode +} + func (a TcAct) String() string { switch a { case TC_ACT_UNSPEC: @@ -63,6 +85,9 @@ func (a TcAct) String() string { case TC_ACT_JUMP: return "jump" } + if TcActExtCmp(int32(a), int32(getTcActGotoChain())) { + return "goto" + } return fmt.Sprintf("0x%x", int32(a)) } @@ -112,6 +137,7 @@ type Action interface { type GenericAction struct { ActionAttrs + Chain int32 } func (action *GenericAction) Type() string { @@ -157,6 +183,39 @@ func NewConnmarkAction() *ConnmarkAction { } } +type CsumUpdateFlags uint32 + +const ( + TCA_CSUM_UPDATE_FLAG_IPV4HDR CsumUpdateFlags = 1 + TCA_CSUM_UPDATE_FLAG_ICMP CsumUpdateFlags = 2 + TCA_CSUM_UPDATE_FLAG_IGMP CsumUpdateFlags = 4 + TCA_CSUM_UPDATE_FLAG_TCP CsumUpdateFlags = 8 + TCA_CSUM_UPDATE_FLAG_UDP CsumUpdateFlags = 16 + TCA_CSUM_UPDATE_FLAG_UDPLITE CsumUpdateFlags = 32 + TCA_CSUM_UPDATE_FLAG_SCTP CsumUpdateFlags = 64 +) + +type CsumAction struct { + ActionAttrs + UpdateFlags CsumUpdateFlags +} + +func (action *CsumAction) Type() string { + return "csum" +} + +func (action *CsumAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewCsumAction() *CsumAction { + return &CsumAction{ + ActionAttrs: ActionAttrs{ + Action: TC_ACT_PIPE, + }, + } +} + type MirredAct uint8 func (a MirredAct) String() string { @@ -242,6 +301,7 @@ type SkbEditAction struct { PType *uint16 Priority *uint32 Mark *uint32 + Mask *uint32 } func (action *SkbEditAction) Type() string { @@ -260,6 +320,40 @@ func NewSkbEditAction() *SkbEditAction { } } +type PoliceAction struct { + ActionAttrs + Rate uint32 // in byte per second + Burst uint32 // in byte + RCellLog int + Mtu uint32 + Mpu uint16 // in byte + PeakRate uint32 // in byte per second + PCellLog int + AvRate uint32 // in byte per second + Overhead uint16 + LinkLayer int + ExceedAction TcPolAct + NotExceedAction TcPolAct +} + +func (action *PoliceAction) Type() string { + return "police" +} + +func (action *PoliceAction) Attrs() *ActionAttrs { + return &action.ActionAttrs +} + +func NewPoliceAction() *PoliceAction { + return &PoliceAction{ + RCellLog: -1, + PCellLog: -1, + LinkLayer: 1, // ETHERNET + ExceedAction: TC_POLICE_RECLASSIFY, + NotExceedAction: TC_POLICE_OK, + } +} + // MatchAll filters match all packets type MatchAll struct { FilterAttrs @@ -275,20 +369,21 @@ func (filter *MatchAll) Type() string { return "matchall" } -type FilterFwAttrs struct { - ClassId uint32 - InDev string - Mask uint32 - Index uint32 - Buffer uint32 - Mtu uint32 - Mpu uint16 - Rate uint32 - AvRate uint32 - PeakRate uint32 - Action TcPolAct - Overhead uint16 - LinkLayer int +type FwFilter struct { + FilterAttrs + ClassId uint32 + InDev string + Mask uint32 + Police *PoliceAction + Actions []Action +} + +func (filter *FwFilter) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *FwFilter) Type() string { + return "fw" } type BpfFilter struct { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/filter_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/filter_linux.go index 5514531dd8..fbd434aa93 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/filter_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/filter_linux.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "errors" "fmt" + "net" "syscall" "github.com/vishvananda/netlink/nl" @@ -50,74 +51,185 @@ func (filter *U32) Type() string { return "u32" } -// Fw filter filters on firewall marks -// NOTE: this is in filter_linux because it refers to nl.TcPolice which -// is defined in nl/tc_linux.go -type Fw struct { +type Flower struct { FilterAttrs - ClassId uint32 - // TODO remove nl type from interface - Police nl.TcPolice - InDev string - // TODO Action - Mask uint32 - AvRate uint32 - Rtab [256]uint32 - Ptab [256]uint32 -} - -func NewFw(attrs FilterAttrs, fattrs FilterFwAttrs) (*Fw, error) { - var rtab [256]uint32 - var ptab [256]uint32 - rcellLog := -1 - pcellLog := -1 - avrate := fattrs.AvRate / 8 - police := nl.TcPolice{} - police.Rate.Rate = fattrs.Rate / 8 - police.PeakRate.Rate = fattrs.PeakRate / 8 - buffer := fattrs.Buffer - linklayer := nl.LINKLAYER_ETHERNET + DestIP net.IP + DestIPMask net.IPMask + SrcIP net.IP + SrcIPMask net.IPMask + EthType uint16 + EncDestIP net.IP + EncDestIPMask net.IPMask + EncSrcIP net.IP + EncSrcIPMask net.IPMask + EncDestPort uint16 + EncKeyId uint32 + SkipHw bool + SkipSw bool + IPProto *nl.IPProto + DestPort uint16 + SrcPort uint16 - if fattrs.LinkLayer != nl.LINKLAYER_UNSPEC { - linklayer = fattrs.LinkLayer - } + Actions []Action +} - police.Action = int32(fattrs.Action) - if police.Rate.Rate != 0 { - police.Rate.Mpu = fattrs.Mpu - police.Rate.Overhead = fattrs.Overhead - if CalcRtable(&police.Rate, rtab[:], rcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("TBF: failed to calculate rate table") - } - police.Burst = Xmittime(uint64(police.Rate.Rate), uint32(buffer)) +func (filter *Flower) Attrs() *FilterAttrs { + return &filter.FilterAttrs +} + +func (filter *Flower) Type() string { + return "flower" +} + +func (filter *Flower) encodeIP(parent *nl.RtAttr, ip net.IP, mask net.IPMask, v4Type, v6Type int, v4MaskType, v6MaskType int) { + ipType := v4Type + maskType := v4MaskType + + encodeMask := mask + if mask == nil { + encodeMask = net.CIDRMask(32, 32) } - police.Mtu = fattrs.Mtu - if police.PeakRate.Rate != 0 { - police.PeakRate.Mpu = fattrs.Mpu - police.PeakRate.Overhead = fattrs.Overhead - if CalcRtable(&police.PeakRate, ptab[:], pcellLog, fattrs.Mtu, linklayer) < 0 { - return nil, errors.New("POLICE: failed to calculate peak rate table") + v4IP := ip.To4() + if v4IP == nil { + ipType = v6Type + maskType = v6MaskType + if mask == nil { + encodeMask = net.CIDRMask(128, 128) } + } else { + ip = v4IP } - return &Fw{ - FilterAttrs: attrs, - ClassId: fattrs.ClassId, - InDev: fattrs.InDev, - Mask: fattrs.Mask, - Police: police, - AvRate: avrate, - Rtab: rtab, - Ptab: ptab, - }, nil + parent.AddRtAttr(ipType, ip) + parent.AddRtAttr(maskType, encodeMask) } -func (filter *Fw) Attrs() *FilterAttrs { - return &filter.FilterAttrs +func (filter *Flower) encode(parent *nl.RtAttr) error { + if filter.EthType != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ETH_TYPE, htons(filter.EthType)) + } + if filter.SrcIP != nil { + filter.encodeIP(parent, filter.SrcIP, filter.SrcIPMask, + nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC, + nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK) + } + if filter.DestIP != nil { + filter.encodeIP(parent, filter.DestIP, filter.DestIPMask, + nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST, + nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK) + } + if filter.EncSrcIP != nil { + filter.encodeIP(parent, filter.EncSrcIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC, + nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK) + } + if filter.EncDestIP != nil { + filter.encodeIP(parent, filter.EncDestIP, filter.EncSrcIPMask, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST, + nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK) + } + if filter.EncDestPort != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT, htons(filter.EncDestPort)) + } + if filter.EncKeyId != 0 { + parent.AddRtAttr(nl.TCA_FLOWER_KEY_ENC_KEY_ID, htonl(filter.EncKeyId)) + } + if filter.IPProto != nil { + ipproto := *filter.IPProto + parent.AddRtAttr(nl.TCA_FLOWER_KEY_IP_PROTO, ipproto.Serialize()) + if filter.SrcPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_SRC, htons(filter.SrcPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_SRC, htons(filter.SrcPort)) + } + } + if filter.DestPort != 0 { + switch ipproto { + case nl.IPPROTO_TCP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_TCP_DST, htons(filter.DestPort)) + case nl.IPPROTO_UDP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_UDP_DST, htons(filter.DestPort)) + case nl.IPPROTO_SCTP: + parent.AddRtAttr(nl.TCA_FLOWER_KEY_SCTP_DST, htons(filter.DestPort)) + } + } + } + + var flags uint32 = 0 + if filter.SkipHw { + flags |= nl.TCA_CLS_FLAGS_SKIP_HW + } + if filter.SkipSw { + flags |= nl.TCA_CLS_FLAGS_SKIP_SW + } + parent.AddRtAttr(nl.TCA_FLOWER_FLAGS, htonl(flags)) + + actionsAttr := parent.AddRtAttr(nl.TCA_FLOWER_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } + return nil } -func (filter *Fw) Type() string { - return "fw" +func (filter *Flower) decode(data []syscall.NetlinkRouteAttr) error { + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_FLOWER_KEY_ETH_TYPE: + filter.EthType = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_IPV4_SRC, nl.TCA_FLOWER_KEY_IPV6_SRC: + filter.SrcIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_IPV6_SRC_MASK: + filter.SrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST, nl.TCA_FLOWER_KEY_IPV6_DST: + filter.DestIP = datum.Value + case nl.TCA_FLOWER_KEY_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_IPV6_DST_MASK: + filter.DestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC: + filter.EncSrcIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK: + filter.EncSrcIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST, nl.TCA_FLOWER_KEY_ENC_IPV6_DST: + filter.EncDestIP = datum.Value + case nl.TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, nl.TCA_FLOWER_KEY_ENC_IPV6_DST_MASK: + filter.EncDestIPMask = datum.Value + case nl.TCA_FLOWER_KEY_ENC_UDP_DST_PORT: + filter.EncDestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_ENC_KEY_ID: + filter.EncKeyId = ntohl(datum.Value) + case nl.TCA_FLOWER_KEY_IP_PROTO: + val := new(nl.IPProto) + *val = nl.IPProto(datum.Value[0]) + filter.IPProto = val + case nl.TCA_FLOWER_KEY_TCP_SRC, nl.TCA_FLOWER_KEY_UDP_SRC, nl.TCA_FLOWER_KEY_SCTP_SRC: + filter.SrcPort = ntohs(datum.Value) + case nl.TCA_FLOWER_KEY_TCP_DST, nl.TCA_FLOWER_KEY_UDP_DST, nl.TCA_FLOWER_KEY_SCTP_DST: + filter.DestPort = ntohs(datum.Value) + case nl.TCA_FLOWER_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return err + } + filter.Actions, err = parseActions(tables) + if err != nil { + return err + } + case nl.TCA_FLOWER_FLAGS: + attr := nl.DeserializeUint32Bitfield(datum.Value) + skipSw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_HW + skipHw := attr.Value & nl.TCA_CLS_FLAGS_SKIP_SW + if skipSw != 0 { + filter.SkipSw = true + } + if skipHw != 0 { + filter.SkipHw = true + } + } + } + return nil } // FilterDel will delete a filter from the system. @@ -129,19 +241,7 @@ func FilterDel(filter Filter) error { // FilterDel will delete a filter from the system. // Equivalent to: `tc filter del $filter` func (h *Handle) FilterDel(filter Filter) error { - req := h.newNetlinkRequest(unix.RTM_DELTFILTER, unix.NLM_F_ACK) - base := filter.Attrs() - msg := &nl.TcMsg{ - Family: nl.FAMILY_ALL, - Ifindex: int32(base.LinkIndex), - Handle: base.Handle, - Parent: base.Parent, - Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), - } - req.AddData(msg) - - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + return h.filterModify(filter, unix.RTM_DELTFILTER, 0) } // FilterAdd will add a filter to the system. @@ -153,7 +253,7 @@ func FilterAdd(filter Filter) error { // FilterAdd will add a filter to the system. // Equivalent to: `tc filter add $filter` func (h *Handle) FilterAdd(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE|unix.NLM_F_EXCL) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE|unix.NLM_F_EXCL) } // FilterReplace will replace a filter. @@ -165,11 +265,11 @@ func FilterReplace(filter Filter) error { // FilterReplace will replace a filter. // Equivalent to: `tc filter replace $filter` func (h *Handle) FilterReplace(filter Filter) error { - return h.filterModify(filter, unix.NLM_F_CREATE) + return h.filterModify(filter, unix.RTM_NEWTFILTER, unix.NLM_F_CREATE) } -func (h *Handle) filterModify(filter Filter, flags int) error { - req := h.newNetlinkRequest(unix.RTM_NEWTFILTER, flags|unix.NLM_F_ACK) +func (h *Handle) filterModify(filter Filter, proto, flags int) error { + req := h.newNetlinkRequest(proto, flags|unix.NLM_F_ACK) base := filter.Attrs() msg := &nl.TcMsg{ Family: nl.FAMILY_ALL, @@ -179,6 +279,9 @@ func (h *Handle) filterModify(filter Filter, flags int) error { Info: MakeHandle(base.Priority, nl.Swap16(base.Protocol)), } req.AddData(msg) + if filter.Attrs().Chain != nil { + req.AddData(nl.NewRtAttr(nl.TCA_CHAIN, nl.Uint32Attr(*filter.Attrs().Chain))) + } req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(filter.Type()))) options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -236,7 +339,7 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if err := EncodeActions(actionsAttr, filter.Actions); err != nil { return err } - case *Fw: + case *FwFilter: if filter.Mask != 0 { b := make([]byte, 4) native.PutUint32(b, filter.Mask) @@ -245,17 +348,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.InDev != "" { options.AddRtAttr(nl.TCA_FW_INDEV, nl.ZeroTerminated(filter.InDev)) } - if (filter.Police != nl.TcPolice{}) { - + if filter.Police != nil { police := options.AddRtAttr(nl.TCA_FW_POLICE, nil) - police.AddRtAttr(nl.TCA_POLICE_TBF, filter.Police.Serialize()) - if (filter.Police.Rate != nl.TcRateSpec{}) { - payload := SerializeRtab(filter.Rtab) - police.AddRtAttr(nl.TCA_POLICE_RATE, payload) - } - if (filter.Police.PeakRate != nl.TcRateSpec{}) { - payload := SerializeRtab(filter.Ptab) - police.AddRtAttr(nl.TCA_POLICE_PEAKRATE, payload) + if err := encodePolice(police, filter.Police); err != nil { + return err } } if filter.ClassId != 0 { @@ -263,6 +359,10 @@ func (h *Handle) filterModify(filter Filter, flags int) error { native.PutUint32(b, filter.ClassId) options.AddRtAttr(nl.TCA_FW_CLASSID, b) } + actionsAttr := options.AddRtAttr(nl.TCA_FW_ACT, nil) + if err := EncodeActions(actionsAttr, filter.Actions); err != nil { + return err + } case *BpfFilter: var bpfFlags uint32 if filter.ClassId != 0 { @@ -286,8 +386,11 @@ func (h *Handle) filterModify(filter Filter, flags int) error { if filter.ClassId != 0 { options.AddRtAttr(nl.TCA_MATCHALL_CLASSID, nl.Uint32Attr(filter.ClassId)) } + case *Flower: + if err := filter.encode(options); err != nil { + return err + } } - req.AddData(options) _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err @@ -349,11 +452,13 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { case "u32": filter = &U32{} case "fw": - filter = &Fw{} + filter = &FwFilter{} case "bpf": filter = &BpfFilter{} case "matchall": filter = &MatchAll{} + case "flower": + filter = &Flower{} default: filter = &GenericFilter{FilterType: filterType} } @@ -383,9 +488,18 @@ func (h *Handle) FilterList(link Link, parent uint32) ([]Filter, error) { if err != nil { return nil, err } + case "flower": + detailed, err = parseFlowerData(filter, data) + if err != nil { + return nil, err + } default: detailed = true } + case nl.TCA_CHAIN: + val := new(uint32) + *val = native.Uint32(attr.Value) + base.Chain = val } } // only return the detailed version of the filter @@ -414,6 +528,53 @@ func toAttrs(tcgen *nl.TcGen, attrs *ActionAttrs) { attrs.Bindcnt = int(tcgen.Bindcnt) } +func encodePolice(attr *nl.RtAttr, action *PoliceAction) error { + var rtab [256]uint32 + var ptab [256]uint32 + police := nl.TcPolice{} + police.Index = uint32(action.Attrs().Index) + police.Bindcnt = int32(action.Attrs().Bindcnt) + police.Capab = uint32(action.Attrs().Capab) + police.Refcnt = int32(action.Attrs().Refcnt) + police.Rate.Rate = action.Rate + police.PeakRate.Rate = action.PeakRate + police.Action = int32(action.ExceedAction) + + if police.Rate.Rate != 0 { + police.Rate.Mpu = action.Mpu + police.Rate.Overhead = action.Overhead + if CalcRtable(&police.Rate, rtab[:], action.RCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("TBF: failed to calculate rate table") + } + police.Burst = Xmittime(uint64(police.Rate.Rate), action.Burst) + } + + police.Mtu = action.Mtu + if police.PeakRate.Rate != 0 { + police.PeakRate.Mpu = action.Mpu + police.PeakRate.Overhead = action.Overhead + if CalcRtable(&police.PeakRate, ptab[:], action.PCellLog, action.Mtu, action.LinkLayer) < 0 { + return errors.New("POLICE: failed to calculate peak rate table") + } + } + + attr.AddRtAttr(nl.TCA_POLICE_TBF, police.Serialize()) + if police.Rate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RATE, SerializeRtab(rtab)) + } + if police.PeakRate.Rate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_PEAKRATE, SerializeRtab(ptab)) + } + if action.AvRate != 0 { + attr.AddRtAttr(nl.TCA_POLICE_AVRATE, nl.Uint32Attr(action.AvRate)) + } + if action.NotExceedAction != 0 { + attr.AddRtAttr(nl.TCA_POLICE_RESULT, nl.Uint32Attr(uint32(action.NotExceedAction))) + } + + return nil +} + func EncodeActions(attr *nl.RtAttr, actions []Action) error { tabIndex := int(nl.TCA_ACT_TAB) @@ -421,6 +582,14 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { switch action := action.(type) { default: return fmt.Errorf("unknown action type %s", action.Type()) + case *PoliceAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("police")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + if err := encodePolice(aopts, action); err != nil { + return err + } case *MirredAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -482,6 +651,9 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { if action.Mark != nil { aopts.AddRtAttr(nl.TCA_SKBEDIT_MARK, nl.Uint32Attr(*action.Mark)) } + if action.Mask != nil { + aopts.AddRtAttr(nl.TCA_SKBEDIT_MASK, nl.Uint32Attr(*action.Mask)) + } case *ConnmarkAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -492,6 +664,16 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { } toTcGen(action.Attrs(), &connmark.TcGen) aopts.AddRtAttr(nl.TCA_CONNMARK_PARMS, connmark.Serialize()) + case *CsumAction: + table := attr.AddRtAttr(tabIndex, nil) + tabIndex++ + table.AddRtAttr(nl.TCA_ACT_KIND, nl.ZeroTerminated("csum")) + aopts := table.AddRtAttr(nl.TCA_ACT_OPTIONS, nil) + csum := nl.TcCsum{ + UpdateFlags: uint32(action.UpdateFlags), + } + toTcGen(action.Attrs(), &csum.TcGen) + aopts.AddRtAttr(nl.TCA_CSUM_PARMS, csum.Serialize()) case *BpfAction: table := attr.AddRtAttr(tabIndex, nil) tabIndex++ @@ -515,6 +697,29 @@ func EncodeActions(attr *nl.RtAttr, actions []Action) error { return nil } +func parsePolice(data syscall.NetlinkRouteAttr, police *PoliceAction) { + switch data.Attr.Type { + case nl.TCA_POLICE_RESULT: + police.NotExceedAction = TcPolAct(native.Uint32(data.Value[0:4])) + case nl.TCA_POLICE_AVRATE: + police.AvRate = native.Uint32(data.Value[0:4]) + case nl.TCA_POLICE_TBF: + p := *nl.DeserializeTcPolice(data.Value) + police.ActionAttrs = ActionAttrs{} + police.Attrs().Index = int(p.Index) + police.Attrs().Bindcnt = int(p.Bindcnt) + police.Attrs().Capab = int(p.Capab) + police.Attrs().Refcnt = int(p.Refcnt) + police.ExceedAction = TcPolAct(p.Action) + police.Rate = p.Rate.Rate + police.PeakRate = p.PeakRate.Rate + police.Burst = Xmitsize(uint64(p.Rate.Rate), p.Burst) + police.Mtu = p.Mtu + police.LinkLayer = int(p.Rate.Linklayer) & nl.TC_LINKLAYER_MASK + police.Overhead = p.Rate.Overhead + } +} + func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { var actions []Action for _, table := range tables { @@ -537,12 +742,16 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { action = &BpfAction{} case "connmark": action = &ConnmarkAction{} + case "csum": + action = &CsumAction{} case "gact": action = &GenericAction{} case "tunnel_key": action = &TunnelKeyAction{} case "skbedit": action = &SkbEditAction{} + case "police": + action = &PoliceAction{} default: break nextattr } @@ -587,6 +796,9 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { case nl.TCA_SKBEDIT_MARK: mark := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Mark = &mark + case nl.TCA_SKBEDIT_MASK: + mask := native.Uint32(adatum.Value[0:4]) + action.(*SkbEditAction).Mask = &mask case nl.TCA_SKBEDIT_PRIORITY: priority := native.Uint32(adatum.Value[0:4]) action.(*SkbEditAction).Priority = &priority @@ -615,12 +827,25 @@ func parseActions(tables []syscall.NetlinkRouteAttr) ([]Action, error) { toAttrs(&connmark.TcGen, action.Attrs()) action.(*ConnmarkAction).Zone = connmark.Zone } + case "csum": + switch adatum.Attr.Type { + case nl.TCA_CSUM_PARMS: + csum := *nl.DeserializeTcCsum(adatum.Value) + action.(*CsumAction).ActionAttrs = ActionAttrs{} + toAttrs(&csum.TcGen, action.Attrs()) + action.(*CsumAction).UpdateFlags = CsumUpdateFlags(csum.UpdateFlags) + } case "gact": switch adatum.Attr.Type { case nl.TCA_GACT_PARMS: gen := *nl.DeserializeTcGen(adatum.Value) toAttrs(&gen, action.Attrs()) + if action.Attrs().Action.String() == "goto" { + action.(*GenericAction).Chain = TC_ACT_EXT_VAL_MASK & gen.Action + } } + case "police": + parsePolice(adatum, action.(*PoliceAction)) } } } @@ -676,7 +901,7 @@ func parseU32Data(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) } func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { - fw := filter.(*Fw) + fw := filter.(*FwFilter) detailed := true for _, datum := range data { switch datum.Attr.Type { @@ -687,16 +912,20 @@ func parseFwData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { case nl.TCA_FW_INDEV: fw.InDev = string(datum.Value[:len(datum.Value)-1]) case nl.TCA_FW_POLICE: + var police PoliceAction adata, _ := nl.ParseRouteAttr(datum.Value) for _, aattr := range adata { - switch aattr.Attr.Type { - case nl.TCA_POLICE_TBF: - fw.Police = *nl.DeserializeTcPolice(aattr.Value) - case nl.TCA_POLICE_RATE: - fw.Rtab = DeserializeRtab(aattr.Value) - case nl.TCA_POLICE_PEAKRATE: - fw.Ptab = DeserializeRtab(aattr.Value) - } + parsePolice(aattr, &police) + } + fw.Police = &police + case nl.TCA_FW_ACT: + tables, err := nl.ParseRouteAttr(datum.Value) + if err != nil { + return detailed, err + } + fw.Actions, err = parseActions(tables) + if err != nil { + return detailed, err } } } @@ -722,7 +951,7 @@ func parseBpfData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) case nl.TCA_BPF_ID: bpf.Id = int(native.Uint32(datum.Value[0:4])) case nl.TCA_BPF_TAG: - bpf.Tag = hex.EncodeToString(datum.Value[:len(datum.Value)-1]) + bpf.Tag = hex.EncodeToString(datum.Value) } } return detailed, nil @@ -749,6 +978,10 @@ func parseMatchAllData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, er return detailed, nil } +func parseFlowerData(filter Filter, data []syscall.NetlinkRouteAttr) (bool, error) { + return true, filter.(*Flower).decode(data) +} + func AlignToAtm(size uint) uint { var linksize, cells int cells = int(size / nl.ATM_CELL_PAYLOAD) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/handle_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/handle_linux.go index 65356679d7..4cb0116878 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/handle_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/handle_linux.go @@ -15,7 +15,7 @@ var pkgHandle = &Handle{} // Handle is an handle for the netlink requests on a // specific network namespace. All the requests on the // same netlink family share the same netlink socket, -// which gets released when the handle is deleted. +// which gets released when the handle is Close'd. type Handle struct { sockets map[int]*nl.SocketHandle lookupByDump bool @@ -107,6 +107,21 @@ func (h *Handle) GetSocketReceiveBufferSize() ([]int, error) { return results, nil } +// SetStrictCheck sets the strict check socket option for each socket in the netlink handle. Returns early if any set operation fails +func (h *Handle) SetStrictCheck(state bool) error { + for _, sh := range h.sockets { + var stateInt int = 0 + if state { + stateInt = 1 + } + err := unix.SetsockoptInt(sh.Socket.GetFd(), unix.SOL_NETLINK, unix.NETLINK_GET_STRICT_CHK, stateInt) + if err != nil { + return err + } + } + return nil +} + // NewHandleAt returns a netlink handle on the network namespace // specified by ns. If ns=netns.None(), current network namespace // will be assumed @@ -136,14 +151,22 @@ func newHandle(newNs, curNs netns.NsHandle, nlFamilies ...int) (*Handle, error) return h, nil } -// Delete releases the resources allocated to this handle -func (h *Handle) Delete() { +// Close releases the resources allocated to this handle +func (h *Handle) Close() { for _, sh := range h.sockets { sh.Close() } h.sockets = nil } +// Delete releases the resources allocated to this handle +// +// Deprecated: use Close instead which is in line with typical resource release +// patterns for files and other resources. +func (h *Handle) Delete() { + h.Close() +} + func (h *Handle) newNetlinkRequest(proto, flags int) *nl.NetlinkRequest { // Do this so that package API still use nl package variable nextSeqNr if h.sockets == nil { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/handle_unspecified.go b/go-controller/vendor/github.com/vishvananda/netlink/handle_unspecified.go index 3a6db8137b..693ca0a5d0 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/handle_unspecified.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/handle_unspecified.go @@ -23,6 +23,8 @@ func NewHandleAtFrom(newNs, curNs netns.NsHandle) (*Handle, error) { return nil, ErrNotImplemented } +func (h *Handle) Close() {} + func (h *Handle) Delete() {} func (h *Handle) SupportsNetlinkFamily(nlFamily int) bool { @@ -161,6 +163,14 @@ func (h *Handle) LinkSetGroup(link Link, group int) error { return ErrNotImplemented } +func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + func (h *Handle) setProtinfoAttr(link Link, mode bool, attr int) error { return ErrNotImplemented } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/ipset_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/ipset_linux.go index 2adc2440aa..f4c05229fa 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/ipset_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/ipset_linux.go @@ -1,6 +1,7 @@ package netlink import ( + "encoding/binary" "log" "net" "syscall" @@ -11,12 +12,19 @@ import ( // IPSetEntry is used for adding, updating, retreiving and deleting entries type IPSetEntry struct { - Comment string - MAC net.HardwareAddr - IP net.IP - Timeout *uint32 - Packets *uint64 - Bytes *uint64 + Comment string + MAC net.HardwareAddr + IP net.IP + CIDR uint8 + Timeout *uint32 + Packets *uint64 + Bytes *uint64 + Protocol *uint8 + Port *uint16 + IP2 net.IP + CIDR2 uint8 + IFace string + Mark *uint32 Replace bool // replace existing entry } @@ -32,6 +40,12 @@ type IPSetResult struct { SetName string TypeName string Comment string + MarkMask uint32 + + IPFrom net.IP + IPTo net.IP + PortFrom uint16 + PortTo uint16 HashSize uint32 NumEntries uint32 @@ -52,6 +66,14 @@ type IpsetCreateOptions struct { Counters bool Comments bool Skbinfo bool + + Family uint8 + Revision uint8 + IPFrom net.IP + IPTo net.IP + PortFrom uint16 + PortTo uint16 + MaxElements uint32 } // IpsetProtocol returns the ipset protocol version from the kernel @@ -74,6 +96,11 @@ func IpsetFlush(setname string) error { return pkgHandle.IpsetFlush(setname) } +// IpsetSwap swaps two ipsets. +func IpsetSwap(setname, othersetname string) error { + return pkgHandle.IpsetSwap(setname, othersetname) +} + // IpsetList dumps an specific ipset. func IpsetList(setname string) (*IPSetResult, error) { return pkgHandle.IpsetList(setname) @@ -86,12 +113,17 @@ func IpsetListAll() ([]IPSetResult, error) { // IpsetAdd adds an entry to an existing ipset. func IpsetAdd(setname string, entry *IPSetEntry) error { - return pkgHandle.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry) + return pkgHandle.IpsetAdd(setname, entry) } // IpsetDel deletes an entry from an existing ipset. func IpsetDel(setname string, entry *IPSetEntry) error { - return pkgHandle.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry) + return pkgHandle.IpsetDel(setname, entry) +} + +// IpsetTest tests whether an entry is in a set or not. +func IpsetTest(setname string, entry *IPSetEntry) (bool, error) { + return pkgHandle.IpsetTest(setname, entry) } func (h *Handle) IpsetProtocol() (protocol uint8, minVersion uint8, err error) { @@ -114,11 +146,37 @@ func (h *Handle) IpsetCreate(setname, typename string, options IpsetCreateOption req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(typename))) - req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(0))) - req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(2))) // 2 == inet + + revision := options.Revision + if revision == 0 { + revision = getIpsetDefaultWithTypeName(typename) + } + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_REVISION, nl.Uint8Attr(revision))) data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil) + var family uint8 + switch typename { + case "hash:mac": + case "bitmap:port": + buf := make([]byte, 4) + binary.BigEndian.PutUint16(buf, options.PortFrom) + binary.BigEndian.PutUint16(buf[2:], options.PortTo) + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_FROM|int(nl.NLA_F_NET_BYTEORDER), buf[:2])) + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PORT_TO|int(nl.NLA_F_NET_BYTEORDER), buf[2:])) + default: + family = options.Family + if family == 0 { + family = unix.AF_INET + } + } + + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_FAMILY, nl.Uint8Attr(family))) + + if options.MaxElements != 0 { + data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MAXELEM | nl.NLA_F_NET_BYTEORDER, Value: options.MaxElements}) + } + if timeout := options.Timeout; timeout != nil { data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *timeout}) } @@ -158,6 +216,14 @@ func (h *Handle) IpsetFlush(setname string) error { return err } +func (h *Handle) IpsetSwap(setname, othersetname string) error { + req := h.newIpsetRequest(nl.IPSET_CMD_SWAP) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_TYPENAME, nl.ZeroTerminated(othersetname))) + _, err := ipsetExecute(req) + return err +} + func (h *Handle) IpsetList(name string) (*IPSetResult, error) { req := h.newIpsetRequest(nl.IPSET_CMD_LIST) req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(name))) @@ -187,39 +253,133 @@ func (h *Handle) IpsetListAll() ([]IPSetResult, error) { return result, nil } -func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error { - req := h.newIpsetRequest(nlCmd) - req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) +// IpsetAdd adds an entry to an existing ipset. +func (h *Handle) IpsetAdd(setname string, entry *IPSetEntry) error { + return h.ipsetAddDel(nl.IPSET_CMD_ADD, setname, entry) +} - if entry.Comment != "" { - req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment))) +// IpsetDel deletes an entry from an existing ipset. +func (h *Handle) IpsetDel(setname string, entry *IPSetEntry) error { + return h.ipsetAddDel(nl.IPSET_CMD_DEL, setname, entry) +} + +func encodeIP(ip net.IP) (*nl.RtAttr, error) { + typ := int(nl.NLA_F_NET_BYTEORDER) + if ip4 := ip.To4(); ip4 != nil { + typ |= nl.IPSET_ATTR_IPADDR_IPV4 + ip = ip4 + } else { + typ |= nl.IPSET_ATTR_IPADDR_IPV6 } + return nl.NewRtAttr(typ, ip), nil +} + +func buildEntryData(entry *IPSetEntry) (*nl.RtAttr, error) { data := nl.NewRtAttr(nl.IPSET_ATTR_DATA|int(nl.NLA_F_NESTED), nil) - if !entry.Replace { - req.Flags |= unix.NLM_F_EXCL + if entry.Comment != "" { + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_COMMENT, nl.ZeroTerminated(entry.Comment))) } if entry.Timeout != nil { data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_TIMEOUT | nl.NLA_F_NET_BYTEORDER, Value: *entry.Timeout}) } - if entry.MAC != nil { - nestedData := nl.NewRtAttr(nl.IPSET_ATTR_ETHER|int(nl.NLA_F_NET_BYTEORDER), entry.MAC) - data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_ETHER|int(nl.NLA_F_NESTED), nestedData.Serialize())) - } + if entry.IP != nil { - nestedData := nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NET_BYTEORDER), entry.IP) + nestedData, err := encodeIP(entry.IP) + if err != nil { + return nil, err + } data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP|int(nl.NLA_F_NESTED), nestedData.Serialize())) } + if entry.MAC != nil { + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_ETHER, entry.MAC)) + } + + if entry.CIDR != 0 { + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR, nl.Uint8Attr(entry.CIDR))) + } + + if entry.IP2 != nil { + nestedData, err := encodeIP(entry.IP2) + if err != nil { + return nil, err + } + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IP2|int(nl.NLA_F_NESTED), nestedData.Serialize())) + } + + if entry.CIDR2 != 0 { + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_CIDR2, nl.Uint8Attr(entry.CIDR2))) + } + + if entry.Port != nil { + if entry.Protocol == nil { + // use tcp protocol as default + val := uint8(unix.IPPROTO_TCP) + entry.Protocol = &val + } + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_PROTO, nl.Uint8Attr(*entry.Protocol))) + buf := make([]byte, 2) + binary.BigEndian.PutUint16(buf, *entry.Port) + data.AddChild(nl.NewRtAttr(int(nl.IPSET_ATTR_PORT|nl.NLA_F_NET_BYTEORDER), buf)) + } + + if entry.IFace != "" { + data.AddChild(nl.NewRtAttr(nl.IPSET_ATTR_IFACE, nl.ZeroTerminated(entry.IFace))) + } + + if entry.Mark != nil { + data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER, Value: *entry.Mark}) + } + return data, nil +} + +func (h *Handle) ipsetAddDel(nlCmd int, setname string, entry *IPSetEntry) error { + req := h.newIpsetRequest(nlCmd) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) + + if !entry.Replace { + req.Flags |= unix.NLM_F_EXCL + } + + data, err := buildEntryData(entry) + if err != nil { + return err + } data.AddChild(&nl.Uint32Attribute{Type: nl.IPSET_ATTR_LINENO | nl.NLA_F_NET_BYTEORDER, Value: 0}) req.AddData(data) - _, err := ipsetExecute(req) + _, err = ipsetExecute(req) return err } +func (h *Handle) IpsetTest(setname string, entry *IPSetEntry) (bool, error) { + req := h.newIpsetRequest(nl.IPSET_CMD_TEST) + req.AddData(nl.NewRtAttr(nl.IPSET_ATTR_SETNAME, nl.ZeroTerminated(setname))) + + if !entry.Replace { + req.Flags |= unix.NLM_F_EXCL + } + + data, err := buildEntryData(entry) + if err != nil { + return false, err + } + req.AddData(data) + + _, err = ipsetExecute(req) + if err != nil { + if err == nl.IPSetError(nl.IPSET_ERR_EXIST) { + // not exist + return false, nil + } + return false, err + } + return true, nil +} + func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest { req := h.newNetlinkRequest(cmd|(unix.NFNL_SUBSYS_IPSET<<8), nl.GetIpsetFlags(cmd)) @@ -235,6 +395,17 @@ func (h *Handle) newIpsetRequest(cmd int) *nl.NetlinkRequest { return req } +func getIpsetDefaultWithTypeName(typename string) uint8 { + switch typename { + case "hash:ip,port", + "hash:ip,port,ip", + "hash:ip,port,net", + "hash:net,port": + return 1 + } + return 0 +} + func ipsetExecute(req *nl.NetlinkRequest) (msgs [][]byte, err error) { msgs, err = req.Execute(unix.NETLINK_NETFILTER, 0) @@ -278,6 +449,8 @@ func (result *IPSetResult) unserialize(msg []byte) { result.parseAttrADT(attr.Value) case nl.IPSET_ATTR_PROTOCOL_MIN: result.ProtocolMinVersion = attr.Value[0] + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() default: log.Printf("unknown ipset attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) } @@ -307,12 +480,31 @@ func (result *IPSetResult) parseAttrData(data []byte) { switch nested.Type { case nl.IPSET_ATTR_IP | nl.NLA_F_NET_BYTEORDER: result.Entries = append(result.Entries, IPSetEntry{IP: nested.Value}) + case nl.IPSET_ATTR_IP: + result.IPFrom = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) } } + case nl.IPSET_ATTR_IP_TO | nl.NLA_F_NESTED: + for nested := range nl.ParseAttributes(attr.Value) { + switch nested.Type { + case nl.IPSET_ATTR_IP: + result.IPTo = nested.Value + default: + log.Printf("unknown nested ipset data attribute from kernel: %+v %v", nested, nested.Type&nl.NLA_TYPE_MASK) + } + } + case nl.IPSET_ATTR_PORT_FROM | nl.NLA_F_NET_BYTEORDER: + result.PortFrom = networkOrder.Uint16(attr.Value) + case nl.IPSET_ATTR_PORT_TO | nl.NLA_F_NET_BYTEORDER: + result.PortTo = networkOrder.Uint16(attr.Value) case nl.IPSET_ATTR_CADT_LINENO | nl.NLA_F_NET_BYTEORDER: result.LineNo = attr.Uint32() case nl.IPSET_ATTR_COMMENT: result.Comment = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARKMASK: + result.MarkMask = attr.Uint32() default: log.Printf("unknown ipset data attribute from kernel: %+v %v", attr, attr.Type&nl.NLA_TYPE_MASK) } @@ -351,12 +543,36 @@ func parseIPSetEntry(data []byte) (entry IPSetEntry) { case nl.IPSET_ATTR_IP | nl.NLA_F_NESTED: for attr := range nl.ParseAttributes(attr.Value) { switch attr.Type { - case nl.IPSET_ATTR_IP: + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: entry.IP = net.IP(attr.Value) default: log.Printf("unknown nested ADT attribute from kernel: %+v", attr) } } + case nl.IPSET_ATTR_IP2 | nl.NLA_F_NESTED: + for attr := range nl.ParseAttributes(attr.Value) { + switch attr.Type { + case nl.IPSET_ATTR_IPADDR_IPV4, nl.IPSET_ATTR_IPADDR_IPV6: + entry.IP2 = net.IP(attr.Value) + default: + log.Printf("unknown nested ADT attribute from kernel: %+v", attr) + } + } + case nl.IPSET_ATTR_CIDR: + entry.CIDR = attr.Value[0] + case nl.IPSET_ATTR_CIDR2: + entry.CIDR2 = attr.Value[0] + case nl.IPSET_ATTR_PORT | nl.NLA_F_NET_BYTEORDER: + val := networkOrder.Uint16(attr.Value) + entry.Port = &val + case nl.IPSET_ATTR_PROTO: + val := attr.Value[0] + entry.Protocol = &val + case nl.IPSET_ATTR_IFACE: + entry.IFace = nl.BytesToString(attr.Value) + case nl.IPSET_ATTR_MARK | nl.NLA_F_NET_BYTEORDER: + val := attr.Uint32() + entry.Mark = &val default: log.Printf("unknown ADT attribute from kernel: %+v", attr) } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/link.go b/go-controller/vendor/github.com/vishvananda/netlink/link.go index 2e002d6951..4963e11a94 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/link.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/link.go @@ -36,15 +36,18 @@ type LinkAttrs struct { Statistics *LinkStatistics Promisc int Allmulti int + Multi int Xdp *LinkXdp EncapType string Protinfo *Protinfo OperState LinkOperState + PhysSwitchID int NetNsID int NumTxQueues int NumRxQueues int GSOMaxSize uint32 GSOMaxSegs uint32 + GROMaxSize uint32 Vfs []VfInfo // virtual functions available on link Group uint32 Slave LinkSlave @@ -263,6 +266,7 @@ type Bridge struct { AgeingTime *uint32 HelloTime *uint32 VlanFiltering *bool + VlanDefaultPVID *uint16 } func (bridge *Bridge) Attrs() *LinkAttrs { @@ -1036,6 +1040,7 @@ type Iptun struct { EncapType uint16 EncapFlags uint16 FlowBased bool + Proto uint8 } func (iptun *Iptun) Attrs() *LinkAttrs { @@ -1071,6 +1076,37 @@ func (ip6tnl *Ip6tnl) Type() string { return "ip6tnl" } +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L84 +type TunnelEncapType uint16 + +const ( + None TunnelEncapType = iota + FOU + GUE +) + +// from https://elixir.bootlin.com/linux/v5.15.4/source/include/uapi/linux/if_tunnel.h#L91 +type TunnelEncapFlag uint16 + +const ( + CSum TunnelEncapFlag = 1 << 0 + CSum6 = 1 << 1 + RemCSum = 1 << 2 +) + +// from https://elixir.bootlin.com/linux/latest/source/include/uapi/linux/ip6_tunnel.h#L12 +type IP6TunnelFlag uint16 + +const ( + IP6_TNL_F_IGN_ENCAP_LIMIT IP6TunnelFlag = 1 // don't add encapsulation limit if one isn't present in inner packet + IP6_TNL_F_USE_ORIG_TCLASS = 2 // copy the traffic class field from the inner packet + IP6_TNL_F_USE_ORIG_FLOWLABEL = 4 // copy the flowlabel from the inner packet + IP6_TNL_F_MIP6_DEV = 8 // being used for Mobile IPv6 + IP6_TNL_F_RCV_DSCP_COPY = 10 // copy DSCP from the outer packet + IP6_TNL_F_USE_ORIG_FWMARK = 20 // copy fwmark from inner packet + IP6_TNL_F_ALLOW_LOCAL_REMOTE = 40 // allow remote endpoint on the local node +) + type Sittun struct { LinkAttrs Link uint32 @@ -1131,6 +1167,7 @@ type Gretun struct { EncapFlags uint16 EncapSport uint16 EncapDport uint16 + FlowBased bool } func (gretun *Gretun) Attrs() *LinkAttrs { @@ -1174,6 +1211,7 @@ func (gtp *GTP) Type() string { } // Virtual XFRM Interfaces +// // Named "xfrmi" to prevent confusion with XFRM objects type Xfrmi struct { LinkAttrs @@ -1282,11 +1320,27 @@ func (ipoib *IPoIB) Type() string { return "ipoib" } +type BareUDP struct { + LinkAttrs + Port uint16 + EtherType uint16 + SrcPortMin uint16 + MultiProto bool +} + +func (bareudp *BareUDP) Attrs() *LinkAttrs { + return &bareudp.LinkAttrs +} + +func (bareudp *BareUDP) Type() string { + return "bareudp" +} + // iproute2 supported devices; // vlan | veth | vcan | dummy | ifb | macvlan | macvtap | // bridge | bond | ipoib | ip6tnl | ipip | sit | vxlan | // gre | gretap | ip6gre | ip6gretap | vti | vti6 | nlmon | -// bond_slave | ipvlan | xfrm +// bond_slave | ipvlan | xfrm | bareudp // LinkNotFoundError wraps the various not found errors when // getting/reading links. This is intended for better error diff --git a/go-controller/vendor/github.com/vishvananda/netlink/link_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/link_linux.go index caf30c057e..524a93bb00 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/link_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/link_linux.go @@ -55,8 +55,6 @@ const ( VF_LINK_STATE_DISABLE uint32 = 2 ) -var lookupByDump = false - var macvlanModes = [...]uint32{ 0, nl.MACVLAN_MODE_PRIVATE, @@ -182,6 +180,51 @@ func (h *Handle) LinkSetAllmulticastOff(link Link) error { return err } +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func LinkSetMulticastOn(link Link) error { + return pkgHandle.LinkSetMulticastOn(link) +} + +// LinkSetMulticastOn enables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast on` +func (h *Handle) LinkSetMulticastOn(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Flags = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func LinkSetMulticastOff(link Link) error { + return pkgHandle.LinkSetMulticastOff(link) +} + +// LinkSetAllmulticastOff disables the reception of multicast packets for the link device. +// Equivalent to: `ip link set $link multicast off` +func (h *Handle) LinkSetMulticastOff(link Link) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_NEWLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Change = unix.IFF_MULTICAST + msg.Index = int32(base.Index) + req.AddData(msg) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func MacvlanMACAddrAdd(link Link, addr net.HardwareAddr) error { return pkgHandle.MacvlanMACAddrAdd(link, addr) } @@ -302,6 +345,16 @@ func (h *Handle) BridgeSetVlanFiltering(link Link, on bool) error { return h.linkModify(bridge, unix.NLM_F_ACK) } +func BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + return pkgHandle.BridgeSetVlanDefaultPVID(link, pvid) +} + +func (h *Handle) BridgeSetVlanDefaultPVID(link Link, pvid uint16) error { + bridge := link.(*Bridge) + bridge.VlanDefaultPVID = &pvid + return h.linkModify(bridge, unix.NLM_F_ACK) +} + func SetPromiscOn(link Link) error { return pkgHandle.SetPromiscOn(link) } @@ -546,13 +599,13 @@ func (h *Handle) LinkSetVfVlanQos(link Link, vf, vlan, qos int) error { req.AddData(msg) data := nl.NewRtAttr(unix.IFLA_VFINFO_LIST, nil) - info := nl.NewRtAttrChild(data, nl.IFLA_VF_INFO, nil) + info := data.AddRtAttr(nl.IFLA_VF_INFO, nil) vfmsg := nl.VfVlan{ Vf: uint32(vf), Vlan: uint32(vlan), Qos: uint32(qos), } - nl.NewRtAttrChild(info, nl.IFLA_VF_VLAN, vfmsg.Serialize()) + info.AddRtAttr(nl.IFLA_VF_VLAN, vfmsg.Serialize()) req.AddData(data) _, err := req.Execute(unix.NETLINK_ROUTE, 0) @@ -903,6 +956,60 @@ func LinkSetXdpFdWithFlags(link Link, fd, flags int) error { return err } +// LinkSetGSOMaxSize sets the GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGSOMaxSize(link, maxSize) +} + +// LinkSetGSOMaxSize sets the GSO maximum size of the link device. +// Equivalent to: `ip link set $link gso_max_size $maxSize` +func (h *Handle) LinkSetGSOMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GSO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + +// LinkSetGROMaxSize sets the GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func LinkSetGROMaxSize(link Link, maxSize int) error { + return pkgHandle.LinkSetGROMaxSize(link, maxSize) +} + +// LinkSetGROMaxSize sets the GRO maximum size of the link device. +// Equivalent to: `ip link set $link gro_max_size $maxSize` +func (h *Handle) LinkSetGROMaxSize(link Link, maxSize int) error { + base := link.Attrs() + h.ensureIndex(base) + req := h.newNetlinkRequest(unix.RTM_SETLINK, unix.NLM_F_ACK) + + msg := nl.NewIfInfomsg(unix.AF_UNSPEC) + msg.Index = int32(base.Index) + req.AddData(msg) + + b := make([]byte, 4) + native.PutUint32(b, uint32(maxSize)) + + data := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, b) + req.AddData(data) + + _, err := req.Execute(unix.NETLINK_ROUTE, 0) + return err +} + func boolAttr(val bool) []byte { var v uint8 if val { @@ -1103,6 +1210,10 @@ func (h *Handle) LinkAdd(link Link) error { return h.linkModify(link, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) } +func LinkModify(link Link) error { + return pkgHandle.LinkModify(link) +} + func (h *Handle) LinkModify(link Link) error { return h.linkModify(link, unix.NLM_F_REQUEST|unix.NLM_F_ACK) } @@ -1217,9 +1328,26 @@ func (h *Handle) linkModify(link Link, flags int) error { } + control := func(file *os.File, f func(fd uintptr)) error { + name := file.Name() + conn, err := file.SyscallConn() + if err != nil { + return fmt.Errorf("SyscallConn() failed on %s: %v", name, err) + } + if err := conn.Control(f); err != nil { + return fmt.Errorf("Failed to get file descriptor for %s: %v", name, err) + } + return nil + } + // only persist interface if NonPersist is NOT set if !tuntap.NonPersist { - _, _, errno := unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 1) + var errno syscall.Errno + if err := control(fds[0], func(fd uintptr) { + _, _, errno = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 1) + }); err != nil { + return err + } if errno != 0 { cleanupFds(fds) return fmt.Errorf("Tuntap IOCTL TUNSETPERSIST failed, errno %v", errno) @@ -1236,7 +1364,10 @@ func (h *Handle) linkModify(link Link, flags int) error { // un-persist (e.g. allow the interface to be removed) the tuntap // should not hurt if not set prior, condition might be not needed if !tuntap.NonPersist { - _, _, _ = unix.Syscall(unix.SYS_IOCTL, fds[0].Fd(), uintptr(unix.TUNSETPERSIST), 0) + // ignore error + _ = control(fds[0], func(fd uintptr) { + _, _, _ = unix.Syscall(unix.SYS_IOCTL, fd, uintptr(unix.TUNSETPERSIST), 0) + }) } cleanupFds(fds) return err @@ -1334,6 +1465,11 @@ func (h *Handle) linkModify(link Link, flags int) error { req.AddData(gsoAttr) } + if base.GROMaxSize > 0 { + groAttr := nl.NewRtAttr(unix.IFLA_GRO_MAX_SIZE, nl.Uint32Attr(base.GROMaxSize)) + req.AddData(groAttr) + } + if base.Group > 0 { groupAttr := nl.NewRtAttr(unix.IFLA_GROUP, nl.Uint32Attr(base.Group)) req.AddData(groupAttr) @@ -1446,6 +1582,8 @@ func (h *Handle) linkModify(link Link, flags int) error { addXfrmiAttrs(link, linkInfo) case *IPoIB: addIPoIBAttrs(link, linkInfo) + case *BareUDP: + addBareUDPAttrs(link, linkInfo) } req.AddData(linkInfo) @@ -1642,12 +1780,17 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { base.RawFlags = msg.Flags base.Flags = linkFlags(msg.Flags) base.EncapType = msg.EncapType() + base.NetNsID = -1 if msg.Flags&unix.IFF_PROMISC != 0 { base.Promisc = 1 } if msg.Flags&unix.IFF_ALLMULTI != 0 { base.Allmulti = 1 } + if msg.Flags&unix.IFF_MULTICAST != 0 { + base.Multi = 1 + } + var ( link Link stats32 *LinkStatistics32 @@ -1722,6 +1865,8 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { link = &IPoIB{} case "can": link = &Can{} + case "bareudp": + link = &BareUDP{} default: link = &GenericLink{LinkType: linkType} } @@ -1777,7 +1922,10 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { parseIPoIBData(link, data) case "can": parseCanData(link, data) + case "bareudp": + parseBareUDPData(link, data) } + case nl.IFLA_INFO_SLAVE_KIND: slaveType = string(info.Value[:len(info.Value)-1]) switch slaveType { @@ -1854,12 +2002,16 @@ func LinkDeserialize(hdr *unix.NlMsghdr, m []byte) (Link, error) { } case unix.IFLA_OPERSTATE: base.OperState = LinkOperState(uint8(attr.Value[0])) + case unix.IFLA_PHYS_SWITCH_ID: + base.PhysSwitchID = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_LINK_NETNSID: base.NetNsID = int(native.Uint32(attr.Value[0:4])) case unix.IFLA_GSO_MAX_SIZE: base.GSOMaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_GSO_MAX_SEGS: base.GSOMaxSegs = native.Uint32(attr.Value[0:4]) + case unix.IFLA_GRO_MAX_SIZE: + base.GROMaxSize = native.Uint32(attr.Value[0:4]) case unix.IFLA_VFINFO_LIST: data, err := nl.ParseRouteAttr(attr.Value) if err != nil { @@ -2042,7 +2194,8 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -2062,9 +2215,10 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) } - return + continue } ifmsg := nl.DeserializeIfInfomsg(m.Data) header := unix.NlMsghdr(m.Header) @@ -2073,7 +2227,7 @@ func linkSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- LinkUpdate, done <-c if cberr != nil { cberr(err) } - return + continue } ch <- LinkUpdate{IfInfomsg: *ifmsg, Header: header, Link: link} } @@ -2570,7 +2724,7 @@ func addGretapAttrs(gretap *Gretap, linkInfo *nl.RtAttr) { if gretap.FlowBased { // In flow based mode, no other attributes need to be configured - data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, boolAttr(gretap.FlowBased)) + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) return } @@ -2653,6 +2807,12 @@ func parseGretapData(link Link, data []syscall.NetlinkRouteAttr) { func addGretunAttrs(gre *Gretun, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + if gre.FlowBased { + // In flow based mode, no other attributes need to be configured + data.AddRtAttr(nl.IFLA_GRE_COLLECT_METADATA, []byte{}) + return + } + if ip := gre.Local; ip != nil { if ip.To4() != nil { ip = ip.To4() @@ -2723,6 +2883,8 @@ func parseGretunData(link Link, data []syscall.NetlinkRouteAttr) { gre.EncapSport = ntohs(datum.Value[0:2]) case nl.IFLA_GRE_ENCAP_DPORT: gre.EncapDport = ntohs(datum.Value[0:2]) + case nl.IFLA_GRE_COLLECT_METADATA: + gre.FlowBased = true } } } @@ -2791,6 +2953,7 @@ func addIptunAttrs(iptun *Iptun, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_FLAGS, nl.Uint16Attr(iptun.EncapFlags)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_SPORT, htons(iptun.EncapSport)) data.AddRtAttr(nl.IFLA_IPTUN_ENCAP_DPORT, htons(iptun.EncapDport)) + data.AddRtAttr(nl.IFLA_IPTUN_PROTO, nl.Uint8Attr(iptun.Proto)) } func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { @@ -2821,6 +2984,8 @@ func parseIptunData(link Link, data []syscall.NetlinkRouteAttr) { iptun.EncapFlags = native.Uint16(datum.Value[0:2]) case nl.IFLA_IPTUN_COLLECT_METADATA: iptun.FlowBased = true + case nl.IFLA_IPTUN_PROTO: + iptun.Proto = datum.Value[0] } } } @@ -3029,6 +3194,9 @@ func addBridgeAttrs(bridge *Bridge, linkInfo *nl.RtAttr) { if bridge.VlanFiltering != nil { data.AddRtAttr(nl.IFLA_BR_VLAN_FILTERING, boolToByte(*bridge.VlanFiltering)) } + if bridge.VlanDefaultPVID != nil { + data.AddRtAttr(nl.IFLA_BR_VLAN_DEFAULT_PVID, nl.Uint16Attr(*bridge.VlanDefaultPVID)) + } } func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { @@ -3047,6 +3215,9 @@ func parseBridgeData(bridge Link, data []syscall.NetlinkRouteAttr) { case nl.IFLA_BR_VLAN_FILTERING: vlanFiltering := datum.Value[0] == 1 br.VlanFiltering = &vlanFiltering + case nl.IFLA_BR_VLAN_DEFAULT_PVID: + vlanDefaultPVID := native.Uint16(datum.Value[0:2]) + br.VlanDefaultPVID = &vlanDefaultPVID } } } @@ -3143,8 +3314,9 @@ func parseVfInfo(data []syscall.NetlinkRouteAttr, id int) VfInfo { func addXfrmiAttrs(xfrmi *Xfrmi, linkInfo *nl.RtAttr) { data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) data.AddRtAttr(nl.IFLA_XFRM_LINK, nl.Uint32Attr(uint32(xfrmi.ParentIndex))) - data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) - + if xfrmi.Ifid != 0 { + data.AddRtAttr(nl.IFLA_XFRM_IF_ID, nl.Uint32Attr(xfrmi.Ifid)) + } } func parseXfrmiData(link Link, data []syscall.NetlinkRouteAttr) { @@ -3347,3 +3519,32 @@ func addIPoIBAttrs(ipoib *IPoIB, linkInfo *nl.RtAttr) { data.AddRtAttr(nl.IFLA_IPOIB_MODE, nl.Uint16Attr(uint16(ipoib.Mode))) data.AddRtAttr(nl.IFLA_IPOIB_UMCAST, nl.Uint16Attr(uint16(ipoib.Umcast))) } + +func addBareUDPAttrs(bareudp *BareUDP, linkInfo *nl.RtAttr) { + data := linkInfo.AddRtAttr(nl.IFLA_INFO_DATA, nil) + + data.AddRtAttr(nl.IFLA_BAREUDP_PORT, nl.Uint16Attr(nl.Swap16(bareudp.Port))) + data.AddRtAttr(nl.IFLA_BAREUDP_ETHERTYPE, nl.Uint16Attr(nl.Swap16(bareudp.EtherType))) + if bareudp.SrcPortMin != 0 { + data.AddRtAttr(nl.IFLA_BAREUDP_SRCPORT_MIN, nl.Uint16Attr(bareudp.SrcPortMin)) + } + if bareudp.MultiProto { + data.AddRtAttr(nl.IFLA_BAREUDP_MULTIPROTO_MODE, []byte{}) + } +} + +func parseBareUDPData(link Link, data []syscall.NetlinkRouteAttr) { + bareudp := link.(*BareUDP) + for _, attr := range data { + switch attr.Attr.Type { + case nl.IFLA_BAREUDP_PORT: + bareudp.Port = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_ETHERTYPE: + bareudp.EtherType = binary.BigEndian.Uint16(attr.Value) + case nl.IFLA_BAREUDP_SRCPORT_MIN: + bareudp.SrcPortMin = native.Uint16(attr.Value) + case nl.IFLA_BAREUDP_MULTIPROTO_MODE: + bareudp.MultiProto = true + } + } +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/neigh.go b/go-controller/vendor/github.com/vishvananda/netlink/neigh.go index 379e5655f7..32d722e885 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/neigh.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/neigh.go @@ -12,6 +12,7 @@ type Neigh struct { State int Type int Flags int + FlagsExt int IP net.IP HardwareAddr net.HardwareAddr LLIPAddr net.IP //Used in the case of NHRP diff --git a/go-controller/vendor/github.com/vishvananda/netlink/neigh_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/neigh_linux.go index f2c70ba964..4bf63037a5 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/neigh_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/neigh_linux.go @@ -24,7 +24,11 @@ const ( NDA_MASTER NDA_LINK_NETNSID NDA_SRC_VNI - NDA_MAX = NDA_SRC_VNI + NDA_PROTOCOL + NDA_NH_ID + NDA_FDB_EXT_ATTRS + NDA_FLAGS_EXT + NDA_MAX = NDA_FLAGS_EXT ) // Neighbor Cache Entry States. @@ -42,11 +46,19 @@ const ( // Neighbor Flags const ( - NTF_USE = 0x01 - NTF_SELF = 0x02 - NTF_MASTER = 0x04 - NTF_PROXY = 0x08 - NTF_ROUTER = 0x80 + NTF_USE = 0x01 + NTF_SELF = 0x02 + NTF_MASTER = 0x04 + NTF_PROXY = 0x08 + NTF_EXT_LEARNED = 0x10 + NTF_OFFLOADED = 0x20 + NTF_STICKY = 0x40 + NTF_ROUTER = 0x80 +) + +// Extended Neighbor Flags +const ( + NTF_EXT_MANAGED = 0x00000001 ) // Ndmsg is for adding, removing or receiving information about a neighbor table entry @@ -162,11 +174,16 @@ func neighHandle(neigh *Neigh, req *nl.NetlinkRequest) error { if neigh.LLIPAddr != nil { llIPData := nl.NewRtAttr(NDA_LLADDR, neigh.LLIPAddr.To4()) req.AddData(llIPData) - } else if neigh.Flags != NTF_PROXY || neigh.HardwareAddr != nil { + } else if neigh.HardwareAddr != nil { hwData := nl.NewRtAttr(NDA_LLADDR, []byte(neigh.HardwareAddr)) req.AddData(hwData) } + if neigh.FlagsExt != 0 { + flagsExtData := nl.NewRtAttr(NDA_FLAGS_EXT, nl.Uint32Attr(uint32(neigh.FlagsExt))) + req.AddData(flagsExtData) + } + if neigh.Vlan != 0 { vlanData := nl.NewRtAttr(NDA_VLAN, nl.Uint16Attr(uint16(neigh.Vlan))) req.AddData(vlanData) @@ -305,6 +322,8 @@ func NeighDeserialize(m []byte) (*Neigh, error) { } else { neigh.HardwareAddr = net.HardwareAddr(attr.Value) } + case NDA_FLAGS_EXT: + neigh.FlagsExt = int(native.Uint32(attr.Value[0:4])) case NDA_VLAN: neigh.Vlan = int(native.Uint16(attr.Value[0:2])) case NDA_VNI: @@ -351,10 +370,9 @@ func NeighSubscribeWithOptions(ch chan<- NeighUpdate, done <-chan struct{}, opti func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done <-chan struct{}, cberr func(error), listExisting bool) error { s, err := nl.SubscribeAt(newNs, curNs, unix.NETLINK_ROUTE, unix.RTNLGRP_NEIGH) makeRequest := func(family int) error { - req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, - unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) + req := pkgHandle.newNetlinkRequest(unix.RTM_GETNEIGH, unix.NLM_F_DUMP) + ndmsg := &Ndmsg{Family: uint8(family)} + req.AddData(ndmsg) if err := s.Send(req); err != nil { return err } @@ -408,12 +426,12 @@ func neighSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- NeighUpdate, done < continue } if m.Header.Type == unix.NLMSG_ERROR { - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + nError := int32(native.Uint32(m.Data[0:4])) + if nError == 0 { continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(syscall.Errno(-nError)) } return } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/netlink_unspecified.go b/go-controller/vendor/github.com/vishvananda/netlink/netlink_unspecified.go index 71436f25cc..c641949052 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/netlink_unspecified.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/netlink_unspecified.go @@ -124,6 +124,14 @@ func LinkSetTxQLen(link Link, qlen int) error { return ErrNotImplemented } +func LinkSetGSOMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + +func LinkSetGROMaxSize(link Link, maxSize int) error { + return ErrNotImplemented +} + func LinkAdd(link Link) error { return ErrNotImplemented } @@ -180,14 +188,30 @@ func RouteAdd(route *Route) error { return ErrNotImplemented } +func RouteAppend(route *Route) error { + return ErrNotImplemented +} + func RouteDel(route *Route) error { return ErrNotImplemented } +func RouteGet(destination net.IP) ([]Route, error) { + return nil, ErrNotImplemented +} + func RouteList(link Link, family int) ([]Route, error) { return nil, ErrNotImplemented } +func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { + return nil, ErrNotImplemented +} + +func RouteReplace(route *Route) error { + return ErrNotImplemented +} + func XfrmPolicyAdd(policy *XfrmPolicy) error { return ErrNotImplemented } @@ -200,6 +224,10 @@ func XfrmPolicyList(family int) ([]XfrmPolicy, error) { return nil, ErrNotImplemented } +func XfrmPolicyGet(policy *XfrmPolicy) (*XfrmPolicy, error) { + return nil, ErrNotImplemented +} + func XfrmStateAdd(policy *XfrmState) error { return ErrNotImplemented } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/netns_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/netns_linux.go index 77cf6f4690..2eb29c7ce0 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/netns_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/netns_linux.go @@ -87,7 +87,7 @@ func (h *Handle) getNetNsId(attrType int, val uint32) (int, error) { rtgen := nl.NewRtGenMsg() req.AddData(rtgen) - b := make([]byte, 4, 4) + b := make([]byte, 4) native.PutUint32(b, val) attr := nl.NewRtAttr(attrType, b) req.AddData(attr) @@ -126,12 +126,12 @@ func (h *Handle) setNetNsId(attrType int, val uint32, newnsid uint32) error { rtgen := nl.NewRtGenMsg() req.AddData(rtgen) - b := make([]byte, 4, 4) + b := make([]byte, 4) native.PutUint32(b, val) attr := nl.NewRtAttr(attrType, b) req.AddData(attr) - b1 := make([]byte, 4, 4) + b1 := make([]byte, 4) native.PutUint32(b1, newnsid) attr1 := nl.NewRtAttr(NETNSA_NSID, b1) req.AddData(attr1) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go index 0e0ff55b29..8659cd1302 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/conntrack_linux.go @@ -44,6 +44,7 @@ const ( NLA_F_NESTED uint16 = (1 << 15) // #define NLA_F_NESTED (1 << 15) NLA_F_NET_BYTEORDER uint16 = (1 << 14) // #define NLA_F_NESTED (1 << 14) NLA_TYPE_MASK = ^(NLA_F_NESTED | NLA_F_NET_BYTEORDER) + NLA_ALIGNTO uint16 = 4 // #define NLA_ALIGNTO 4 ) // enum ctattr_type { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go index 2410922d10..2995da492f 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/devlink_linux.go @@ -16,6 +16,7 @@ const ( DEVLINK_CMD_PORT_DEL = 8 DEVLINK_CMD_ESWITCH_GET = 29 DEVLINK_CMD_ESWITCH_SET = 30 + DEVLINK_CMD_INFO_GET = 51 ) const ( @@ -30,6 +31,13 @@ const ( DEVLINK_ATTR_ESWITCH_INLINE_MODE = 26 DEVLINK_ATTR_ESWITCH_ENCAP_MODE = 62 DEVLINK_ATTR_PORT_FLAVOUR = 77 + DEVLINK_ATTR_INFO_DRIVER_NAME = 98 + DEVLINK_ATTR_INFO_SERIAL_NUMBER = 99 + DEVLINK_ATTR_INFO_VERSION_FIXED = 100 + DEVLINK_ATTR_INFO_VERSION_RUNNING = 101 + DEVLINK_ATTR_INFO_VERSION_STORED = 102 + DEVLINK_ATTR_INFO_VERSION_NAME = 103 + DEVLINK_ATTR_INFO_VERSION_VALUE = 104 DEVLINK_ATTR_PORT_PCI_PF_NUMBER = 127 DEVLINK_ATTR_PORT_FUNCTION = 145 DEVLINK_ATTR_PORT_CONTROLLER_NUMBER = 150 diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go index a60b4b09d9..89dd009df1 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/ipset_linux.go @@ -88,6 +88,11 @@ const ( SET_ATTR_CREATE_MAX ) +const ( + IPSET_ATTR_IPADDR_IPV4 = 1 + IPSET_ATTR_IPADDR_IPV6 = 2 +) + /* ADT specific attributes */ const ( IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + iota + 1 diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/link_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/link_linux.go index c72cc436e6..e10edbc09d 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/link_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/link_linux.go @@ -709,3 +709,12 @@ const ( IFLA_CAN_BITRATE_MAX IFLA_CAN_MAX = IFLA_CAN_BITRATE_MAX ) + +const ( + IFLA_BAREUDP_UNSPEC = iota + IFLA_BAREUDP_PORT + IFLA_BAREUDP_ETHERTYPE + IFLA_BAREUDP_SRCPORT_MIN + IFLA_BAREUDP_MULTIPROTO_MODE + IFLA_BAREUDP_MAX = IFLA_BAREUDP_MULTIPROTO_MODE +) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go new file mode 100644 index 0000000000..bafd593c4f --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/lwt_linux.go @@ -0,0 +1,29 @@ +package nl + +const ( + LWT_BPF_PROG_UNSPEC = iota + LWT_BPF_PROG_FD + LWT_BPF_PROG_NAME + __LWT_BPF_PROG_MAX +) + +const ( + LWT_BPF_PROG_MAX = __LWT_BPF_PROG_MAX - 1 +) + +const ( + LWT_BPF_UNSPEC = iota + LWT_BPF_IN + LWT_BPF_OUT + LWT_BPF_XMIT + LWT_BPF_XMIT_HEADROOM + __LWT_BPF_MAX +) + +const ( + LWT_BPF_MAX = __LWT_BPF_MAX - 1 +) + +const ( + LWT_BPF_MAX_HEADROOM = 256 +) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/nl_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/nl_linux.go index dcd4b94695..d3e5ed3925 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/nl_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/nl_linux.go @@ -27,7 +27,8 @@ const ( // tc rules or filters, or other more memory requiring data. RECEIVE_BUFFER_SIZE = 65536 // Kernel netlink pid - PidKernel uint32 = 0 + PidKernel uint32 = 0 + SizeofCnMsgOp = 0x18 ) // SupportedNlFamilies contains the list of netlink families this netlink package supports @@ -38,6 +39,9 @@ var nextSeqNr uint32 // Default netlink socket timeout, 60s var SocketTimeoutTv = unix.Timeval{Sec: 60, Usec: 0} +// ErrorMessageReporting is the default error message reporting configuration for the new netlink sockets +var EnableErrorMessageReporting bool = false + // GetIPFamily returns the family type of a net.IP. func GetIPFamily(ip net.IP) int { if len(ip) <= net.IPv4len { @@ -80,11 +84,69 @@ func Swap32(i uint32) uint32 { return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24 } +const ( + NLMSGERR_ATTR_UNUSED = 0 + NLMSGERR_ATTR_MSG = 1 + NLMSGERR_ATTR_OFFS = 2 + NLMSGERR_ATTR_COOKIE = 3 + NLMSGERR_ATTR_POLICY = 4 +) + type NetlinkRequestData interface { Len() int Serialize() []byte } +const ( + PROC_CN_MCAST_LISTEN = 1 + PROC_CN_MCAST_IGNORE +) + +type CbID struct { + Idx uint32 + Val uint32 +} + +type CnMsg struct { + ID CbID + Seq uint32 + Ack uint32 + Length uint16 + Flags uint16 +} + +type CnMsgOp struct { + CnMsg + // here we differ from the C header + Op uint32 +} + +func NewCnMsg(idx, val, op uint32) *CnMsgOp { + var cm CnMsgOp + + cm.ID.Idx = idx + cm.ID.Val = val + + cm.Ack = 0 + cm.Seq = 1 + cm.Length = uint16(binary.Size(op)) + cm.Op = op + + return &cm +} + +func (msg *CnMsgOp) Serialize() []byte { + return (*(*[SizeofCnMsgOp]byte)(unsafe.Pointer(msg)))[:] +} + +func DeserializeCnMsgOp(b []byte) *CnMsgOp { + return (*CnMsgOp)(unsafe.Pointer(&b[0:SizeofCnMsgOp][0])) +} + +func (msg *CnMsgOp) Len() int { + return SizeofCnMsgOp +} + // IfInfomsg is related to links, but it is used for list requests as well type IfInfomsg struct { unix.IfInfomsg @@ -252,6 +314,12 @@ func (msg *IfInfomsg) EncapType() string { return fmt.Sprintf("unknown%d", msg.Type) } +// Round the length of a netlink message up to align it properly. +// Taken from syscall/netlink_linux.go by The Go Authors under BSD-style license. +func nlmAlignOf(msglen int) int { + return (msglen + syscall.NLMSG_ALIGNTO - 1) & ^(syscall.NLMSG_ALIGNTO - 1) +} + func rtaAlignOf(attrlen int) int { return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1) } @@ -262,6 +330,19 @@ func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg { return msg } +type Uint32Bitfield struct { + Value uint32 + Selector uint32 +} + +func (a *Uint32Bitfield) Serialize() []byte { + return (*(*[SizeofUint32Bitfield]byte)(unsafe.Pointer(a)))[:] +} + +func DeserializeUint32Bitfield(data []byte) *Uint32Bitfield { + return (*Uint32Bitfield)(unsafe.Pointer(&data[0:SizeofUint32Bitfield][0])) +} + type Uint32Attribute struct { Type uint16 Value uint32 @@ -436,6 +517,11 @@ func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, erro if err := s.SetReceiveTimeout(&SocketTimeoutTv); err != nil { return nil, err } + if EnableErrorMessageReporting { + if err := s.SetExtAck(true); err != nil { + return nil, err + } + } defer s.Close() } else { @@ -475,11 +561,37 @@ done: } if m.Header.Type == unix.NLMSG_DONE || m.Header.Type == unix.NLMSG_ERROR { native := NativeEndian() - error := int32(native.Uint32(m.Data[0:4])) - if error == 0 { + errno := int32(native.Uint32(m.Data[0:4])) + if errno == 0 { break done } - return nil, syscall.Errno(-error) + var err error + err = syscall.Errno(-errno) + + unreadData := m.Data[4:] + if m.Header.Flags|unix.NLM_F_ACK_TLVS != 0 && len(unreadData) > syscall.SizeofNlMsghdr { + // Skip the echoed request message. + echoReqH := (*syscall.NlMsghdr)(unsafe.Pointer(&unreadData[0])) + unreadData = unreadData[nlmAlignOf(int(echoReqH.Len)):] + + // Annotate `err` using nlmsgerr attributes. + for len(unreadData) >= syscall.SizeofRtAttr { + attr := (*syscall.RtAttr)(unsafe.Pointer(&unreadData[0])) + attrData := unreadData[syscall.SizeofRtAttr:attr.Len] + + switch attr.Type { + case NLMSGERR_ATTR_MSG: + err = fmt.Errorf("%w: %s", err, string(attrData)) + + default: + // TODO: handle other NLMSGERR_ATTR types + } + + unreadData = unreadData[rtaAlignOf(int(attr.Len)):] + } + } + + return nil, err } if resType != 0 && m.Header.Type != resType { continue @@ -694,6 +806,16 @@ func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error { return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout) } +// SetExtAck requests error messages to be reported on the socket +func (s *NetlinkSocket) SetExtAck(enable bool) error { + var enableN int + if enable { + enableN = 1 + } + + return unix.SetsockoptInt(int(s.fd), unix.SOL_NETLINK, unix.NETLINK_EXT_ACK, enableN) +} + func (s *NetlinkSocket) GetPid() (uint32, error) { fd := int(atomic.LoadInt32(&s.fd)) lsa, err := unix.Getsockname(fd) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go index 1224b747de..ce43ee1550 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/rdma_link_linux.go @@ -11,6 +11,8 @@ const ( const ( RDMA_NLDEV_CMD_GET = 1 RDMA_NLDEV_CMD_SET = 2 + RDMA_NLDEV_CMD_NEWLINK = 3 + RDMA_NLDEV_CMD_DELLINK = 4 RDMA_NLDEV_CMD_SYS_GET = 6 RDMA_NLDEV_CMD_SYS_SET = 7 ) @@ -30,6 +32,8 @@ const ( RDMA_NLDEV_ATTR_PORT_STATE = 12 RDMA_NLDEV_ATTR_PORT_PHYS_STATE = 13 RDMA_NLDEV_ATTR_DEV_NODE_TYPE = 14 + RDMA_NLDEV_ATTR_NDEV_NAME = 51 + RDMA_NLDEV_ATTR_LINK_TYPE = 65 RDMA_NLDEV_SYS_ATTR_NETNS_MODE = 66 RDMA_NLDEV_NET_NS_FD = 68 ) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/route_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/route_linux.go index 03c1900ffa..c26f3bf91a 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/route_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/route_linux.go @@ -48,7 +48,9 @@ type RtNexthop struct { } func DeserializeRtNexthop(b []byte) *RtNexthop { - return (*RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0])) + return &RtNexthop{ + RtNexthop: *((*unix.RtNexthop)(unsafe.Pointer(&b[0:unix.SizeofRtNexthop][0]))), + } } func (msg *RtNexthop) Len() int { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go index 5774cbb15a..fe88285f20 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/seg6_linux.go @@ -23,7 +23,7 @@ func (s1 *IPv6SrHdr) Equal(s2 IPv6SrHdr) bool { return false } for i := range s1.Segments { - if s1.Segments[i].Equal(s2.Segments[i]) != true { + if !s1.Segments[i].Equal(s2.Segments[i]) { return false } } @@ -89,7 +89,7 @@ func DecodeSEG6Encap(buf []byte) (int, []net.IP, error) { } buf = buf[12:] if len(buf)%16 != 0 { - err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)\n", len(buf)) + err := fmt.Errorf("DecodeSEG6Encap: error parsing Segment List (buf len: %d)", len(buf)) return mode, nil, err } for len(buf) > 0 { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/syscall.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/syscall.go index 4a01e6e595..bdf6ba6395 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/syscall.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/syscall.go @@ -1,6 +1,6 @@ package nl -// syscall package lack of rule atributes type. +// syscall package lack of rule attributes type. // Thus there are defined below const ( FRA_UNSPEC = iota diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/tc_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/tc_linux.go index c24d53eb79..d8b60c2ed7 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/tc_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/tc_linux.go @@ -2,7 +2,10 @@ package nl import ( "encoding/binary" + "fmt" "unsafe" + + "golang.org/x/sys/unix" ) // LinkLayer @@ -42,7 +45,14 @@ const ( TCA_FCNT TCA_STATS2 TCA_STAB - TCA_MAX = TCA_STAB + TCA_PAD + TCA_DUMP_INVISIBLE + TCA_CHAIN + TCA_HW_OFFLOAD + TCA_INGRESS_BLOCK + TCA_EGRESS_BLOCK + TCA_DUMP_FLAGS + TCA_MAX = TCA_DUMP_FLAGS ) const ( @@ -56,6 +66,12 @@ const ( TCA_ACT_OPTIONS TCA_ACT_INDEX TCA_ACT_STATS + TCA_ACT_PAD + TCA_ACT_COOKIE + TCA_ACT_FLAGS + TCA_ACT_HW_STATS + TCA_ACT_USED_HW_STATS + TCA_ACT_IN_HW_COUNT TCA_ACT_MAX ) @@ -88,8 +104,9 @@ const ( SizeofTcHtbGlob = 0x14 SizeofTcU32Key = 0x10 SizeofTcU32Sel = 0x10 // without keys - SizeofTcGen = 0x14 + SizeofTcGen = 0x16 SizeofTcConnmark = SizeofTcGen + 0x04 + SizeofTcCsum = SizeofTcGen + 0x04 SizeofTcMirred = SizeofTcGen + 0x08 SizeofTcTunnelKey = SizeofTcGen + 0x04 SizeofTcSkbEdit = SizeofTcGen @@ -97,6 +114,7 @@ const ( SizeofTcSfqQopt = 0x0b SizeofTcSfqRedStats = 0x18 SizeofTcSfqQoptV1 = SizeofTcSfqQopt + SizeofTcSfqRedStats + 0x1c + SizeofUint32Bitfield = 0x8 ) // struct tcmsg { @@ -694,6 +712,36 @@ func (x *TcConnmark) Serialize() []byte { return (*(*[SizeofTcConnmark]byte)(unsafe.Pointer(x)))[:] } +const ( + TCA_CSUM_UNSPEC = iota + TCA_CSUM_PARMS + TCA_CSUM_TM + TCA_CSUM_PAD + TCA_CSUM_MAX = TCA_CSUM_PAD +) + +// struct tc_csum { +// tc_gen; +// __u32 update_flags; +// } + +type TcCsum struct { + TcGen + UpdateFlags uint32 +} + +func (msg *TcCsum) Len() int { + return SizeofTcCsum +} + +func DeserializeTcCsum(b []byte) *TcCsum { + return (*TcCsum)(unsafe.Pointer(&b[0:SizeofTcCsum][0])) +} + +func (x *TcCsum) Serialize() []byte { + return (*(*[SizeofTcCsum]byte)(unsafe.Pointer(x)))[:] +} + const ( TCA_ACT_MIRRED = 8 ) @@ -773,7 +821,8 @@ const ( TCA_SKBEDIT_MARK TCA_SKBEDIT_PAD TCA_SKBEDIT_PTYPE - TCA_SKBEDIT_MAX = TCA_SKBEDIT_MARK + TCA_SKBEDIT_MASK + TCA_SKBEDIT_MAX ) type TcSkbEdit struct { @@ -860,6 +909,10 @@ const ( TCA_FQ_FLOW_REFILL_DELAY // flow credit refill delay in usec TCA_FQ_ORPHAN_MASK // mask applied to orphaned skb hashes TCA_FQ_LOW_RATE_THRESHOLD // per packet delay under this rate + TCA_FQ_CE_THRESHOLD // DCTCP-like CE-marking threshold + TCA_FQ_TIMER_SLACK // timer slack + TCA_FQ_HORIZON // time horizon in us + TCA_FQ_HORIZON_DROP // drop packets beyond horizon, or cap their EDT ) const ( @@ -882,6 +935,114 @@ const ( TCA_HFSC_USC ) +const ( + TCA_FLOWER_UNSPEC = iota + TCA_FLOWER_CLASSID + TCA_FLOWER_INDEV + TCA_FLOWER_ACT + TCA_FLOWER_KEY_ETH_DST /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_DST_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_SRC_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ETH_TYPE /* be16 */ + TCA_FLOWER_KEY_IP_PROTO /* u8 */ + TCA_FLOWER_KEY_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_IPV6_DST_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_TCP_SRC /* be16 */ + TCA_FLOWER_KEY_TCP_DST /* be16 */ + TCA_FLOWER_KEY_UDP_SRC /* be16 */ + TCA_FLOWER_KEY_UDP_DST /* be16 */ + + TCA_FLOWER_FLAGS + TCA_FLOWER_KEY_VLAN_ID /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK /* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK /* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_SRC_MASK /* be16 */ + TCA_FLOWER_KEY_SCTP_DST_MASK /* be16 */ + + TCA_FLOWER_KEY_SCTP_SRC /* be16 */ + TCA_FLOWER_KEY_SCTP_DST /* be16 */ + + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT /* be16 */ + TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK /* be16 */ + + TCA_FLOWER_KEY_FLAGS /* be32 */ + TCA_FLOWER_KEY_FLAGS_MASK /* be32 */ + + TCA_FLOWER_KEY_ICMPV4_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV4_TYPE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_CODE_MASK /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE /* u8 */ + TCA_FLOWER_KEY_ICMPV6_TYPE_MASK /* u8 */ + + TCA_FLOWER_KEY_ARP_SIP /* be32 */ + TCA_FLOWER_KEY_ARP_SIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_TIP /* be32 */ + TCA_FLOWER_KEY_ARP_TIP_MASK /* be32 */ + TCA_FLOWER_KEY_ARP_OP /* u8 */ + TCA_FLOWER_KEY_ARP_OP_MASK /* u8 */ + TCA_FLOWER_KEY_ARP_SHA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_SHA_MASK /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA /* ETH_ALEN */ + TCA_FLOWER_KEY_ARP_THA_MASK /* ETH_ALEN */ + + TCA_FLOWER_KEY_MPLS_TTL /* u8 - 8 bits */ + TCA_FLOWER_KEY_MPLS_BOS /* u8 - 1 bit */ + TCA_FLOWER_KEY_MPLS_TC /* u8 - 3 bits */ + TCA_FLOWER_KEY_MPLS_LABEL /* be32 - 20 bits */ + + TCA_FLOWER_KEY_TCP_FLAGS /* be16 */ + TCA_FLOWER_KEY_TCP_FLAGS_MASK /* be16 */ + + TCA_FLOWER_KEY_IP_TOS /* u8 */ + TCA_FLOWER_KEY_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_IP_TTL /* u8 */ + TCA_FLOWER_KEY_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_CVLAN_ID /* be16 */ + TCA_FLOWER_KEY_CVLAN_PRIO /* u8 */ + TCA_FLOWER_KEY_CVLAN_ETH_TYPE /* be16 */ + + TCA_FLOWER_KEY_ENC_IP_TOS /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TOS_MASK /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL /* u8 */ + TCA_FLOWER_KEY_ENC_IP_TTL_MASK /* u8 */ + + TCA_FLOWER_KEY_ENC_OPTS + TCA_FLOWER_KEY_ENC_OPTS_MASK + + __TCA_FLOWER_MAX +) + +const TCA_CLS_FLAGS_SKIP_HW = 1 << 0 /* don't offload filter to HW */ +const TCA_CLS_FLAGS_SKIP_SW = 1 << 1 /* don't use filter in SW */ + // struct tc_sfq_qopt { // unsigned quantum; /* Bytes per round allocated to flow */ // int perturb_period; /* Period of hash perturbation */ @@ -981,3 +1142,36 @@ func DeserializeTcSfqQoptV1(b []byte) *TcSfqQoptV1 { func (x *TcSfqQoptV1) Serialize() []byte { return (*(*[SizeofTcSfqQoptV1]byte)(unsafe.Pointer(x)))[:] } + +// IPProto represents Flower ip_proto attribute +type IPProto uint8 + +const ( + IPPROTO_TCP IPProto = unix.IPPROTO_TCP + IPPROTO_UDP IPProto = unix.IPPROTO_UDP + IPPROTO_SCTP IPProto = unix.IPPROTO_SCTP + IPPROTO_ICMP IPProto = unix.IPPROTO_ICMP + IPPROTO_ICMPV6 IPProto = unix.IPPROTO_ICMPV6 +) + +func (i IPProto) Serialize() []byte { + arr := make([]byte, 1) + arr[0] = byte(i) + return arr +} + +func (i IPProto) String() string { + switch i { + case IPPROTO_TCP: + return "tcp" + case IPPROTO_UDP: + return "udp" + case IPPROTO_SCTP: + return "sctp" + case IPPROTO_ICMP: + return "icmp" + case IPPROTO_ICMPV6: + return "icmpv6" + } + return fmt.Sprintf("%d", i) +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go index dce9073f7b..cdb318ba55 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_linux.go @@ -131,7 +131,15 @@ func (x *XfrmAddress) ToIP() net.IP { return ip } -func (x *XfrmAddress) ToIPNet(prefixlen uint8) *net.IPNet { +// family is only used when x and prefixlen are both 0 +func (x *XfrmAddress) ToIPNet(prefixlen uint8, family uint16) *net.IPNet { + empty := [SizeofXfrmAddress]byte{} + if bytes.Equal(x[:], empty[:]) && prefixlen == 0 { + if family == FAMILY_V6 { + return &net.IPNet{IP: net.ParseIP("::"), Mask: net.CIDRMask(int(prefixlen), 128)} + } + return &net.IPNet{IP: net.ParseIP("0.0.0.0"), Mask: net.CIDRMask(int(prefixlen), 32)} + } ip := x.ToIP() if GetIPFamily(ip) == FAMILY_V4 { return &net.IPNet{IP: ip, Mask: net.CIDRMask(int(prefixlen), 32)} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go index 43a947f229..e8920b9a69 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/nl/xfrm_state_linux.go @@ -15,6 +15,7 @@ const ( SizeofXfrmEncapTmpl = 0x18 SizeofXfrmUsersaFlush = 0x1 SizeofXfrmReplayStateEsn = 0x18 + SizeofXfrmReplayState = 0x0c ) const ( @@ -28,6 +29,11 @@ const ( XFRM_STATE_ESN = 128 ) +const ( + XFRM_SA_XFLAG_DONT_ENCAP_DSCP = 1 + XFRM_SA_XFLAG_OSEQ_MAY_WRAP = 2 +) + // struct xfrm_usersa_id { // xfrm_address_t daddr; // __be32 spi; @@ -103,6 +109,7 @@ func (msg *XfrmStats) Serialize() []byte { // }; // // #define XFRM_SA_XFLAG_DONT_ENCAP_DSCP 1 +// #define XFRM_SA_XFLAG_OSEQ_MAY_WRAP 2 // type XfrmUsersaInfo struct { @@ -332,3 +339,23 @@ func (msg *XfrmReplayStateEsn) Serialize() []byte { // We deliberately do not pass Bmp, as it gets set by the kernel. return (*(*[SizeofXfrmReplayStateEsn]byte)(unsafe.Pointer(msg)))[:] } + +// struct xfrm_replay_state { +// __u32 oseq; +// __u32 seq; +// __u32 bitmap; +// }; + +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func DeserializeXfrmReplayState(b []byte) *XfrmReplayState { + return (*XfrmReplayState)(unsafe.Pointer(&b[0:SizeofXfrmReplayState][0])) +} + +func (msg *XfrmReplayState) Serialize() []byte { + return (*(*[SizeofXfrmReplayState]byte)(unsafe.Pointer(msg)))[:] +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/proc_event_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/proc_event_linux.go new file mode 100644 index 0000000000..53bc59a6ec --- /dev/null +++ b/go-controller/vendor/github.com/vishvananda/netlink/proc_event_linux.go @@ -0,0 +1,217 @@ +package netlink + +import ( + "bytes" + "encoding/binary" + "fmt" + "os" + "syscall" + + "github.com/vishvananda/netlink/nl" + "github.com/vishvananda/netns" + "golang.org/x/sys/unix" +) + +const CN_IDX_PROC = 0x1 + +const ( + PROC_EVENT_NONE = 0x00000000 + PROC_EVENT_FORK = 0x00000001 + PROC_EVENT_EXEC = 0x00000002 + PROC_EVENT_UID = 0x00000004 + PROC_EVENT_GID = 0x00000040 + PROC_EVENT_SID = 0x00000080 + PROC_EVENT_PTRACE = 0x00000100 + PROC_EVENT_COMM = 0x00000200 + PROC_EVENT_COREDUMP = 0x40000000 + PROC_EVENT_EXIT = 0x80000000 +) + +const ( + CN_VAL_PROC = 0x1 + PROC_CN_MCAST_LISTEN = 0x1 +) + +type ProcEventMsg interface { + Pid() uint32 + Tgid() uint32 +} + +type ProcEventHeader struct { + What uint32 + CPU uint32 + Timestamp uint64 +} + +type ProcEvent struct { + ProcEventHeader + Msg ProcEventMsg +} + +func (pe *ProcEvent) setHeader(h ProcEventHeader) { + pe.What = h.What + pe.CPU = h.CPU + pe.Timestamp = h.Timestamp +} + +type ExitProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +type ExitProcEvent2 struct { + ProcessPid uint32 + ProcessTgid uint32 + ExitCode uint32 + ExitSignal uint32 + ParentPid uint32 + ParentTgid uint32 +} + +func (e *ExitProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExitProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ExecProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 +} + +func (e *ExecProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *ExecProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +type ForkProcEvent struct { + ParentPid uint32 + ParentTgid uint32 + ChildPid uint32 + ChildTgid uint32 +} + +func (e *ForkProcEvent) Pid() uint32 { + return e.ParentPid +} + +func (e *ForkProcEvent) Tgid() uint32 { + return e.ParentTgid +} + +type CommProcEvent struct { + ProcessPid uint32 + ProcessTgid uint32 + Comm [16]byte +} + +func (e *CommProcEvent) Pid() uint32 { + return e.ProcessPid +} + +func (e *CommProcEvent) Tgid() uint32 { + return e.ProcessTgid +} + +func ProcEventMonitor(ch chan<- ProcEvent, done <-chan struct{}, errorChan chan<- error) error { + h, err := NewHandle() + if err != nil { + return err + } + defer h.Delete() + + s, err := nl.SubscribeAt(netns.None(), netns.None(), unix.NETLINK_CONNECTOR, CN_IDX_PROC) + if err != nil { + return err + } + + var nlmsg nl.NetlinkRequest + + nlmsg.Pid = uint32(os.Getpid()) + nlmsg.Type = unix.NLMSG_DONE + nlmsg.Len = uint32(unix.SizeofNlMsghdr) + + cm := nl.NewCnMsg(CN_IDX_PROC, CN_VAL_PROC, PROC_CN_MCAST_LISTEN) + nlmsg.AddData(cm) + + s.Send(&nlmsg) + + if done != nil { + go func() { + <-done + s.Close() + }() + } + + go func() { + defer close(ch) + for { + msgs, from, err := s.Receive() + if err != nil { + errorChan <- err + return + } + if from.Pid != nl.PidKernel { + errorChan <- fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return + } + + for _, m := range msgs { + e := parseNetlinkMessage(m) + if e != nil { + ch <- *e + } + } + + } + }() + + return nil +} + +func parseNetlinkMessage(m syscall.NetlinkMessage) *ProcEvent { + if m.Header.Type == unix.NLMSG_DONE { + buf := bytes.NewBuffer(m.Data) + msg := &nl.CnMsg{} + hdr := &ProcEventHeader{} + binary.Read(buf, nl.NativeEndian(), msg) + binary.Read(buf, nl.NativeEndian(), hdr) + + pe := &ProcEvent{} + pe.setHeader(*hdr) + switch hdr.What { + case PROC_EVENT_EXIT: + event := &ExitProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_FORK: + event := &ForkProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_EXEC: + event := &ExecProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + case PROC_EVENT_COMM: + event := &CommProcEvent{} + binary.Read(buf, nl.NativeEndian(), event) + pe.Msg = event + return pe + } + return nil + } + + return nil +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/qdisc.go b/go-controller/vendor/github.com/vishvananda/netlink/qdisc.go index f594c9c212..00e93b458e 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/qdisc.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/qdisc.go @@ -17,6 +17,12 @@ const ( HANDLE_MIN_EGRESS = 0xFFFFFFF3 ) +const ( + HORIZON_DROP_POLICY_CAP = 0 + HORIZON_DROP_POLICY_DROP = 1 + HORIZON_DROP_POLICY_DEFAULT = 255 +) + type Qdisc interface { Attrs() *QdiscAttrs Type() string @@ -26,10 +32,11 @@ type Qdisc interface { // has a handle, a parent and a refcnt. The root qdisc of a device should // have parent == HANDLE_ROOT. type QdiscAttrs struct { - LinkIndex int - Handle uint32 - Parent uint32 - Refcnt uint32 // read only + LinkIndex int + Handle uint32 + Parent uint32 + Refcnt uint32 // read only + IngressBlock *uint32 } func (q QdiscAttrs) String() string { @@ -278,22 +285,25 @@ type Fq struct { FlowDefaultRate uint32 FlowMaxRate uint32 // called BucketsLog under the hood - Buckets uint32 - FlowRefillDelay uint32 - LowRateThreshold uint32 + Buckets uint32 + FlowRefillDelay uint32 + LowRateThreshold uint32 + Horizon uint32 + HorizonDropPolicy uint8 } func (fq *Fq) String() string { return fmt.Sprintf( - "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v}", - fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, + "{PacketLimit: %v, FlowPacketLimit: %v, Quantum: %v, InitialQuantum: %v, Pacing: %v, FlowDefaultRate: %v, FlowMaxRate: %v, Buckets: %v, FlowRefillDelay: %v, LowRateThreshold: %v, Horizon: %v, HorizonDropPolicy: %v}", + fq.PacketLimit, fq.FlowPacketLimit, fq.Quantum, fq.InitialQuantum, fq.Pacing, fq.FlowDefaultRate, fq.FlowMaxRate, fq.Buckets, fq.FlowRefillDelay, fq.LowRateThreshold, fq.Horizon, fq.HorizonDropPolicy, ) } func NewFq(attrs QdiscAttrs) *Fq { return &Fq{ - QdiscAttrs: attrs, - Pacing: 1, + QdiscAttrs: attrs, + Pacing: 1, + HorizonDropPolicy: HORIZON_DROP_POLICY_DEFAULT, } } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/qdisc_linux.go index 9fc4b3d241..e477bcd6c9 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/qdisc_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -159,6 +159,9 @@ func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + if qdisc.Attrs().IngressBlock != nil { + req.AddData(nl.NewRtAttr(nl.TCA_INGRESS_BLOCK, nl.Uint32Attr(*qdisc.Attrs().IngressBlock))) + } options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) @@ -286,6 +289,12 @@ func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { if qdisc.FlowDefaultRate > 0 { options.AddRtAttr(nl.TCA_FQ_FLOW_DEFAULT_RATE, nl.Uint32Attr((uint32(qdisc.FlowDefaultRate)))) } + if qdisc.Horizon > 0 { + options.AddRtAttr(nl.TCA_FQ_HORIZON, nl.Uint32Attr(qdisc.Horizon)) + } + if qdisc.HorizonDropPolicy != HORIZON_DROP_POLICY_DEFAULT { + options.AddRtAttr(nl.TCA_FQ_HORIZON_DROP, nl.Uint8Attr(qdisc.HorizonDropPolicy)) + } case *Sfq: opt := nl.TcSfqQoptV1{} opt.TcSfqQopt.Quantum = qdisc.Quantum @@ -442,6 +451,10 @@ func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { // no options for ingress } + case nl.TCA_INGRESS_BLOCK: + ingressBlock := new(uint32) + *ingressBlock = native.Uint32(attr.Value) + base.IngressBlock = ingressBlock } } *qdisc.Attrs() = base @@ -546,6 +559,11 @@ func parseFqData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { fq.FlowMaxRate = native.Uint32(datum.Value) case nl.TCA_FQ_FLOW_DEFAULT_RATE: fq.FlowDefaultRate = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON: + fq.Horizon = native.Uint32(datum.Value) + case nl.TCA_FQ_HORIZON_DROP: + fq.HorizonDropPolicy = datum.Value[0] + } } return nil @@ -706,3 +724,7 @@ func Xmittime(rate uint64, size uint32) uint32 { // https://git.kernel.org/pub/scm/network/iproute2/iproute2.git/tree/tc/tc_core.c#n62 return time2Tick(uint32(TIME_UNITS_PER_SEC * (float64(size) / float64(rate)))) } + +func Xmitsize(rate uint64, ticks uint32) uint32 { + return uint32((float64(rate) * float64(tick2Time(ticks))) / TIME_UNITS_PER_SEC) +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/rdma_link_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/rdma_link_linux.go index ff014ca4cc..036399db6b 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/rdma_link_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/rdma_link_linux.go @@ -278,3 +278,54 @@ func (h *Handle) RdmaLinkSetNsFd(link *RdmaLink, fd uint32) error { return execRdmaSetLink(req) } + +// RdmaLinkDel deletes an rdma link +// +// Similar to: rdma link delete NAME +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkDel(name string) error { + return pkgHandle.RdmaLinkDel(name) +} + +// RdmaLinkDel deletes an rdma link. +func (h *Handle) RdmaLinkDel(name string) error { + link, err := h.RdmaLinkByName(name) + if err != nil { + return err + } + + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_DELLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + b := make([]byte, 4) + native.PutUint32(b, link.Attrs.Index) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_INDEX, b)) + + _, err = req.Execute(unix.NETLINK_RDMA, 0) + return err +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +// Similar to: rdma link add NAME type TYPE netdev NETDEV +// NAME - specifies the new name of the rdma link to add +// TYPE - specifies which rdma type to use. Link types: +// rxe - Soft RoCE driver +// siw - Soft iWARP driver +// NETDEV - specifies the network device to which the link is bound +// +// REF: https://man7.org/linux/man-pages/man8/rdma-link.8.html +func RdmaLinkAdd(linkName, linkType, netdev string) error { + return pkgHandle.RdmaLinkAdd(linkName, linkType, netdev) +} + +// RdmaLinkAdd adds an rdma link for the specified type to the network device. +func (h *Handle) RdmaLinkAdd(linkName string, linkType string, netdev string) error { + proto := getProtoField(nl.RDMA_NL_NLDEV, nl.RDMA_NLDEV_CMD_NEWLINK) + req := h.newNetlinkRequest(proto, unix.NLM_F_ACK) + + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_DEV_NAME, nl.ZeroTerminated(linkName))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_LINK_TYPE, nl.ZeroTerminated(linkType))) + req.AddData(nl.NewRtAttr(nl.RDMA_NLDEV_ATTR_NDEV_NAME, nl.ZeroTerminated(netdev))) + _, err := req.Execute(unix.NETLINK_RDMA, 0) + return err +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/route.go b/go-controller/vendor/github.com/vishvananda/netlink/route.go index 845f418080..79cc218ec8 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/route.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/route.go @@ -11,6 +11,24 @@ type Scope uint8 type NextHopFlag int +const ( + RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) + RT_FILTER_SCOPE + RT_FILTER_TYPE + RT_FILTER_TOS + RT_FILTER_IIF + RT_FILTER_OIF + RT_FILTER_DST + RT_FILTER_SRC + RT_FILTER_GW + RT_FILTER_TABLE + RT_FILTER_HOPLIMIT + RT_FILTER_PRIORITY + RT_FILTER_MARK + RT_FILTER_MASK + RT_FILTER_REALM +) + type Destination interface { Family() int Decode([]byte) error @@ -41,6 +59,7 @@ type Route struct { MultiPath []*NexthopInfo Protocol RouteProtocol Priority int + Family int Table int Type int Tos int @@ -49,6 +68,7 @@ type Route struct { NewDst Destination Encap Encap Via Destination + Realm int MTU int Window int Rtt int @@ -94,6 +114,7 @@ func (r Route) String() string { } elems = append(elems, fmt.Sprintf("Flags: %s", r.ListFlags())) elems = append(elems, fmt.Sprintf("Table: %d", r.Table)) + elems = append(elems, fmt.Sprintf("Realm: %d", r.Realm)) return fmt.Sprintf("{%s}", strings.Join(elems, " ")) } @@ -107,6 +128,7 @@ func (r Route) Equal(x Route) bool { nexthopInfoSlice(r.MultiPath).Equal(x.MultiPath) && r.Protocol == x.Protocol && r.Priority == x.Priority && + r.Realm == x.Realm && r.Table == x.Table && r.Type == x.Type && r.Tos == x.Tos && diff --git a/go-controller/vendor/github.com/vishvananda/netlink/route_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/route_linux.go index 2846f3c783..62bb2d468a 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/route_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/route_linux.go @@ -41,23 +41,6 @@ func (s Scope) String() string { } } -const ( - RT_FILTER_PROTOCOL uint64 = 1 << (1 + iota) - RT_FILTER_SCOPE - RT_FILTER_TYPE - RT_FILTER_TOS - RT_FILTER_IIF - RT_FILTER_OIF - RT_FILTER_DST - RT_FILTER_SRC - RT_FILTER_GW - RT_FILTER_TABLE - RT_FILTER_HOPLIMIT - RT_FILTER_PRIORITY - RT_FILTER_MARK - RT_FILTER_MASK -) - const ( FLAG_ONLINK NextHopFlag = unix.RTNH_F_ONLINK FLAG_PERVASIVE NextHopFlag = unix.RTNH_F_PERVASIVE @@ -249,7 +232,7 @@ func (e *SEG6Encap) String() string { segs := make([]string, 0, len(e.Segments)) // append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { - segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1])) + segs = append(segs, e.Segments[i-1].String()) } str := fmt.Sprintf("mode %s segs %d [ %s ]", nl.SEG6EncapModeString(e.Mode), len(e.Segments), strings.Join(segs, " ")) @@ -419,7 +402,7 @@ func (e *SEG6LocalEncap) String() string { segs := make([]string, 0, len(e.Segments)) //append segment backwards (from n to 0) since seg#0 is the last segment. for i := len(e.Segments); i > 0; i-- { - segs = append(segs, fmt.Sprintf("%s", e.Segments[i-1])) + segs = append(segs, e.Segments[i-1].String()) } strs = append(strs, fmt.Sprintf("segs %d [ %s ]", len(e.Segments), strings.Join(segs, " "))) } @@ -460,6 +443,152 @@ func (e *SEG6LocalEncap) Equal(x Encap) bool { return true } +// Encap BPF definitions +type bpfObj struct { + progFd int + progName string +} +type BpfEncap struct { + progs [nl.LWT_BPF_MAX]bpfObj + headroom int +} + +// SetProg adds a bpf function to the route via netlink RTA_ENCAP. The fd must be a bpf +// program loaded with bpf(type=BPF_PROG_TYPE_LWT_*) matching the direction the program should +// be applied to (LWT_BPF_IN, LWT_BPF_OUT, LWT_BPF_XMIT). +func (e *BpfEncap) SetProg(mode, progFd int, progName string) error { + if progFd <= 0 { + return fmt.Errorf("lwt bpf SetProg: invalid fd") + } + if mode <= nl.LWT_BPF_UNSPEC || mode >= nl.LWT_BPF_XMIT_HEADROOM { + return fmt.Errorf("lwt bpf SetProg:invalid mode") + } + e.progs[mode].progFd = progFd + e.progs[mode].progName = fmt.Sprintf("%s[fd:%d]", progName, progFd) + return nil +} + +// SetXmitHeadroom sets the xmit headroom (LWT_BPF_MAX_HEADROOM) via netlink RTA_ENCAP. +// maximum headroom is LWT_BPF_MAX_HEADROOM +func (e *BpfEncap) SetXmitHeadroom(headroom int) error { + if headroom > nl.LWT_BPF_MAX_HEADROOM || headroom < 0 { + return fmt.Errorf("invalid headroom size. range is 0 - %d", nl.LWT_BPF_MAX_HEADROOM) + } + e.headroom = headroom + return nil +} + +func (e *BpfEncap) Type() int { + return nl.LWTUNNEL_ENCAP_BPF +} +func (e *BpfEncap) Decode(buf []byte) error { + if len(buf) < 4 { + return fmt.Errorf("lwt bpf decode: lack of bytes") + } + native := nl.NativeEndian() + attrs, err := nl.ParseRouteAttr(buf) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing attribute. err: %v", err) + } + for _, attr := range attrs { + if int(attr.Attr.Type) < 1 { + // nl.LWT_BPF_UNSPEC + continue + } + if int(attr.Attr.Type) > nl.LWT_BPF_MAX { + return fmt.Errorf("lwt bpf decode: received unknown attribute type: %d", attr.Attr.Type) + } + switch int(attr.Attr.Type) { + case nl.LWT_BPF_MAX_HEADROOM: + e.headroom = int(native.Uint32(attr.Value)) + default: + bpfO := bpfObj{} + parsedAttrs, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return fmt.Errorf("lwt bpf decode: failed parsing route attribute") + } + for _, parsedAttr := range parsedAttrs { + switch int(parsedAttr.Attr.Type) { + case nl.LWT_BPF_PROG_FD: + bpfO.progFd = int(native.Uint32(parsedAttr.Value)) + case nl.LWT_BPF_PROG_NAME: + bpfO.progName = string(parsedAttr.Value) + default: + return fmt.Errorf("lwt bpf decode: received unknown attribute: type: %d, len: %d", parsedAttr.Attr.Type, parsedAttr.Attr.Len) + } + } + e.progs[attr.Attr.Type] = bpfO + } + } + return nil +} + +func (e *BpfEncap) Encode() ([]byte, error) { + buf := make([]byte, 0) + native = nl.NativeEndian() + for index, attr := range e.progs { + nlMsg := nl.NewRtAttr(index, []byte{}) + if attr.progFd != 0 { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_FD, nl.Uint32Attr(uint32(attr.progFd))) + } + if attr.progName != "" { + nlMsg.AddRtAttr(nl.LWT_BPF_PROG_NAME, nl.ZeroTerminated(attr.progName)) + } + if nlMsg.Len() > 4 { + buf = append(buf, nlMsg.Serialize()...) + } + } + if len(buf) <= 4 { + return nil, fmt.Errorf("lwt bpf encode: bpf obj definitions returned empty buffer") + } + if e.headroom > 0 { + hRoom := nl.NewRtAttr(nl.LWT_BPF_XMIT_HEADROOM, nl.Uint32Attr(uint32(e.headroom))) + buf = append(buf, hRoom.Serialize()...) + } + return buf, nil +} + +func (e *BpfEncap) String() string { + progs := make([]string, 0) + for index, obj := range e.progs { + empty := bpfObj{} + switch index { + case nl.LWT_BPF_IN: + if obj != empty { + progs = append(progs, fmt.Sprintf("in: %s", obj.progName)) + } + case nl.LWT_BPF_OUT: + if obj != empty { + progs = append(progs, fmt.Sprintf("out: %s", obj.progName)) + } + case nl.LWT_BPF_XMIT: + if obj != empty { + progs = append(progs, fmt.Sprintf("xmit: %s", obj.progName)) + } + } + } + if e.headroom > 0 { + progs = append(progs, fmt.Sprintf("xmit headroom: %d", e.headroom)) + } + return strings.Join(progs, " ") +} + +func (e *BpfEncap) Equal(x Encap) bool { + o, ok := x.(*BpfEncap) + if !ok { + return false + } + if e.headroom != o.headroom { + return false + } + for i := range o.progs { + if o.progs[i] != e.progs[i] { + return false + } + } + return true +} + type Via struct { AddrFamily int Addr net.IP @@ -526,7 +655,8 @@ func RouteAdd(route *Route) error { func (h *Handle) RouteAdd(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_EXCL | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteAppend will append a route to the system. @@ -540,7 +670,8 @@ func RouteAppend(route *Route) error { func (h *Handle) RouteAppend(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_APPEND | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteAddEcmp will add a route to the system. @@ -552,7 +683,8 @@ func RouteAddEcmp(route *Route) error { func (h *Handle) RouteAddEcmp(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteReplace will add a route to the system. @@ -566,7 +698,8 @@ func RouteReplace(route *Route) error { func (h *Handle) RouteReplace(route *Route) error { flags := unix.NLM_F_CREATE | unix.NLM_F_REPLACE | unix.NLM_F_ACK req := h.newNetlinkRequest(unix.RTM_NEWROUTE, flags) - return h.routeHandle(route, req, nl.NewRtMsg()) + _, err := h.routeHandle(route, req, nl.NewRtMsg()) + return err } // RouteDel will delete a route from the system. @@ -579,12 +712,13 @@ func RouteDel(route *Route) error { // Equivalent to: `ip route del $route` func (h *Handle) RouteDel(route *Route) error { req := h.newNetlinkRequest(unix.RTM_DELROUTE, unix.NLM_F_ACK) - return h.routeHandle(route, req, nl.NewRtDelMsg()) + _, err := h.routeHandle(route, req, nl.NewRtDelMsg()) + return err } -func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) error { - if (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { - return fmt.Errorf("one of Dst.IP, Src, or Gw must not be nil") +func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg) ([][]byte, error) { + if req.NlMsghdr.Type != unix.RTM_GETROUTE && (route.Dst == nil || route.Dst.IP == nil) && route.Src == nil && route.Gw == nil && route.MPLSDst == nil { + return nil, fmt.Errorf("Either Dst.IP, Src.IP or Gw must be set") } family := -1 @@ -611,11 +745,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if route.NewDst != nil { if family != -1 && family != route.NewDst.Family() { - return fmt.Errorf("new destination and destination are not the same address family") + return nil, fmt.Errorf("new destination and destination are not the same address family") } buf, err := route.NewDst.Encode() if err != nil { - return err + return nil, err } rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_NEWDST, buf)) } @@ -626,15 +760,21 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf)) buf, err := route.Encap.Encode() if err != nil { - return err + return nil, err } - rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + switch route.Encap.Type() { + case nl.LWTUNNEL_ENCAP_BPF: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP|unix.NLA_F_NESTED, buf)) + default: + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_ENCAP, buf)) + } + } if route.Src != nil { srcFamily := nl.GetIPFamily(route.Src) if family != -1 && family != srcFamily { - return fmt.Errorf("source and destination ip are not the same IP family") + return nil, fmt.Errorf("source and destination ip are not the same IP family") } family = srcFamily var srcData []byte @@ -650,7 +790,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if route.Gw != nil { gwFamily := nl.GetIPFamily(route.Gw) if family != -1 && family != gwFamily { - return fmt.Errorf("gateway, source, and destination ip are not the same IP family") + return nil, fmt.Errorf("gateway, source, and destination ip are not the same IP family") } family = gwFamily var gwData []byte @@ -665,7 +805,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if route.Via != nil { buf, err := route.Via.Encode() if err != nil { - return fmt.Errorf("failed to encode RTA_VIA: %v", err) + return nil, fmt.Errorf("failed to encode RTA_VIA: %v", err) } rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_VIA, buf)) } @@ -684,7 +824,7 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg if nh.Gw != nil { gwFamily := nl.GetIPFamily(nh.Gw) if family != -1 && family != gwFamily { - return fmt.Errorf("gateway, source, and destination ip are not the same IP family") + return nil, fmt.Errorf("gateway, source, and destination ip are not the same IP family") } if gwFamily == FAMILY_V4 { children = append(children, nl.NewRtAttr(unix.RTA_GATEWAY, []byte(nh.Gw.To4()))) @@ -694,11 +834,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg } if nh.NewDst != nil { if family != -1 && family != nh.NewDst.Family() { - return fmt.Errorf("new destination and destination are not the same address family") + return nil, fmt.Errorf("new destination and destination are not the same address family") } buf, err := nh.NewDst.Encode() if err != nil { - return err + return nil, err } children = append(children, nl.NewRtAttr(unix.RTA_NEWDST, buf)) } @@ -708,14 +848,14 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg children = append(children, nl.NewRtAttr(unix.RTA_ENCAP_TYPE, buf)) buf, err := nh.Encap.Encode() if err != nil { - return err + return nil, err } children = append(children, nl.NewRtAttr(unix.RTA_ENCAP, buf)) } if nh.Via != nil { buf, err := nh.Via.Encode() if err != nil { - return err + return nil, err } children = append(children, nl.NewRtAttr(unix.RTA_VIA, buf)) } @@ -741,6 +881,11 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg native.PutUint32(b, uint32(route.Priority)) rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_PRIORITY, b)) } + if route.Realm > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.Realm)) + rtAttrs = append(rtAttrs, nl.NewRtAttr(unix.RTA_FLOW, b)) + } if route.Tos > 0 { msg.Tos = uint8(route.Tos) } @@ -827,19 +972,22 @@ func (h *Handle) routeHandle(route *Route, req *nl.NetlinkRequest, msg *nl.RtMsg msg.Flags = uint32(route.Flags) msg.Scope = uint8(route.Scope) - msg.Family = uint8(family) + // only overwrite family if it was not set in msg + if msg.Family == 0 { + msg.Family = uint8(family) + } req.AddData(msg) for _, attr := range rtAttrs { req.AddData(attr) } - b := make([]byte, 4) - native.PutUint32(b, uint32(route.LinkIndex)) - - req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + if (req.NlMsghdr.Type != unix.RTM_GETROUTE) || (req.NlMsghdr.Type == unix.RTM_GETROUTE && route.LinkIndex > 0) { + b := make([]byte, 4) + native.PutUint32(b, uint32(route.LinkIndex)) + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } - _, err := req.Execute(unix.NETLINK_ROUTE, 0) - return err + return req.Execute(unix.NETLINK_ROUTE, 0) } // RouteList gets a list of routes in the system. @@ -853,13 +1001,13 @@ func RouteList(link Link, family int) ([]Route, error) { // Equivalent to: `ip route show`. // The list can be filtered by link and ip family. func (h *Handle) RouteList(link Link, family int) ([]Route, error) { - var routeFilter *Route + routeFilter := &Route{} if link != nil { - routeFilter = &Route{ - LinkIndex: link.Attrs().Index, - } + routeFilter.LinkIndex = link.Attrs().Index + + return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) } - return h.RouteListFiltered(family, routeFilter, RT_FILTER_OIF) + return h.RouteListFiltered(family, routeFilter, 0) } // RouteListFiltered gets a list of routes in the system filtered with specified rules. @@ -872,10 +1020,9 @@ func RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, e // All rules must be defined in RouteFilter struct func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) ([]Route, error) { req := h.newNetlinkRequest(unix.RTM_GETROUTE, unix.NLM_F_DUMP) - infmsg := nl.NewIfInfomsg(family) - req.AddData(infmsg) - - msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) + rtmsg := &nl.RtMsg{} + rtmsg.Family = uint8(family) + msgs, err := h.routeHandle(filter, req, rtmsg) if err != nil { return nil, err } @@ -909,6 +1056,8 @@ func (h *Handle) RouteListFiltered(family int, filter *Route, filterMask uint64) continue case filterMask&RT_FILTER_TOS != 0 && route.Tos != filter.Tos: continue + case filterMask&RT_FILTER_REALM != 0 && route.Realm != filter.Realm: + continue case filterMask&RT_FILTER_OIF != 0 && route.LinkIndex != filter.LinkIndex: continue case filterMask&RT_FILTER_IIF != 0 && route.ILinkIndex != filter.ILinkIndex: @@ -946,6 +1095,7 @@ func deserializeRoute(m []byte) (Route, error) { Type: int(msg.Type), Tos: int(msg.Tos), Flags: int(msg.Flags), + Family: int(msg.Family), } var encap, encapType syscall.NetlinkRouteAttr @@ -974,6 +1124,8 @@ func deserializeRoute(m []byte) (Route, error) { route.ILinkIndex = int(native.Uint32(attr.Value[0:4])) case unix.RTA_PRIORITY: route.Priority = int(native.Uint32(attr.Value[0:4])) + case unix.RTA_FLOW: + route.Realm = int(native.Uint32(attr.Value[0:4])) case unix.RTA_TABLE: route.Table = int(native.Uint32(attr.Value[0:4])) case unix.RTA_MULTIPATH: @@ -1129,6 +1281,11 @@ func deserializeRoute(m []byte) (Route, error) { if err := e.Decode(encap.Value); err != nil { return route, err } + case nl.LWTUNNEL_ENCAP_BPF: + e = &BpfEncap{} + if err := e.Decode(encap.Value); err != nil { + return route, err + } } route.Encap = e } @@ -1140,8 +1297,11 @@ func deserializeRoute(m []byte) (Route, error) { // RouteGetWithOptions type RouteGetOptions struct { Iif string + Oif string VrfName string SrcAddr net.IP + UID *uint32 + Mark int } // RouteGetWithOptions gets a route to a specific destination from the host system. @@ -1206,6 +1366,18 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption req.AddData(nl.NewRtAttr(unix.RTA_IIF, b)) } + if len(options.Oif) > 0 { + link, err := LinkByName(options.Oif) + if err != nil { + return nil, err + } + + b := make([]byte, 4) + native.PutUint32(b, uint32(link.Attrs().Index)) + + req.AddData(nl.NewRtAttr(unix.RTA_OIF, b)) + } + if options.SrcAddr != nil { var srcAddr []byte if family == FAMILY_V4 { @@ -1216,6 +1388,21 @@ func (h *Handle) RouteGetWithOptions(destination net.IP, options *RouteGetOption req.AddData(nl.NewRtAttr(unix.RTA_SRC, srcAddr)) } + + if options.UID != nil { + uid := *options.UID + b := make([]byte, 4) + native.PutUint32(b, uid) + + req.AddData(nl.NewRtAttr(unix.RTA_UID, b)) + } + + if options.Mark > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(options.Mark)) + + req.AddData(nl.NewRtAttr(unix.RTA_MARK, b)) + } } msgs, err := req.Execute(unix.NETLINK_ROUTE, unix.RTM_NEWROUTE) @@ -1297,7 +1484,8 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < msgs, from, err := s.Receive() if err != nil { if cberr != nil { - cberr(err) + cberr(fmt.Errorf("Receive failed: %v", + err)) } return } @@ -1317,16 +1505,17 @@ func routeSubscribeAt(newNs, curNs netns.NsHandle, ch chan<- RouteUpdate, done < continue } if cberr != nil { - cberr(syscall.Errno(-error)) + cberr(fmt.Errorf("error message: %v", + syscall.Errno(-error))) } - return + continue } route, err := deserializeRoute(m.Data) if err != nil { if cberr != nil { cberr(err) } - return + continue } ch <- RouteUpdate{Type: m.Header.Type, Route: route} } diff --git a/go-controller/vendor/github.com/vishvananda/netlink/rule.go b/go-controller/vendor/github.com/vishvananda/netlink/rule.go index 95f2facfb1..9a6b57dac4 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/rule.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/rule.go @@ -25,10 +25,24 @@ type Rule struct { Invert bool Dport *RulePortRange Sport *RulePortRange + IPProto int + UIDRange *RuleUIDRange + Protocol uint8 } func (r Rule) String() string { - return fmt.Sprintf("ip rule %d: from %s table %d", r.Priority, r.Src, r.Table) + from := "all" + if r.Src != nil && r.Src.String() != "" { + from = r.Src.String() + } + + to := "all" + if r.Dst != nil && r.Dst.String() != "" { + to = r.Dst.String() + } + + return fmt.Sprintf("ip rule %d: from %s to %s table %d", + r.Priority, from, to, r.Table) } // NewRule return empty rules. @@ -54,3 +68,14 @@ type RulePortRange struct { Start uint16 End uint16 } + +// NewRuleUIDRange creates rule uid range. +func NewRuleUIDRange(start, end uint32) *RuleUIDRange { + return &RuleUIDRange{Start: start, End: end} +} + +// RuleUIDRange represents rule uid range. +type RuleUIDRange struct { + Start uint32 + End uint32 +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/rule_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/rule_linux.go index fe68f28f1e..69f53e48de 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/rule_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/rule_linux.go @@ -152,6 +152,12 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(nl.NewRtAttr(nl.FRA_GOTO, b)) } + if rule.IPProto > 0 { + b := make([]byte, 4) + native.PutUint32(b, uint32(rule.IPProto)) + req.AddData(nl.NewRtAttr(nl.FRA_IP_PROTO, b)) + } + if rule.Dport != nil { b := rule.Dport.toRtAttrData() req.AddData(nl.NewRtAttr(nl.FRA_DPORT_RANGE, b)) @@ -162,6 +168,15 @@ func ruleHandle(rule *Rule, req *nl.NetlinkRequest) error { req.AddData(nl.NewRtAttr(nl.FRA_SPORT_RANGE, b)) } + if rule.UIDRange != nil { + b := rule.UIDRange.toRtAttrData() + req.AddData(nl.NewRtAttr(nl.FRA_UID_RANGE, b)) + } + + if rule.Protocol > 0 { + req.AddData(nl.NewRtAttr(nl.FRA_PROTOCOL, nl.Uint8Attr(rule.Protocol))) + } + _, err := req.Execute(unix.NETLINK_ROUTE, 0) return err } @@ -229,7 +244,7 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ( case nl.FRA_FWMASK: rule.Mask = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_TUN_ID: - rule.TunID = uint(native.Uint64(attrs[j].Value[0:4])) + rule.TunID = uint(native.Uint64(attrs[j].Value[0:8])) case nl.FRA_IIFNAME: rule.IifName = string(attrs[j].Value[:len(attrs[j].Value)-1]) case nl.FRA_OIFNAME: @@ -250,10 +265,16 @@ func (h *Handle) RuleListFiltered(family int, filter *Rule, filterMask uint64) ( rule.Goto = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_PRIORITY: rule.Priority = int(native.Uint32(attrs[j].Value[0:4])) + case nl.FRA_IP_PROTO: + rule.IPProto = int(native.Uint32(attrs[j].Value[0:4])) case nl.FRA_DPORT_RANGE: rule.Dport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) case nl.FRA_SPORT_RANGE: rule.Sport = NewRulePortRange(native.Uint16(attrs[j].Value[0:2]), native.Uint16(attrs[j].Value[2:4])) + case nl.FRA_UID_RANGE: + rule.UIDRange = NewRuleUIDRange(native.Uint32(attrs[j].Value[0:4]), native.Uint32(attrs[j].Value[4:8])) + case nl.FRA_PROTOCOL: + rule.Protocol = uint8(attrs[j].Value[0]) } } @@ -291,3 +312,10 @@ func (pr *RulePortRange) toRtAttrData() []byte { native.PutUint16(b[1], pr.End) return bytes.Join(b, []byte{}) } + +func (pr *RuleUIDRange) toRtAttrData() []byte { + b := [][]byte{make([]byte, 4), make([]byte, 4)} + native.PutUint32(b[0], pr.Start) + native.PutUint32(b[1], pr.End) + return bytes.Join(b, []byte{}) +} diff --git a/go-controller/vendor/github.com/vishvananda/netlink/socket_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/socket_linux.go index 8738d4173d..b881fe496d 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/socket_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/socket_linux.go @@ -172,12 +172,56 @@ func SocketGet(local, remote net.Addr) (*Socket, error) { return sock, nil } -// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type. +// SocketDiagTCPInfo requests INET_DIAG_INFO for TCP protocol for specified family type and return with extension TCP info. func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { - s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + var result []*InetDiagTCPInfoResp + err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + sockInfo := &Socket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:]) + if err != nil { + return err + } + + res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo) + if err != nil { + return err + } + + result = append(result, res) + return nil + }) + if err != nil { + return nil, err + } + return result, nil +} + +// SocketDiagTCP requests INET_DIAG_INFO for TCP protocol for specified family type and return related socket. +func SocketDiagTCP(family uint8) ([]*Socket, error) { + var result []*Socket + err := socketDiagTCPExecutor(family, func(m syscall.NetlinkMessage) error { + sockInfo := &Socket{} + if err := sockInfo.deserialize(m.Data); err != nil { + return err + } + result = append(result, sockInfo) + return nil + }) if err != nil { return nil, err } + return result, nil +} + +// socketDiagTCPExecutor requests INET_DIAG_INFO for TCP protocol for specified family type. +func socketDiagTCPExecutor(family uint8, receiver func(syscall.NetlinkMessage) error) error { + s, err := nl.Subscribe(unix.NETLINK_INET_DIAG) + if err != nil { + return err + } defer s.Close() req := nl.NewNetlinkRequest(nl.SOCK_DIAG_BY_FAMILY, unix.NLM_F_DUMP) @@ -189,18 +233,17 @@ func SocketDiagTCPInfo(family uint8) ([]*InetDiagTCPInfoResp, error) { }) s.Send(req) - var result []*InetDiagTCPInfoResp loop: for { msgs, from, err := s.Receive() if err != nil { - return nil, err + return err } if from.Pid != nl.PidKernel { - return nil, fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) + return fmt.Errorf("Wrong sender portid %d, expected %d", from.Pid, nl.PidKernel) } if len(msgs) == 0 { - return nil, errors.New("no message nor error from netlink") + return errors.New("no message nor error from netlink") } for _, m := range msgs { @@ -209,26 +252,14 @@ loop: break loop case unix.NLMSG_ERROR: error := int32(native.Uint32(m.Data[0:4])) - return nil, syscall.Errno(-error) + return syscall.Errno(-error) } - sockInfo := &Socket{} - if err := sockInfo.deserialize(m.Data); err != nil { - return nil, err + if err := receiver(m); err != nil { + return err } - attrs, err := nl.ParseRouteAttr(m.Data[sizeofSocket:]) - if err != nil { - return nil, err - } - - res, err := attrsToInetDiagTCPInfoResp(attrs, sockInfo) - if err != nil { - return nil, err - } - - result = append(result, res) } } - return result, nil + return nil } func attrsToInetDiagTCPInfoResp(attrs []syscall.NetlinkRouteAttr, sockInfo *Socket) (*InetDiagTCPInfoResp, error) { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go index 694bd74e6d..cca5260515 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/xfrm_policy_linux.go @@ -75,6 +75,7 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { userTmpl := nl.DeserializeXfrmUserTmpl(tmplData[start : start+nl.SizeofXfrmUserTmpl]) userTmpl.XfrmId.Daddr.FromIP(tmpl.Dst) userTmpl.Saddr.FromIP(tmpl.Src) + userTmpl.Family = uint16(nl.GetIPFamily(tmpl.Dst)) userTmpl.XfrmId.Proto = uint8(tmpl.Proto) userTmpl.XfrmId.Spi = nl.Swap32(uint32(tmpl.Spi)) userTmpl.Mode = uint8(tmpl.Mode) @@ -93,8 +94,10 @@ func (h *Handle) xfrmPolicyAddOrUpdate(policy *XfrmPolicy, nlProto int) error { req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) - req.AddData(ifId) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } _, err := req.Execute(unix.NETLINK_XFRM, 0) return err @@ -189,8 +192,10 @@ func (h *Handle) xfrmPolicyGetOrDelete(policy *XfrmPolicy, nlProto int) (*XfrmPo req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) - req.AddData(ifId) + if policy.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(policy.Ifid))) + req.AddData(ifId) + } resType := nl.XFRM_MSG_NEWPOLICY if nlProto == nl.XFRM_MSG_DELPOLICY { @@ -219,8 +224,8 @@ func parseXfrmPolicy(m []byte, family int) (*XfrmPolicy, error) { var policy XfrmPolicy - policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD) - policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS) + policy.Dst = msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, uint16(family)) + policy.Src = msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, uint16(family)) policy.Proto = Proto(msg.Sel.Proto) policy.DstPort = int(nl.Swap16(msg.Sel.Dport)) policy.SrcPort = int(nl.Swap16(msg.Sel.Sport)) diff --git a/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state.go b/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state.go index 19df82c763..0095832dca 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state.go @@ -81,31 +81,48 @@ type XfrmStateStats struct { UseTime uint64 } +// XfrmReplayState represents the sequence number states for +// "legacy" anti-replay mode. +type XfrmReplayState struct { + OSeq uint32 + Seq uint32 + BitMap uint32 +} + +func (r XfrmReplayState) String() string { + return fmt.Sprintf("{OSeq: 0x%x, Seq: 0x%x, BitMap: 0x%x}", + r.OSeq, r.Seq, r.BitMap) +} + // XfrmState represents the state of an ipsec policy. It optionally // contains an XfrmStateAlgo for encryption and one for authentication. type XfrmState struct { - Dst net.IP - Src net.IP - Proto Proto - Mode Mode - Spi int - Reqid int - ReplayWindow int - Limits XfrmStateLimits - Statistics XfrmStateStats - Mark *XfrmMark - OutputMark *XfrmMark - Ifid int - Auth *XfrmStateAlgo - Crypt *XfrmStateAlgo - Aead *XfrmStateAlgo - Encap *XfrmStateEncap - ESN bool + Dst net.IP + Src net.IP + Proto Proto + Mode Mode + Spi int + Reqid int + ReplayWindow int + Limits XfrmStateLimits + Statistics XfrmStateStats + Mark *XfrmMark + OutputMark *XfrmMark + Ifid int + Auth *XfrmStateAlgo + Crypt *XfrmStateAlgo + Aead *XfrmStateAlgo + Encap *XfrmStateEncap + ESN bool + DontEncapDSCP bool + OSeqMayWrap bool + Replay *XfrmReplayState + Selector *XfrmPolicy } func (sa XfrmState) String() string { - return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t", - sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN) + return fmt.Sprintf("Dst: %v, Src: %v, Proto: %s, Mode: %s, SPI: 0x%x, ReqID: 0x%x, ReplayWindow: %d, Mark: %v, OutputMark: %v, Ifid: %d, Auth: %v, Crypt: %v, Aead: %v, Encap: %v, ESN: %t, DontEncapDSCP: %t, OSeqMayWrap: %t, Replay: %v", + sa.Dst, sa.Src, sa.Proto, sa.Mode, sa.Spi, sa.Reqid, sa.ReplayWindow, sa.Mark, sa.OutputMark, sa.Ifid, sa.Auth, sa.Crypt, sa.Aead, sa.Encap, sa.ESN, sa.DontEncapDSCP, sa.OSeqMayWrap, sa.Replay) } func (sa XfrmState) Print(stats bool) string { if !stats { diff --git a/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go b/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go index 5b1b6c31ae..6b7bb8e72b 100644 --- a/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go +++ b/go-controller/vendor/github.com/vishvananda/netlink/xfrm_state_linux.go @@ -77,6 +77,14 @@ func writeReplayEsn(replayWindow int) []byte { return replayEsn.Serialize() } +func writeReplay(r *XfrmReplayState) []byte { + return (&nl.XfrmReplayState{ + OSeq: r.OSeq, + Seq: r.Seq, + BitMap: r.BitMap, + }).Serialize() +} + // XfrmStateAdd will add an xfrm state to the system. // Equivalent to: `ip xfrm state add $state` func XfrmStateAdd(state *XfrmState) error { @@ -111,7 +119,7 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { // A state with spi 0 can't be deleted so don't allow it to be set if state.Spi == 0 { - return fmt.Errorf("Spi must be set when adding xfrm state.") + return fmt.Errorf("Spi must be set when adding xfrm state") } req := h.newNetlinkRequest(nlProto, unix.NLM_F_CREATE|unix.NLM_F_EXCL|unix.NLM_F_ACK) @@ -166,9 +174,26 @@ func (h *Handle) xfrmStateAddOrUpdate(state *XfrmState, nlProto int) error { req.AddData(out) } } + if state.OSeqMayWrap || state.DontEncapDSCP { + var flags uint32 + if state.DontEncapDSCP { + flags |= nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP + } + if state.OSeqMayWrap { + flags |= nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP + } + out := nl.NewRtAttr(nl.XFRMA_SA_EXTRA_FLAGS, nl.Uint32Attr(flags)) + req.AddData(out) + } + if state.Replay != nil { + out := nl.NewRtAttr(nl.XFRMA_REPLAY_VAL, writeReplay(state.Replay)) + req.AddData(out) + } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) - req.AddData(ifId) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } _, err := req.Execute(unix.NETLINK_XFRM, 0) return err @@ -184,7 +209,6 @@ func (h *Handle) xfrmStateAllocSpi(state *XfrmState) (*XfrmState, error) { msg.Min = 0x100 msg.Max = 0xffffffff req.AddData(msg) - if state.Mark != nil { out := nl.NewRtAttr(nl.XFRMA_MARK, writeMark(state.Mark)) req.AddData(out) @@ -281,8 +305,10 @@ func (h *Handle) xfrmStateGetOrDelete(state *XfrmState, nlProto int) (*XfrmState req.AddData(out) } - ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) - req.AddData(ifId) + if state.Ifid != 0 { + ifId := nl.NewRtAttr(nl.XFRMA_IF_ID, nl.Uint32Attr(uint32(state.Ifid))) + req.AddData(ifId) + } resType := nl.XFRM_MSG_NEWSA if nlProto == nl.XFRM_MSG_DELSA { @@ -310,7 +336,6 @@ var familyError = fmt.Errorf("family error") func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { var state XfrmState - state.Dst = msg.Id.Daddr.ToIP() state.Src = msg.Saddr.ToIP() state.Proto = Proto(msg.Id.Proto) @@ -320,20 +345,25 @@ func xfrmStateFromXfrmUsersaInfo(msg *nl.XfrmUsersaInfo) *XfrmState { state.ReplayWindow = int(msg.ReplayWindow) lftToLimits(&msg.Lft, &state.Limits) curToStats(&msg.Curlft, &msg.Stats, &state.Statistics) + state.Selector = &XfrmPolicy{ + Dst: msg.Sel.Daddr.ToIPNet(msg.Sel.PrefixlenD, msg.Sel.Family), + Src: msg.Sel.Saddr.ToIPNet(msg.Sel.PrefixlenS, msg.Sel.Family), + Proto: Proto(msg.Sel.Proto), + DstPort: int(nl.Swap16(msg.Sel.Dport)), + SrcPort: int(nl.Swap16(msg.Sel.Sport)), + Ifindex: int(msg.Sel.Ifindex), + } return &state } func parseXfrmState(m []byte, family int) (*XfrmState, error) { msg := nl.DeserializeXfrmUsersaInfo(m) - // This is mainly for the state dump if family != FAMILY_ALL && family != int(msg.Family) { return nil, familyError } - state := xfrmStateFromXfrmUsersaInfo(msg) - attrs, err := nl.ParseRouteAttr(m[nl.SizeofXfrmUsersaInfo:]) if err != nil { return nil, err @@ -381,6 +411,14 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { state.Mark = new(XfrmMark) state.Mark.Value = mark.Value state.Mark.Mask = mark.Mask + case nl.XFRMA_SA_EXTRA_FLAGS: + flags := native.Uint32(attr.Value) + if (flags & nl.XFRM_SA_XFLAG_DONT_ENCAP_DSCP) != 0 { + state.DontEncapDSCP = true + } + if (flags & nl.XFRM_SA_XFLAG_OSEQ_MAY_WRAP) != 0 { + state.OSeqMayWrap = true + } case nl.XFRMA_SET_MARK: if state.OutputMark == nil { state.OutputMark = new(XfrmMark) @@ -396,6 +434,14 @@ func parseXfrmState(m []byte, family int) (*XfrmState, error) { } case nl.XFRMA_IF_ID: state.Ifid = int(native.Uint32(attr.Value)) + case nl.XFRMA_REPLAY_VAL: + if state.Replay == nil { + state.Replay = new(XfrmReplayState) + } + replay := nl.DeserializeXfrmReplayState(attr.Value[:]) + state.Replay.OSeq = replay.OSeq + state.Replay.Seq = replay.Seq + state.Replay.BitMap = replay.BitMap } } @@ -472,6 +518,9 @@ func xfrmUsersaInfoFromXfrmState(state *XfrmState) *nl.XfrmUsersaInfo { msg.Id.Spi = nl.Swap32(uint32(state.Spi)) msg.Reqid = uint32(state.Reqid) msg.ReplayWindow = uint8(state.ReplayWindow) - + msg.Sel = nl.XfrmSelector{} + if state.Selector != nil { + selFromPolicy(&msg.Sel, state.Selector) + } return msg } diff --git a/go-controller/vendor/modules.txt b/go-controller/vendor/modules.txt index 2ebbb2913c..fea43dc54e 100644 --- a/go-controller/vendor/modules.txt +++ b/go-controller/vendor/modules.txt @@ -355,7 +355,7 @@ github.com/stretchr/testify/mock # github.com/urfave/cli/v2 v2.2.0 ## explicit; go 1.11 github.com/urfave/cli/v2 -# github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01 => github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6 +# github.com/vishvananda/netlink v1.2.1-beta.2.0.20230420174744-55c8b9515a01 ## explicit; go 1.12 github.com/vishvananda/netlink github.com/vishvananda/netlink/nl @@ -1023,4 +1023,3 @@ sigs.k8s.io/structured-merge-diff/v4/value sigs.k8s.io/yaml # github.com/Microsoft/hcsshim => github.com/Microsoft/hcsshim v0.8.20 # github.com/gogo/protobuf => github.com/gogo/protobuf v1.3.2 -# github.com/vishvananda/netlink => github.com/jcaamano/netlink v1.1.1-0.20220831114501-3a761ed61db6