From f502c3200098cb494e1b6eff39eb4da411cb83d9 Mon Sep 17 00:00:00 2001 From: Shi Su <67605788+shi-su@users.noreply.github.com> Date: Thu, 11 Nov 2021 09:27:55 -0800 Subject: [PATCH] [vnetorch] Add ECMP support for vnet tunnel routes (#1960) What I did Add functions to create/remove next hop groups for vnet tunnel routes. Count the reference count of next hop groups to create and remove as needed. Share the counter of next hop groups with routeorch. Add vs test Why I did it To add support for overlay ECMP. How I verified it Verify ECMP groups are properly created and removed with the functions. Verify vs test passes --- orchagent/nexthopkey.h | 2 + orchagent/routeorch.cpp | 15 ++ orchagent/routeorch.h | 4 + orchagent/vnetorch.cpp | 382 ++++++++++++++++++++++++++++++++++++---- orchagent/vnetorch.h | 40 +++-- tests/test_vnet.py | 256 +++++++++++++++++++++++++-- 6 files changed, 640 insertions(+), 59 deletions(-) diff --git a/orchagent/nexthopkey.h b/orchagent/nexthopkey.h index 1e76916dd428..0e4e77f046c9 100644 --- a/orchagent/nexthopkey.h +++ b/orchagent/nexthopkey.h @@ -82,6 +82,8 @@ struct NextHopKey weight = 0; } + NextHopKey(const IpAddress &ip, const MacAddress &mac, const uint32_t &vni, bool overlay_nh) : ip_address(ip), alias(""), vni(vni), mac_address(mac){} + const std::string to_string() const { std::string str = formatMplsNextHop(); diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index e8ebf9a6c5c3..04c9a083af67 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -2335,3 +2335,18 @@ bool RouteOrch::removeOverlayNextHops(sai_object_id_t vrf_id, const NextHopGroup return true; } + +void RouteOrch::increaseNextHopGroupCount() +{ + m_nextHopGroupCount ++; +} + +void RouteOrch::decreaseNextHopGroupCount() +{ + m_nextHopGroupCount --; +} + +bool RouteOrch::checkNextHopGroupCount() +{ + return m_nextHopGroupCount < m_maxNextHopGroupCount; +} diff --git a/orchagent/routeorch.h b/orchagent/routeorch.h index d28ba4322e3b..3162331820a8 100644 --- a/orchagent/routeorch.h +++ b/orchagent/routeorch.h @@ -205,6 +205,10 @@ class RouteOrch : public Orch, public Subject unsigned int getNhgCount() { return m_nextHopGroupCount; } unsigned int getMaxNhgCount() { return m_maxNextHopGroupCount; } + + void increaseNextHopGroupCount(); + void decreaseNextHopGroupCount(); + bool checkNextHopGroupCount(); private: SwitchOrch *m_switchOrch; diff --git a/orchagent/vnetorch.cpp b/orchagent/vnetorch.cpp index efc60d22c256..053784e2987e 100644 --- a/orchagent/vnetorch.cpp +++ b/orchagent/vnetorch.cpp @@ -20,6 +20,7 @@ #include "intfsorch.h" #include "neighorch.h" #include "crmorch.h" +#include "routeorch.h" extern sai_virtual_router_api_t* sai_virtual_router_api; extern sai_route_api_t* sai_route_api; @@ -28,6 +29,7 @@ extern sai_router_interface_api_t* sai_router_intfs_api; extern sai_fdb_api_t* sai_fdb_api; extern sai_neighbor_api_t* sai_neighbor_api; extern sai_next_hop_api_t* sai_next_hop_api; +extern sai_next_hop_group_api_t* sai_next_hop_group_api; extern sai_object_id_t gSwitchId; extern sai_object_id_t gVirtualRouterId; extern Directory gDirectory; @@ -35,6 +37,7 @@ extern PortsOrch *gPortsOrch; extern IntfsOrch *gIntfsOrch; extern NeighOrch *gNeighOrch; extern CrmOrch *gCrmOrch; +extern RouteOrch *gRouteOrch; extern MacAddress gVxlanMacAddress; /* @@ -150,15 +153,18 @@ bool VNetVrfObject::hasRoute(IpPrefix& ipPrefix) return false; } -bool VNetVrfObject::addRoute(IpPrefix& ipPrefix, tunnelEndpoint& endp) +bool VNetVrfObject::addRoute(IpPrefix& ipPrefix, NextHopGroupKey& nexthops) { - if (hasRoute(ipPrefix)) + if (nexthops.is_overlay_nexthop()) { - SWSS_LOG_INFO("VNET route '%s' exists", ipPrefix.to_string().c_str()); + tunnels_[ipPrefix] = nexthops; + } + else + { + SWSS_LOG_ERROR("Input %s is not overlay nexthop group", nexthops.to_string().c_str()); return false; } - tunnels_[ipPrefix] = endp; return true; } @@ -237,8 +243,6 @@ bool VNetVrfObject::removeRoute(IpPrefix& ipPrefix) if (tunnels_.find(ipPrefix) != tunnels_.end()) { - auto endp = tunnels_.at(ipPrefix); - removeTunnelNextHop(endp); tunnels_.erase(ipPrefix); } else @@ -267,32 +271,32 @@ bool VNetVrfObject::getRouteNextHop(IpPrefix& ipPrefix, nextHop& nh) return true; } -sai_object_id_t VNetVrfObject::getTunnelNextHop(tunnelEndpoint& endp) +sai_object_id_t VNetVrfObject::getTunnelNextHop(NextHopKey& nh) { sai_object_id_t nh_id = SAI_NULL_OBJECT_ID; auto tun_name = getTunnelName(); VxlanTunnelOrch* vxlan_orch = gDirectory.get(); - nh_id = vxlan_orch->createNextHopTunnel(tun_name, endp.ip, endp.mac, endp.vni); + nh_id = vxlan_orch->createNextHopTunnel(tun_name, nh.ip_address, nh.mac_address, nh.vni); if (nh_id == SAI_NULL_OBJECT_ID) { - throw std::runtime_error("NH Tunnel create failed for " + vnet_name_ + " ip " + endp.ip.to_string()); + throw std::runtime_error("NH Tunnel create failed for " + vnet_name_ + " ip " + nh.ip_address.to_string()); } return nh_id; } -bool VNetVrfObject::removeTunnelNextHop(tunnelEndpoint& endp) +bool VNetVrfObject::removeTunnelNextHop(NextHopKey& nh) { auto tun_name = getTunnelName(); VxlanTunnelOrch* vxlan_orch = gDirectory.get(); - if (!vxlan_orch->removeNextHopTunnel(tun_name, endp.ip, endp.mac, endp.vni)) + if (!vxlan_orch->removeNextHopTunnel(tun_name, nh.ip_address, nh.mac_address, nh.vni)) { SWSS_LOG_ERROR("VNET %s Tunnel NextHop remove failed for '%s'", - vnet_name_.c_str(), endp.ip.to_string().c_str()); + vnet_name_.c_str(), nh.ip_address.to_string().c_str()); return false; } @@ -603,6 +607,28 @@ static bool add_route(sai_object_id_t vr_id, sai_ip_prefix_t& ip_pfx, sai_object return true; } +static bool update_route(sai_object_id_t vr_id, sai_ip_prefix_t& ip_pfx, sai_object_id_t nh_id) +{ + sai_route_entry_t route_entry; + route_entry.vr_id = vr_id; + route_entry.switch_id = gSwitchId; + route_entry.destination = ip_pfx; + + sai_attribute_t route_attr; + + route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; + route_attr.value.oid = nh_id; + + sai_status_t status = sai_route_api->set_route_entry_attribute(&route_entry, &route_attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("SAI failed to update route"); + return false; + } + + return true; +} + VNetRouteOrch::VNetRouteOrch(DBConnector *db, vector &tableNames, VNetOrch *vnetOrch) : Orch2(db, tableNames, request_), vnet_orch_(vnetOrch) { @@ -612,9 +638,166 @@ VNetRouteOrch::VNetRouteOrch(DBConnector *db, vector &tableNames, VNetOr handler_map_.insert(handler_pair(APP_VNET_RT_TUNNEL_TABLE_NAME, &VNetRouteOrch::handleTunnel)); } +bool VNetRouteOrch::hasNextHopGroup(const string& vnet, const NextHopGroupKey& nexthops) +{ + return syncd_nexthop_groups_[vnet].find(nexthops) != syncd_nexthop_groups_[vnet].end(); +} + +sai_object_id_t VNetRouteOrch::getNextHopGroupId(const string& vnet, const NextHopGroupKey& nexthops) +{ + assert(hasNextHopGroup(vnet, nexthops)); + return syncd_nexthop_groups_[vnet][nexthops].next_hop_group_id; +} + +bool VNetRouteOrch::addNextHopGroup(const string& vnet, const NextHopGroupKey &nexthops, VNetVrfObject *vrf_obj) +{ + SWSS_LOG_ENTER(); + + assert(!hasNextHopGroup(vnet, nexthops)); + + if (!gRouteOrch->checkNextHopGroupCount()) + { + SWSS_LOG_ERROR("Reached maximum number of next hop groups. Failed to create new next hop group."); + return false; + } + + vector next_hop_ids; + set next_hop_set = nexthops.getNextHops(); + std::map nhopgroup_members_set; + + for (auto it : next_hop_set) + { + sai_object_id_t next_hop_id = vrf_obj->getTunnelNextHop(it); + next_hop_ids.push_back(next_hop_id); + nhopgroup_members_set[next_hop_id] = it; + } + + sai_attribute_t nhg_attr; + vector nhg_attrs; + + nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_TYPE; + nhg_attr.value.s32 = SAI_NEXT_HOP_GROUP_TYPE_ECMP; + nhg_attrs.push_back(nhg_attr); + + sai_object_id_t next_hop_group_id; + sai_status_t status = sai_next_hop_group_api->create_next_hop_group(&next_hop_group_id, + gSwitchId, + (uint32_t)nhg_attrs.size(), + nhg_attrs.data()); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create next hop group %s, rv:%d", + nexthops.to_string().c_str(), status); + return false; + } + + gRouteOrch->increaseNextHopGroupCount(); + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); + SWSS_LOG_NOTICE("Create next hop group %s", nexthops.to_string().c_str()); + + NextHopGroupInfo next_hop_group_entry; + next_hop_group_entry.next_hop_group_id = next_hop_group_id; + + for (auto nhid: next_hop_ids) + { + // Create a next hop group member + vector nhgm_attrs; + + sai_attribute_t nhgm_attr; + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID; + nhgm_attr.value.oid = next_hop_group_id; + nhgm_attrs.push_back(nhgm_attr); + + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID; + nhgm_attr.value.oid = nhid; + nhgm_attrs.push_back(nhgm_attr); + + sai_object_id_t next_hop_group_member_id; + status = sai_next_hop_group_api->create_next_hop_group_member(&next_hop_group_member_id, + gSwitchId, + (uint32_t)nhgm_attrs.size(), + nhgm_attrs.data()); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create next hop group %" PRIx64 " member %" PRIx64 ": %d\n", + next_hop_group_id, next_hop_group_member_id, status); + return false; + } + + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + + // Save the membership into next hop structure + next_hop_group_entry.active_members[nhopgroup_members_set.find(nhid)->second] = + next_hop_group_member_id; + } + + /* + * Initialize the next hop group structure with ref_count as 0. This + * count will increase once the route is successfully syncd. + */ + next_hop_group_entry.ref_count = 0; + syncd_nexthop_groups_[vnet][nexthops] = next_hop_group_entry; + + return true; +} + +bool VNetRouteOrch::removeNextHopGroup(const string& vnet, const NextHopGroupKey &nexthops, VNetVrfObject *vrf_obj) +{ + SWSS_LOG_ENTER(); + + sai_object_id_t next_hop_group_id; + auto next_hop_group_entry = syncd_nexthop_groups_[vnet].find(nexthops); + sai_status_t status; + + assert(next_hop_group_entry != syncd_nexthop_groups_[vnet].end()); + + if (next_hop_group_entry->second.ref_count != 0) + { + return true; + } + + next_hop_group_id = next_hop_group_entry->second.next_hop_group_id; + SWSS_LOG_NOTICE("Delete next hop group %s", nexthops.to_string().c_str()); + + for (auto nhop = next_hop_group_entry->second.active_members.begin(); + nhop != next_hop_group_entry->second.active_members.end();) + { + NextHopKey nexthop = nhop->first; + + status = sai_next_hop_group_api->remove_next_hop_group_member(nhop->second); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove next hop group member %" PRIx64 ", rv:%d", + nhop->second, status); + return false; + } + + vrf_obj->removeTunnelNextHop(nexthop); + + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + nhop = next_hop_group_entry->second.active_members.erase(nhop); + } + + status = sai_next_hop_group_api->remove_next_hop_group(next_hop_group_id); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove next hop group %" PRIx64 ", rv:%d", next_hop_group_id, status); + return false; + } + + gRouteOrch->decreaseNextHopGroupCount(); + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); + + syncd_nexthop_groups_[vnet].erase(nexthops); + + return true; +} + template<> bool VNetRouteOrch::doRouteTask(const string& vnet, IpPrefix& ipPrefix, - tunnelEndpoint& endp, string& op) + NextHopGroupKey& nexthops, string& op) { SWSS_LOG_ENTER(); @@ -648,28 +831,123 @@ bool VNetRouteOrch::doRouteTask(const string& vnet, IpPrefix& ipP auto *vrf_obj = vnet_orch_->getTypePtr(vnet); sai_ip_prefix_t pfx; copy(pfx, ipPrefix); - sai_object_id_t nh_id = (op == SET_COMMAND)?vrf_obj->getTunnelNextHop(endp):SAI_NULL_OBJECT_ID; - for (auto vr_id : vr_set) + if (op == SET_COMMAND) { - if (op == SET_COMMAND && !add_route(vr_id, pfx, nh_id)) + sai_object_id_t nh_id; + /* The route is pointing to one single endpoint */ + if (!hasNextHopGroup(vnet, nexthops)) { - SWSS_LOG_ERROR("Route add failed for %s, vr_id '0x%" PRIx64, ipPrefix.to_string().c_str(), vr_id); - return false; + if (nexthops.getSize() == 1) + { + NextHopKey nexthop(nexthops.to_string(), true); + NextHopGroupInfo next_hop_group_entry; + next_hop_group_entry.next_hop_group_id = vrf_obj->getTunnelNextHop(nexthop); + next_hop_group_entry.ref_count = 0; + next_hop_group_entry.active_members[nexthop] = SAI_NULL_OBJECT_ID; + syncd_nexthop_groups_[vnet][nexthops] = next_hop_group_entry; + } + else + { + if (!addNextHopGroup(vnet, nexthops, vrf_obj)) + { + SWSS_LOG_ERROR("Failed to create next hop group %s", nexthops.to_string().c_str()); + return false; + } + } } - else if (op == DEL_COMMAND && !del_route(vr_id, pfx)) + nh_id = syncd_nexthop_groups_[vnet][nexthops].next_hop_group_id; + + auto it_route = syncd_tunnel_routes_[vnet].find(ipPrefix); + for (auto vr_id : vr_set) { - SWSS_LOG_ERROR("Route del failed for %s, vr_id '0x%" PRIx64, ipPrefix.to_string().c_str(), vr_id); - return false; + bool route_status = true; + + if (it_route == syncd_tunnel_routes_[vnet].end()) + { + route_status = add_route(vr_id, pfx, nh_id); + } + else + { + route_status = update_route(vr_id, pfx, nh_id); + } + + if (!route_status) + { + SWSS_LOG_ERROR("Route add/update failed for %s, vr_id '0x%" PRIx64, ipPrefix.to_string().c_str(), vr_id); + /* Clean up the newly created next hop group entry */ + if (nexthops.getSize() > 1) + { + removeNextHopGroup(vnet, nexthops, vrf_obj); + } + return false; + } } - } - if (op == SET_COMMAND) - { - vrf_obj->addRoute(ipPrefix, endp); + if (it_route != syncd_tunnel_routes_[vnet].end()) + { + // In case of updating an existing route, decrease the reference count for the previous nexthop group + NextHopGroupKey nhg = it_route->second; + if(--syncd_nexthop_groups_[vnet][nhg].ref_count == 0) + { + if (nexthops.getSize() > 1) + { + removeNextHopGroup(vnet, nhg, vrf_obj); + } + else + { + syncd_nexthop_groups_[vnet].erase(nhg); + NextHopKey nexthop(nhg.to_string(), true); + vrf_obj->removeTunnelNextHop(nexthop); + } + } + vrf_obj->removeRoute(ipPrefix); + } + + syncd_tunnel_routes_[vnet][ipPrefix] = nexthops; + syncd_nexthop_groups_[vnet][nexthops].ref_count++; + vrf_obj->addRoute(ipPrefix, nexthops); } - else + else if (op == DEL_COMMAND) { + auto it_route = syncd_tunnel_routes_[vnet].find(ipPrefix); + if (it_route == syncd_tunnel_routes_[vnet].end()) + { + SWSS_LOG_INFO("Failed to find tunnel route entry, prefix %s\n", + ipPrefix.to_string().c_str()); + return true; + } + NextHopGroupKey nhg = it_route->second; + + for (auto vr_id : vr_set) + { + if (!del_route(vr_id, pfx)) + { + SWSS_LOG_ERROR("Route del failed for %s, vr_id '0x%" PRIx64, ipPrefix.to_string().c_str(), vr_id); + return false; + } + } + + if(--syncd_nexthop_groups_[vnet][nhg].ref_count == 0) + { + if (nhg.getSize() > 1) + { + removeNextHopGroup(vnet, nhg, vrf_obj); + } + else + { + syncd_nexthop_groups_[vnet].erase(nhg); + NextHopKey nexthop(nhg.to_string(), true); + vrf_obj->removeTunnelNextHop(nexthop); + } + } + + syncd_tunnel_routes_[vnet].erase(ipPrefix); + if (syncd_tunnel_routes_[vnet].empty()) + { + syncd_tunnel_routes_.erase(vnet); + } + vrf_obj->removeRoute(ipPrefix); } @@ -1041,23 +1319,25 @@ bool VNetRouteOrch::handleTunnel(const Request& request) { SWSS_LOG_ENTER(); - IpAddress ip; - MacAddress mac; - uint32_t vni = 0; + vector ip_list; + vector mac_list; + vector vni_list; for (const auto& name: request.getAttrFieldNames()) { if (name == "endpoint") { - ip = request.getAttrIP(name); + ip_list = request.getAttrIPList(name); } else if (name == "vni") { - vni = static_cast(request.getAttrUint(name)); + string vni_str = request.getAttrString(name); + vni_list = tokenize(vni_str, ','); } else if (name == "mac_address") { - mac = request.getAttrMacAddress(name); + string mac_str = request.getAttrString(name); + mac_list = tokenize(mac_str, ','); } else { @@ -1066,6 +1346,18 @@ bool VNetRouteOrch::handleTunnel(const Request& request) } } + if (vni_list.size() > 1 && vni_list.size() != ip_list.size()) + { + SWSS_LOG_ERROR("VNI size of %zu does not match endpoint size of %zu", vni_list.size(), ip_list.size()); + return false; + } + + if (!mac_list.empty() && mac_list.size() != ip_list.size()) + { + SWSS_LOG_ERROR("MAC address size of %zu does not match endpoint size of %zu", mac_list.size(), ip_list.size()); + return false; + } + const std::string& vnet_name = request.getKeyString(0); auto ip_pfx = request.getKeyIpPrefix(1); auto op = request.getOperation(); @@ -1073,11 +1365,33 @@ bool VNetRouteOrch::handleTunnel(const Request& request) SWSS_LOG_INFO("VNET-RT '%s' op '%s' for pfx %s", vnet_name.c_str(), op.c_str(), ip_pfx.to_string().c_str()); - tunnelEndpoint endp = { ip, mac, vni }; + NextHopGroupKey nhg("", true); + for (size_t idx_ip = 0; idx_ip < ip_list.size(); idx_ip++) + { + IpAddress ip = ip_list[idx_ip]; + MacAddress mac; + uint32_t vni = 0; + if (vni_list.size() == 1 && vni_list[0] != "") + { + vni = (uint32_t)stoul(vni_list[0]); + } + else if (vni_list.size() > 1 && vni_list[idx_ip] != "") + { + vni = (uint32_t)stoul(vni_list[idx_ip]); + } + + if (!mac_list.empty() && mac_list[idx_ip] != "") + { + mac = MacAddress(mac_list[idx_ip]); + } + + NextHopKey nh(ip, mac, vni, true); + nhg.add(nh); + } if (vnet_orch_->isVnetExecVrf()) { - return doRouteTask(vnet_name, ip_pfx, endp, op); + return doRouteTask(vnet_name, ip_pfx, nhg, op); } return true; diff --git a/orchagent/vnetorch.h b/orchagent/vnetorch.h index 2ca48ec3a0e3..569a23f2e086 100644 --- a/orchagent/vnetorch.h +++ b/orchagent/vnetorch.h @@ -12,6 +12,7 @@ #include "ipaddresses.h" #include "producerstatetable.h" #include "observer.h" +#include "nexthopgroupkey.h" #define VNET_BITMAP_SIZE 32 #define VNET_TUNNEL_SIZE 40960 @@ -66,11 +67,11 @@ class VNetRequest : public Request VNetRequest() : Request(vnet_request_description, ':') { } }; -struct tunnelEndpoint +struct NextHopGroupInfo { - IpAddress ip; - MacAddress mac; - uint32_t vni; + sai_object_id_t next_hop_group_id; // next hop group id (null for single nexthop) + int ref_count; // reference count + std::map active_members; // active nexthops and nexthop group member id (null for single nexthop) }; class VNetObject @@ -125,7 +126,7 @@ struct nextHop string ifname; }; -typedef std::map TunnelRoutes; +typedef std::map TunnelRoutes; typedef std::map RouteMap; class VNetVrfObject : public VNetObject @@ -165,7 +166,7 @@ class VNetVrfObject : public VNetObject bool updateObj(vector&); - bool addRoute(IpPrefix& ipPrefix, tunnelEndpoint& endp); + bool addRoute(IpPrefix& ipPrefix, NextHopGroupKey& nexthops); bool addRoute(IpPrefix& ipPrefix, nextHop& nh); bool removeRoute(IpPrefix& ipPrefix); @@ -173,8 +174,8 @@ class VNetVrfObject : public VNetObject bool getRouteNextHop(IpPrefix& ipPrefix, nextHop& nh); bool hasRoute(IpPrefix& ipPrefix); - sai_object_id_t getTunnelNextHop(tunnelEndpoint& endp); - bool removeTunnelNextHop(tunnelEndpoint& endp); + sai_object_id_t getTunnelNextHop(NextHopKey& nh); + bool removeTunnelNextHop(NextHopKey& nh); void increaseNextHopRefCount(const nextHop&); void decreaseNextHopRefCount(const nextHop&); @@ -246,11 +247,12 @@ class VNetOrch : public Orch2 const request_description_t vnet_route_description = { { REQ_T_STRING, REQ_T_IP_PREFIX }, { - { "endpoint", REQ_T_IP }, - { "ifname", REQ_T_STRING }, - { "nexthop", REQ_T_STRING }, - { "vni", REQ_T_UINT }, - { "mac_address", REQ_T_MAC_ADDRESS }, + { "endpoint", REQ_T_IP_LIST }, + { "ifname", REQ_T_STRING }, + { "nexthop", REQ_T_STRING }, + { "vni", REQ_T_STRING }, + { "mac_address", REQ_T_STRING }, + { "endpoint_monitor", REQ_T_STRING }, }, { } }; @@ -281,6 +283,9 @@ struct VNetNextHopObserverEntry /* NextHopObserverTable: Destination IP address, next hop observer entry */ typedef std::map VNetNextHopObserverTable; +typedef std::map VNetNextHopGroupInfoTable; +typedef std::map VNetTunnelRouteTable; + class VNetRouteOrch : public Orch2, public Subject { public: @@ -302,8 +307,13 @@ class VNetRouteOrch : public Orch2, public Subject bool handleRoutes(const Request&); bool handleTunnel(const Request&); + bool hasNextHopGroup(const string&, const NextHopGroupKey&); + sai_object_id_t getNextHopGroupId(const string&, const NextHopGroupKey&); + bool addNextHopGroup(const string&, const NextHopGroupKey&, VNetVrfObject *vrf_obj); + bool removeNextHopGroup(const string&, const NextHopGroupKey&, VNetVrfObject *vrf_obj); + template - bool doRouteTask(const string& vnet, IpPrefix& ipPrefix, tunnelEndpoint& endp, string& op); + bool doRouteTask(const string& vnet, IpPrefix& ipPrefix, NextHopGroupKey& nexthops, string& op); template bool doRouteTask(const string& vnet, IpPrefix& ipPrefix, nextHop& nh, string& op); @@ -314,6 +324,8 @@ class VNetRouteOrch : public Orch2, public Subject VNetRouteTable syncd_routes_; VNetNextHopObserverTable next_hop_observers_; + std::map syncd_nexthop_groups_; + std::map syncd_tunnel_routes_; }; class VNetCfgRouteOrch : public Orch diff --git a/tests/test_vnet.py b/tests/test_vnet.py index c7fd3c1225e3..595c80a28b03 100644 --- a/tests/test_vnet.py +++ b/tests/test_vnet.py @@ -6,6 +6,7 @@ from swsscommon import swsscommon from pprint import pprint +from dvslib.dvs_common import wait_for_result def create_entry(tbl, key, pairs): @@ -139,7 +140,11 @@ def delete_vnet_local_routes(dvs, prefix, vnet_name): time.sleep(2) -def create_vnet_routes(dvs, prefix, vnet_name, endpoint, mac="", vni=0): +def create_vnet_routes(dvs, prefix, vnet_name, endpoint, mac="", vni=0, ep_monitor=""): + set_vnet_routes(dvs, prefix, vnet_name, endpoint, mac=mac, vni=vni, ep_monitor=ep_monitor) + + +def set_vnet_routes(dvs, prefix, vnet_name, endpoint, mac="", vni=0, ep_monitor=""): conf_db = swsscommon.DBConnector(swsscommon.CONFIG_DB, dvs.redis_sock, 0) attrs = [ @@ -152,11 +157,12 @@ def create_vnet_routes(dvs, prefix, vnet_name, endpoint, mac="", vni=0): if mac: attrs.append(('mac_address', mac)) - create_entry_tbl( - conf_db, - "VNET_ROUTE_TUNNEL", '|', "%s|%s" % (vnet_name, prefix), - attrs, - ) + if ep_monitor: + attrs.append(('endpoint_monitor', ep_monitor)) + + tbl = swsscommon.Table(conf_db, "VNET_ROUTE_TUNNEL") + fvs = swsscommon.FieldValuePairs(attrs) + tbl.set("%s|%s" % (vnet_name, prefix), fvs) time.sleep(2) @@ -429,7 +435,9 @@ class VnetVxlanVrfTunnel(object): ASIC_VRF_TABLE = "ASIC_STATE:SAI_OBJECT_TYPE_VIRTUAL_ROUTER" ASIC_ROUTE_ENTRY = "ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY" ASIC_NEXT_HOP = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP" - ASIC_VLAN_TABLE = "ASIC_STATE:SAI_OBJECT_TYPE_VLAN" + ASIC_VLAN_TABLE = "ASIC_STATE:SAI_OBJECT_TYPE_VLAN" + ASIC_NEXT_HOP_GROUP = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP" + ASIC_NEXT_HOP_GROUP_MEMBER = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER" tunnel_map_ids = set() tunnel_map_entry_ids = set() @@ -440,6 +448,7 @@ class VnetVxlanVrfTunnel(object): vnet_vr_ids = set() vr_map = {} nh_ids = {} + nhg_ids = {} def fetch_exist_entries(self, dvs): self.vnet_vr_ids = get_exist_entries(dvs, self.ASIC_VRF_TABLE) @@ -450,6 +459,7 @@ def fetch_exist_entries(self, dvs): self.rifs = get_exist_entries(dvs, self.ASIC_RIF_TABLE) self.routes = get_exist_entries(dvs, self.ASIC_ROUTE_ENTRY) self.nhops = get_exist_entries(dvs, self.ASIC_NEXT_HOP) + self.nhgs = get_exist_entries(dvs, self.ASIC_NEXT_HOP_GROUP) global loopback_id, def_vr_id, switch_mac if not loopback_id: @@ -670,7 +680,7 @@ def check_del_vnet_local_routes(self, dvs, name): # TODO: Implement for VRF VNET return True - def check_vnet_routes(self, dvs, name, endpoint, tunnel, mac="", vni=0): + def check_vnet_routes(self, dvs, name, endpoint, tunnel, mac="", vni=0, route_ids=""): asic_db = swsscommon.DBConnector(swsscommon.ASIC_DB, dvs.redis_sock, 0) vr_ids = self.vnet_route_ids(dvs, name) @@ -697,7 +707,10 @@ def check_vnet_routes(self, dvs, name, endpoint, tunnel, mac="", vni=0): self.nhops.add(new_nh) check_object(asic_db, self.ASIC_NEXT_HOP, new_nh, expected_attr) - new_route = get_created_entries(asic_db, self.ASIC_ROUTE_ENTRY, self.routes, count) + if not route_ids: + new_route = get_created_entries(asic_db, self.ASIC_ROUTE_ENTRY, self.routes, count) + else: + new_route = route_ids #Check if the route is in expected VRF asic_vrs = set() @@ -714,8 +727,107 @@ def check_vnet_routes(self, dvs, name, endpoint, tunnel, mac="", vni=0): self.routes.update(new_route) - def check_del_vnet_routes(self, dvs, name): + return new_route + + def serialize_endpoint_group(self, endpoints): + endpoints.sort() + return ",".join(endpoints) + + def check_next_hop_group_member(self, dvs, nhg, expected_endpoint, expected_attrs): + expected_endpoint_str = self.serialize_endpoint_group(expected_endpoint) + asic_db = swsscommon.DBConnector(swsscommon.ASIC_DB, dvs.redis_sock, 0) + tbl_nhgm = swsscommon.Table(asic_db, self.ASIC_NEXT_HOP_GROUP_MEMBER) + tbl_nh = swsscommon.Table(asic_db, self.ASIC_NEXT_HOP) + entries = set(tbl_nhgm.getKeys()) + endpoints = [] + for entry in entries: + status, fvs = tbl_nhgm.get(entry) + fvs = dict(fvs) + assert status, "Got an error when get a key" + if fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID"] == nhg: + nh_key = fvs["SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID"] + status, nh_fvs = tbl_nh.get(nh_key) + nh_fvs = dict(nh_fvs) + assert status, "Got an error when get a key" + endpoint = nh_fvs["SAI_NEXT_HOP_ATTR_IP"] + endpoints.append(endpoint) + assert endpoint in expected_attrs + check_object(asic_db, self.ASIC_NEXT_HOP, nh_key, expected_attrs[endpoint]) + + assert self.serialize_endpoint_group(endpoints) == expected_endpoint_str + + def check_vnet_ecmp_routes(self, dvs, name, endpoints, tunnel, mac=[], vni=[], route_ids=[], nhg=""): + asic_db = swsscommon.DBConnector(swsscommon.ASIC_DB, dvs.redis_sock, 0) + endpoint_str = name + "|" + self.serialize_endpoint_group(endpoints) + + vr_ids = self.vnet_route_ids(dvs, name) + count = len(vr_ids) + + expected_attrs = {} + for idx, endpoint in enumerate(endpoints): + expected_attr = { + "SAI_NEXT_HOP_ATTR_TYPE": "SAI_NEXT_HOP_TYPE_TUNNEL_ENCAP", + "SAI_NEXT_HOP_ATTR_IP": endpoint, + "SAI_NEXT_HOP_ATTR_TUNNEL_ID": self.tunnel[tunnel], + } + if vni and vni[idx]: + expected_attr.update({'SAI_NEXT_HOP_ATTR_TUNNEL_VNI': vni[idx]}) + if mac and mac[idx]: + expected_attr.update({'SAI_NEXT_HOP_ATTR_TUNNEL_MAC': mac[idx]}) + expected_attrs[endpoint] = expected_attr + + if nhg: + new_nhg = nhg + elif endpoint_str in self.nhg_ids: + new_nhg = self.nhg_ids[endpoint_str] + else: + new_nhg = get_created_entry(asic_db, self.ASIC_NEXT_HOP_GROUP, self.nhgs) + self.nhg_ids[endpoint_str] = new_nhg + self.nhgs.add(new_nhg) + + + # Check routes in ingress VRF + expected_nhg_attr = { + "SAI_NEXT_HOP_GROUP_ATTR_TYPE": "SAI_NEXT_HOP_GROUP_TYPE_DYNAMIC_UNORDERED_ECMP", + } + check_object(asic_db, self.ASIC_NEXT_HOP_GROUP, new_nhg, expected_nhg_attr) + + # Check nexthop group member + self.check_next_hop_group_member(dvs, new_nhg, endpoints, expected_attrs) + + if route_ids: + new_route = route_ids + else: + new_route = get_created_entries(asic_db, self.ASIC_ROUTE_ENTRY, self.routes, count) + + #Check if the route is in expected VRF + asic_vrs = set() + for idx in range(count): + check_object(asic_db, self.ASIC_ROUTE_ENTRY, new_route[idx], + { + "SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID": new_nhg, + } + ) + rt_key = json.loads(new_route[idx]) + asic_vrs.add(rt_key['vr']) + + assert asic_vrs == vr_ids + + self.routes.update(new_route) + + return new_route, new_nhg + + def check_del_vnet_routes(self, dvs, name, prefixes=[]): # TODO: Implement for VRF VNET + + def _access_function(): + route_entries = get_exist_entries(dvs, self.ASIC_ROUTE_ENTRY) + route_prefixes = [json.loads(route_entry)["dest"] for route_entry in route_entries] + return (all(prefix not in route_prefixes for prefix in prefixes), None) + + if prefixes: + wait_for_result(_access_function) + return True @@ -790,7 +902,7 @@ def test_vnet_orch_1(self, dvs, testlog): vnet_obj.check_del_vnet_routes(dvs, 'Vnet_2001') delete_vnet_routes(dvs, "100.100.1.1/32", 'Vnet_2000') - vnet_obj.check_del_vnet_routes(dvs, 'Vnet_2001') + vnet_obj.check_del_vnet_routes(dvs, 'Vnet_2000') delete_phy_interface(dvs, "Ethernet4", "100.102.1.1/24") vnet_obj.check_del_router_interface(dvs, "Ethernet4") @@ -1125,6 +1237,128 @@ def test_vnet_vxlan_multi_map(self, dvs, testlog): create_vxlan_tunnel_map(dvs, tunnel_name, 'map_1', 'Vlan1000', '1000') + ''' + Test 7 - Test for vnet tunnel routes with ECMP nexthop group + ''' + def test_vnet_orch_7(self, dvs, testlog): + vnet_obj = self.get_vnet_obj() + + tunnel_name = 'tunnel_7' + + vnet_obj.fetch_exist_entries(dvs) + + create_vxlan_tunnel(dvs, tunnel_name, '7.7.7.7') + create_vnet_entry(dvs, 'Vnet7', tunnel_name, '10007', "") + + vnet_obj.check_vnet_entry(dvs, 'Vnet7') + vnet_obj.check_vxlan_tunnel_entry(dvs, tunnel_name, 'Vnet7', '10007') + + vnet_obj.check_vxlan_tunnel(dvs, tunnel_name, '7.7.7.7') + + # Create an ECMP tunnel route + vnet_obj.fetch_exist_entries(dvs) + create_vnet_routes(dvs, "100.100.1.1/32", 'Vnet7', '7.0.0.1,7.0.0.2,7.0.0.3') + route1, nhg1_1 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet7', ['7.0.0.1', '7.0.0.2', '7.0.0.3'], tunnel_name) + + # Set the tunnel route to another nexthop group + set_vnet_routes(dvs, "100.100.1.1/32", 'Vnet7', '7.0.0.1,7.0.0.2,7.0.0.3,7.0.0.4') + route1, nhg1_2 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet7', ['7.0.0.1', '7.0.0.2', '7.0.0.3', '7.0.0.4'], tunnel_name, route_ids=route1) + + # Check the previous nexthop group is removed + vnet_obj.fetch_exist_entries(dvs) + assert nhg1_1 not in vnet_obj.nhgs + + # Create another tunnel route to the same set of endpoints + create_vnet_routes(dvs, "100.100.2.1/32", 'Vnet7', '7.0.0.1,7.0.0.2,7.0.0.3,7.0.0.4') + route2, nhg2_1 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet7', ['7.0.0.1', '7.0.0.2', '7.0.0.3', '7.0.0.4'], tunnel_name) + + assert nhg2_1 == nhg1_2 + + # Remove one of the tunnel routes + delete_vnet_routes(dvs, "100.100.1.1/32", 'Vnet7') + vnet_obj.check_del_vnet_routes(dvs, 'Vnet7', ["100.100.1.1/32"]) + + # Check the nexthop group still exists + vnet_obj.fetch_exist_entries(dvs) + assert nhg1_2 in vnet_obj.nhgs + + # Remove the other tunnel route + delete_vnet_routes(dvs, "100.100.2.1/32", 'Vnet7') + vnet_obj.check_del_vnet_routes(dvs, 'Vnet7', ["100.100.2.1/32"]) + + # Check the nexthop group is removed + vnet_obj.fetch_exist_entries(dvs) + assert nhg2_1 not in vnet_obj.nhgs + + delete_vnet_entry(dvs, 'Vnet7') + vnet_obj.check_del_vnet_entry(dvs, 'Vnet7') + + ''' + Test 8 - Test for ipv6 vnet tunnel routes with ECMP nexthop group + ''' + def test_vnet_orch_8(self, dvs, testlog): + vnet_obj = self.get_vnet_obj() + + tunnel_name = 'tunnel_8' + + vnet_obj.fetch_exist_entries(dvs) + + create_vxlan_tunnel(dvs, tunnel_name, 'fd:8::32') + create_vnet_entry(dvs, 'Vnet8', tunnel_name, '10008', "") + + vnet_obj.check_vnet_entry(dvs, 'Vnet8') + vnet_obj.check_vxlan_tunnel_entry(dvs, tunnel_name, 'Vnet8', '10008') + + vnet_obj.check_vxlan_tunnel(dvs, tunnel_name, 'fd:8::32') + + # Create an ECMP tunnel route + vnet_obj.fetch_exist_entries(dvs) + create_vnet_routes(dvs, "fd:8:10::32/128", 'Vnet8', 'fd:8:1::1,fd:8:1::2,fd:8:1::3') + route1, nhg1_1 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet8', ['fd:8:1::1', 'fd:8:1::2', 'fd:8:1::3'], tunnel_name) + + # Set the tunnel route to another nexthop group + set_vnet_routes(dvs, "fd:8:10::32/128", 'Vnet8', 'fd:8:1::1,fd:8:1::2,fd:8:1::3,fd:8:1::4') + route1, nhg1_2 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet8', ['fd:8:1::1', 'fd:8:1::2', 'fd:8:1::3', 'fd:8:1::4'], tunnel_name, route_ids=route1) + + # Check the previous nexthop group is removed + vnet_obj.fetch_exist_entries(dvs) + assert nhg1_1 not in vnet_obj.nhgs + + # Create another tunnel route to the same set of endpoints + create_vnet_routes(dvs, "fd:8:20::32/128", 'Vnet8', 'fd:8:1::1,fd:8:1::2,fd:8:1::3,fd:8:1::4') + route2, nhg2_1 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet8', ['fd:8:1::1', 'fd:8:1::2', 'fd:8:1::3', 'fd:8:1::4'], tunnel_name) + + assert nhg2_1 == nhg1_2 + + # Create another tunnel route with ipv4 prefix to the same set of endpoints + create_vnet_routes(dvs, "8.0.0.0/24", 'Vnet8', 'fd:8:1::1,fd:8:1::2,fd:8:1::3,fd:8:1::4') + route3, nhg3_1 = vnet_obj.check_vnet_ecmp_routes(dvs, 'Vnet8', ['fd:8:1::1', 'fd:8:1::2', 'fd:8:1::3', 'fd:8:1::4'], tunnel_name) + + assert nhg3_1 == nhg1_2 + + # Remove one of the tunnel routes + delete_vnet_routes(dvs, "fd:8:10::32/128", 'Vnet8') + vnet_obj.check_del_vnet_routes(dvs, 'Vnet8', ["fd:8:10::32/128"]) + + # Check the nexthop group still exists + vnet_obj.fetch_exist_entries(dvs) + assert nhg1_2 in vnet_obj.nhgs + + # Remove tunnel route 2 + delete_vnet_routes(dvs, "fd:8:20::32/128", 'Vnet8') + vnet_obj.check_del_vnet_routes(dvs, 'Vnet8', ["fd:8:20::32/128"]) + + # Remove tunnel route 3 + delete_vnet_routes(dvs, "8.0.0.0/24", 'Vnet8') + vnet_obj.check_del_vnet_routes(dvs, 'Vnet8', ["8.0.0.0/24"]) + + # Check the nexthop group is removed + vnet_obj.fetch_exist_entries(dvs) + assert nhg2_1 not in vnet_obj.nhgs + + delete_vnet_entry(dvs, 'Vnet8') + vnet_obj.check_del_vnet_entry(dvs, 'Vnet8') + # Add Dummy always-pass test at end as workaroud # for issue when Flaky fail on final test it invokes module tear-down before retrying