From fee171661118a0988a9a80ec6e5e48a20e6c798a Mon Sep 17 00:00:00 2001 From: anish-n <44376847+anish-n@users.noreply.github.com> Date: Wed, 30 Sep 2020 12:05:48 -0700 Subject: [PATCH] [fgnhgorch] Changes for Fine Grained ECMP (#1315) * Fine grained ECMP initial commit * Fix compilation, test failures and add logic to query SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE from SAI * Misc fixes after migrating branch * Misc-fixes * Add support to fgnhgorch for neighbor and interface availability changes * fgnhg group/prefix/member addition and deletion * fix indentation and address comment * Improve error handling and add support for crm and neigh refcounts * Temporarily disable dynamic changes to route from fine grained to standard ecmp * Add checks to limit Fine Grained ECMP to default VRF * handle fgnhg prefix addition deletion with route bulker * fix indentation * Add handling for neigh edge case scenarios and improve routeorch bulker integration * write route entry to state_db * Fix bug with add and remove nh in a bank at the same time * Improve test coverage and fix issues with multi-nh changes, misc code improvements * Improve distribution of hash buckets per nh when hash buckets are not divisible by # of buckets * Incorporate review comments v1 * Reduce exposure of private routeorch functions/variables outside of routeorch * Fix an uninitialized variable error found in testing * Change test_fgnhg.py to use the new framework to validate db entries * Remove exposure of routeorch nextHopGroupCounts outside of routeorch, and misc. improvements * camelCase changes for fgnhgorch * RouteOrch add/removeRoutePost do not need logic to reroute calls to fgnhgorch * camelCase changes v2 --- orchagent/Makefile.am | 1 + orchagent/fgnhgorch.cpp | 1488 ++++++++++++++++++++++++ orchagent/fgnhgorch.h | 130 +++ orchagent/neighorch.cpp | 5 + orchagent/orchdaemon.cpp | 13 +- orchagent/routeorch.cpp | 90 +- orchagent/routeorch.h | 11 +- tests/mock_tests/Makefile.am | 1 + tests/mock_tests/aclorch_ut.cpp | 13 +- tests/mock_tests/mock_orchagent_main.h | 2 + tests/test_fgnhg.py | 574 +++++++++ 11 files changed, 2322 insertions(+), 6 deletions(-) create mode 100644 orchagent/fgnhgorch.cpp create mode 100644 orchagent/fgnhgorch.h create mode 100644 tests/test_fgnhg.py diff --git a/orchagent/Makefile.am b/orchagent/Makefile.am index fefadb2bb8..2aa0337448 100644 --- a/orchagent/Makefile.am +++ b/orchagent/Makefile.am @@ -35,6 +35,7 @@ orchagent_SOURCES = \ neighorch.cpp \ intfsorch.cpp \ portsorch.cpp \ + fgnhgorch.cpp \ copporch.cpp \ tunneldecaporch.cpp \ qosorch.cpp \ diff --git a/orchagent/fgnhgorch.cpp b/orchagent/fgnhgorch.cpp new file mode 100644 index 0000000000..6e6ae980f3 --- /dev/null +++ b/orchagent/fgnhgorch.cpp @@ -0,0 +1,1488 @@ +#include +#include +#include "fgnhgorch.h" +#include "routeorch.h" +#include "logger.h" +#include "swssnet.h" +#include "crmorch.h" +#include + +extern sai_object_id_t gVirtualRouterId; +extern sai_object_id_t gSwitchId; + +extern sai_next_hop_group_api_t* sai_next_hop_group_api; +extern sai_route_api_t* sai_route_api; + +extern RouteOrch *gRouteOrch; +extern CrmOrch *gCrmOrch; + +FgNhgOrch::FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : + Orch(db, tableNames), + m_neighOrch(neighOrch), + m_intfsOrch(intfsOrch), + m_vrfOrch(vrfOrch), + m_stateWarmRestartRouteTable(stateDb, STATE_FG_ROUTE_TABLE_NAME), + m_routeTable(appDb, APP_ROUTE_TABLE_NAME) +{ + SWSS_LOG_ENTER(); +} + + +/* calculateBankHashBucketStartIndices: generates the hash_bucket_indices for all banks + * and stores it in fgNhgEntry for the group. + * The function will identify the # of next-hops assigned to each bank and + * assign the total number of hash buckets for a bank, based on the proportional + * number of next-hops in the bank. + * eg: Bank0: 6 nh, Bank1: 3 nh, total buckets: 30 => + * calculateBankHashBucketStartIndices: Bank0: Bucket# 0-19, Bank1: Bucket# 20-29 + */ +void FgNhgOrch::calculateBankHashBucketStartIndices(FgNhgEntry *fgNhgEntry) +{ + SWSS_LOG_ENTER(); + uint32_t num_banks = 0; + vector memb_per_bank; + for (auto nh : fgNhgEntry->next_hops) + { + while (nh.second + 1 > num_banks) + { + num_banks++; + memb_per_bank.push_back(0); + } + memb_per_bank[nh.second] = memb_per_bank[nh.second] + 1; + } + + uint32_t buckets_per_nexthop = fgNhgEntry->real_bucket_size/((uint32_t)fgNhgEntry->next_hops.size()); + uint32_t extra_buckets = fgNhgEntry->real_bucket_size - (buckets_per_nexthop*((uint32_t)fgNhgEntry->next_hops.size())); + uint32_t split_extra_buckets_among_bank = extra_buckets/num_banks; + extra_buckets = extra_buckets - (split_extra_buckets_among_bank*num_banks); + + uint32_t prev_idx = 0; + + for (uint32_t i = 0; i < memb_per_bank.size(); i++) + { + BankIndexRange bir; + bir.start_index = prev_idx; + bir.end_index = bir.start_index + (buckets_per_nexthop * memb_per_bank[i]) + split_extra_buckets_among_bank - 1; + if (extra_buckets > 0) + { + bir.end_index = bir.end_index + 1; + extra_buckets--; + } + if (i == fgNhgEntry->hash_bucket_indices.size()) + { + fgNhgEntry->hash_bucket_indices.push_back(bir); + } + else + { + fgNhgEntry->hash_bucket_indices[i] = bir; + } + prev_idx = bir.end_index + 1; + SWSS_LOG_INFO("Calculate_bank_hash_bucket_start_indices: bank %d, si %d, ei %d", + i, fgNhgEntry->hash_bucket_indices[i].start_index, fgNhgEntry->hash_bucket_indices[i].end_index); + } +} + + +void FgNhgOrch::setStateDbRouteEntry(const IpPrefix &ipPrefix, uint32_t index, NextHopKey nextHop) +{ + SWSS_LOG_ENTER(); + + string key = ipPrefix.to_string(); + // Write to StateDb + std::vector fvs; + + // check if profile already exists - if yes - skip creation + m_stateWarmRestartRouteTable.get(key, fvs); + + //bucket rewrite + if (fvs.size() > index) + { + FieldValueTuple fv(std::to_string(index), nextHop.to_string()); + fvs[index] = fv; + SWSS_LOG_INFO("Set state db entry for ip prefix %s next hop %s with index %d", + ipPrefix.to_string().c_str(), nextHop.to_string().c_str(), index); + m_stateWarmRestartRouteTable.set(key, fvs); + } + else + { + fvs.push_back(FieldValueTuple(std::to_string(index), nextHop.to_string())); + SWSS_LOG_INFO("Add new next hop entry %s with index %d for ip prefix %s", + nextHop.to_string().c_str(), index, ipPrefix.to_string().c_str()); + m_stateWarmRestartRouteTable.set(key, fvs); + } + +} + + +bool FgNhgOrch::writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid, + const IpPrefix &ipPrefix, NextHopKey nextHop) +{ + SWSS_LOG_ENTER(); + + sai_attribute_t nhgm_attr; + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID; + nhgm_attr.value.oid = nh_oid; + sai_status_t status = sai_next_hop_group_api->set_next_hop_group_member_attribute( + syncd_fg_route_entry->nhopgroup_members[index], + &nhgm_attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to set next hop oid %" PRIx64 " member %" PRIx64 ": %d", + syncd_fg_route_entry->nhopgroup_members[index], nh_oid, status); + return false; + } + + setStateDbRouteEntry(ipPrefix, index, nextHop); + return true; +} + + +bool FgNhgOrch::createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + const NextHopGroupKey &nextHops) +{ + SWSS_LOG_ENTER(); + string platform = getenv("platform") ? getenv("platform") : ""; + sai_attribute_t nhg_attr; + vector nhg_attrs; + + nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_TYPE; + nhg_attr.value.s32 = SAI_NEXT_HOP_GROUP_TYPE_FINE_GRAIN_ECMP; + nhg_attrs.push_back(nhg_attr); + + nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_CONFIGURED_SIZE; + nhg_attr.value.s32 = fgNhgEntry->configured_bucket_size; + nhg_attrs.push_back(nhg_attr); + + sai_object_id_t next_hop_group_id; + + if (!gRouteOrch->createFineGrainedNextHopGroup(next_hop_group_id, nhg_attrs)) + { + SWSS_LOG_ERROR("Failed to create next hop group %s", + nextHops.to_string().c_str()); + return false; + } + + syncd_fg_route_entry.next_hop_group_id = next_hop_group_id; + + if (platform == VS_PLATFORM_SUBSTRING) + { + /* TODO: need implementation for SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE */ + fgNhgEntry->real_bucket_size = fgNhgEntry->configured_bucket_size; + } + else + { + nhg_attr.id = SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE; + nhg_attr.value.u32 = 0; + sai_status_t status = sai_next_hop_group_api->get_next_hop_group_attribute(next_hop_group_id, 1, &nhg_attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to query next hop group %s SAI_NEXT_HOP_GROUP_ATTR_REAL_SIZE, rv:%d", + nextHops.to_string().c_str(), status); + if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry, fgNhgEntry)) + { + SWSS_LOG_ERROR("Failed to clean-up after next hop group real_size query failure"); + } + return false; + } + fgNhgEntry->real_bucket_size = nhg_attr.value.u32; + } + + calculateBankHashBucketStartIndices(fgNhgEntry); + + SWSS_LOG_NOTICE("fgnhgorch created next hop group %s of size %d", nextHops.to_string().c_str(), fgNhgEntry->real_bucket_size); + return true; +} + + +bool FgNhgOrch::removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry) +{ + SWSS_LOG_ENTER(); + sai_status_t status; + + for (auto nhgm : syncd_fg_route_entry->nhopgroup_members) + { + status = sai_next_hop_group_api->remove_next_hop_group_member(nhgm); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove next hop group member %" PRIx64 ", rv:%d", + nhgm, status); + return false; + } + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + } + + if (!gRouteOrch->removeFineGrainedNextHopGroup(syncd_fg_route_entry->next_hop_group_id)) + { + return false; + } + + return true; +} + + +bool FgNhgOrch::createFineGrainedRouteEntry(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops) +{ + SWSS_LOG_ENTER(); + sai_route_entry_t route_entry; + sai_attribute_t route_attr; + route_entry.vr_id = vrf_id; + route_entry.switch_id = gSwitchId; + copy(route_entry.destination, ipPrefix); + route_attr.id = SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID; + route_attr.value.oid = syncd_fg_route_entry.next_hop_group_id; + sai_status_t status = sai_route_api->create_route_entry(&route_entry, 1, &route_attr); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create route %s with next hop(s) %s", + ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); + + /* Clean up the newly created next hop group entry */ + if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry, fgNhgEntry)) + { + SWSS_LOG_ERROR("Failed to clean-up after route creation failure"); + } + return false; + } + + if (route_entry.destination.addr_family == SAI_IP_ADDR_FAMILY_IPV4) + { + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE); + } + else + { + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); + } + return true; +} + + +bool FgNhgOrch::validNextHopInNextHopGroup(const NextHopKey& nexthop) +{ + SWSS_LOG_ENTER(); + + for (auto &route_tables : m_syncdFGRouteTables) + { + for (auto &route_table : route_tables.second) + { + if (!(route_table.second.nhg_key.contains(nexthop))) + { + continue; + } + + FGNextHopGroupEntry *syncd_fg_route_entry = &(route_table.second); + auto prefix_entry = fgNhgPrefixes.find(route_table.first); + if (prefix_entry == fgNhgPrefixes.end()) + { + SWSS_LOG_ERROR("Hit unexpected condition for %s:%s where route_table exists but no fgNhgEntry found", + route_table.first.to_string().c_str(), syncd_fg_route_entry->nhg_key.to_string().c_str()); + return false; + } + + FgNhgEntry *fgNhgEntry = prefix_entry->second; + std::map nhopgroup_members_set; + + std::vector bank_member_changes( + fgNhgEntry->hash_bucket_indices.size(), BankMemberChanges()); + + if (syncd_fg_route_entry->active_nexthops.find(nexthop) != + syncd_fg_route_entry->active_nexthops.end()) + { + return true; + } + + for (auto active_nh : syncd_fg_route_entry->active_nexthops) + { + bank_member_changes[fgNhgEntry->next_hops[active_nh.ip_address]]. + active_nhs.push_back(active_nh); + } + + bank_member_changes[fgNhgEntry->next_hops[nexthop.ip_address]]. + nhs_to_add.push_back(nexthop); + nhopgroup_members_set[nexthop] = m_neighOrch->getNextHopId(nexthop); + + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + bank_member_changes, nhopgroup_members_set, route_table.first)) + { + SWSS_LOG_ERROR("Failed to set fine grained next hop %s", + nexthop.to_string().c_str()); + return false; + } + + m_neighOrch->increaseNextHopRefCount(nexthop); + } + } + + return true; +} + + +bool FgNhgOrch::invalidNextHopInNextHopGroup(const NextHopKey& nexthop) +{ + SWSS_LOG_ENTER(); + + for (auto &route_tables : m_syncdFGRouteTables) + { + for (auto &route_table : route_tables.second) + { + if (!(route_table.second.nhg_key.contains(nexthop))) + { + continue; + } + + FGNextHopGroupEntry *syncd_fg_route_entry = &(route_table.second); + auto prefix_entry = fgNhgPrefixes.find(route_table.first); + if (prefix_entry == fgNhgPrefixes.end()) + { + SWSS_LOG_ERROR("Hit unexpected condition for %s:%s where route_table exists but no fgNhgEntry found", + route_table.first.to_string().c_str(), syncd_fg_route_entry->nhg_key.to_string().c_str()); + return false; + } + + FgNhgEntry *fgNhgEntry = prefix_entry->second; + std::map nhopgroup_members_set; + + std::vector bank_member_changes( + fgNhgEntry->hash_bucket_indices.size(), BankMemberChanges()); + + if (syncd_fg_route_entry->active_nexthops.find(nexthop) == + syncd_fg_route_entry->active_nexthops.end()) + { + return true; + } + + for (auto active_nh : syncd_fg_route_entry->active_nexthops) + { + if (active_nh.ip_address == nexthop.ip_address && + active_nh.alias == nexthop.alias) + { + continue; + } + + bank_member_changes[fgNhgEntry->next_hops[active_nh.ip_address]]. + active_nhs.push_back(active_nh); + + nhopgroup_members_set[active_nh] = m_neighOrch->getNextHopId(active_nh); + } + + bank_member_changes[fgNhgEntry->next_hops[nexthop.ip_address]]. + nhs_to_del.push_back(nexthop); + + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + bank_member_changes, nhopgroup_members_set, route_table.first)) + { + SWSS_LOG_ERROR("Failed to set fine grained next hop %s", + nexthop.to_string().c_str()); + return false; + } + + m_neighOrch->decreaseNextHopRefCount(nexthop); + } + } + + return true; +} + + +/* setActiveBankHashBucketChanges: Sets hash buckets for active banks and called on a PER bank basis + * This function deals with a scenario where next-hop changes occured for the route, + * and the next-hop change didn't cause an entire bank to go active/inactive. + * The function uses bank_member_changes to compute the hash buckets to modify, in order to satisy the next-hop + * availability for the route/neigh. + * Eg: Prefix A had nhs 1, 2, 3 with 1, 2, 3, being equally distributed over hash buckets + * 0-59(20 buckets per nh). If there was a nh removal of nh 2, this fn would equally redistribute hash buckets + * for nh 2 to nh 1 and nh 3. Leading to 30 hash buckets, each, for nh 1 and nh 3, and none for nh 2. + * Thereby achieving consistent and layered hashing. + */ +bool FgNhgOrch::setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + uint32_t bank, uint32_t syncd_bank, std::vector bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) +{ + SWSS_LOG_ENTER(); + + BankMemberChanges bank_member_change = bank_member_changes[bank]; + uint32_t add_idx = 0, del_idx = 0; + FGNextHopGroupMap *bank_fgnhg_map = &(syncd_fg_route_entry->syncd_fgnhg_map[syncd_bank]); + + while(del_idx < bank_member_change.nhs_to_del.size() && + add_idx < bank_member_change.nhs_to_add.size()) + { + HashBuckets *hash_buckets = &(bank_fgnhg_map->at(bank_member_change.nhs_to_del[del_idx])); + for (uint32_t i = 0; i < hash_buckets->size(); i++) + { + if (!writeHashBucketChange(syncd_fg_route_entry, hash_buckets->at(i), + nhopgroup_members_set[bank_member_change.nhs_to_add[add_idx]], + ipPrefix, bank_member_change.nhs_to_add[add_idx])) + { + return false; + } + } + + (*bank_fgnhg_map)[bank_member_change.nhs_to_add[add_idx]] =*hash_buckets; + + bank_fgnhg_map->erase(bank_member_change.nhs_to_del[del_idx]); + bank_member_change.active_nhs.push_back(bank_member_change.nhs_to_add[add_idx]); + syncd_fg_route_entry->active_nexthops.erase(bank_member_change.nhs_to_del[del_idx]); + syncd_fg_route_entry->active_nexthops.insert(bank_member_change.nhs_to_add[add_idx]); + + del_idx++; + add_idx++; + } + + /* Given that we resolved add + del on a bank in the above while stmt + * We will either have add OR delete left to do, and the logic below + * relies on this fact + */ + if (del_idx < bank_member_change.nhs_to_del.size()) + { + uint32_t num_buckets_in_bank = 1 + fgNhgEntry->hash_bucket_indices[syncd_bank].end_index - + fgNhgEntry->hash_bucket_indices[syncd_bank].start_index; + uint32_t exp_bucket_size = num_buckets_in_bank / (uint32_t)bank_member_change.active_nhs.size(); + uint32_t num_nhs_with_one_more = (num_buckets_in_bank % (uint32_t)bank_member_change.active_nhs.size()); + + + while(del_idx < bank_member_change.nhs_to_del.size()) + { + HashBuckets *hash_buckets = &(bank_fgnhg_map->at(bank_member_change.nhs_to_del[del_idx])); + for (uint32_t i = 0; i < hash_buckets->size(); i++) + { + NextHopKey round_robin_nh = bank_member_change.active_nhs[i % + bank_member_change.active_nhs.size()]; + + if (!writeHashBucketChange(syncd_fg_route_entry, hash_buckets->at(i), + nhopgroup_members_set[round_robin_nh], ipPrefix, round_robin_nh)) + { + return false; + } + bank_fgnhg_map->at(round_robin_nh).push_back(hash_buckets->at(i)); + + /* Logic below ensure that # hash buckets assigned to a nh is equalized, + * we could have used simple round robin to reassign hash buckets to + * other available nhs, but for cases where # hash buckets is not + * divisible by # of nhs, simple round robin can make the hash bucket + * distribution non-ideal, thereby nhs can attract unequal traffic */ + if (num_nhs_with_one_more == 0) + { + if (bank_fgnhg_map->at(round_robin_nh).size() == exp_bucket_size) + { + SWSS_LOG_INFO("%s reached %d, don't remove more buckets", + (bank_member_change.active_nhs[i % bank_member_change.active_nhs.size()]).to_string().c_str(), + exp_bucket_size); + bank_member_change.active_nhs.erase(bank_member_change.active_nhs.begin() + + (i % bank_member_change.active_nhs.size())); + } + else if (bank_fgnhg_map->at(round_robin_nh).size() > exp_bucket_size) + { + SWSS_LOG_WARN("Unexpected bucket size for nh %s, size %lu, exp_size %d", + round_robin_nh.to_string().c_str(), bank_fgnhg_map->at(round_robin_nh).size(), + exp_bucket_size); + } + } + else + { + if (bank_fgnhg_map->at(round_robin_nh).size() == exp_bucket_size +1) + { + + SWSS_LOG_INFO("%s reached %d, don't remove more buckets num_nhs_with_one_more %d", + (bank_member_change.active_nhs[i %bank_member_change.active_nhs.size()]).to_string().c_str(), + exp_bucket_size +1, num_nhs_with_one_more -1); + bank_member_change.active_nhs.erase(bank_member_change.active_nhs.begin() + + (i % bank_member_change.active_nhs.size())); + num_nhs_with_one_more--; + } + else if (bank_fgnhg_map->at(round_robin_nh).size() > exp_bucket_size +1) + { + SWSS_LOG_WARN("Unexpected bucket size for nh %s, size %lu, exp_size %d", + round_robin_nh.to_string().c_str(), bank_fgnhg_map->at(round_robin_nh).size(), + exp_bucket_size + 1); + } + } + } + + bank_fgnhg_map->erase(bank_member_change.nhs_to_del[del_idx]); + syncd_fg_route_entry->active_nexthops.erase(bank_member_change.nhs_to_del[del_idx]); + del_idx++; + } + } + + if (add_idx < bank_member_change.nhs_to_add.size()) + { + uint32_t total_nhs = (uint32_t)bank_member_change.active_nhs.size() + + (uint32_t)bank_member_change.nhs_to_add.size() - add_idx; + uint32_t num_buckets_in_bank = 1+ fgNhgEntry->hash_bucket_indices[syncd_bank].end_index - + fgNhgEntry->hash_bucket_indices[syncd_bank].start_index; + uint32_t exp_bucket_size = num_buckets_in_bank/total_nhs; + uint32_t num_nhs_with_one_more = (num_buckets_in_bank % total_nhs); + uint32_t num_nhs_with_eq_to_exp = total_nhs - num_nhs_with_one_more; + uint32_t add_nh_exp_bucket_size = exp_bucket_size; + + while(add_idx < bank_member_change.nhs_to_add.size()) + { + (*bank_fgnhg_map)[bank_member_change.nhs_to_add[add_idx]] = + std::vector(); + auto it = bank_member_change.active_nhs.begin(); + if (num_nhs_with_eq_to_exp > 0) + { + num_nhs_with_eq_to_exp--; + } + else + { + add_nh_exp_bucket_size = exp_bucket_size + 1; + num_nhs_with_one_more--; + } + + while(bank_fgnhg_map->at(bank_member_change.nhs_to_add[add_idx]).size() != add_nh_exp_bucket_size) + { + if (it == bank_member_change.active_nhs.end()) + { + it = bank_member_change.active_nhs.begin(); + } + vector *map_entry = &(bank_fgnhg_map->at(*it)); + if ((*map_entry).size() <= 1) + { + /* Case where the number of hash buckets for the nh is <= 1 */ + SWSS_LOG_WARN("Next-hop %s has %d entries, either number of buckets were less or we hit a bug", + (*it).to_string().c_str(), ((int)(*map_entry).size())); + return false; + } + else + { + uint32_t last_elem = map_entry->at((*map_entry).size() - 1); + + if (!writeHashBucketChange(syncd_fg_route_entry, last_elem, + nhopgroup_members_set[bank_member_change.nhs_to_add[add_idx]], + ipPrefix, bank_member_change.nhs_to_add[add_idx])) + { + return false; + } + + (*bank_fgnhg_map)[bank_member_change.nhs_to_add[add_idx]].push_back(last_elem); + (*map_entry).erase((*map_entry).end() - 1); + } + /* Logic below ensure that # hash buckets assigned to a nh is equalized, + * we could have used simple round robin to reassign hash buckets to + * other available nhs, but for cases where # hash buckets is not + * divisible by # of nhs, simple round robin can make the hash bucket + * distribution non-ideal, thereby nhs can attract unequal traffic */ + if (num_nhs_with_one_more == 0) + { + if (map_entry->size() == exp_bucket_size) + { + SWSS_LOG_INFO("%s reached %d, don't remove more buckets", it->to_string().c_str(), exp_bucket_size); + it = bank_member_change.active_nhs.erase(it); + } + else if (map_entry->size() < exp_bucket_size) + { + SWSS_LOG_WARN("Unexpected bucket size for nh %s, size %lu, exp_size %d", + it->to_string().c_str(), map_entry->size(), exp_bucket_size); + it++; + } + else + { + it++; + } + } + else + { + if (map_entry->size() == exp_bucket_size +1) + { + SWSS_LOG_INFO("%s reached %d, don't remove more buckets num_nhs_with_one_more %d", + it->to_string().c_str(), exp_bucket_size + 1, num_nhs_with_one_more -1); + it = bank_member_change.active_nhs.erase(it); + num_nhs_with_one_more--; + } + else if (map_entry->size() < exp_bucket_size) + { + SWSS_LOG_WARN("Unexpected bucket size for nh %s, size %lu, exp_size %d", + it->to_string().c_str(), map_entry->size(), exp_bucket_size + 1); + it++; + } + else + { + it++; + } + } + } + syncd_fg_route_entry->active_nexthops.insert(bank_member_change.nhs_to_add[add_idx]); + add_idx++; + } + } + return true; +} + + +bool FgNhgOrch::setInactiveBankToNextAvailableActiveBank(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + uint32_t bank, std::vector bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) +{ + SWSS_LOG_ENTER(); + + uint32_t new_bank_idx = 0; + for (; new_bank_idx < bank_member_changes.size(); new_bank_idx++) + { + if (bank_member_changes[new_bank_idx].active_nhs.size() + + bank_member_changes[new_bank_idx].nhs_to_add.size() != 0) + { + syncd_fg_route_entry->syncd_fgnhg_map[bank].clear(); + syncd_fg_route_entry->inactive_to_active_map[bank] = new_bank_idx; + + /* Create collated set of members which will be active in the bank */ + for (auto memb: bank_member_changes[new_bank_idx].nhs_to_add) + { + bank_member_changes[new_bank_idx].active_nhs.push_back(memb); + } + + for (uint32_t i = fgNhgEntry->hash_bucket_indices[bank].start_index; + i <= fgNhgEntry->hash_bucket_indices[bank].end_index; i++) + { + NextHopKey bank_nh_memb = bank_member_changes[new_bank_idx]. + active_nhs[i % bank_member_changes[new_bank_idx].active_nhs.size()]; + + if (!writeHashBucketChange(syncd_fg_route_entry, i, + nhopgroup_members_set[bank_nh_memb],ipPrefix, bank_nh_memb )) + { + return false; + } + + syncd_fg_route_entry->syncd_fgnhg_map[bank][bank_nh_memb].push_back(i); + } + break; + } + } + + if (new_bank_idx == bank_member_changes.size()) + { + SWSS_LOG_NOTICE("No active next-hop members were found in any bank"); + /* Case where there are no active banks */ + /* Note: There is no way to set a NULL OID to the now inactive next-hops + * so we leave the next-hops as is in SAI, and future route/neighbor changes + * will take care of setting the next-hops to the correctly active nhs + */ + syncd_fg_route_entry->syncd_fgnhg_map[bank].clear(); + } + + return true; +} + + +/* setInactiveBankHashBucketChanges: Sets hash buckets for inactive banks and called on a PER bank basis. + * This function deals with scenarios where next-hop changes occured for the route, + * and the next-hop change causes an active bank to become inactive, or an inactive bank to become active or + * inactive bank to remain inactive. + * The function uses the bank member diffs provided in bank_member_changes and uses it to compute + * the hash buckets to modify, in order to satisy the next-hop availability for the route/neigh. + * Eg: Lets assume prefix A had nhs 1, 2, 3, 4, 5, 6 with nhs being equally distirbuted over hash buckets + * 0-59(10 per nh). Now there was a nh deletion of 1, 2, 3 which constituted bank 0(4, 5, 6 constituted bank 1) + * This function will identify that all of bank 0's nh are down and re-assign all the hash buckets(0-29) for these nhs to + * nhs from bank 1, along with making local struct changes to track this for future route/neigh changes. + */ +bool FgNhgOrch::setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + uint32_t bank,std::vector &bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix &ipPrefix) +{ + SWSS_LOG_ENTER(); + + if (bank_member_changes[bank].nhs_to_add.size() > 0) + { + /* Previously inactive bank now transistions to active */ + syncd_fg_route_entry->syncd_fgnhg_map[bank].clear(); + for (uint32_t i = fgNhgEntry->hash_bucket_indices[bank].start_index; + i <= fgNhgEntry->hash_bucket_indices[bank].end_index; i++) + { + NextHopKey bank_nh_memb = bank_member_changes[bank]. + nhs_to_add[i % bank_member_changes[bank].nhs_to_add.size()]; + + if (!writeHashBucketChange(syncd_fg_route_entry, i, + nhopgroup_members_set[bank_nh_memb], ipPrefix, bank_nh_memb)) + { + return false; + } + + syncd_fg_route_entry->syncd_fgnhg_map[bank][bank_nh_memb].push_back(i); + syncd_fg_route_entry->active_nexthops.insert(bank_nh_memb); + } + syncd_fg_route_entry->inactive_to_active_map[bank] = bank; + } + else if (bank_member_changes[bank].nhs_to_del.size() > 0) + { + /* Previously active bank now transistions to inactive */ + if (!setInactiveBankToNextAvailableActiveBank(syncd_fg_route_entry, fgNhgEntry, + bank, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + SWSS_LOG_INFO("Failed to map to active_bank and set nh in SAI"); + return false; + } + + for (auto memb: bank_member_changes[bank].nhs_to_del) + { + syncd_fg_route_entry->active_nexthops.erase(memb); + } + } + else + { + /* Previously inactive bank remains inactive */ + uint32_t active_bank = syncd_fg_route_entry->inactive_to_active_map[bank]; + if (bank_member_changes[active_bank].active_nhs.size() == 0) + { + if (!setInactiveBankToNextAvailableActiveBank(syncd_fg_route_entry, fgNhgEntry, + bank, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + SWSS_LOG_INFO("Failed to map to active_bank and set nh in SAI"); + return false; + } + } + else + { + if (!setActiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + active_bank, bank, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + SWSS_LOG_INFO("Failed setActiveBankHashBucketChanges"); + return false; + } + } + } + return true; +} + + +bool FgNhgOrch::computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, + FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, + std::map &nhopgroup_members_set, + const IpPrefix &ipPrefix) +{ + SWSS_LOG_ENTER(); + + for (uint32_t bank_idx = 0; bank_idx < bank_member_changes.size(); bank_idx++) + { + if (bank_member_changes[bank_idx].active_nhs.size() != 0 || + (bank_member_changes[bank_idx].nhs_to_add.size() != 0 && + bank_member_changes[bank_idx].nhs_to_del.size() != 0)) + { + /* Active bank is is determined by there being active nhs on the bank OR + * an edge case where all active_nhs went down(nhs_to_del > 0) BUT + * simultaneously, nhs were added(nhs_to_add > 0). + * Route this to fn which deals with active banks + */ + if (!setActiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + bank_idx, bank_idx, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + return false; + } + } + else + { + if (!setInactiveBankHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, + bank_idx, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + return false; + } + } + } + + return true; +} + + +bool FgNhgOrch::setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + std::vector &bank_member_changes, std::map &nhopgroup_members_set, + const IpPrefix &ipPrefix) +{ + SWSS_LOG_ENTER(); + + sai_status_t status; + for (uint32_t i = 0; i < fgNhgEntry->hash_bucket_indices.size(); i++) + { + uint32_t bank = i; + syncd_fg_route_entry.inactive_to_active_map[bank] = bank; + if (i + 1 > syncd_fg_route_entry.syncd_fgnhg_map.size()) + { + syncd_fg_route_entry.syncd_fgnhg_map.push_back(FGNextHopGroupMap()); + } + + if (bank_member_changes[i].nhs_to_add.size() == 0) + { + /* Case where bank is empty */ + for (uint32_t active_bank = 0; active_bank < bank_member_changes.size(); active_bank++) + { + if (bank_member_changes[active_bank].nhs_to_add.size() != 0) + { + bank = active_bank; + syncd_fg_route_entry.inactive_to_active_map[i] = active_bank; + break; + } + } + } + + if (bank_member_changes[bank].nhs_to_add.size() == 0) + { + /* Case where all banks are empty, we let retry logic(upon rv false) take care of this scenario */ + SWSS_LOG_INFO("Found no next-hops to add, skipping"); + return false; + } + + for (uint32_t j = fgNhgEntry->hash_bucket_indices[i].start_index; + j <= fgNhgEntry->hash_bucket_indices[i].end_index; j++) + { + NextHopKey bank_nh_memb = bank_member_changes[bank].nhs_to_add[j % + bank_member_changes[bank].nhs_to_add.size()]; + + // Create a next hop group member + sai_attribute_t nhgm_attr; + vector nhgm_attrs; + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID; + nhgm_attr.value.oid = syncd_fg_route_entry.next_hop_group_id; + nhgm_attrs.push_back(nhgm_attr); + + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID; + nhgm_attr.value.oid = nhopgroup_members_set[bank_nh_memb]; + nhgm_attrs.push_back(nhgm_attr); + + nhgm_attr.id = SAI_NEXT_HOP_GROUP_MEMBER_ATTR_INDEX; + nhgm_attr.value.s32 = j; + nhgm_attrs.push_back(nhgm_attr); + + sai_object_id_t next_hop_group_member_id; + status = sai_next_hop_group_api->create_next_hop_group_member( + &next_hop_group_member_id, + gSwitchId, + (uint32_t)nhgm_attrs.size(), + nhgm_attrs.data()); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create next hop group %" PRIx64 " member %" PRIx64 ": %d", + syncd_fg_route_entry.next_hop_group_id, next_hop_group_member_id, status); + + if (!removeFineGrainedNextHopGroup(&syncd_fg_route_entry, fgNhgEntry)) + { + SWSS_LOG_ERROR("Failed to clean-up after next-hop member creation failure"); + } + + return false; + } + + setStateDbRouteEntry(ipPrefix, j, bank_nh_memb); + syncd_fg_route_entry.syncd_fgnhg_map[i][bank_nh_memb].push_back(j); + syncd_fg_route_entry.active_nexthops.insert(bank_nh_memb); + syncd_fg_route_entry.nhopgroup_members.push_back(next_hop_group_member_id); + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); + } + } + + return true; +} + + +bool FgNhgOrch::addRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops) +{ + SWSS_LOG_ENTER(); + + if (m_syncdFGRouteTables.find(vrf_id) != m_syncdFGRouteTables.end() && + m_syncdFGRouteTables.at(vrf_id).find(ipPrefix) != m_syncdFGRouteTables.at(vrf_id).end() && + m_syncdFGRouteTables.at(vrf_id).at(ipPrefix).nhg_key == nextHops) + { + return true; + } + + if (m_syncdFGRouteTables.find(vrf_id) == m_syncdFGRouteTables.end()) + { + m_syncdFGRouteTables.emplace(vrf_id, FGRouteTable()); + m_vrfOrch->increaseVrfRefCount(vrf_id); + } + + auto prefix_entry = fgNhgPrefixes.find(ipPrefix); + assert(prefix_entry != fgNhgPrefixes.end()); + FgNhgEntry *fgNhgEntry = prefix_entry->second; + + set next_hop_set = nextHops.getNextHops(); + std::map nhopgroup_members_set; + auto syncd_fg_route_entry_it = m_syncdFGRouteTables.at(vrf_id).find(ipPrefix); + bool next_hop_to_add = false; + + /* Default init with # of banks */ + std::vector bank_member_changes( + fgNhgEntry->hash_bucket_indices.size(), BankMemberChanges()); + if (fgNhgEntry->hash_bucket_indices.size() == 0) + { + /* Only happens the 1st time when hash_bucket_indices are not inited + */ + for (auto it : fgNhgEntry->next_hops) + { + while(bank_member_changes.size() <= it.second) + { + bank_member_changes.push_back(BankMemberChanges()); + } + } + } + + /* Assert each IP address exists in m_syncdNextHops table, + * and add the corresponding next_hop_id to next_hop_ids. */ + for (NextHopKey nhk : next_hop_set) + { + if (!m_neighOrch->hasNextHop(nhk)) + { + SWSS_LOG_NOTICE("Failed to get next hop %s:%s in neighorch", + nhk.to_string().c_str(), nextHops.to_string().c_str()); + continue; + } + else if (fgNhgEntry->next_hops.find(nhk.ip_address) == fgNhgEntry->next_hops.end()) + { + SWSS_LOG_WARN("Could not find next-hop %s in Fine Grained next-hop group entry for prefix %s, skipping", + nhk.to_string().c_str(), fgNhgEntry->fg_nhg_name.c_str()); + continue; + } + else if (m_neighOrch->isNextHopFlagSet(nhk, NHFLAGS_IFDOWN)) + { + SWSS_LOG_NOTICE("Next hop %s in %s is down, skipping", + nhk.to_string().c_str(), nextHops.to_string().c_str()); + continue; + } + + if (syncd_fg_route_entry_it == m_syncdFGRouteTables.at(vrf_id).end()) + { + bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address]]. + nhs_to_add.push_back(nhk); + next_hop_to_add = true; + } + else + { + FGNextHopGroupEntry *syncd_fg_route_entry = &(syncd_fg_route_entry_it->second); + if (syncd_fg_route_entry->active_nexthops.find(nhk) == + syncd_fg_route_entry->active_nexthops.end()) + { + bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address]]. + nhs_to_add.push_back(nhk); + } + } + + sai_object_id_t next_hop_id = m_neighOrch->getNextHopId(nhk); + nhopgroup_members_set[nhk] = next_hop_id; + } + + if (syncd_fg_route_entry_it != m_syncdFGRouteTables.at(vrf_id).end()) + { + FGNextHopGroupEntry *syncd_fg_route_entry = &(syncd_fg_route_entry_it->second); + + /* Route exists, update FG ECMP group in SAI */ + for (auto nhk : syncd_fg_route_entry->active_nexthops) + { + if (nhopgroup_members_set.find(nhk) == nhopgroup_members_set.end()) + { + bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address]]. + nhs_to_del.push_back(nhk); + } + else + { + bank_member_changes[fgNhgEntry->next_hops[nhk.ip_address]]. + active_nhs.push_back(nhk); + } + } + + if (!computeAndSetHashBucketChanges(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, + nhopgroup_members_set, ipPrefix)) + { + return false; + } + } + else + { + /* New route + nhg addition */ + if (next_hop_to_add == false) + { + SWSS_LOG_INFO("There were no valid next-hops to add %s:%s", ipPrefix.to_string().c_str(), + nextHops.to_string().c_str()); + /* Let the route retry logic(upon false rc) take care of this case */ + return false; + } + + FGNextHopGroupEntry syncd_fg_route_entry; + if (!createFineGrainedNextHopGroup(syncd_fg_route_entry, fgNhgEntry, nextHops)) + { + return false; + } + + if (!setNewNhgMembers(syncd_fg_route_entry, fgNhgEntry, bank_member_changes, nhopgroup_members_set, ipPrefix)) + { + return false; + } + + if (!createFineGrainedRouteEntry(syncd_fg_route_entry, fgNhgEntry, vrf_id, ipPrefix, nextHops)) + { + return false; + } + + m_syncdFGRouteTables[vrf_id][ipPrefix] = syncd_fg_route_entry; + + SWSS_LOG_NOTICE("Created route %s:%s", ipPrefix.to_string().c_str(), nextHops.to_string().c_str()); + } + m_syncdFGRouteTables[vrf_id][ipPrefix].nhg_key = nextHops; + + for (uint32_t bank_idx = 0; bank_idx < bank_member_changes.size(); bank_idx++) + { + for (auto nh : bank_member_changes[bank_idx].nhs_to_add) + { + m_neighOrch->increaseNextHopRefCount(nh); + } + + for (auto nh : bank_member_changes[bank_idx].nhs_to_del) + { + m_neighOrch->decreaseNextHopRefCount(nh); + } + } + + return true; +} + + +bool FgNhgOrch::removeRoute(sai_object_id_t vrf_id, const IpPrefix &ipPrefix) +{ + SWSS_LOG_ENTER(); + + auto it_route_table = m_syncdFGRouteTables.find(vrf_id); + if (it_route_table == m_syncdFGRouteTables.end()) + { + SWSS_LOG_INFO("No route table found for %s, vrf_id 0x%" PRIx64, + ipPrefix.to_string().c_str(), vrf_id); + return true; + } + + auto it_route = it_route_table->second.find(ipPrefix); + if (it_route == it_route_table->second.end()) + { + SWSS_LOG_INFO("Failed to find route entry, vrf_id 0x%" PRIx64 ", prefix %s", vrf_id, + ipPrefix.to_string().c_str()); + return true; + } + + auto prefix_entry = fgNhgPrefixes.find(ipPrefix); + assert(prefix_entry != fgNhgPrefixes.end()); + FgNhgEntry *fgNhgEntry = prefix_entry->second; + + sai_route_entry_t route_entry; + route_entry.vr_id = vrf_id; + route_entry.switch_id = gSwitchId; + copy(route_entry.destination, ipPrefix); + sai_status_t status = sai_route_api->remove_route_entry(&route_entry); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove route prefix:%s", ipPrefix.to_string().c_str()); + return false; + } + + if (route_entry.destination.addr_family == SAI_IP_ADDR_FAMILY_IPV4) + { + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV4_ROUTE); + } + else + { + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_IPV6_ROUTE); + } + + FGNextHopGroupEntry *syncd_fg_route_entry = &(it_route->second); + if (!removeFineGrainedNextHopGroup(syncd_fg_route_entry, fgNhgEntry)) + { + SWSS_LOG_ERROR("Failed to clean-up fine grained ECMP SAI group"); + return false; + } + + for (auto nh : syncd_fg_route_entry->active_nexthops) + { + m_neighOrch->decreaseNextHopRefCount(nh); + } + + // remove state_db entry + m_stateWarmRestartRouteTable.del(ipPrefix.to_string()); + + it_route_table->second.erase(it_route); + if (it_route_table->second.size() == 0) + { + m_syncdFGRouteTables.erase(vrf_id); + m_vrfOrch->decreaseVrfRefCount(vrf_id); + } + return true; +} + + +bool FgNhgOrch::doTaskFgNhg(const KeyOpFieldsValuesTuple & t) +{ + SWSS_LOG_ENTER(); + string op = kfvOp(t); + string key = kfvKey(t); + string fg_nhg_name = key; + auto fgNhg_entry = m_FgNhgs.find(fg_nhg_name); + + if (op == SET_COMMAND) + { + uint32_t bucket_size = 0; + + for (auto i : kfvFieldsValues(t)) + { + if (fvField(i) == "bucket_size") + { + bucket_size = stoi(fvValue(i)); + } + } + + if (bucket_size == 0) + { + SWSS_LOG_ERROR("Received bucket_size which is 0 for key %s", kfvKey(t).c_str()); + return true; + } + + if (fgNhg_entry != m_FgNhgs.end()) + { + if (bucket_size != (fgNhg_entry->second).configured_bucket_size) + { + SWSS_LOG_WARN("Received request to change %s's bucket size to %d, unsupported operation, skipping", + fg_nhg_name.c_str(), bucket_size); + return true; + } + } + else + { + FgNhgEntry fgNhgEntry; + fgNhgEntry.configured_bucket_size = bucket_size; + fgNhgEntry.fg_nhg_name = fg_nhg_name; + SWSS_LOG_INFO("Added new FG_NHG entry with configured_bucket_size %d", + fgNhgEntry.configured_bucket_size); + m_FgNhgs[fg_nhg_name] = fgNhgEntry; + } + } + else if (op == DEL_COMMAND) + { + if (fgNhg_entry == m_FgNhgs.end()) + { + SWSS_LOG_INFO("Received delete call for non-existent entry %s", + fg_nhg_name.c_str()); + } + else + { + /* Check if there are no child objects associated prior to deleting */ + if (fgNhg_entry->second.prefixes.size() == 0 && fgNhg_entry->second.next_hops.size() == 0) + { + m_FgNhgs.erase(fgNhg_entry); + assert(m_FgNhgs.find(fg_nhg_name) == fgNhgPrefixes.end()); + SWSS_LOG_INFO("Received delete call for valid entry with no further dependencies, deleting %s", + fg_nhg_name.c_str()); + } + else + { + SWSS_LOG_INFO("Child Prefix/Member entries are still associated with this FG_NHG %s", + fg_nhg_name.c_str()); + return false; + } + } + } + return true; +} + + +vector FgNhgOrch::generateRouteTableFromNhgKey(NextHopGroupKey nhg) +{ + SWSS_LOG_ENTER(); + vector fvVector; + std::set nhks = nhg.getNextHops(); + string nexthops = nhks.begin()->ip_address.to_string(); + string ifnames = nhks.begin()->alias; + + for (auto nhk: nhks) + { + if (nhk == *(nhks.begin())) + { + continue; + } + nexthops += "," + nhk.ip_address.to_string(); + ifnames += "," + nhk.alias; + } + + FieldValueTuple nh("nexthop", nexthops); + FieldValueTuple idx("ifname", ifnames); + SWSS_LOG_INFO("Generated fv nhs %s, fv ifnames %s", nexthops.c_str(), ifnames.c_str()); + + fvVector.push_back(nh); + fvVector.push_back(idx); + return fvVector; +} + + +bool FgNhgOrch::doTaskFgNhgPrefix(const KeyOpFieldsValuesTuple & t) +{ + SWSS_LOG_ENTER(); + string op = kfvOp(t); + string key = kfvKey(t); + IpPrefix ip_prefix = IpPrefix(key); + auto prefix_entry = fgNhgPrefixes.find(ip_prefix); + + if (op == SET_COMMAND) + { + if (prefix_entry != fgNhgPrefixes.end()) + { + SWSS_LOG_INFO("FG_NHG prefix already exists"); + return true; + } + + string fg_nhg_name = ""; + for (auto i : kfvFieldsValues(t)) + { + if (fvField(i) == "FG_NHG") + { + fg_nhg_name = fvValue(i); + } + } + if (fg_nhg_name.empty()) + { + SWSS_LOG_ERROR("Received FG_NHG with empty name for key %s", kfvKey(t).c_str()); + return true; + } + + auto fgNhg_entry = m_FgNhgs.find(fg_nhg_name); + if (fgNhg_entry == m_FgNhgs.end()) + { + SWSS_LOG_INFO("FG_NHG entry not received yet, continue"); + return false; + } + + /* delete regular ecmp handling for prefix */ + sai_object_id_t vrf_id = gVirtualRouterId; + NextHopGroupKey nhg = gRouteOrch->getSyncdRouteNhgKey(vrf_id, ip_prefix); + auto addCache = m_fgPrefixAddCache.find(ip_prefix); + if (addCache == m_fgPrefixAddCache.end()) + { + if (nhg.getSize() == 0) + { + SWSS_LOG_INFO("Route does not exist in routeorch, don't need to migrate route to fgnhgorch"); + fgNhg_entry->second.prefixes.push_back(ip_prefix); + fgNhgPrefixes[ip_prefix] = &(fgNhg_entry->second); + } + else + { + SWSS_LOG_INFO("Route exists in routeorch, deleting from APP_DB to begin migration"); + m_fgPrefixAddCache[ip_prefix] = nhg; + m_routeTable.del(ip_prefix.to_string()); + return false; + } + } + else + { + if (nhg.getSize() == 0) + { + /* Case where APP_DB route entry was present and the route delete was completed */ + SWSS_LOG_INFO("Route removed in routeorch, now do an APP_DB addition"); + fgNhg_entry->second.prefixes.push_back(ip_prefix); + fgNhgPrefixes[ip_prefix] = &(fgNhg_entry->second); + m_routeTable.set(ip_prefix.to_string(), generateRouteTableFromNhgKey(addCache->second)); + m_fgPrefixAddCache.erase(addCache); + SWSS_LOG_INFO("Performed APP_DB addition with prefix %s", ip_prefix.to_string().c_str()); + } + else + { + SWSS_LOG_INFO("Route exists in routeorch, and APP_DB route was deleted, waiting for routeorch delete to complete"); + return false; + } + } + SWSS_LOG_INFO("FG_NHG added for group %s, prefix %s", + fgNhgPrefixes[ip_prefix]->fg_nhg_name.c_str(), ip_prefix.to_string().c_str()); + } + else if (op == DEL_COMMAND) + { + if (prefix_entry == fgNhgPrefixes.end()) + { + SWSS_LOG_INFO("FG_NHG prefix doesn't exists, ignore"); + return true; + } + /* delete fine grained ecmp handling for prefix */ + auto fgNhg_entry = prefix_entry->second; + sai_object_id_t vrf_id = gVirtualRouterId; + NextHopGroupKey nhg; + if (m_syncdFGRouteTables.find(vrf_id) != m_syncdFGRouteTables.end() && + m_syncdFGRouteTables.at(vrf_id).find(ip_prefix) != m_syncdFGRouteTables.at(vrf_id).end()) + { + nhg = m_syncdFGRouteTables.at(vrf_id).at(ip_prefix).nhg_key; + } + + auto delCache = m_fgPrefixDelCache.find(ip_prefix); + if (delCache == m_fgPrefixDelCache.end()) + { + if (nhg.getSize() == 0) + { + SWSS_LOG_INFO("Route does not exist in fgnhgorch, proceed with deletion of local structures"); + for (uint32_t i = 0; i < fgNhg_entry->prefixes.size(); i++) + { + if(fgNhg_entry->prefixes[i] == ip_prefix) + { + fgNhg_entry->prefixes.erase(fgNhg_entry->prefixes.begin() + i); + break; + } + } + + fgNhgPrefixes.erase(ip_prefix); + } + else + { + SWSS_LOG_INFO("Route exists in fgNhgOrch, deleting from APP_DB"); + m_fgPrefixDelCache[ip_prefix] = nhg; + m_routeTable.del(ip_prefix.to_string()); + return false; + } + } + else + { + if (nhg.getSize() == 0) + { + /* Case where fgnhgorch route entry was present and the route delete was completed */ + SWSS_LOG_INFO("Route removed in fgNhgOrch, now do an APP_DB addition"); + for (uint32_t i = 0; i < fgNhg_entry->prefixes.size(); i++) + { + if (fgNhg_entry->prefixes[i] == ip_prefix) + { + fgNhg_entry->prefixes.erase(fgNhg_entry->prefixes.begin() + i); + break; + } + } + fgNhgPrefixes.erase(ip_prefix); + + m_routeTable.set(ip_prefix.to_string(), generateRouteTableFromNhgKey(delCache->second)); + SWSS_LOG_INFO("Perform APP_DB addition with prefix %s", ip_prefix.to_string().c_str()); + } + else + { + SWSS_LOG_INFO("Route exists in fgNhgOrch, and APP_DB route was deleted, waiting for fgNhgOrch delete to complete"); + return false; + } + } + SWSS_LOG_INFO("FG_NHG removed for group prefix %s", ip_prefix.to_string().c_str()); + + } + return true; +} + + +bool FgNhgOrch::doTaskFgNhgMember(const KeyOpFieldsValuesTuple & t) +{ + SWSS_LOG_ENTER(); + string op = kfvOp(t); + string key = kfvKey(t); + IpAddress next_hop = IpAddress(key); + NextHopKey nhk(next_hop.to_string()); + + if (op == SET_COMMAND) + { + string fg_nhg_name = ""; + uint32_t bank = 0; + for (auto i : kfvFieldsValues(t)) + { + if (fvField(i) == "FG_NHG") + { + fg_nhg_name = fvValue(i); + } + else if (fvField(i) == "bank") + { + bank = stoi(fvValue(i)); + } + } + if (fg_nhg_name.empty()) + { + SWSS_LOG_ERROR("Received FG_NHG with empty name for key %s", kfvKey(t).c_str()); + return true; + } + + auto fgNhg_entry = m_FgNhgs.find(fg_nhg_name); + if (fgNhg_entry == m_FgNhgs.end()) + { + SWSS_LOG_INFO("FG_NHG entry not received yet, continue"); + return false; + } + else + { + /* skip addition if next-hop already exists */ + if (fgNhg_entry->second.next_hops.find(next_hop) != fgNhg_entry->second.next_hops.end()) + { + SWSS_LOG_INFO("FG_NHG member %s already exists, skip", next_hop.to_string().c_str()); + return true; + } + fgNhg_entry->second.next_hops[next_hop] = bank; + + /* query and check the next hop is valid in neighOrcch */ + if (!m_neighOrch->hasNextHop(nhk)) + { + SWSS_LOG_INFO("Nexthop %s is not resolved yet", nhk.to_string().c_str()); + return true; + } + + /* add next-hop into SAI group */ + if (!validNextHopInNextHopGroup(nhk)) + { + return false; + } + SWSS_LOG_INFO("FG_NHG member added for group %s, next-hop %s", + fgNhg_entry->second.fg_nhg_name.c_str(), next_hop.to_string().c_str()); + } + } + else if (op == DEL_COMMAND) + { + /* remove next hop from SAI group if its a resolved nh which is programmed to SAI*/ + if (m_neighOrch->hasNextHop(nhk)) + { + if (!invalidNextHopInNextHopGroup(nhk)) + { + return false; + } + } + + SWSS_LOG_INFO("FG_NHG member removed for SAI group, next-hop %s", + next_hop.to_string().c_str()); + + /* remove next-hop in fgnhg entry*/ + for (auto fgnhg_it = m_FgNhgs.begin(); fgnhg_it != m_FgNhgs.end(); ++fgnhg_it) + { + auto it = fgnhg_it->second.next_hops.find(next_hop); + if (it != fgnhg_it->second.next_hops.end()) + { + SWSS_LOG_INFO("FG_NHG member removed for group %s, next-hop %s", + fgnhg_it->second.fg_nhg_name.c_str(), next_hop.to_string().c_str()); + fgnhg_it->second.next_hops.erase(it); + } + } + } + return true; +} + + +void FgNhgOrch::doTask(Consumer& consumer) +{ + SWSS_LOG_ENTER(); + const string & table_name = consumer.getTableName(); + auto it = consumer.m_toSync.begin(); + bool entry_handled = true; + + while (it != consumer.m_toSync.end()) + { + auto t = it->second; + if (table_name == CFG_FG_NHG) + { + entry_handled = doTaskFgNhg(t); + } + else if (table_name == CFG_FG_NHG_PREFIX) + { + entry_handled = doTaskFgNhgPrefix(t); + } + else if (table_name == CFG_FG_NHG_MEMBER) + { + entry_handled = doTaskFgNhgMember(t); + } + else + { + entry_handled = true; + SWSS_LOG_ERROR("Unknown table : %s", table_name.c_str()); + } + + if (entry_handled) + { + consumer.m_toSync.erase(it++); + } + else + { + it++; + } + } + return; +} diff --git a/orchagent/fgnhgorch.h b/orchagent/fgnhgorch.h new file mode 100644 index 0000000000..fe2a6fc187 --- /dev/null +++ b/orchagent/fgnhgorch.h @@ -0,0 +1,130 @@ +#ifndef SWSS_FGNHGORCH_H +#define SWSS_FGNHGORCH_H + +#include "orch.h" +#include "observer.h" +#include "intfsorch.h" +#include "neighorch.h" +#include "producerstatetable.h" + +#include "ipaddress.h" +#include "ipaddresses.h" +#include "ipprefix.h" +#include "nexthopgroupkey.h" + +#include + +typedef uint32_t Bank; +typedef std::set ActiveNextHops; +typedef std::vector FGNextHopGroupMembers; +typedef std::vector HashBuckets; +typedef std::map FGNextHopGroupMap; +typedef std::vector BankFGNextHopGroupMap; +typedef std::map InactiveBankMapsToBank; + +struct FGNextHopGroupEntry +{ + sai_object_id_t next_hop_group_id; // next hop group id + FGNextHopGroupMembers nhopgroup_members; // sai_object_ids of nexthopgroup members(0 - real_bucket_size - 1) + ActiveNextHops active_nexthops; // The set of nexthops(ip+alias) + BankFGNextHopGroupMap syncd_fgnhg_map; // Map of (bank) -> (nexthops) -> (index in nhopgroup_members) + NextHopGroupKey nhg_key; // Full next hop group key + InactiveBankMapsToBank inactive_to_active_map; // Maps an inactive bank to an active one in terms of hash bkts +}; + +/*TODO: can we make an optimization here when we get multiple routes pointing to a fgnhg */ +typedef std::map FGRouteTable; +/* RouteTables: vrf_id, FGRouteTable */ +typedef std::map FGRouteTables; +/* Name of the FG NHG group */ +typedef std::string FgNhg; +/* Map from IP to Bank */ +typedef std::map NextHops; +/* Cache currently ongoing FG_NHG PREFIX additions/deletions */ +typedef std::map FgPrefixOpCache; + +/* Store the indices occupied by a bank */ +typedef struct +{ + uint32_t start_index; + uint32_t end_index; +} BankIndexRange; + +typedef struct FgNhgEntry +{ + string fg_nhg_name; // Name of FG NHG group configured by user + uint32_t configured_bucket_size; // Bucket size configured by user + uint32_t real_bucket_size; // Real bucket size as queried from SAI + NextHops next_hops; // The IP to Bank mapping configured by user + std::vector prefixes; // Prefix which desires FG behavior + std::vector hash_bucket_indices; // The hash bucket indices for a bank +} FgNhgEntry; + +/* Map from IP prefix to user configured FG NHG entries */ +typedef std::map FgNhgPrefixes; +/* Main structure to hold user configuration */ +typedef std::map FgNhgs; + +/* Helper struct populated at every route change to identify the next-hop changes which occured */ +typedef struct +{ + std::vector nhs_to_del; + std::vector nhs_to_add; + std::vector active_nhs; +} BankMemberChanges; + +class FgNhgOrch : public Orch +{ +public: + FgNhgPrefixes fgNhgPrefixes; + FgNhgOrch(DBConnector *db, DBConnector *appDb, DBConnector *stateDb, vector &tableNames, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch); + + bool addRoute(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&); + bool removeRoute(sai_object_id_t, const IpPrefix&); + bool validNextHopInNextHopGroup(const NextHopKey&); + bool invalidNextHopInNextHopGroup(const NextHopKey&); + +private: + NeighOrch *m_neighOrch; + IntfsOrch *m_intfsOrch; + VRFOrch *m_vrfOrch; + FgNhgs m_FgNhgs; + FGRouteTables m_syncdFGRouteTables; + Table m_stateWarmRestartRouteTable; + ProducerStateTable m_routeTable; + FgPrefixOpCache m_fgPrefixAddCache; + FgPrefixOpCache m_fgPrefixDelCache; + + bool setNewNhgMembers(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + std::vector &bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix&); + bool computeAndSetHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, + FgNhgEntry *fgNhgEntry, std::vector &bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix&); + bool setActiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + uint32_t bank, uint32_t syncd_bank, std::vector bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix&); + bool setInactiveBankHashBucketChanges(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + uint32_t bank,std::vector &bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix&); + bool setInactiveBankToNextAvailableActiveBank(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + uint32_t bank, std::vector bank_member_changes, + std::map &nhopgroup_members_set, const IpPrefix&); + void calculateBankHashBucketStartIndices(FgNhgEntry *fgNhgEntry); + void setStateDbRouteEntry(const IpPrefix&, uint32_t index, NextHopKey nextHop); + bool writeHashBucketChange(FGNextHopGroupEntry *syncd_fg_route_entry, uint32_t index, sai_object_id_t nh_oid, + const IpPrefix &ipPrefix, NextHopKey nextHop); + bool createFineGrainedNextHopGroup(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + const NextHopGroupKey &nextHops); + bool removeFineGrainedNextHopGroup(FGNextHopGroupEntry *syncd_fg_route_entry, FgNhgEntry *fgNhgEntry); + bool createFineGrainedRouteEntry(FGNextHopGroupEntry &syncd_fg_route_entry, FgNhgEntry *fgNhgEntry, + sai_object_id_t vrf_id, const IpPrefix &ipPrefix, const NextHopGroupKey &nextHops); + + vector generateRouteTableFromNhgKey(NextHopGroupKey nhg); + bool doTaskFgNhg(const KeyOpFieldsValuesTuple&); + bool doTaskFgNhgPrefix(const KeyOpFieldsValuesTuple&); + bool doTaskFgNhgMember(const KeyOpFieldsValuesTuple&); + void doTask(Consumer& consumer); +}; + +#endif /* SWSS_FGNHGORCH_H */ diff --git a/orchagent/neighorch.cpp b/orchagent/neighorch.cpp index ae0db0751c..5ba5ec4e27 100644 --- a/orchagent/neighorch.cpp +++ b/orchagent/neighorch.cpp @@ -12,6 +12,7 @@ extern PortsOrch *gPortsOrch; extern sai_object_id_t gSwitchId; extern CrmOrch *gCrmOrch; extern RouteOrch *gRouteOrch; +extern FgNhgOrch *gFgNhgOrch; const int neighorch_pri = 30; @@ -86,6 +87,8 @@ bool NeighOrch::addNextHop(const IpAddress &ipAddress, const string &alias) gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_IPV6_NEXTHOP); } + gFgNhgOrch->validNextHopInNextHopGroup(nexthop); + // For nexthop with incoming port which has down oper status, NHFLAGS_IFDOWN // flag Should be set on it. // This scenario may happen under race condition where buffered neighbor event @@ -216,6 +219,8 @@ bool NeighOrch::removeNextHop(const IpAddress &ipAddress, const string &alias) NextHopKey nexthop = { ipAddress, alias }; assert(hasNextHop(nexthop)); + gFgNhgOrch->invalidNextHopInNextHopGroup(nexthop); + if (m_syncdNextHops[nexthop].ref_count > 0) { SWSS_LOG_ERROR("Failed to remove still referenced next hop %s on %s", diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index 99fbc0a7f6..1682e4a23c 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -30,6 +30,7 @@ FdbOrch *gFdbOrch; IntfsOrch *gIntfsOrch; NeighOrch *gNeighOrch; RouteOrch *gRouteOrch; +FgNhgOrch *gFgNhgOrch; AclOrch *gAclOrch; CrmOrch *gCrmOrch; BufferOrch *gBufferOrch; @@ -126,7 +127,16 @@ bool OrchDaemon::init() gIntfsOrch = new IntfsOrch(m_applDb, APP_INTF_TABLE_NAME, vrf_orch); gNeighOrch = new NeighOrch(m_applDb, APP_NEIGH_TABLE_NAME, gIntfsOrch); - gRouteOrch = new RouteOrch(m_applDb, APP_ROUTE_TABLE_NAME, gSwitchOrch, gNeighOrch, gIntfsOrch, vrf_orch); + + vector fgnhg_tables = { + CFG_FG_NHG, + CFG_FG_NHG_PREFIX, + CFG_FG_NHG_MEMBER + }; + + gFgNhgOrch = new FgNhgOrch(m_configDb, m_applDb, m_stateDb, fgnhg_tables, gNeighOrch, gIntfsOrch, vrf_orch); + gDirectory.set(gFgNhgOrch); + gRouteOrch = new RouteOrch(m_applDb, APP_ROUTE_TABLE_NAME, gSwitchOrch, gNeighOrch, gIntfsOrch, vrf_orch, gFgNhgOrch); TableConnector confDbSflowTable(m_configDb, CFG_SFLOW_TABLE_NAME); TableConnector appCoppTable(m_applDb, APP_COPP_TABLE_NAME); @@ -282,6 +292,7 @@ bool OrchDaemon::init() m_orchList.push_back(vnet_orch); m_orchList.push_back(vnet_rt_orch); m_orchList.push_back(gNatOrch); + m_orchList.push_back(gFgNhgOrch); m_select = new Select(); diff --git a/orchagent/routeorch.cpp b/orchagent/routeorch.cpp index 66a0c86aaf..629d08390a 100644 --- a/orchagent/routeorch.cpp +++ b/orchagent/routeorch.cpp @@ -22,7 +22,7 @@ extern CrmOrch *gCrmOrch; const int routeorch_pri = 5; -RouteOrch::RouteOrch(DBConnector *db, string tableName, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch) : +RouteOrch::RouteOrch(DBConnector *db, string tableName, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch) : gRouteBulker(sai_route_api), gNextHopGroupMemberBulker(sai_next_hop_group_api, gSwitchId), Orch(db, tableName, routeorch_pri), @@ -30,6 +30,7 @@ RouteOrch::RouteOrch(DBConnector *db, string tableName, SwitchOrch *switchOrch, m_neighOrch(neighOrch), m_intfsOrch(intfsOrch), m_vrfOrch(vrfOrch), + m_fgNhgOrch(fgNhgOrch), m_nextHopGroupCount(0), m_resync(false) { @@ -334,6 +335,11 @@ bool RouteOrch::validnexthopinNextHopGroup(const NextHopKey &nexthop) nhopgroup->second.nhopgroup_members[nexthop] = nexthop_id; } + if (!m_fgNhgOrch->validNextHopInNextHopGroup(nexthop)) + { + return false; + } + return true; } @@ -366,6 +372,11 @@ bool RouteOrch::invalidnexthopinNextHopGroup(const NextHopKey &nexthop) gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP_MEMBER); } + if (!m_fgNhgOrch->invalidNextHopInNextHopGroup(nexthop)) + { + return false; + } + return true; } @@ -832,6 +843,66 @@ bool RouteOrch::isRefCounterZero(const NextHopGroupKey &nexthops) const return m_syncdNextHopGroups.at(nexthops).ref_count == 0; } +const NextHopGroupKey RouteOrch::getSyncdRouteNhgKey(sai_object_id_t vrf_id, const IpPrefix& ipPrefix) +{ + NextHopGroupKey nhg; + auto route_table = m_syncdRoutes.find(vrf_id); + if (route_table != m_syncdRoutes.end()) + { + auto route_entry = route_table->second.find(ipPrefix); + if (route_entry != route_table->second.end()) + { + nhg = route_entry->second; + } + } + return nhg; +} + +bool RouteOrch::createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id, vector &nhg_attrs) +{ + SWSS_LOG_ENTER(); + + if (m_nextHopGroupCount >= m_maxNextHopGroupCount) + { + SWSS_LOG_DEBUG("Failed to create new next hop group. \ + Reaching maximum number of next hop groups."); + return false; + } + + sai_status_t status = sai_next_hop_group_api->create_next_hop_group(&next_hop_group_id, + gSwitchId, + (uint32_t)nhg_attrs.size(), + nhg_attrs.data()); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to create next hop group rv:%d", status); + return false; + } + + gCrmOrch->incCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); + m_nextHopGroupCount++; + + return true; +} + +bool RouteOrch::removeFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id) +{ + SWSS_LOG_ENTER(); + + sai_status_t status = sai_next_hop_group_api->remove_next_hop_group(next_hop_group_id); + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to remove next hop group %" PRIx64 ", rv:%d", + next_hop_group_id, status); + return false; + } + + gCrmOrch->decCrmResUsedCounter(CrmResourceType::CRM_NEXTHOP_GROUP); + m_nextHopGroupCount--; + + return true; +} + bool RouteOrch::addNextHopGroup(const NextHopGroupKey &nexthops) { SWSS_LOG_ENTER(); @@ -1073,6 +1144,15 @@ bool RouteOrch::addRoute(RouteBulkContext& ctx, const NextHopGroupKey &nextHops) sai_object_id_t& vrf_id = ctx.vrf_id; IpPrefix& ipPrefix = ctx.ip_prefix; + if (m_fgNhgOrch->fgNhgPrefixes.find(ipPrefix) != m_fgNhgOrch->fgNhgPrefixes.end() + && vrf_id == gVirtualRouterId) + { + /* Only support the default vrf for Fine Grained ECMP */ + SWSS_LOG_INFO("Reroute %s:%s to fgNhgOrch", ipPrefix.to_string().c_str(), + nextHops.to_string().c_str()); + return m_fgNhgOrch->addRoute(vrf_id, ipPrefix, nextHops); + } + /* next_hop_id indicates the next hop id or next hop group id of this route */ sai_object_id_t next_hop_id; @@ -1334,6 +1414,14 @@ bool RouteOrch::removeRoute(RouteBulkContext& ctx) sai_object_id_t& vrf_id = ctx.vrf_id; IpPrefix& ipPrefix = ctx.ip_prefix; + if (m_fgNhgOrch->fgNhgPrefixes.find(ipPrefix) != m_fgNhgOrch->fgNhgPrefixes.end() + && vrf_id == gVirtualRouterId) + { + /* Only support the default vrf for Fine Grained ECMP */ + SWSS_LOG_INFO("Reroute %s to fgNhgOrch", ipPrefix.to_string().c_str()); + return m_fgNhgOrch->removeRoute(vrf_id, ipPrefix); + } + auto it_route_table = m_syncdRoutes.find(vrf_id); if (it_route_table == m_syncdRoutes.end()) { diff --git a/orchagent/routeorch.h b/orchagent/routeorch.h index 82bcd35d71..7c5a6a483f 100644 --- a/orchagent/routeorch.h +++ b/orchagent/routeorch.h @@ -12,7 +12,7 @@ #include "ipprefix.h" #include "nexthopgroupkey.h" #include "bulker.h" - +#include "fgnhgorch.h" #include /* Maximum next hop group number */ @@ -89,7 +89,7 @@ struct RouteBulkContext class RouteOrch : public Orch, public Subject { public: - RouteOrch(DBConnector *db, string tableName, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch); + RouteOrch(DBConnector *db, string tableName, SwitchOrch *switchOrch, NeighOrch *neighOrch, IntfsOrch *intfsOrch, VRFOrch *vrfOrch, FgNhgOrch *fgNhgOrch); bool hasNextHopGroup(const NextHopGroupKey&) const; sai_object_id_t getNextHopGroupId(const NextHopGroupKey&); @@ -108,11 +108,16 @@ class RouteOrch : public Orch, public Subject bool invalidnexthopinNextHopGroup(const NextHopKey&); void notifyNextHopChangeObservers(sai_object_id_t, const IpPrefix&, const NextHopGroupKey&, bool); + const NextHopGroupKey getSyncdRouteNhgKey(sai_object_id_t vrf_id, const IpPrefix& ipPrefix); + bool createFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id, vector &nhg_attrs); + bool removeFineGrainedNextHopGroup(sai_object_id_t &next_hop_group_id); + private: SwitchOrch *m_switchOrch; NeighOrch *m_neighOrch; IntfsOrch *m_intfsOrch; VRFOrch *m_vrfOrch; + FgNhgOrch *m_fgNhgOrch; int m_nextHopGroupCount; int m_maxNextHopGroupCount; @@ -128,8 +133,8 @@ class RouteOrch : public Orch, public Subject void addTempRoute(RouteBulkContext& ctx, const NextHopGroupKey&); bool addRoute(RouteBulkContext& ctx, const NextHopGroupKey&); - bool addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey &nextHops); bool removeRoute(RouteBulkContext& ctx); + bool addRoutePost(const RouteBulkContext& ctx, const NextHopGroupKey &nextHops); bool removeRoutePost(const RouteBulkContext& ctx); std::string getLinkLocalEui64Addr(void); diff --git a/tests/mock_tests/Makefile.am b/tests/mock_tests/Makefile.am index b60cbb2106..108cab6beb 100644 --- a/tests/mock_tests/Makefile.am +++ b/tests/mock_tests/Makefile.am @@ -36,6 +36,7 @@ tests_SOURCES = aclorch_ut.cpp \ $(top_srcdir)/orchagent/orch.cpp \ $(top_srcdir)/orchagent/notifications.cpp \ $(top_srcdir)/orchagent/routeorch.cpp \ + $(top_srcdir)/orchagent/fgnhgorch.cpp \ $(top_srcdir)/orchagent/neighorch.cpp \ $(top_srcdir)/orchagent/intfsorch.cpp \ $(top_srcdir)/orchagent/portsorch.cpp \ diff --git a/tests/mock_tests/aclorch_ut.cpp b/tests/mock_tests/aclorch_ut.cpp index cc0ca8941f..63a99f6075 100644 --- a/tests/mock_tests/aclorch_ut.cpp +++ b/tests/mock_tests/aclorch_ut.cpp @@ -8,6 +8,7 @@ extern PortsOrch *gPortsOrch; extern RouteOrch *gRouteOrch; extern IntfsOrch *gIntfsOrch; extern NeighOrch *gNeighOrch; +extern FgNhgOrch *gFgNhgOrch; extern FdbOrch *gFdbOrch; extern MirrorOrch *gMirrorOrch; @@ -315,8 +316,16 @@ namespace aclorch_test ASSERT_EQ(gNeighOrch, nullptr); gNeighOrch = new NeighOrch(m_app_db.get(), APP_NEIGH_TABLE_NAME, gIntfsOrch); + ASSERT_EQ(gFgNhgOrch, nullptr); + vector fgnhg_tables = { + CFG_FG_NHG, + CFG_FG_NHG_PREFIX, + CFG_FG_NHG_MEMBER + }; + gFgNhgOrch = new FgNhgOrch(m_config_db.get(), m_app_db.get(), m_state_db.get(), fgnhg_tables, gNeighOrch, gIntfsOrch, gVrfOrch); + ASSERT_EQ(gRouteOrch, nullptr); - gRouteOrch = new RouteOrch(m_app_db.get(), APP_ROUTE_TABLE_NAME, gSwitchOrch, gNeighOrch, gIntfsOrch, gVrfOrch); + gRouteOrch = new RouteOrch(m_app_db.get(), APP_ROUTE_TABLE_NAME, gSwitchOrch, gNeighOrch, gIntfsOrch, gVrfOrch, gFgNhgOrch); TableConnector applDbFdb(m_app_db.get(), APP_FDB_TABLE_NAME); TableConnector stateDbFdb(m_state_db.get(), STATE_FDB_TABLE_NAME); @@ -362,6 +371,8 @@ namespace aclorch_test gCrmOrch = nullptr; delete gPortsOrch; gPortsOrch = nullptr; + delete gFgNhgOrch; + gFgNhgOrch = nullptr; auto status = sai_switch_api->remove_switch(gSwitchId); ASSERT_EQ(status, SAI_STATUS_SUCCESS); diff --git a/tests/mock_tests/mock_orchagent_main.h b/tests/mock_tests/mock_orchagent_main.h index 71bb522d77..587055b87e 100644 --- a/tests/mock_tests/mock_orchagent_main.h +++ b/tests/mock_tests/mock_orchagent_main.h @@ -14,6 +14,7 @@ #include "vnetorch.h" #include "vxlanorch.h" #include "policerorch.h" +#include "fgnhgorch.h" extern int gBatchSize; extern bool gSwssRecord; @@ -33,6 +34,7 @@ extern sai_object_id_t gUnderlayIfId; extern SwitchOrch *gSwitchOrch; extern CrmOrch *gCrmOrch; extern PortsOrch *gPortsOrch; +extern FgNhgOrch *gFgNhgOrch; extern RouteOrch *gRouteOrch; extern IntfsOrch *gIntfsOrch; extern NeighOrch *gNeighOrch; diff --git a/tests/test_fgnhg.py b/tests/test_fgnhg.py new file mode 100644 index 0000000000..e273484541 --- /dev/null +++ b/tests/test_fgnhg.py @@ -0,0 +1,574 @@ +import os +import re +import time +import json +import pytest + +from dvslib.dvs_common import wait_for_result +from dvslib.dvs_database import DVSDatabase +from swsscommon import swsscommon + +IF_TB = 'INTERFACE' +FG_NHG = 'FG_NHG' +FG_NHG_PREFIX = 'FG_NHG_PREFIX' +FG_NHG_MEMBER = 'FG_NHG_MEMBER' +ROUTE_TB = "ROUTE_TABLE" +ASIC_ROUTE_TB = "ASIC_STATE:SAI_OBJECT_TYPE_ROUTE_ENTRY" +ASIC_NHG_MEMB = "ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP_MEMBER" + + +def create_entry(db, table, key, pairs): + db.create_entry(table, key, pairs) + programmed_table = db.wait_for_entry(table,key) + assert programmed_table != {} + + +def remove_entry(db, table, key): + db.delete_entry(table, key) + db.wait_for_deleted_entry(table,key) + + +def asic_route_exists_and_is_nhg(asic_db, keys, ipprefix): + route_exists = False + key = '' + for k in keys: + rt_key = json.loads(k) + + if rt_key['dest'] == ipprefix: + route_exists = True + key = k + break + assert route_exists + + fvs = asic_db.get_entry(ASIC_ROUTE_TB, key) + if not fvs: + return None + + nhgid = fvs.get("SAI_ROUTE_ENTRY_ATTR_NEXT_HOP_ID") + return nhgid + + +def get_asic_route_key(asic_db, ipprefix): + route_exists = False + key = '' + keys = asic_db.get_keys(ASIC_ROUTE_TB) + for k in keys: + rt_key = json.loads(k) + + if rt_key['dest'] == ipprefix: + route_exists = True + key = k + break + assert route_exists + return key + + +def validate_asic_nhg(asic_db, nhgid, size): + fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP_GROUP", nhgid) + assert fvs != {} + nhg_type = fvs.get("SAI_NEXT_HOP_GROUP_ATTR_TYPE") + assert nhg_type == "SAI_NEXT_HOP_GROUP_TYPE_FINE_GRAIN_ECMP" + nhg_cfg_size = fvs.get("SAI_NEXT_HOP_GROUP_ATTR_CONFIGURED_SIZE") + assert int(nhg_cfg_size) == size + keys = asic_db.get_keys(ASIC_NHG_MEMB) + assert len(keys) == size + + +def get_nh_oid_map(asic_db): + nh_oid_map = {} + keys = asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP") + for key in keys: + fvs = asic_db.get_entry("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", key) + assert fvs != {} + nh_oid_map[key] = fvs["SAI_NEXT_HOP_ATTR_IP"] + + assert nh_oid_map != {} + return nh_oid_map + + +def verify_programmed_fg_asic_db_entry(asic_db,nh_memb_exp_count,nh_oid_map,nhgid,bucket_size): + def _access_function(): + false_ret = (False, None) + ret = True + nh_memb_count = {} + for key in nh_memb_exp_count: + nh_memb_count[key] = 0 + + members = asic_db.get_keys(ASIC_NHG_MEMB) + memb_dict = {} + + for member in members: + fvs = asic_db.get_entry(ASIC_NHG_MEMB, member) + if fvs == {}: + return false_ret + index = -1 + nh_oid = "0" + for key, val in fvs.items(): + if key == "SAI_NEXT_HOP_GROUP_MEMBER_ATTR_INDEX": + index = int(val) + elif key == "SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_ID": + nh_oid = val + elif key == "SAI_NEXT_HOP_GROUP_MEMBER_ATTR_NEXT_HOP_GROUP_ID": + if nhgid != val: + return false_ret + if (index == -1 or + nh_oid == "0" or + nh_oid_map.get(nh_oid,"NULL") == "NULL" or + nh_oid_map.get(nh_oid) not in nh_memb_exp_count): + return false_ret + memb_dict[index] = nh_oid_map.get(nh_oid) + idxs = [0]*bucket_size + for idx,memb in memb_dict.items(): + nh_memb_count[memb] = 1 + nh_memb_count[memb] + idxs[idx] = idxs[idx] + 1 + + for key in nh_memb_exp_count: + ret = ret and (nh_memb_count[key] == nh_memb_exp_count[key]) + for idx in idxs: + ret = ret and (idx == 1) + return (ret, nh_memb_count) + + status, result = wait_for_result(_access_function, DVSDatabase.DEFAULT_POLLING_CONFIG) + if not status: + assert not polling_config.strict, \ + f"Exact match not found: expected={nh_memb_exp_count}, received={result}" + return result + + +def shutdown_link(dvs, db, port): + dvs.servers[port].runcmd("ip link set down dev eth0") == 0 + db.wait_for_field_match("PORT_TABLE", "Ethernet%d" % (port * 4), {"oper_status": "down"}) + + +def startup_link(dvs, db, port): + dvs.servers[port].runcmd("ip link set up dev eth0") == 0 + db.wait_for_field_match("PORT_TABLE", "Ethernet%d" % (port * 4), {"oper_status": "up"}) + + +def verify_programmed_fg_state_db_entry(state_db,nh_memb_exp_count): + memb_dict = nh_memb_exp_count + keys = state_db.get_keys("FG_ROUTE_TABLE") + assert len(keys) != 0 + for key in keys: + fvs = state_db.get_entry("FG_ROUTE_TABLE", key) + assert fvs != {} + for key, value in fvs.items(): + assert value in nh_memb_exp_count + memb_dict[value] = memb_dict[value] - 1 + + for idx,memb in memb_dict.items(): + assert memb == 0 + + +def validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size): + state_db_entry_memb_exp_count = {} + + for ip, cnt in nh_memb_exp_count.items(): + state_db_entry_memb_exp_count[ip + '@' + ip_to_if_map[ip]] = cnt + + verify_programmed_fg_asic_db_entry(asic_db,nh_memb_exp_count,nh_oid_map,nhgid,bucket_size) + verify_programmed_fg_state_db_entry(state_db, state_db_entry_memb_exp_count) + + +def program_route_and_validate_fine_grained_ecmp(app_db, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size): + ips = "" + ifs = "" + for ip in nh_memb_exp_count: + if ips == "": + ips = ip + ifs = ip_to_if_map[ip] + else: + ips = ips + "," + ip + ifs = ifs + "," + ip_to_if_map[ip] + + ps = swsscommon.ProducerStateTable(app_db, ROUTE_TB) + fvs = swsscommon.FieldValuePairs([("nexthop", ips), ("ifname", ifs)]) + ps.set(fg_nhg_prefix, fvs) + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + +class TestFineGrainedNextHopGroup(object): + def test_route_fgnhg(self, dvs, testlog): + app_db = dvs.get_app_db() + asic_db = dvs.get_asic_db() + config_db = dvs.get_config_db() + state_db = dvs.get_state_db() + fvs_nul = {"NULL": "NULL"} + NUM_NHs = 6 + fg_nhg_name = "fgnhg_v4" + fg_nhg_prefix = "2.2.2.0/24" + bucket_size = 60 + ip_to_if_map = {} + + fvs = {"bucket_size": str(bucket_size)} + create_entry(config_db, FG_NHG, fg_nhg_name, fvs) + + fvs = {"FG_NHG": fg_nhg_name} + create_entry(config_db, FG_NHG_PREFIX, fg_nhg_prefix, fvs) + + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + ip_pref_key = "Ethernet" + str(i*4) + "|10.0.0." + str(i*2) + "/31" + create_entry(config_db, IF_TB, if_name_key, fvs_nul) + create_entry(config_db, IF_TB, ip_pref_key, fvs_nul) + dvs.runcmd("config interface startup " + if_name_key) + dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + dvs.servers[i].runcmd("ip link set up dev eth0") == 0 + bank = 0 + if i >= NUM_NHs/2: + bank = 1 + fvs = {"FG_NHG": fg_nhg_name, "bank": str(bank)} + create_entry(config_db, FG_NHG_MEMBER, "10.0.0." + str(1 + i*2), fvs) + ip_to_if_map["10.0.0." + str(1 + i*2)] = if_name_key + # Wait for the software to receive the entries + time.sleep(1) + + asic_routes_count = len(asic_db.get_keys(ASIC_ROUTE_TB)) + ps = swsscommon.ProducerStateTable(app_db.db_connection, ROUTE_TB) + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.7,10.0.0.9,10.0.0.11"), + ("ifname", "Ethernet12,Ethernet16,Ethernet20")]) + + ps.set(fg_nhg_prefix, fvs) + # No ASIC_DB entry we can wait for since ARP is not resolved yet, + # We just use sleep so that the sw receives this entry + time.sleep(1) + + adb = swsscommon.DBConnector(1, dvs.redis_sock, 0) + rtbl = swsscommon.Table(adb, ASIC_ROUTE_TB) + keys = rtbl.getKeys() + found_route = False + for k in keys: + rt_key = json.loads(k) + + if rt_key['dest'] == fg_nhg_prefix: + found_route = True + break + + # Since we didn't populate ARP yet, the route shouldn't be programmed + assert (found_route == False) + + dvs.runcmd("arp -s 10.0.0.1 00:00:00:00:00:01") + dvs.runcmd("arp -s 10.0.0.3 00:00:00:00:00:02") + dvs.runcmd("arp -s 10.0.0.5 00:00:00:00:00:03") + dvs.runcmd("arp -s 10.0.0.9 00:00:00:00:00:05") + dvs.runcmd("arp -s 10.0.0.11 00:00:00:00:00:06") + + keys = asic_db.wait_for_n_keys(ASIC_ROUTE_TB, asic_routes_count + 1) + nhgid = asic_route_exists_and_is_nhg(asic_db, keys, fg_nhg_prefix) + assert nhgid is not None + + validate_asic_nhg(asic_db, nhgid, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + ### Test scenarios with bank 0 having 0 members up + # ARP is not resolved for 10.0.0.7, so fg nhg should be created without 10.0.0.7 + nh_memb_exp_count = {"10.0.0.9":30,"10.0.0.11":30} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Resolve ARP for 10.0.0.7 + asic_nh_count = len(asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP")) + dvs.runcmd("arp -s 10.0.0.7 00:00:00:00:00:04") + asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_NEXT_HOP", asic_nh_count + 1) + nh_oid_map = get_nh_oid_map(asic_db) + # Now that ARP was resolved, 10.0.0.7 should be added as a valid fg nhg member + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.7":30,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up 1 next hop in bank 1 + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up some next-hops in bank 0 for the 1st time + nh_memb_exp_count = {"10.0.0.1":10,"10.0.0.3":10,"10.0.0.5":10,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 1 next-hop from bank 0, and 2 next-hops from bank 1 + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.5":15,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 1 member and bring up 1 member in bank 0 at the same time + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.3":15,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down 2 members and bring up 1 member in bank 0 at the same time + nh_memb_exp_count = {"10.0.0.5":30,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring up 2 members and bring down 1 member in bank 0 at the same time + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.3":15,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bringup arbitrary # of next-hops from both banks at the same time + nh_memb_exp_count = {"10.0.0.1":10,"10.0.0.3":10,"10.0.0.5":10,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring all next-hops in bank 1 down + nh_memb_exp_count = {"10.0.0.1":20,"10.0.0.3":20,"10.0.0.5":20} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Make next-hop changes to bank 0 members, given bank 1 is still down + nh_memb_exp_count = {"10.0.0.1":30,"10.0.0.5":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bringup 1 member in bank 1 again + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.5":15,"10.0.0.11":30} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Test 2nd,3rd memb up in bank + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.5":15,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # bring all links down one by one + shutdown_link(dvs, app_db, 0) + nh_memb_exp_count = {"10.0.0.5":30,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + shutdown_link(dvs, app_db, 2) + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + shutdown_link(dvs, app_db, 3) + nh_memb_exp_count = {"10.0.0.9":30,"10.0.0.11":30} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + shutdown_link(dvs, app_db, 4) + nh_memb_exp_count = {"10.0.0.11":60} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Bring down last link, there shouldn't be a crash or other bad orchagent state because of this + shutdown_link(dvs, app_db, 5) + # Nothing to check for in this case, sleep 1s for the shutdown to reach sw + time.sleep(1) + + # bring all links up one by one + startup_link(dvs, app_db, 3) + startup_link(dvs, app_db, 4) + startup_link(dvs, app_db, 5) + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + startup_link(dvs, app_db, 2) + nh_memb_exp_count = {"10.0.0.5":30,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + startup_link(dvs, app_db, 0) + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.5":15,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # remove fgnhg member + remove_entry(config_db, "FG_NHG_MEMBER", "10.0.0.1") + nh_memb_exp_count = {"10.0.0.5":30,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # add fgnhg member + fvs = {"FG_NHG": fg_nhg_name, "bank": "0"} + create_entry(config_db, FG_NHG_MEMBER, "10.0.0.1", fvs) + nh_memb_exp_count = {"10.0.0.1":15,"10.0.0.5":15,"10.0.0.7":10,"10.0.0.9":10,"10.0.0.11":10} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Remove route + asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) + ps._del(fg_nhg_prefix) + asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) + + keys = asic_db.get_keys(ASIC_NHG_MEMB) + assert len(keys) == 0 + + keys = state_db.get_keys("FG_ROUTE_TABLE") + assert len(keys) == 0 + + remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) + # Nothing we can wait for in terms of db entries, we sleep here + # to give the sw enough time to delete the entry + time.sleep(1) + + # Add an ECMP route, since we deleted the FG_NHG_PREFIX it should see + # standard(non-Fine grained) ECMP behavior + asic_routes_count = len(asic_db.get_keys(ASIC_ROUTE_TB)) + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.7,10.0.0.9,10.0.0.11"), + ("ifname", "Ethernet12,Ethernet16,Ethernet20")]) + ps.set(fg_nhg_prefix, fvs) + + keys = asic_db.wait_for_n_keys(ASIC_ROUTE_TB, asic_routes_count + 1) + nhgid = asic_route_exists_and_is_nhg(asic_db, keys, fg_nhg_prefix) + assert nhgid is not None + + keys = asic_db.get_keys(ASIC_NHG_MEMB) + assert len(keys) == 3 + + # add fgnhg prefix: The regular route should transition to fine grained ECMP + fvs = {"FG_NHG": fg_nhg_name} + create_entry(config_db, FG_NHG_PREFIX, fg_nhg_prefix, fvs) + + # Validate the transistion to Fine Grained ECMP + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, bucket_size) + keys = asic_db.get_keys(ASIC_ROUTE_TB) + nhgid = asic_route_exists_and_is_nhg(asic_db, keys, fg_nhg_prefix) + validate_asic_nhg(asic_db, nhgid, bucket_size) + + nh_oid_map = {} + nh_oid_map = get_nh_oid_map(asic_db) + + nh_memb_exp_count = {"10.0.0.7":20,"10.0.0.9":20,"10.0.0.11":20} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # remove fgnhg prefix: The fine grained route should transition to regular ECMP/route + remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix) + + # Validate regular ECMP + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 3) + asic_route_exists_and_is_nhg(asic_db, keys, fg_nhg_prefix) + + # remove prefix entry + asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) + ps._del(fg_nhg_prefix) + asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) + + # Cleanup all FG, arp and interface + remove_entry(config_db, "FG_NHG", fg_nhg_name) + + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + ip_pref_key = "Ethernet" + str(i*4) + "|10.0.0." + str(i*2) + "/31" + remove_entry(config_db, IF_TB, if_name_key) + remove_entry(config_db, IF_TB, ip_pref_key) + dvs.runcmd("config interface shutdown " + if_name_key) + dvs.servers[i].runcmd("ip link set down dev eth0") == 0 + remove_entry(config_db, "FG_NHG_MEMBER", "10.0.0." + str(1 + i*2)) + + + ### Create new set of entries with a greater number of FG members and + ### bigger bucket size such that the # of nhs are not divisible by + ### bucket size. + fg_nhg_name = "new_fgnhg_v4" + fg_nhg_prefix = "3.3.3.0/24" + # Test with non-divisible bucket size + bucket_size = 128 + NUM_NHs = 10 + + ip_to_if_map = {} + nh_oid_map = {} + + # Initialize base config + fvs = {"bucket_size": str(bucket_size)} + create_entry(config_db, FG_NHG, fg_nhg_name, fvs) + + fvs = {"FG_NHG": fg_nhg_name} + create_entry(config_db, FG_NHG_PREFIX, fg_nhg_prefix, fvs) + + for i in range(0,NUM_NHs): + if_name_key = "Ethernet" + str(i*4) + ip_pref_key = "Ethernet" + str(i*4) + "|10.0.0." + str(i*2) + "/31" + create_entry(config_db, IF_TB, if_name_key, fvs_nul) + create_entry(config_db, IF_TB, ip_pref_key, fvs_nul) + dvs.runcmd("config interface startup " + if_name_key) + shutdown_link(dvs, app_db, i) + startup_link(dvs, app_db, i) + bank = 1 + if i >= NUM_NHs/2: + bank = 0 + fvs = {"FG_NHG": fg_nhg_name, "bank": str(bank)} + create_entry(config_db, FG_NHG_MEMBER, "10.0.0." + str(1 + i*2), fvs) + ip_to_if_map["10.0.0." + str(1 + i*2)] = if_name_key + dvs.runcmd("arp -s 10.0.0." + str(1 + i*2) + " 00:00:00:00:00:" + str(1 + i*2)) + + # Program the route + asic_routes_count = len(asic_db.get_keys(ASIC_ROUTE_TB)) + fvs = swsscommon.FieldValuePairs([("nexthop","10.0.0.1,10.0.0.11"), + ("ifname", "Ethernet0,Ethernet20")]) + ps.set(fg_nhg_prefix, fvs) + + # Validate that the correct ASIC DB elements were setup per Fine Grained ECMP + keys = asic_db.wait_for_n_keys(ASIC_ROUTE_TB, asic_routes_count + 1) + nhgid = asic_route_exists_and_is_nhg(asic_db, keys, fg_nhg_prefix) + assert nhgid is not None + + validate_asic_nhg(asic_db, nhgid, bucket_size) + + nh_oid_map = get_nh_oid_map(asic_db) + + # Test addition of route with 0 members in bank + nh_memb_exp_count = {"10.0.0.1":64,"10.0.0.11":64} + validate_fine_grained_asic_n_state_db_entries(asic_db, state_db, ip_to_if_map, + nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Add 2 nhs to both bank 0 and bank 1 + nh_memb_exp_count = {"10.0.0.1":22,"10.0.0.3":21,"10.0.0.5":21,"10.0.0.11":22, + "10.0.0.13":21,"10.0.0.15":21} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Add 2 more nhs to both bank 0 and bank 1 + nh_memb_exp_count = {"10.0.0.1":13,"10.0.0.3":13,"10.0.0.5":13,"10.0.0.7":12, + "10.0.0.9":13,"10.0.0.11":13,"10.0.0.13":13,"10.0.0.15":13,"10.0.0.17":12,"10.0.0.19":13} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Remove 1 nh from bank 0 and remove 2 nhs from bank 1 + nh_memb_exp_count = {"10.0.0.3":16,"10.0.0.5":16,"10.0.0.7":16,"10.0.0.9":16, + "10.0.0.11":22,"10.0.0.13":21,"10.0.0.19":21} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Remove 1 nh from bank 0 and add 1 nh to bank 1 + nh_memb_exp_count = {"10.0.0.3":22,"10.0.0.7":21,"10.0.0.9":21,"10.0.0.13":16, + "10.0.0.15":16,"10.0.0.17":16,"10.0.0.19":16} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Remove 2 nh from bank 0 and remove 3 nh from bank 1 + nh_memb_exp_count = {"10.0.0.7":64,"10.0.0.11":64} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Add 2 nhs to bank 0 and remove all nh from bank 1 + nh_memb_exp_count = {"10.0.0.5":42,"10.0.0.7":44,"10.0.0.9":42} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Add 2 nhs to bank 0 and add 1 nh to bank 1 + nh_memb_exp_count = {"10.0.0.1":12,"10.0.0.3":13,"10.0.0.5":13,"10.0.0.7":13, + "10.0.0.9":13,"10.0.0.11":64} + program_route_and_validate_fine_grained_ecmp(app_db.db_connection, asic_db, state_db, ip_to_if_map, + fg_nhg_prefix, nh_memb_exp_count, nh_oid_map, nhgid, bucket_size) + + # Remove route + # remove prefix entry + asic_rt_key = get_asic_route_key(asic_db, fg_nhg_prefix) + ps._del(fg_nhg_prefix) + asic_db.wait_for_deleted_entry(ASIC_ROUTE_TB, asic_rt_key) + asic_db.wait_for_n_keys(ASIC_NHG_MEMB, 0) + + remove_entry(config_db, "FG_NHG_PREFIX", fg_nhg_prefix)