From adcf69dc25b25c0e5e0952eafba1bd75a4a927ec Mon Sep 17 00:00:00 2001 From: abanu-ms Date: Tue, 25 Jan 2022 08:19:48 +0200 Subject: [PATCH] [cbf] Fix cbf sync error (#2056) What I did Changed the CBF group members sync code to be non-assertive when one of its members is not yet synced. Why I did it There can be situations when the member of a CBF group is not yet synced by the time the CBF group is being synced. Some examples would be: CBF group entry is created before its member in APPL_DB Member sync is being blocked by something, like missing a neighbor/NH entry In this scenarios, we should wait for the member to be synced instead of asserting as the member might become available shortly. How I verified it Added a new UT --- orchagent/cbf/cbfnhgorch.cpp | 10 +++++++--- tests/test_nhg.py | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/orchagent/cbf/cbfnhgorch.cpp b/orchagent/cbf/cbfnhgorch.cpp index f275c5c902dc..76435ad12d2b 100644 --- a/orchagent/cbf/cbfnhgorch.cpp +++ b/orchagent/cbf/cbfnhgorch.cpp @@ -632,6 +632,11 @@ bool CbfNhg::syncMembers(const set &members) nhgm.to_string().c_str(), to_string().c_str()); throw std::logic_error("Syncing already synced NHG member"); } + else if (nhgm.getNhgId() == SAI_NULL_OBJECT_ID) + { + SWSS_LOG_WARN("CBF NHG member %s is not yet synced", nhgm.to_string().c_str()); + return false; + } /* * Check if the group exists in NhgOrch. @@ -710,10 +715,9 @@ vector CbfNhg::createNhgmAttrs(const CbfNhgMember &nhgm) const { SWSS_LOG_ENTER(); - if (!isSynced() || (nhgm.getNhgId() == SAI_NULL_OBJECT_ID)) + if (!isSynced()) { - SWSS_LOG_ERROR("CBF next hop group %s or next hop group %s are not synced", - to_string().c_str(), nhgm.to_string().c_str()); + SWSS_LOG_ERROR("CBF next hop group %s is not synced", to_string().c_str()); throw logic_error("CBF next hop group member attributes data is insufficient"); } diff --git a/tests/test_nhg.py b/tests/test_nhg.py index 390a8e0f517c..bf6ef890ea00 100644 --- a/tests/test_nhg.py +++ b/tests/test_nhg.py @@ -2207,6 +2207,42 @@ def create_cbf_nhg_inexistent_map_test(): self.fc_to_nhg_ps._del(nhg_maps.pop()) self.asic_db.wait_for_n_keys(self.ASIC_NHG_MAP_STR, self.asic_nhg_maps_count) + # Test scenario: + # - Create a CBF NHG that has a member which is not yet synced. It shouldn't be synced. + # - Add the missing member and assert the CBF NHG is now synced. + def test_cbf_sync_before_member(self, dvs, testlog): + self.init_test(dvs, 2) + + # Create an FC to NH index selection map + nhg_map = [(str(i), '0' if i < 4 else '1') for i in range(8)] + fvs = swsscommon.FieldValuePairs(nhg_map) + self.fc_to_nhg_ps.set('cbfnhgmap1', fvs) + self.asic_db.wait_for_n_keys(self.ASIC_NHG_MAP_STR, self.asic_nhg_maps_count + 1) + + # Create a non-CBF NHG + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ('ifname', 'Ethernet0,Ethernet4')]) + self.nhg_ps.set('group1', fvs) + self.asic_db.wait_for_n_keys(self.ASIC_NHG_STR, self.asic_nhgs_count + 1) + + # Create a CBF NHG with a member that doesn't currently exist. Nothing should happen + fvs = swsscommon.FieldValuePairs([('members', 'group1,group2'), + ('selection_map', 'cbfnhgmap1')]) + self.cbf_nhg_ps.set('cbfgroup1', fvs) + time.sleep(1) + assert(len(self.asic_db.get_keys(self.ASIC_NHG_STR)) == self.asic_nhgs_count + 1) + + # Create the missing non-CBF NHG. This and the CBF NHG should be created. + fvs = swsscommon.FieldValuePairs([('nexthop', '10.0.0.1,10.0.0.3'), + ("ifname", "Ethernet0,Ethernet4")]) + self.nhg_ps.set("group2", fvs) + self.asic_db.wait_for_n_keys(self.ASIC_NHG_STR, self.asic_nhgs_count + 3) + + # Cleanup + self.nhg_ps._del('cbfgroup1') + self.nhg_ps._del('group1') + self.nhg_ps._del('group2') + self.nhg_ps._del('cbfnhgmap1') # Add Dummy always-pass test at end as workaroud # for issue when Flaky fail on final test it invokes module tear-down before retrying