From 829b2197aeeace1a68617f51d0384de54feb21d2 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Thu, 17 Mar 2022 10:52:12 -0700 Subject: [PATCH] [tunnelmgrd]: Warm boot support (#2166) * [tunnelmgrd]: Warm boot support Signed-off-by: Lawrence Lee --- cfgmgr/tunnelmgr.cpp | 46 +++++++++++++++++++++++++++++++++++++-- cfgmgr/tunnelmgr.h | 8 +++++++ cfgmgr/tunnelmgrd.cpp | 4 ++++ tests/test_warm_reboot.py | 29 ++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) diff --git a/cfgmgr/tunnelmgr.cpp b/cfgmgr/tunnelmgr.cpp index 7f4dc4dd3d4a..a81438470fe4 100644 --- a/cfgmgr/tunnelmgr.cpp +++ b/cfgmgr/tunnelmgr.cpp @@ -9,6 +9,7 @@ #include "tokenize.h" #include "shellcmd.h" #include "exec.h" +#include "warm_restart.h" using namespace std; using namespace swss; @@ -107,7 +108,8 @@ static int cmdIpTunnelRouteDel(const std::string& pfx, std::string & res) TunnelMgr::TunnelMgr(DBConnector *cfgDb, DBConnector *appDb, const std::vector &tableNames) : Orch(cfgDb, tableNames), m_appIpInIpTunnelTable(appDb, APP_TUNNEL_DECAP_TABLE_NAME), - m_cfgPeerTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME) + m_cfgPeerTable(cfgDb, CFG_PEER_SWITCH_TABLE_NAME), + m_cfgTunnelTable(cfgDb, CFG_TUNNEL_TABLE_NAME) { std::vector peer_keys; m_cfgPeerTable.getKeys(peer_keys); @@ -126,6 +128,23 @@ TunnelMgr::TunnelMgr(DBConnector *cfgDb, DBConnector *appDb, const std::vector tunnel_keys; + m_cfgTunnelTable.getKeys(tunnel_keys); + + for (auto tunnel: tunnel_keys) + { + m_tunnelReplay.insert(tunnel); + } + if (m_tunnelReplay.empty()) + { + finalizeWarmReboot(); + } + + } + auto consumerStateTable = new swss::ConsumerStateTable(appDb, APP_TUNNEL_ROUTE_TABLE_NAME, TableConsumable::DEFAULT_POP_BATCH_SIZE, default_orch_pri); @@ -191,6 +210,11 @@ void TunnelMgr::doTask(Consumer &consumer) ++it; } } + + if (!replayDone && m_tunnelReplay.empty() && WarmStart::isWarmStart()) + { + finalizeWarmReboot(); + } } bool TunnelMgr::doTunnelTask(const KeyOpFieldsValuesTuple & t) @@ -230,8 +254,16 @@ bool TunnelMgr::doTunnelTask(const KeyOpFieldsValuesTuple & t) SWSS_LOG_NOTICE("Peer/Remote IP not configured"); } - m_appIpInIpTunnelTable.set(tunnelName, kfvFieldsValues(t)); + /* If the tunnel is already in hardware (i.e. present in the replay), + * don't try to create it again since it will cause an OA crash + * (warmboot case) + */ + if (m_tunnelReplay.find(tunnelName) == m_tunnelReplay.end()) + { + m_appIpInIpTunnelTable.set(tunnelName, kfvFieldsValues(t)); + } } + m_tunnelReplay.erase(tunnelName); m_tunnelCache[tunnelName] = tunInfo; } else @@ -356,3 +388,13 @@ bool TunnelMgr::configIpTunnel(const TunnelInfo& tunInfo) return true; } + + +void TunnelMgr::finalizeWarmReboot() +{ + replayDone = true; + WarmStart::setWarmStartState("tunnelmgrd", WarmStart::REPLAYED); + SWSS_LOG_NOTICE("tunnelmgrd warmstart state set to REPLAYED"); + WarmStart::setWarmStartState("tunnelmgrd", WarmStart::RECONCILED); + SWSS_LOG_NOTICE("tunnelmgrd warmstart state set to RECONCILED"); +} diff --git a/cfgmgr/tunnelmgr.h b/cfgmgr/tunnelmgr.h index e2b601abe929..53d2f2727864 100644 --- a/cfgmgr/tunnelmgr.h +++ b/cfgmgr/tunnelmgr.h @@ -4,6 +4,8 @@ #include "producerstatetable.h" #include "orch.h" +#include + namespace swss { struct TunnelInfo @@ -28,12 +30,18 @@ class TunnelMgr : public Orch bool configIpTunnel(const TunnelInfo& info); + void finalizeWarmReboot(); + ProducerStateTable m_appIpInIpTunnelTable; Table m_cfgPeerTable; + Table m_cfgTunnelTable; std::map m_tunnelCache; std::map m_intfCache; std::string m_peerIp; + + std::set m_tunnelReplay; + bool replayDone = false; }; } diff --git a/cfgmgr/tunnelmgrd.cpp b/cfgmgr/tunnelmgrd.cpp index 0165eb94b5f2..0a6a84eaeb5b 100644 --- a/cfgmgr/tunnelmgrd.cpp +++ b/cfgmgr/tunnelmgrd.cpp @@ -11,6 +11,7 @@ #include "exec.h" #include "schema.h" #include "tunnelmgr.h" +#include "warm_restart.h" using namespace std; using namespace swss; @@ -54,6 +55,9 @@ int main(int argc, char **argv) DBConnector cfgDb("CONFIG_DB", 0); DBConnector appDb("APPL_DB", 0); + WarmStart::initialize("tunnelmgrd", "swss"); + WarmStart::checkWarmStart("tunnelmgrd", "swss"); + TunnelMgr tunnelmgr(&cfgDb, &appDb, cfgTunTables); std::vector cfgOrchList = {&tunnelmgr}; diff --git a/tests/test_warm_reboot.py b/tests/test_warm_reboot.py index 36028dfc6985..1a10c9455a3b 100644 --- a/tests/test_warm_reboot.py +++ b/tests/test_warm_reboot.py @@ -2365,6 +2365,35 @@ def test_EverflowWarmReboot(self, dvs, dvs_acl): dvs.start_swss() dvs.check_swss_ready() + def test_TunnelMgrdWarmRestart(self, dvs): + tunnel_name = "MuxTunnel0" + tunnel_table = "TUNNEL_DECAP_TABLE" + tunnel_params = { + "tunnel_type": "IPINIP", + "dst_ip": "10.1.0.32", + "dscp_mode": "uniform", + "ecn_mode": "standard", + "ttl_mode": "pipe" + } + + pubsub = dvs.SubscribeAppDbObject(tunnel_table) + + dvs.runcmd("config warm_restart enable swss") + config_db = dvs.get_config_db() + config_db.create_entry("TUNNEL", tunnel_name, tunnel_params) + + app_db = dvs.get_app_db() + app_db.wait_for_matching_keys(tunnel_table, [tunnel_name]) + + nadd, ndel = dvs.CountSubscribedObjects(pubsub) + assert nadd == len(tunnel_params) + assert ndel == 1 # Expect 1 deletion as part of table creation + + dvs.runcmd("supervisorctl restart tunnelmgrd") + dvs.check_services_ready() + nadd, ndel = dvs.CountSubscribedObjects(pubsub) + assert nadd == 0 + assert ndel == 0 # Add Dummy always-pass test at end as workaroud # for issue when Flaky fail on final test it invokes module tear-down before retrying