From 43cc48693718e70170cded77f3d6abb389f3572e Mon Sep 17 00:00:00 2001
From: Vivek <vkarri@nvidia.com>
Date: Mon, 12 Sep 2022 16:46:37 -0700
Subject: [PATCH] [portmgr] Fixed the orchagent crash due to late arrival of
 notif (#2431)

Signed-off-by: Vivek Reddy Karri <vkarri@nvidia.com>
Bulk write to APP_DB i.e. alias, lanes, speed must be read through one notification by orchagent during create_port
Handled a race condition in portmgrd which tries to immediately apply a mtu/admin_status SET notif after a DEL causing it to crash
---
 cfgmgr/portmgr.cpp      | 60 ++++++++++++++++++++++++++++++++---------
 cfgmgr/portmgr.h        |  1 +
 orchagent/portsorch.cpp |  9 +++++++
 3 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/cfgmgr/portmgr.cpp b/cfgmgr/portmgr.cpp
index 38c0418a7ac5..5134b31861c6 100644
--- a/cfgmgr/portmgr.cpp
+++ b/cfgmgr/portmgr.cpp
@@ -23,27 +23,55 @@ PortMgr::PortMgr(DBConnector *cfgDb, DBConnector *appDb, DBConnector *stateDb, c
 bool PortMgr::setPortMtu(const string &alias, const string &mtu)
 {
     stringstream cmd;
-    string res;
+    string res, cmd_str;
 
     // ip link set dev <port_name> mtu <mtu>
     cmd << IP_CMD << " link set dev " << shellquote(alias) << " mtu " << shellquote(mtu);
-    EXEC_WITH_ERROR_THROW(cmd.str(), res);
-
-    // Set the port MTU in application database to update both
-    // the port MTU and possibly the port based router interface MTU
-    return writeConfigToAppDb(alias, "mtu", mtu);
+    cmd_str = cmd.str();
+    int ret = swss::exec(cmd_str, res);
+    if (!ret)
+    {
+        // Set the port MTU in application database to update both
+        // the port MTU and possibly the port based router interface MTU
+        return writeConfigToAppDb(alias, "mtu", mtu);
+    }
+    else if (!isPortStateOk(alias))
+    {
+        // Can happen when a DEL notification is sent by portmgrd immediately followed by a new SET notif
+        SWSS_LOG_WARN("Setting mtu to alias:%s netdev failed with cmd:%s, rc:%d, error:%s", alias.c_str(), cmd_str.c_str(), ret, res.c_str());
+        return false;
+    }
+    else
+    {
+        throw runtime_error(cmd_str + " : " + res);
+    }
+    return true;
 }
 
 bool PortMgr::setPortAdminStatus(const string &alias, const bool up)
 {
     stringstream cmd;
-    string res;
+    string res, cmd_str;
 
     // ip link set dev <port_name> [up|down]
     cmd << IP_CMD << " link set dev " << shellquote(alias) << (up ? " up" : " down");
-    EXEC_WITH_ERROR_THROW(cmd.str(), res);
-
-    return writeConfigToAppDb(alias, "admin_status", (up ? "up" : "down"));
+    cmd_str = cmd.str();
+    int ret = swss::exec(cmd_str, res);
+    if (!ret)
+    {
+        return writeConfigToAppDb(alias, "admin_status", (up ? "up" : "down"));
+    }
+    else if (!isPortStateOk(alias))
+    {
+        // Can happen when a DEL notification is sent by portmgrd immediately followed by a new SET notification
+        SWSS_LOG_WARN("Setting admin_status to alias:%s netdev failed with cmd%s, rc:%d, error:%s", alias.c_str(), cmd_str.c_str(), ret, res.c_str());
+        return false;
+    }
+    else
+    {
+        throw runtime_error(cmd_str + " : " + res);
+    }
+    return true;
 }
 
 bool PortMgr::isPortStateOk(const string &alias)
@@ -124,10 +152,9 @@ void PortMgr::doTask(Consumer &consumer)
                 }
             }
 
-            for (auto &entry : field_values)
+            if (field_values.size())
             {
-                writeConfigToAppDb(alias, fvField(entry), fvValue(entry));
-                SWSS_LOG_NOTICE("Configure %s %s to %s", alias.c_str(), fvField(entry).c_str(), fvValue(entry).c_str());
+                writeConfigToAppDb(alias, field_values);
             }
 
             if (!portOk)
@@ -136,6 +163,7 @@ void PortMgr::doTask(Consumer &consumer)
 
                 writeConfigToAppDb(alias, "mtu", mtu);
                 writeConfigToAppDb(alias, "admin_status", admin_status);
+                /* Retry setting these params after the netdev is created */
                 field_values.clear();
                 field_values.emplace_back("mtu", mtu);
                 field_values.emplace_back("admin_status", admin_status);
@@ -176,3 +204,9 @@ bool PortMgr::writeConfigToAppDb(const std::string &alias, const std::string &fi
 
     return true;
 }
+
+bool PortMgr::writeConfigToAppDb(const std::string &alias, std::vector<FieldValueTuple> &field_values)
+{
+    m_appPortTable.set(alias, field_values);
+    return true;
+}
diff --git a/cfgmgr/portmgr.h b/cfgmgr/portmgr.h
index dde346bfe1cc..683aacc488c7 100644
--- a/cfgmgr/portmgr.h
+++ b/cfgmgr/portmgr.h
@@ -30,6 +30,7 @@ class PortMgr : public Orch
 
     void doTask(Consumer &consumer);
     bool writeConfigToAppDb(const std::string &alias, const std::string &field, const std::string &value);
+    bool writeConfigToAppDb(const std::string &alias, std::vector<FieldValueTuple> &field_values);
     bool setPortMtu(const std::string &alias, const std::string &mtu);
     bool setPortAdminStatus(const std::string &alias, const bool up);
     bool isPortStateOk(const std::string &alias);
diff --git a/orchagent/portsorch.cpp b/orchagent/portsorch.cpp
index 14e510866008..126c33dcc7ec 100755
--- a/orchagent/portsorch.cpp
+++ b/orchagent/portsorch.cpp
@@ -2615,6 +2615,15 @@ bool PortsOrch::addPort(const set<int> &lane_set, uint32_t speed, int an, string
 {
     SWSS_LOG_ENTER();
 
+    if (!speed || lane_set.empty())
+    {
+        /*
+        speed and lane list are mandatory attributes for the initial create_port call
+        This check is required because the incoming notifs may not be atomic
+        */
+        return true;
+    }
+
     vector<uint32_t> lanes(lane_set.begin(), lane_set.end());
 
     sai_attribute_t attr;