From a26b26ac340dea1ff79160aad0684cd3bc8a06d7 Mon Sep 17 00:00:00 2001
From: Dror Prital <76714716+dprital@users.noreply.github.com>
Date: Mon, 29 Aug 2022 16:23:00 +0300
Subject: [PATCH] Dynamic port configuration - add port buffer cfg to the port
 ref counter (#2194)

- What I did
This PR replace PR #2022

Added increasing/decreasing to the port ref counter each time a port buffer configuration is added or removed
Implemented according to the - sonic-net/SONiC#900

- Why I did it
In order to avoid cases where a port is removed before the buffer configuration on this port were removed also

- How I verified it
VS Test was added in order to test it.
we remove a port with buffer configuration and the port is not removed. only after all buffer configurations on this port were
removed - this port will be removed.
---
 orchagent/bufferorch.cpp      |  69 ++++++++++
 tests/test_port_add_remove.py | 251 ++++++++++++++++++++++++++++++++++
 2 files changed, 320 insertions(+)
 create mode 100755 tests/test_port_add_remove.py

diff --git a/orchagent/bufferorch.cpp b/orchagent/bufferorch.cpp
index c9ff5d60a67d..3519ba432fac 100644
--- a/orchagent/bufferorch.cpp
+++ b/orchagent/bufferorch.cpp
@@ -44,6 +44,9 @@ map<string, string> buffer_to_ref_table_map = {
     {buffer_profile_list_field_name, APP_BUFFER_PROFILE_TABLE_NAME}
 };
 
+std::map<string, std::map<size_t, string>> pg_port_flags;
+std::map<string, std::map<size_t, string>> queue_port_flags;
+
 BufferOrch::BufferOrch(DBConnector *applDb, DBConnector *confDb, DBConnector *stateDb, vector<string> &tableNames) :
     Orch(applDb, tableNames),
     m_flexCounterDb(new DBConnector("FLEX_COUNTER_DB", 0)),
@@ -829,6 +832,39 @@ task_process_status BufferOrch::processQueue(KeyOpFieldsValuesTuple &tuple)
                     }
                 }
             }
+
+            /* when we apply buffer configuration we need to increase the ref counter of this port
+             * or decrease the ref counter for this port when we remove buffer cfg
+             * so for each priority cfg in each port we will increase/decrease the ref counter
+             * also we need to know when the set command is for creating a buffer cfg or modifying buffer cfg -
+             * we need to increase ref counter only on create flow.
+             * so we added a map that will help us to know what was the last command for this port and priority -
+             * if the last command was set command then it is a modify command and we dont need to increase the buffer counter
+             * all other cases (no last command exist or del command was the last command) it means that we need to increase the ref counter */
+            if (op == SET_COMMAND) 
+            {
+                if (queue_port_flags[port_name][ind] != SET_COMMAND) 
+                {
+                    /* if the last operation was not "set" then it's create and not modify - need to increase ref counter */
+                    gPortsOrch->increasePortRefCount(port_name);
+                }
+            } 
+            else if (op == DEL_COMMAND)
+            {
+                if (queue_port_flags[port_name][ind] == SET_COMMAND) 
+		{
+                    /* we need to decrease ref counter only if the last operation was "SET_COMMAND" */
+                    gPortsOrch->decreasePortRefCount(port_name);
+                }
+            } 
+            else 
+            {
+                SWSS_LOG_ERROR("operation value is not SET or DEL (op = %s)", op.c_str());
+                return task_process_status::task_invalid_entry;
+            }
+            /* save the last command (set or delete) */
+            queue_port_flags[port_name][ind] = op;
+
         }
     }
 
@@ -976,6 +1012,39 @@ task_process_status BufferOrch::processPriorityGroup(KeyOpFieldsValuesTuple &tup
                     }
                 }
             }
+
+            /* when we apply buffer configuration we need to increase the ref counter of this port
+             * or decrease the ref counter for this port when we remove buffer cfg
+             * so for each priority cfg in each port we will increase/decrease the ref counter
+             * also we need to know when the set command is for creating a buffer cfg or modifying buffer cfg -
+             * we need to increase ref counter only on create flow.
+             * so we added a map that will help us to know what was the last command for this port and priority -
+             * if the last command was set command then it is a modify command and we dont need to increase the buffer counter
+             * all other cases (no last command exist or del command was the last command) it means that we need to increase the ref counter */
+            if (op == SET_COMMAND) 
+            {
+                if (pg_port_flags[port_name][ind] != SET_COMMAND) 
+                {
+                    /* if the last operation was not "set" then it's create and not modify - need to increase ref counter */
+                    gPortsOrch->increasePortRefCount(port_name);
+                }
+            } 
+            else if (op == DEL_COMMAND)
+            {
+                if (pg_port_flags[port_name][ind] == SET_COMMAND) 
+                {
+                    /* we need to decrease ref counter only if the last operation was "SET_COMMAND" */
+                    gPortsOrch->decreasePortRefCount(port_name);
+                }
+            } 
+            else 
+            {
+                SWSS_LOG_ERROR("operation value is not SET or DEL (op = %s)", op.c_str());
+                return task_process_status::task_invalid_entry;
+            }
+            /* save the last command (set or delete) */
+            pg_port_flags[port_name][ind] = op;
+
         }
     }
 
diff --git a/tests/test_port_add_remove.py b/tests/test_port_add_remove.py
new file mode 100755
index 000000000000..bfc3074d8322
--- /dev/null
+++ b/tests/test_port_add_remove.py
@@ -0,0 +1,251 @@
+import pytest
+import time
+import buffer_model
+from dvslib.dvs_common import PollingConfig
+
+# the port to be removed and add
+PORT_A = "Ethernet64"
+PORT_B = "Ethernet68"
+
+"""
+DELETE_CREATE_ITERATIONS defines the number of iteration of delete and create to  ports,
+we add different timeouts between delete/create to catch potential race condition that can lead to system crush
+
+Add \ Remove of Buffers can be done only when the model is dynamic.
+"""
+DELETE_CREATE_ITERATIONS = 10
+
+@pytest.yield_fixture
+def dynamic_buffer(dvs):
+    buffer_model.enable_dynamic_buffer(dvs.get_config_db(), dvs.runcmd)
+    yield
+    buffer_model.disable_dynamic_buffer(dvs.get_config_db(), dvs.runcmd)
+
+
+@pytest.mark.usefixtures('dvs_port_manager')
+@pytest.mark.usefixtures("dynamic_buffer")    
+class TestPortAddRemove(object):
+
+    def set_mmu(self,dvs):
+        state_db = dvs.get_state_db()
+        # set mmu size
+        fvs = {"mmu_size": "12766208"}
+        state_db.create_entry("BUFFER_MAX_PARAM_TABLE", "global", fvs)
+
+
+    def test_remove_add_remove_port_with_buffer_cfg(self, dvs, testlog):
+        config_db = dvs.get_config_db()
+        asic_db = dvs.get_asic_db()
+        state_db = dvs.get_state_db()
+        app_db = dvs.get_app_db()
+
+        # set mmu size
+        self.set_mmu(dvs)
+
+        # Startup interface
+        dvs.port_admin_set(PORT_A, 'up')
+
+        # get port info
+        port_info = config_db.get_entry("PORT", PORT_A)
+
+        # get the number of ports before removal
+        num_of_ports = len(asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT"))
+
+        # remove buffer pg cfg for the port (record the buffer pgs before removing them)
+        pgs = config_db.get_keys('BUFFER_PG')
+        buffer_pgs = {}
+        for key in pgs:
+            if PORT_A in key:
+                buffer_pgs[key] = config_db.get_entry('BUFFER_PG', key)
+                config_db.delete_entry('BUFFER_PG', key)
+                app_db.wait_for_deleted_entry("BUFFER_PG_TABLE", key)
+
+        # modify buffer queue entry to egress_lossless_profile instead of egress_lossy_profile
+        config_db.update_entry("BUFFER_QUEUE", "%s|0-2"%PORT_A, {"profile": "egress_lossless_profile"})
+
+        # remove buffer queue cfg for the port
+        queues = config_db.get_keys('BUFFER_QUEUE')
+        buffer_queues = {}
+        for key in queues:
+            if PORT_A in key:
+                buffer_queues[key] = config_db.get_entry('BUFFER_QUEUE', key)
+                config_db.delete_entry('BUFFER_QUEUE', key)
+                app_db.wait_for_deleted_entry('BUFFER_QUEUE_TABLE', key)
+
+        # Shutdown interface
+        dvs.port_admin_set(PORT_A, 'down')
+                
+        # try to remove this port
+        config_db.delete_entry('PORT', PORT_A)
+        num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                              num_of_ports-1,
+                              polling_config = PollingConfig(polling_interval = 1, timeout = 5.00, strict = True))
+
+        # verify that the port was removed properly since all buffer configuration was removed also
+        assert len(num) == num_of_ports - 1
+
+        # set back the port 
+        config_db.update_entry("PORT", PORT_A, port_info)
+
+        # verify that the port has been readded
+        num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                              num_of_ports,
+                              polling_config = PollingConfig(polling_interval = 1, timeout = 5.00, strict = True))
+
+        assert len(num) == num_of_ports
+
+        # re-add buffer pg and queue cfg to the port
+        for key, pg in buffer_pgs.items():
+            config_db.update_entry("BUFFER_PG", key, pg)
+
+        for key, queue in buffer_queues.items():
+            config_db.update_entry("BUFFER_QUEUE", key, queue)
+
+        time.sleep(5)
+
+        # Remove the port with buffer configuration
+        config_db.delete_entry('PORT', PORT_A)
+        num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                                      num_of_ports-1,
+                                      polling_config = PollingConfig(polling_interval = 1, timeout = 5.00, strict = False))
+
+        # verify that the port wasn't removed since we still have buffer cfg
+        assert len(num) == num_of_ports
+
+        # Remove buffer pgs
+        for key in buffer_pgs.keys():
+            config_db.delete_entry('BUFFER_PG', key)
+            app_db.wait_for_deleted_entry("BUFFER_PG_TABLE", key)
+
+        num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                                      num_of_ports-1,
+                                      polling_config = PollingConfig(polling_interval = 1, timeout = 5.00, strict = False))
+
+        # verify that the port wasn't removed since we still have buffer cfg
+        assert len(num) == num_of_ports
+
+        # Remove buffer queue
+        for key in buffer_queues.keys():
+            config_db.delete_entry('BUFFER_QUEUE', key)
+            app_db.wait_for_deleted_entry('BUFFER_QUEUE_TABLE', key)
+
+        num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                                      num_of_ports-1,
+                                      polling_config = PollingConfig(polling_interval = 1, timeout = 5.00, strict = True))
+
+        # verify that the port wasn't removed since we still have buffer cfg
+        assert len(num) == num_of_ports - 1
+
+        # set back the port as it is required for next test 
+        config_db.update_entry("PORT", PORT_A, port_info)
+       
+
+
+    @pytest.mark.parametrize("scenario", ["one_port", "all_ports"])
+    def test_add_remove_all_the_ports(self, dvs, testlog, scenario):
+        config_db = dvs.get_config_db()
+        state_db = dvs.get_state_db()
+        asic_db = dvs.get_asic_db()
+        app_db = dvs.get_app_db()
+
+        # set mmu size
+        self.set_mmu(dvs)
+
+        # get the number of ports before removal
+        num_of_ports = len(asic_db.get_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT"))
+        
+        # remove buffer pg cfg for the port
+        if scenario == "all_ports":
+            ports = config_db.get_keys('PORT')
+        elif scenario == "one_port":
+            ports = [PORT_A]
+        else:
+            assert False
+
+        # delete all PGs and QUEUEs from the relevant ports 
+        pgs = config_db.get_keys('BUFFER_PG')
+        queues = config_db.get_keys('BUFFER_QUEUE')
+
+        for port in ports:
+            for key in pgs:
+                if port in key:
+                    config_db.delete_entry('BUFFER_PG', key)
+                    app_db.wait_for_deleted_entry('BUFFER_PG_TABLE', key)
+
+            for key in queues:
+                if port in key:
+                    config_db.delete_entry('BUFFER_QUEUE', key)
+                    app_db.wait_for_deleted_entry('BUFFER_QUEUE_TABLE', key)
+
+        ports_info = {}
+
+        for key in ports:
+            # read port info and save it
+            ports_info[key] = config_db.get_entry("PORT", key)
+
+
+        for i in range(DELETE_CREATE_ITERATIONS):
+            # remove ports
+            for key in ports:
+                config_db.delete_entry('PORT',key)
+                app_db.wait_for_deleted_entry("PORT_TABLE", key)
+    
+            # verify remove port
+            num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                                          num_of_ports-len(ports))
+
+            assert len(num) == num_of_ports-len(ports)
+            
+            # add port
+            """
+            DELETE_CREATE_ITERATIONS defines the number of iteration of delete and create to  ports,
+            we add different timeouts between delete/create to catch potential race condition that can lead to system crush.
+            """
+            time.sleep(i%3)
+            for key in ports:
+                config_db.update_entry("PORT", key, ports_info[key])
+                app_db.wait_for_entry('PORT_TABLE',key)
+
+            # verify add port            
+            num = asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT",
+                                  num_of_ports)
+
+            assert len(num) == num_of_ports
+                
+            time.sleep((i%2)+1)           
+            
+        # run ping
+        dvs.setup_db()
+
+        dvs.create_vlan("6")
+        dvs.create_vlan_member("6", PORT_A)
+        dvs.create_vlan_member("6", PORT_B)
+        
+        port_entry_a = state_db.get_entry("PORT_TABLE",PORT_A)
+        port_entry_b = state_db.get_entry("PORT_TABLE",PORT_B)
+        port_admin_a = port_entry_a['admin_status']
+        port_admin_b = port_entry_b['admin_status']
+        
+        dvs.set_interface_status("Vlan6", "up")
+        dvs.add_ip_address("Vlan6", "6.6.6.1/24")
+        dvs.set_interface_status(PORT_A, "up")
+        dvs.set_interface_status(PORT_B, "up")
+        
+        dvs.servers[16].runcmd("ifconfig eth0 6.6.6.6/24 up")
+        dvs.servers[16].runcmd("ip route add default via 6.6.6.1")
+        dvs.servers[17].runcmd("ifconfig eth0 6.6.6.7/24 up")
+        dvs.servers[17].runcmd("ip route add default via 6.6.6.1")
+        
+        time.sleep(2)
+        
+        rc = dvs.servers[16].runcmd("ping -c 1 6.6.6.7")
+        assert rc == 0
+
+        rc = dvs.servers[17].runcmd("ping -c 1 6.6.6.6")
+        assert rc == 0
+
+        dvs.set_interface_status(PORT_A, port_admin_a)
+        dvs.set_interface_status(PORT_B, port_admin_b)
+        dvs.remove_vlan_member("6", PORT_A)
+        dvs.remove_vlan_member("6", PORT_B)
+        dvs.remove_vlan("6")