From 90deca9b55630f489b1e6e42cb56b086e090b7a4 Mon Sep 17 00:00:00 2001
From: Stephen Sun <stephens@nvidia.com>
Date: Wed, 1 Jan 2025 04:50:37 +0000
Subject: [PATCH 1/3] Avoid segment fault in ASIC/SDK health event handling
 when vendor SAI passes an invalid timestamp

Signed-off-by: Stephen Sun <stephens@nvidia.com>
---
 orchagent/switchorch.cpp           | 11 ++++-
 tests/mock_tests/switchorch_ut.cpp | 69 +++++++++++++++++++-----------
 2 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp
index 64442c96af..741cb547d2 100644
--- a/orchagent/switchorch.cpp
+++ b/orchagent/switchorch.cpp
@@ -1115,8 +1115,17 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id,
     const string &severity_str = switch_asic_sdk_health_event_severity_reverse_map.at(severity);
     const string &category_str = switch_asic_sdk_health_event_category_reverse_map.at(category);
     string description_str;
-    const std::time_t &t = (std::time_t)timestamp.tv_sec;
+    std::time_t t = (std::time_t)timestamp.tv_sec;
+    const std::time_t now = std::time(0);
+    const double year_in_seconds = 86400 * 365;
     stringstream time_ss;
+
+    if (difftime(t, now) > year_in_seconds)
+    {
+        SWSS_LOG_ERROR("Invalid timestamp second %lu in received ASIC/SDK health event, reset to current time", timestamp.tv_sec);
+        t = now;
+    }
+
     time_ss << std::put_time(std::localtime(&t), "%Y-%m-%d %H:%M:%S");
 
     switch (data.data_type)
diff --git a/tests/mock_tests/switchorch_ut.cpp b/tests/mock_tests/switchorch_ut.cpp
index a1e8b218e4..e46fb9d07b 100644
--- a/tests/mock_tests/switchorch_ut.cpp
+++ b/tests/mock_tests/switchorch_ut.cpp
@@ -124,6 +124,44 @@ namespace switchorch_test
             gSwitchOrch = new SwitchOrch(m_app_db.get(), switch_tables, stateDbSwitchTable);
         }
 
+        void checkAsicSdkHealthEvent(const sai_timespec_t &timestamp, const string &expected_key="")
+        {
+            initSwitchOrch();
+
+            sai_switch_health_data_t data;
+            memset(&data, 0, sizeof(data));
+            data.data_type = SAI_HEALTH_DATA_TYPE_GENERAL;
+            vector<uint8_t> data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100});
+            sai_u8_list_t description;
+            description.list = data_from_sai.data();
+            description.count = (uint32_t)(data_from_sai.size() - 2);
+            on_switch_asic_sdk_health_event(gSwitchId,
+                                            SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL,
+                                            timestamp,
+                                            SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
+                                            data,
+                                            description);
+
+            string key;
+            if (expected_key.empty())
+            {
+                vector<string> keys;
+                gSwitchOrch->m_asicSdkHealthEventTable->getKeys(keys);
+                key = keys[0];
+            }
+            else
+            {
+                key = expected_key;
+            }
+            string value;
+            gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "category", value);
+            ASSERT_EQ(value, "firmware");
+            gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "severity", value);
+            ASSERT_EQ(value, "fatal");
+            gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "description", value);
+            ASSERT_EQ(value, "description\n{}");
+        }
+
         void TearDown() override
         {
             ::testing_db::reset();
@@ -289,30 +327,13 @@ namespace switchorch_test
 
     TEST_F(SwitchOrchTest, SwitchOrchTestHandleEvent)
     {
-        initSwitchOrch();
-
         sai_timespec_t timestamp = {.tv_sec = 1701160447, .tv_nsec = 538710245};
-        sai_switch_health_data_t data;
-        memset(&data, 0, sizeof(data));
-        data.data_type = SAI_HEALTH_DATA_TYPE_GENERAL;
-        vector<uint8_t> data_from_sai({100, 101, 115, 99, 114, 105, 112, 116, 105, 245, 111, 110, 245, 10, 123, 125, 100, 100});
-        sai_u8_list_t description;
-        description.list = data_from_sai.data();
-        description.count = (uint32_t)(data_from_sai.size() - 2);
-        on_switch_asic_sdk_health_event(gSwitchId,
-                                        SAI_SWITCH_ASIC_SDK_HEALTH_SEVERITY_FATAL,
-                                        timestamp,
-                                        SAI_SWITCH_ASIC_SDK_HEALTH_CATEGORY_FW,
-                                        data,
-                                        description);
-
-        string key = "2023-11-28 08:34:07";
-        string value;
-        gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "category", value);
-        ASSERT_EQ(value, "firmware");
-        gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "severity", value);
-        ASSERT_EQ(value, "fatal");
-        gSwitchOrch->m_asicSdkHealthEventTable->hget(key, "description", value);
-        ASSERT_EQ(value, "description\n{}");
+        checkAsicSdkHealthEvent(timestamp, "2023-11-28 08:34:07");
+    }
+
+    TEST_F(SwitchOrchTest, SwitchOrchTestHandleEventInvalidTimeStamp)
+    {
+        sai_timespec_t timestamp = {.tv_sec = 172479515853275099, .tv_nsec = 538710245};
+        checkAsicSdkHealthEvent(timestamp);
     }
 }

From f1fd87dedda1f96aeaca5c65e51ae8b96bcbbf03 Mon Sep 17 00:00:00 2001
From: Stephen Sun <stephens@nvidia.com>
Date: Wed, 1 Jan 2025 14:46:22 +0000
Subject: [PATCH 2/3] Fix compiling error on ARM

Signed-off-by: Stephen Sun <stephens@nvidia.com>
---
 orchagent/switchorch.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp
index 741cb547d2..46ca7fbbef 100644
--- a/orchagent/switchorch.cpp
+++ b/orchagent/switchorch.cpp
@@ -1122,7 +1122,7 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id,
 
     if (difftime(t, now) > year_in_seconds)
     {
-        SWSS_LOG_ERROR("Invalid timestamp second %lu in received ASIC/SDK health event, reset to current time", timestamp.tv_sec);
+        SWSS_LOG_ERROR("Invalid timestamp second %" PRIx64 " in received ASIC/SDK health event, reset to current time", timestamp.tv_sec);
         t = now;
     }
 

From fb756993bd49ddbbc146f618352e2fb25a03d9f7 Mon Sep 17 00:00:00 2001
From: Stephen Sun <stephens@nvidia.com>
Date: Thu, 2 Jan 2025 10:42:37 +0000
Subject: [PATCH 3/3] Add comment

Signed-off-by: Stephen Sun <stephens@nvidia.com>
---
 orchagent/switchorch.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/orchagent/switchorch.cpp b/orchagent/switchorch.cpp
index 46ca7fbbef..5aeed420f2 100644
--- a/orchagent/switchorch.cpp
+++ b/orchagent/switchorch.cpp
@@ -1120,6 +1120,11 @@ void SwitchOrch::onSwitchAsicSdkHealthEvent(sai_object_id_t switch_id,
     const double year_in_seconds = 86400 * 365;
     stringstream time_ss;
 
+    /*
+     * In case vendor SAI passed a very large timestamp, put_time can cause segment fault which can not be caught by try/catch infra
+     * We check the difference between the timestamp from SAI and the current time and force to use current time if the gap is too large
+     * By doing so, we can avoid the segment fault
+     */
     if (difftime(t, now) > year_in_seconds)
     {
         SWSS_LOG_ERROR("Invalid timestamp second %" PRIx64 " in received ASIC/SDK health event, reset to current time", timestamp.tv_sec);