diff --git a/orchagent/pfcactionhandler.cpp b/orchagent/pfcactionhandler.cpp index dbf00b212bc3..45145c76fd3a 100644 --- a/orchagent/pfcactionhandler.cpp +++ b/orchagent/pfcactionhandler.cpp @@ -12,8 +12,15 @@ #define PFC_WD_QUEUE_STATS_DEADLOCK_DETECTED "PFC_WD_QUEUE_STATS_DEADLOCK_DETECTED" #define PFC_WD_QUEUE_STATS_DEADLOCK_RESTORED "PFC_WD_QUEUE_STATS_DEADLOCK_RESTORED" -#define SAI_QUEUE_STAT_PACKETS_STR "SAI_QUEUE_STAT_PACKETS" -#define SAI_QUEUE_STAT_DROPPED_PACKETS_STR "SAI_QUEUE_STAT_DROPPED_PACKETS" +#define PFC_WD_QUEUE_STATS_TX_PACKETS "PFC_WD_QUEUE_STATS_TX_PACKETS" +#define PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS "PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS" +#define PFC_WD_QUEUE_STATS_RX_PACKETS "PFC_WD_QUEUE_STATS_RX_PACKETS" +#define PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS "PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS" + +#define PFC_WD_QUEUE_STATS_TX_PACKETS_LAST "PFC_WD_QUEUE_STATS_TX_PACKETS_LAST" +#define PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST "PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST" +#define PFC_WD_QUEUE_STATS_RX_PACKETS_LAST "PFC_WD_QUEUE_STATS_RX_PACKETS_LAST" +#define PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST "PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST" extern sai_object_id_t gSwitchId; extern PortsOrch *gPortsOrch; @@ -35,30 +42,60 @@ PfcWdActionHandler::PfcWdActionHandler(sai_object_id_t port, sai_object_id_t que "PFC Watchdog detected PFC storm on queue 0x%lx port 0x%lx", m_queue, m_port); - - m_stats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue)); - m_stats.detectCount++; - m_stats.operational = false; - - updateWdCounters(sai_serialize_object_id(m_queue), m_stats); } PfcWdActionHandler::~PfcWdActionHandler(void) { SWSS_LOG_ENTER(); - auto finalStats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue)); - SWSS_LOG_NOTICE( - "Queue 0x%lx port 0x%lx restored from PFC storm. Tx packets: %lu. Dropped packets: %lu", + "Queue 0x%lx port 0x%lx restored from PFC storm.", m_queue, - m_port, - finalStats.txPkt - m_stats.txPkt, - finalStats.txDropPkt - m_stats.txDropPkt); + m_port); +} + +void PfcWdActionHandler::initCounters(void) +{ + SWSS_LOG_ENTER(); + + if (!getHwCounters(m_hwStats)) + { + return; + } + + auto wdQueueStats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue)); + wdQueueStats.detectCount++; + wdQueueStats.operational = false; + + updateWdCounters(sai_serialize_object_id(m_queue), wdQueueStats); +} + +void PfcWdActionHandler::commitCounters(void) +{ + SWSS_LOG_ENTER(); + + PfcWdHwStats hwStats; + + if (!getHwCounters(hwStats)) + { + return; + } + + auto finalStats = getQueueStats(m_countersTable, sai_serialize_object_id(m_queue)); finalStats.restoreCount++; finalStats.operational = true; + finalStats.txPktLast = hwStats.txPkt - m_hwStats.txPkt; + finalStats.txDropPktLast = hwStats.txDropPkt - m_hwStats.txDropPkt; + finalStats.rxPktLast = hwStats.rxPkt - m_hwStats.rxPkt; + finalStats.rxDropPktLast = hwStats.rxDropPkt - m_hwStats.rxDropPkt; + + finalStats.txPkt += finalStats.txPktLast; + finalStats.txDropPkt += finalStats.txDropPktLast; + finalStats.rxPkt += finalStats.rxPktLast; + finalStats.rxDropPkt += finalStats.rxDropPktLast; + updateWdCounters(sai_serialize_object_id(m_queue), finalStats); } @@ -67,6 +104,8 @@ PfcWdActionHandler::PfcWdQueueStats PfcWdActionHandler::getQueueStats(shared_ptr SWSS_LOG_ENTER(); PfcWdQueueStats stats; + memset(&stats, 0, sizeof(PfcWdQueueStats)); + stats.operational = true; vector fieldValues; if (!countersTable->get(queueIdStr, fieldValues)) @@ -91,14 +130,22 @@ PfcWdActionHandler::PfcWdQueueStats PfcWdActionHandler::getQueueStats(shared_ptr { stats.operational = value == PFC_WD_QUEUE_STATUS_OPERATIONAL ? true : false; } - else if (field == SAI_QUEUE_STAT_PACKETS_STR) + else if (field == PFC_WD_QUEUE_STATS_TX_PACKETS) { stats.txPkt = stoul(value); } - else if (field == SAI_QUEUE_STAT_DROPPED_PACKETS_STR) + else if (field == PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS) { stats.txDropPkt = stoul(value); } + else if (field == PFC_WD_QUEUE_STATS_RX_PACKETS) + { + stats.rxPkt = stoul(value); + } + else if (field == PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS) + { + stats.rxDropPkt = stoul(value); + } } return move(stats); @@ -127,6 +174,17 @@ void PfcWdActionHandler::updateWdCounters(const string& queueIdStr, const PfcWdQ resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_DEADLOCK_DETECTED, to_string(stats.detectCount)); resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_DEADLOCK_RESTORED, to_string(stats.restoreCount)); + + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_PACKETS, to_string(stats.txPkt)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS, to_string(stats.txDropPkt)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_PACKETS, to_string(stats.rxPkt)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS, to_string(stats.rxDropPkt)); + + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_PACKETS_LAST, to_string(stats.txPktLast)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_TX_DROPPED_PACKETS_LAST, to_string(stats.txDropPktLast)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_PACKETS_LAST, to_string(stats.rxPktLast)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATS_RX_DROPPED_PACKETS_LAST, to_string(stats.rxDropPktLast)); + resultFvValues.emplace_back(PFC_WD_QUEUE_STATUS, stats.operational ? PFC_WD_QUEUE_STATUS_OPERATIONAL : PFC_WD_QUEUE_STATUS_STORMED); @@ -292,6 +350,69 @@ PfcWdLossyHandler::~PfcWdLossyHandler(void) } } +bool PfcWdLossyHandler::getHwCounters(PfcWdHwStats& counters) +{ + SWSS_LOG_ENTER(); + + static const vector queueStatIds = + { + SAI_QUEUE_STAT_PACKETS, + SAI_QUEUE_STAT_DROPPED_PACKETS, + }; + + static const vector pgStatIds = + { + SAI_INGRESS_PRIORITY_GROUP_STAT_PACKETS, + SAI_INGRESS_PRIORITY_GROUP_STAT_DROPPED_PACKETS, + }; + + vector queueStats; + queueStats.resize(queueStatIds.size()); + + sai_status_t status = sai_queue_api->get_queue_stats( + getQueue(), + static_cast(queueStatIds.size()), + queueStatIds.data(), + queueStats.data()); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to fetch queue 0x%lx stats: %d", getQueue(), status); + return false; + } + + // PG counters not yet supported in Mellanox platform + Port portInstance; + if (!gPortsOrch->getPort(getPort(), portInstance)) + { + SWSS_LOG_ERROR("Cannot get port by ID 0x%lx", getPort()); + return false; + } + + sai_object_id_t pg = portInstance.m_priority_group_ids[getQueueId()]; + vector pgStats; + pgStats.resize(pgStatIds.size()); + + status = sai_buffer_api->get_ingress_priority_group_stats( + pg, + static_cast(pgStatIds.size()), + pgStatIds.data(), + pgStats.data()); + + if (status != SAI_STATUS_SUCCESS) + { + SWSS_LOG_ERROR("Failed to fetch pg 0x%lx stats: %d", pg, status); + return false; + } + + counters.txPkt = queueStats[0]; + counters.txDropPkt = queueStats[1]; + counters.rxPkt = pgStats[0]; + counters.rxDropPkt = pgStats[1]; + + return true; +} + PfcWdZeroBufferHandler::PfcWdZeroBufferHandler(sai_object_id_t port, sai_object_id_t queue, uint8_t queueId, shared_ptr countersTable): PfcWdLossyHandler(port, queue, queueId, countersTable) diff --git a/orchagent/pfcactionhandler.h b/orchagent/pfcactionhandler.h index 34cf73c98155..cdf795c7585c 100644 --- a/orchagent/pfcactionhandler.h +++ b/orchagent/pfcactionhandler.h @@ -13,6 +13,14 @@ extern "C" { using namespace std; using namespace swss; +struct PfcWdHwStats +{ + uint64_t txPkt; + uint64_t txDropPkt; + uint64_t rxPkt; + uint64_t rxDropPkt; +}; + // PFC queue interface class // It resembles RAII behavior - pause storm is mitigated (queue is locked) on creation, // and is restored (queue released) on removal @@ -39,15 +47,30 @@ class PfcWdActionHandler } static void initWdCounters(shared_ptr
countersTable, const string &queueIdStr); + void initCounters(void); + void commitCounters(void); + + virtual bool getHwCounters(PfcWdHwStats& counters) + { + memset(&counters, 0, sizeof(PfcWdHwStats)); + + return true; + }; private: struct PfcWdQueueStats { - uint64_t detectCount = 0; - uint64_t restoreCount = 0; - uint64_t txPkt = 0; - uint64_t txDropPkt = 0; - bool operational = true; + uint64_t detectCount; + uint64_t restoreCount; + uint64_t txPkt; + uint64_t txDropPkt; + uint64_t rxPkt; + uint64_t rxDropPkt; + uint64_t txPktLast; + uint64_t txDropPktLast; + uint64_t rxPktLast; + uint64_t rxDropPktLast; + bool operational; }; static PfcWdQueueStats getQueueStats(shared_ptr
countersTable, const string &queueIdStr); @@ -57,7 +80,7 @@ class PfcWdActionHandler sai_object_id_t m_queue = SAI_NULL_OBJECT_ID; uint8_t m_queueId = 0; shared_ptr
m_countersTable = nullptr; - PfcWdQueueStats m_stats; + PfcWdHwStats m_hwStats; }; class PfcWdAclHandler: public PfcWdActionHandler @@ -87,6 +110,7 @@ class PfcWdLossyHandler: public PfcWdActionHandler PfcWdLossyHandler(sai_object_id_t port, sai_object_id_t queue, uint8_t queueId, shared_ptr
countersTable); virtual ~PfcWdLossyHandler(void); + virtual bool getHwCounters(PfcWdHwStats& counters); }; // PFC queue that implements drop action by draining queue with buffer of zero size diff --git a/orchagent/pfcwdorch.cpp b/orchagent/pfcwdorch.cpp index c8e85a66c0b3..77a33696acda 100644 --- a/orchagent/pfcwdorch.cpp +++ b/orchagent/pfcwdorch.cpp @@ -506,9 +506,11 @@ void PfcWdSwOrch::doTask(swss::NotificationConsumer { throw runtime_error("Unknown PFC WD action"); } + entry->second.handler->initCounters(); } else if (event == "restore") { + entry->second.handler->commitCounters(); entry->second.handler = nullptr; } else