diff --git a/cfgmgr/buffer_headroom_mellanox.lua b/cfgmgr/buffer_headroom_mellanox.lua index cbc6d2d6ec8f..2c9581400816 100644 --- a/cfgmgr/buffer_headroom_mellanox.lua +++ b/cfgmgr/buffer_headroom_mellanox.lua @@ -16,6 +16,7 @@ local lossless_mtu local small_packet_percentage +local over_subscribe_ratio = 0 local cell_size local pipeline_latency local mac_phy_delay @@ -72,8 +73,19 @@ for i = 1, #lossless_traffic_table_content, 2 do end end --- Fetch DEFAULT_LOSSLESS_BUFFER_PARAMETER from CONFIG_DB -local lossless_traffic_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*') +-- Fetch over subscribe ratio +local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*') +local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio')) + +-- Fetch the shared headroom pool size +local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff')) + +local shp_enabled +if shp_size ~= nil and shp_size ~= 0 or over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then + shp_enabled = true +else + shp_enabled = false +end -- Calculate the headroom information local speed_of_light = 198000000 @@ -119,7 +131,11 @@ xoff_value = math.ceil(xoff_value / 1024) * 1024 xon_value = pipeline_latency xon_value = math.ceil(xon_value / 1024) * 1024 -headroom_size = xoff_value + xon_value + speed_overhead +if shp_enabled then + headroom_size = xon_value +else + headroom_size = xoff_value + xon_value + speed_overhead +end headroom_size = math.ceil(headroom_size / 1024) * 1024 table.insert(ret, "xon" .. ":" .. math.ceil(xon_value)) diff --git a/cfgmgr/buffer_pool_mellanox.lua b/cfgmgr/buffer_pool_mellanox.lua index da88f186c0a1..9adbb15a6a16 100644 --- a/cfgmgr/buffer_pool_mellanox.lua +++ b/cfgmgr/buffer_pool_mellanox.lua @@ -83,6 +83,24 @@ end local egress_lossless_pool_size = redis.call('HGET', 'BUFFER_POOL|egress_lossless_pool', 'size') +-- Whether shared headroom pool is enabled? +local default_lossless_param_keys = redis.call('KEYS', 'DEFAULT_LOSSLESS_BUFFER_PARAMETER*') +local over_subscribe_ratio = tonumber(redis.call('HGET', default_lossless_param_keys[1], 'over_subscribe_ratio')) + +-- Fetch the shared headroom pool size +local shp_size = tonumber(redis.call('HGET', 'BUFFER_POOL|ingress_lossless_pool', 'xoff')) + +local shp_enabled = false +if over_subscribe_ratio ~= nil and over_subscribe_ratio ~= 0 then + shp_enabled = true +end + +if shp_size ~= nil and shp_size ~= 0 then + shp_enabled = true +else + shp_size = 0 +end + -- Switch to APPL_DB redis.call('SELECT', appl_db) @@ -103,6 +121,7 @@ local statistics = {} -- Fetch sizes of all of the profiles, accumulate them local accumulative_occupied_buffer = 0 +local accumulative_xoff = 0 for i = 1, #profiles, 1 do if profiles[i][1] ~= "BUFFER_PROFILE_TABLE_KEY_SET" and profiles[i][1] ~= "BUFFER_PROFILE_TABLE_DEL_SET" then local size = tonumber(redis.call('HGET', profiles[i][1], 'size')) @@ -114,6 +133,13 @@ for i = 1, #profiles, 1 do profiles[i][2] = count_up_port end if size ~= 0 then + if shp_enabled and shp_size == 0 then + local xon = tonumber(redis.call('HGET', profiles[i][1], 'xon')) + local xoff = tonumber(redis.call('HGET', profiles[i][1], 'xoff')) + if xon ~= nil and xoff ~= nil and xon + xoff > size then + accumulative_xoff = accumulative_xoff + (xon + xoff - size) * profiles[i][2] + end + end accumulative_occupied_buffer = accumulative_occupied_buffer + size * profiles[i][2] end table.insert(statistics, {profiles[i][1], size, profiles[i][2]}) @@ -138,7 +164,7 @@ end local asic_keys = redis.call('KEYS', 'ASIC_TABLE*') local cell_size = tonumber(redis.call('HGET', asic_keys[1], 'cell_size')) --- Align mmu_size at cell size boundary, otherwith the sdk will complain and the syncd will faill +-- Align mmu_size at cell size boundary, otherwise the sdk will complain and the syncd will fail local number_of_cells = math.floor(mmu_size / cell_size) local ceiling_mmu_size = number_of_cells * cell_size @@ -149,11 +175,16 @@ redis.call('SELECT', config_db) local pools_need_update = {} local ipools = redis.call('KEYS', 'BUFFER_POOL|ingress*') local ingress_pool_count = 0 +local ingress_lossless_pool_size = nil for i = 1, #ipools, 1 do local size = tonumber(redis.call('HGET', ipools[i], 'size')) if not size then table.insert(pools_need_update, ipools[i]) ingress_pool_count = ingress_pool_count + 1 + else + if ipools[i] == 'BUFFER_POOL|ingress_lossless_pool' and shp_enabled and shp_size == 0 then + ingress_lossless_pool_size = size + end end end @@ -165,7 +196,14 @@ for i = 1, #epools, 1 do end end +if shp_enabled and shp_size == 0 then + shp_size = math.ceil(accumulative_xoff / over_subscribe_ratio) +end + local pool_size +if shp_size then + accumulative_occupied_buffer = accumulative_occupied_buffer + shp_size +end if ingress_pool_count == 1 then pool_size = mmu_size - accumulative_occupied_buffer else @@ -176,18 +214,31 @@ if pool_size > ceiling_mmu_size then pool_size = ceiling_mmu_size end +local shp_deployed = false for i = 1, #pools_need_update, 1 do local pool_name = string.match(pools_need_update[i], "BUFFER_POOL|([^%s]+)$") - table.insert(result, pool_name .. ":" .. math.ceil(pool_size)) + if shp_size ~= 0 and pool_name == "ingress_lossless_pool" then + table.insert(result, pool_name .. ":" .. math.ceil(pool_size) .. ":" .. math.ceil(shp_size)) + shp_deployed = true + else + table.insert(result, pool_name .. ":" .. math.ceil(pool_size)) + end +end + +if not shp_deployed and shp_size ~= 0 and ingress_lossless_pool_size ~= nil then + table.insert(result, "ingress_lossless_pool:" .. math.ceil(ingress_lossless_pool_size) .. ":" .. math.ceil(shp_size)) end table.insert(result, "debug:mmu_size:" .. mmu_size) -table.insert(result, "debug:accumulative:" .. accumulative_occupied_buffer) +table.insert(result, "debug:accumulative size:" .. accumulative_occupied_buffer) for i = 1, #statistics do table.insert(result, "debug:" .. statistics[i][1] .. ":" .. statistics[i][2] .. ":" .. statistics[i][3]) end table.insert(result, "debug:extra_400g:" .. (lossypg_reserved_400g - lossypg_reserved) .. ":" .. lossypg_400g) table.insert(result, "debug:mgmt_pool:" .. mgmt_pool_size) table.insert(result, "debug:egress_mirror:" .. accumulative_egress_mirror_overhead) +table.insert(result, "debug:shp_enabled:" .. tostring(shp_enabled)) +table.insert(result, "debug:shp_size:" .. shp_size) +table.insert(result, "debug:accumulative xoff:" .. accumulative_xoff) return result diff --git a/cfgmgr/buffermgrdyn.cpp b/cfgmgr/buffermgrdyn.cpp index 9039aea734bd..bff7acfd7d83 100644 --- a/cfgmgr/buffermgrdyn.cpp +++ b/cfgmgr/buffermgrdyn.cpp @@ -274,16 +274,16 @@ string BufferMgrDynamic::getPgPoolMode() } // Meta flows which are called by main flows -void BufferMgrDynamic::calculateHeadroomSize(const string &speed, const string &cable, const string &port_mtu, const string &gearbox_model, buffer_profile_t &headroom) +void BufferMgrDynamic::calculateHeadroomSize(buffer_profile_t &headroom) { // Call vendor-specific lua plugin to calculate the xon, xoff, xon_offset, size and threshold vector keys = {}; vector argv = {}; keys.emplace_back(headroom.name); - argv.emplace_back(speed); - argv.emplace_back(cable); - argv.emplace_back(port_mtu); + argv.emplace_back(headroom.speed); + argv.emplace_back(headroom.cable_length); + argv.emplace_back(headroom.port_mtu); argv.emplace_back(m_identifyGearboxDelay); try @@ -316,6 +316,14 @@ void BufferMgrDynamic::calculateHeadroomSize(const string &speed, const string & } } +// This function is designed to fetch the sizes of shared buffer pool and shared headroom pool +// and programe them to APPL_DB if they differ from the current value. +// The function is called periodically: +// 1. Fetch the sizes by calling lug plugin +// - For each of the pools, it checks the size of shared buffer pool. +// - For ingress_lossless_pool, it checks the size of the shared headroom pool (field xoff of the pool) as well. +// 2. Compare the fetched value and the previous value +// 3. Program to APPL_DB.BUFFER_POOL_TABLE only if its sizes differ from the stored value void BufferMgrDynamic::recalculateSharedBufferPool() { try @@ -326,8 +334,17 @@ void BufferMgrDynamic::recalculateSharedBufferPool() auto ret = runRedisScript(*m_applDb, m_bufferpoolSha, keys, argv); // The format of the result: - // a list of strings containing key, value pairs with colon as separator + // a list of lines containing key, value pairs with colon as separator // each is the size of a buffer pool + // possible format of each line: + // 1. shared buffer pool only: + // : + // eg: "egress_lossless_pool:12800000" + // 2. shared buffer pool and shared headroom pool, for ingress_lossless_pool only: + // ingress_lossless_pool::, + // eg: "ingress_lossless_pool:3200000:1024000" + // 3. debug information: + // debug: for ( auto i : ret) { @@ -336,12 +353,62 @@ void BufferMgrDynamic::recalculateSharedBufferPool() if ("debug" != poolName) { - auto &pool = m_bufferPoolLookup[pairs[0]]; + // We will handle the sizes of buffer pool update here. + // For the ingress_lossless_pool, there are some dedicated steps for shared headroom pool + // - The sizes of both the shared headroom pool and the shared buffer pool should be taken into consideration + // - In case the shared headroom pool size is statically configured, as it is programmed to APPL_DB during buffer pool handling, + // - any change from lua plugin will be ignored. + // - will handle ingress_lossless_pool in the way all other pools are handled in this case + auto &pool = m_bufferPoolLookup[poolName]; + auto &poolSizeStr = pairs[1]; + auto old_xoff = pool.xoff; + bool xoff_updated = false; + + if (poolName == INGRESS_LOSSLESS_PG_POOL_NAME && !isNonZero(m_configuredSharedHeadroomPoolSize)) + { + // Shared headroom pool size is treated as "updated" if either of the following conditions satisfied: + // - It is legal and differs from the stored value. + // - The lua plugin doesn't return the shared headroom pool size but there is a non-zero value stored + // This indicates the shared headroom pool was enabled by over subscribe ratio and is disabled. + // In this case a "0" will programmed to APPL_DB, indicating the shared headroom pool is disabled. + SWSS_LOG_DEBUG("Buffer pool ingress_lossless_pool xoff: %s, size %s", pool.xoff.c_str(), pool.total_size.c_str()); + + if (pairs.size() > 2) + { + auto &xoffStr = pairs[2]; + if (pool.xoff != xoffStr) + { + unsigned long xoffNum = atol(xoffStr.c_str()); + if (m_mmuSizeNumber > 0 && m_mmuSizeNumber < xoffNum) + { + SWSS_LOG_ERROR("Buffer pool %s: Invalid xoff %s, exceeding the mmu size %s, ignored xoff but the pool size will be updated", + poolName.c_str(), xoffStr.c_str(), m_mmuSize.c_str()); + } + else + { + pool.xoff = xoffStr; + xoff_updated = true; + } + } + } + else + { + if (isNonZero(pool.xoff)) + { + xoff_updated = true; + } + pool.xoff = "0"; + } + } - if (pool.total_size == pairs[1]) + // In general, the APPL_DB should be updated in case any of the following conditions satisfied + // 1. Shared headroom pool size has been updated + // This indicates the shared headroom pool is enabled by configuring over subscribe ratio, + // which means the shared headroom pool size has updated by lua plugin + // 2. The size of the shared buffer pool isn't configured and has been updated by lua plugin + if ((pool.total_size == poolSizeStr || !pool.dynamic_size) && !xoff_updated) continue; - auto &poolSizeStr = pairs[1]; unsigned long poolSizeNum = atol(poolSizeStr.c_str()); if (m_mmuSizeNumber > 0 && m_mmuSizeNumber < poolSizeNum) { @@ -350,10 +417,19 @@ void BufferMgrDynamic::recalculateSharedBufferPool() continue; } + auto old_size = pool.total_size; pool.total_size = poolSizeStr; updateBufferPoolToDb(poolName, pool); - SWSS_LOG_NOTICE("Buffer pool %s had been updated with new size [%s]", poolName.c_str(), pool.total_size.c_str()); + if (!pool.xoff.empty()) + { + SWSS_LOG_NOTICE("Buffer pool %s has been updated: size from [%s] to [%s], xoff from [%s] to [%s]", + poolName.c_str(), old_size.c_str(), pool.total_size.c_str(), old_xoff.c_str(), pool.xoff.c_str()); + } + else + { + SWSS_LOG_NOTICE("Buffer pool %s has been updated: size from [%s] to [%s]", poolName.c_str(), old_size.c_str(), pool.total_size.c_str()); + } } else { @@ -409,15 +485,16 @@ void BufferMgrDynamic::updateBufferPoolToDb(const string &name, const buffer_poo vector fvVector; if (pool.ingress) - fvVector.emplace_back(make_pair("type", "ingress")); + fvVector.emplace_back("type", "ingress"); else - fvVector.emplace_back(make_pair("type", "egress")); + fvVector.emplace_back("type", "egress"); - fvVector.emplace_back(make_pair("mode", pool.mode)); + if (!pool.xoff.empty()) + fvVector.emplace_back("xoff", pool.xoff); - SWSS_LOG_INFO("Buffer pool %s is initialized", name.c_str()); + fvVector.emplace_back("mode", pool.mode); - fvVector.emplace_back(make_pair("size", pool.total_size)); + fvVector.emplace_back("size", pool.total_size); m_applBufferPoolTable.set(name, fvVector); @@ -476,7 +553,7 @@ void BufferMgrDynamic::updateBufferPgToDb(const string &key, const string &profi } // We have to check the headroom ahead of applying them -task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable, const string &mtu, const string &threshold, const string &gearbox_model, string &profile_name) +task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const string &cable_len, const string &mtu, const string &threshold, const string &gearbox_model, string &profile_name) { // Create record in BUFFER_PROFILE table @@ -496,12 +573,16 @@ task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const return task_process_status::task_need_retry; } + profile.speed = speed; + profile.cable_length = cable_len; + profile.port_mtu = mtu; + profile.gearbox_model = gearbox_model; + // Call vendor-specific lua plugin to calculate the xon, xoff, xon_offset, size // Pay attention, the threshold can contain valid value - calculateHeadroomSize(speed, cable, mtu, gearbox_model, profile); + calculateHeadroomSize(profile); profile.threshold = threshold; - profile.dynamic_calculated = true; profile.static_configured = false; profile.lossless = true; profile.name = profile_name; @@ -512,7 +593,7 @@ task_process_status BufferMgrDynamic::allocateProfile(const string &speed, const SWSS_LOG_NOTICE("BUFFER_PROFILE %s has been created successfully", profile_name.c_str()); SWSS_LOG_DEBUG("New profile created %s according to (%s %s %s): xon %s xoff %s size %s", profile_name.c_str(), - speed.c_str(), cable.c_str(), gearbox_model.c_str(), + speed.c_str(), cable_len.c_str(), gearbox_model.c_str(), profile.xon.c_str(), profile.xoff.c_str(), profile.size.c_str()); } else @@ -768,6 +849,106 @@ task_process_status BufferMgrDynamic::refreshPriorityGroupsForPort(const string return task_process_status::task_success; } +void BufferMgrDynamic::refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size) +{ + // The lossless profiles need to be refreshed only if system is switched between SHP and non-SHP + bool need_refresh_profiles = false; + bool shp_enabled_by_size = isNonZero(m_configuredSharedHeadroomPoolSize); + bool shp_enabled_by_ratio = isNonZero(m_overSubscribeRatio); + + /* + * STATE EVENT ACTION + * enabled by size -> config ratio: no action + * enabled by size -> remove ratio: no action + * enabled by size -> remove size: shared headroom pool disabled + * SHP size will be set here (zero) and programmed to APPL_DB in handleBufferPoolTable + * enabled by ratio -> config size: SHP size will be set here (non zero) and programmed to APPL_DB in handleBufferPoolTable + * enabled by ratio -> remove size: SHP size will be set and programmed to APPL_DB during buffer pool size update + * enabled by ratio -> remove ratio: shared headroom pool disabled + * dynamic size: SHP size will be handled and programmed to APPL_DB during buffer pool size update + * static size: SHP size will be set (zero) and programmed to APPL_DB here + * disabled -> config ratio: shared headroom pool enabled. all lossless profiles refreshed + * SHP size will be handled during buffer pool size update + * disabled -> config size: shared headroom pool enabled. all lossless profiles refreshed + * SHP size will be handled here and programmed to APPL_DB during buffer pool table handling + */ + + auto &ingressLosslessPool = m_bufferPoolLookup[INGRESS_LOSSLESS_PG_POOL_NAME]; + if (enable_state_updated_by_ratio) + { + if (shp_enabled_by_size) + { + // enabled by size -> config or remove ratio, no action + SWSS_LOG_INFO("SHP: No need to refresh lossless profiles even if enable state updated by over subscribe ratio, already enabled by SHP size"); + } + else + { + // enabled by ratio -> remove ratio + // disabled -> config ratio + need_refresh_profiles = true; + } + } + + if (enable_state_updated_by_size) + { + if (shp_enabled_by_ratio) + { + // enabled by ratio -> config size, size will be updated (later in this function) + // enabled by ratio -> remove size, no action here, will be handled during buffer pool size update + SWSS_LOG_INFO("SHP: No need to refresh lossless profiles even if enable state updated by SHP size, already enabled by over subscribe ratio"); + } + else + { + // disabled -> config size + // enabled by size -> remove size + need_refresh_profiles = true; + } + } + + if (need_refresh_profiles) + { + SWSS_LOG_NOTICE("Updating dynamic buffer profiles due to shared headroom pool state updated"); + + for (auto it = m_bufferProfileLookup.begin(); it != m_bufferProfileLookup.end(); ++it) + { + auto &name = it->first; + auto &profile = it->second; + if (profile.static_configured) + { + SWSS_LOG_INFO("Non dynamic profile %s skipped", name.c_str()); + continue; + } + SWSS_LOG_INFO("Updating profile %s with speed %s cable length %s mtu %s gearbox model %s", + name.c_str(), + profile.speed.c_str(), profile.cable_length.c_str(), profile.port_mtu.c_str(), profile.gearbox_model.c_str()); + // recalculate the headroom size + calculateHeadroomSize(profile); + if (task_process_status::task_success != doUpdateBufferProfileForSize(profile, false)) + { + SWSS_LOG_ERROR("Failed to update buffer profile %s when toggle shared headroom pool. See previous message for detail. Please adjust the configuration manually", name.c_str()); + } + } + SWSS_LOG_NOTICE("Updating dynamic buffer profiles finished"); + } + + if (shp_enabled_by_size) + { + ingressLosslessPool.xoff = m_configuredSharedHeadroomPoolSize; + if (isNonZero(ingressLosslessPool.total_size)) + updateBufferPoolToDb(INGRESS_LOSSLESS_PG_POOL_NAME, ingressLosslessPool); + } + else if (!shp_enabled_by_ratio && enable_state_updated_by_ratio) + { + // shared headroom pool is enabled by ratio and will be disabled + // need to program APPL_DB because nobody else will take care of it + ingressLosslessPool.xoff = "0"; + if (isNonZero(ingressLosslessPool.total_size)) + updateBufferPoolToDb(INGRESS_LOSSLESS_PG_POOL_NAME, ingressLosslessPool); + } + + checkSharedBufferPoolSize(); +} + // Main flows // Update lossless pg on a port after an PG has been installed on the port @@ -842,13 +1023,13 @@ task_process_status BufferMgrDynamic::doRemovePgTask(const string &pg_key, const return task_process_status::task_success; } -task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t &profile) +task_process_status BufferMgrDynamic::doUpdateBufferProfileForDynamicTh(buffer_profile_t &profile) { const string &profileName = profile.name; auto &profileToMap = profile.port_pgs; set portsChecked; - if (profile.dynamic_calculated) + if (profile.static_configured && profile.dynamic_calculated) { for (auto &key : profileToMap) { @@ -870,7 +1051,19 @@ task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t } } } - else + + checkSharedBufferPoolSize(); + + return task_process_status::task_success; +} + +task_process_status BufferMgrDynamic::doUpdateBufferProfileForSize(buffer_profile_t &profile, bool update_pool_size=true) +{ + const string &profileName = profile.name; + auto &profileToMap = profile.port_pgs; + set portsChecked; + + if (!profile.static_configured || !profile.dynamic_calculated) { for (auto &key : profileToMap) { @@ -883,7 +1076,6 @@ task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t if (!isHeadroomResourceValid(port, profile)) { - // to do: get the value from application database SWSS_LOG_ERROR("BUFFER_PROFILE %s cannot be updated because %s referencing it violates the resource limitation", profileName.c_str(), key.c_str()); return task_process_status::task_failed; @@ -895,7 +1087,8 @@ task_process_status BufferMgrDynamic::doUpdateStaticProfileTask(buffer_profile_t updateBufferProfileToDb(profileName, profile); } - checkSharedBufferPoolSize(); + if (update_pool_size) + checkSharedBufferPoolSize(); return task_process_status::task_success; } @@ -931,6 +1124,7 @@ task_process_status BufferMgrDynamic::handleBufferMaxParam(KeyOpFieldsValuesTupl task_process_status BufferMgrDynamic::handleDefaultLossLessBufferParam(KeyOpFieldsValuesTuple &tuple) { string op = kfvOp(tuple); + string newRatio = "0"; if (op == SET_COMMAND) { @@ -939,10 +1133,30 @@ task_process_status BufferMgrDynamic::handleDefaultLossLessBufferParam(KeyOpFiel if (fvField(i) == "default_dynamic_th") { m_defaultThreshold = fvValue(i); - SWSS_LOG_DEBUG("Handling Buffer Maximum value table field default_dynamic_th value %s", m_defaultThreshold.c_str()); + SWSS_LOG_DEBUG("Handling Buffer parameter table field default_dynamic_th value %s", m_defaultThreshold.c_str()); + } + else if (fvField(i) == "over_subscribe_ratio") + { + newRatio = fvValue(i); + SWSS_LOG_DEBUG("Handling Buffer parameter table field over_subscribe_ratio value %s", fvValue(i).c_str()); } } } + else + { + SWSS_LOG_ERROR("Unsupported command %s received for DEFAULT_LOSSLESS_BUFFER_PARAMETER table", op.c_str()); + return task_process_status::task_failed; + } + + if (newRatio != m_overSubscribeRatio) + { + bool isSHPEnabled = isNonZero(m_overSubscribeRatio); + bool willSHPBeEnabled = isNonZero(newRatio); + SWSS_LOG_INFO("Recalculate shared buffer pool size due to over subscribe ratio has been updated from %s to %s", + m_overSubscribeRatio.c_str(), newRatio.c_str()); + m_overSubscribeRatio = newRatio; + refreshSharedHeadroomPool(isSHPEnabled != willSHPBeEnabled, false); + } return task_process_status::task_success; } @@ -1149,6 +1363,7 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup // 1. Create the corresponding table entries in APPL_DB // 2. Record the table in the internal cache m_bufferPoolLookup buffer_pool_t &bufferPool = m_bufferPoolLookup[pool]; + string newSHPSize = "0"; bufferPool.dynamic_size = true; for (auto i = kfvFieldsValues(tuple).begin(); i != kfvFieldsValues(tuple).end(); i++) @@ -1163,7 +1378,7 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup } if (field == buffer_pool_xoff_field_name) { - bufferPool.xoff = value; + newSHPSize = value; } if (field == buffer_pool_mode_field_name) { @@ -1173,10 +1388,46 @@ task_process_status BufferMgrDynamic::handleBufferPoolTable(KeyOpFieldsValuesTup { bufferPool.ingress = (value == buffer_value_ingress); } - fvVector.emplace_back(FieldValueTuple(field, value)); + fvVector.emplace_back(field, value); SWSS_LOG_INFO("Inserting BUFFER_POOL table field %s value %s", field.c_str(), value.c_str()); } - if (!bufferPool.dynamic_size) + + bool dontUpdatePoolToDb = bufferPool.dynamic_size; + if (pool == INGRESS_LOSSLESS_PG_POOL_NAME) + { + /* + * "dontUpdatPoolToDb" is calculated for ingress_lossless_pool according to following rules: + * Don't update | pool size | SHP enabled by size | SHP enabled by over subscribe ratio + * True | Dynamic | Any | Any + * False | Static | True | Any + * True | Static | False | True + * False | Static | False | False + */ + bool willSHPBeEnabledBySize = isNonZero(newSHPSize); + if (newSHPSize != m_configuredSharedHeadroomPoolSize) + { + bool isSHPEnabledBySize = isNonZero(m_configuredSharedHeadroomPoolSize); + + m_configuredSharedHeadroomPoolSize = newSHPSize; + refreshSharedHeadroomPool(false, isSHPEnabledBySize != willSHPBeEnabledBySize); + } + else if (!newSHPSize.empty()) + { + SWSS_LOG_INFO("Shared headroom pool size updated without change (new %s vs current %s), skipped", newSHPSize.c_str(), m_configuredSharedHeadroomPoolSize.c_str()); + } + + if (!willSHPBeEnabledBySize) + { + // Don't need to program APPL_DB if shared headroom pool is enabled + dontUpdatePoolToDb |= isNonZero(m_overSubscribeRatio); + } + } + else if (isNonZero(newSHPSize)) + { + SWSS_LOG_ERROR("Field xoff is supported for %s only, but got for %s, ignored", INGRESS_LOSSLESS_PG_POOL_NAME, pool.c_str()); + } + + if (!dontUpdatePoolToDb) { m_applBufferPoolTable.set(pool, fvVector); m_stateBufferPoolTable.set(pool, fvVector); @@ -1299,12 +1550,12 @@ task_process_status BufferMgrDynamic::handleBufferProfileTable(KeyOpFieldsValues profileApp.state = PROFILE_NORMAL; SWSS_LOG_NOTICE("BUFFER_PROFILE %s is dynamic calculation so it won't be deployed to APPL_DB until referenced by a port", profileName.c_str()); - doUpdateStaticProfileTask(profileApp); + doUpdateBufferProfileForDynamicTh(profileApp); } else { profileApp.state = PROFILE_NORMAL; - doUpdateStaticProfileTask(profileApp); + doUpdateBufferProfileForSize(profileApp); SWSS_LOG_NOTICE("BUFFER_PROFILE %s has been inserted into APPL_DB", profileName.c_str()); SWSS_LOG_DEBUG("BUFFER_PROFILE %s for headroom override has been stored internally: [pool %s xon %s xoff %s size %s]", profileName.c_str(), diff --git a/cfgmgr/buffermgrdyn.h b/cfgmgr/buffermgrdyn.h index 941491e7b6b1..f11bd86201f8 100644 --- a/cfgmgr/buffermgrdyn.h +++ b/cfgmgr/buffermgrdyn.h @@ -47,6 +47,14 @@ typedef struct { bool static_configured; bool ingress; bool lossless; + + // fields representing parameters by which the headroom is calculated + std::string speed; + std::string cable_length; + std::string port_mtu; + std::string gearbox_model; + + // APPL_DB.BUFFER_PROFILE fields std::string name; std::string size; std::string xon; @@ -124,6 +132,8 @@ class BufferMgrDynamic : public Orch bool m_portInitDone; bool m_firstTimeCalculateBufferPool; + std::string m_configuredSharedHeadroomPoolSize; + std::shared_ptr m_applDb = nullptr; SelectableTimer *m_buffermgrPeriodtimer = nullptr; @@ -191,6 +201,8 @@ class BufferMgrDynamic : public Orch unsigned long m_mmuSizeNumber; std::string m_defaultThreshold; + std::string m_overSubscribeRatio; + // Initializers void initTableHandlerMap(); void parseGearboxInfo(std::shared_ptr> gearboxInfo); @@ -202,6 +214,10 @@ class BufferMgrDynamic : public Orch std::string parseObjectNameFromKey(const std::string &key, size_t pos/* = 1*/); std::string parseObjectNameFromReference(const std::string &reference); std::string getDynamicProfileName(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model); + inline bool isNonZero(const std::string &value) const + { + return !value.empty() && value != "0"; + } // APPL_DB table operations void updateBufferPoolToDb(const std::string &name, const buffer_pool_t &pool); @@ -209,19 +225,21 @@ class BufferMgrDynamic : public Orch void updateBufferPgToDb(const std::string &key, const std::string &profile, bool add); // Meta flows - void calculateHeadroomSize(const std::string &speed, const std::string &cable, const std::string &port_mtu, const std::string &gearbox_model, buffer_profile_t &headroom); + void calculateHeadroomSize(buffer_profile_t &headroom); void checkSharedBufferPoolSize(); void recalculateSharedBufferPool(); task_process_status allocateProfile(const std::string &speed, const std::string &cable, const std::string &mtu, const std::string &threshold, const std::string &gearbox_model, std::string &profile_name); void releaseProfile(const std::string &profile_name); bool isHeadroomResourceValid(const std::string &port, const buffer_profile_t &profile, const std::string &new_pg); + void refreshSharedHeadroomPool(bool enable_state_updated_by_ratio, bool enable_state_updated_by_size); // Main flows task_process_status refreshPriorityGroupsForPort(const std::string &port, const std::string &speed, const std::string &cable_length, const std::string &mtu, const std::string &exactly_matched_key); task_process_status doUpdatePgTask(const std::string &pg_key, const std::string &port); task_process_status doRemovePgTask(const std::string &pg_key, const std::string &port); task_process_status doAdminStatusTask(const std::string port, const std::string adminStatus); - task_process_status doUpdateStaticProfileTask(buffer_profile_t &profile); + task_process_status doUpdateBufferProfileForDynamicTh(buffer_profile_t &profile); + task_process_status doUpdateBufferProfileForSize(buffer_profile_t &profile, bool update_pool_size); // Table update handlers task_process_status handleBufferMaxParam(KeyOpFieldsValuesTuple &t); diff --git a/tests/test_buffer_dynamic.py b/tests/test_buffer_dynamic.py index b0af416c41a4..932247de37d5 100644 --- a/tests/test_buffer_dynamic.py +++ b/tests/test_buffer_dynamic.py @@ -408,3 +408,95 @@ def test_nonDefaultAlpha(self, dvs, testlog): # clear configuration self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|3-4') self.config_db.delete_entry('BUFFER_PROFILE', 'non-default-dynamic') + + def test_sharedHeadroomPool(self, dvs, testlog): + self.setup_db(dvs) + + # configure lossless PG 3-4 on interface and start up the interface + self.config_db.update_entry('BUFFER_PG', 'Ethernet0|3-4', {'profile': 'NULL'}) + dvs.runcmd('config interface startup Ethernet0') + + expectedProfile = self.make_lossless_profile_name(self.originalSpeed, self.originalCableLen) + self.app_db.wait_for_entry("BUFFER_PROFILE_TABLE", expectedProfile) + self.app_db.wait_for_field_match("BUFFER_PG_TABLE", "Ethernet0:3-4", {"profile": "[BUFFER_PROFILE_TABLE:" + expectedProfile + "]"}) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + profileInApplDb = self.app_db.get_entry('BUFFER_PROFILE_TABLE', expectedProfile) + + # enable shared headroom pool by configuring over subscribe ratio + default_lossless_buffer_parameter = self.config_db.get_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE') + over_subscribe_ratio = default_lossless_buffer_parameter.get('over_subscribe_ratio') + assert not over_subscribe_ratio or over_subscribe_ratio == '0', "Over subscribe ratio isn't 0" + + # config over subscribe ratio to 2 + default_lossless_buffer_parameter['over_subscribe_ratio'] = '2' + self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter) + + # check buffer profile: xoff should be removed from size + profileInApplDb['size'] = profileInApplDb['xon'] + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # check ingress_lossless_pool between appldb and asicdb + # there are only two lossless PGs configured on one port. + # hence the shared headroom pool size should be pg xoff * 2 / over subscribe ratio (2) = xoff. + ingress_lossless_pool_in_appldb = self.app_db.get_entry('BUFFER_POOL_TABLE', 'ingress_lossless_pool') + shp_size = profileInApplDb['xoff'] + ingress_lossless_pool_in_appldb['xoff'] = shp_size + # toggle shared headroom pool, it requires some time to update pools + time.sleep(20) + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb = self.asic_db.get_entry('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_PROFILE', self.ingress_lossless_pool_oid) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = shp_size + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + + # config shared headroom pool size + shp_size = '204800' + ingress_lossless_pool_in_configdb = self.config_db.get_entry('BUFFER_POOL', 'ingress_lossless_pool') + ingress_lossless_pool_in_configdb['xoff'] = shp_size + self.config_db.update_entry('BUFFER_POOL', 'ingress_lossless_pool', ingress_lossless_pool_in_configdb) + # make sure the size is still equal to xon in the profile + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # config over subscribe ratio to 4 + default_lossless_buffer_parameter['over_subscribe_ratio'] = '4' + self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter) + # shp size wins in case both size and over subscribe ratio is configured + ingress_lossless_pool_in_appldb['xoff'] = shp_size + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = shp_size + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + # make sure the size is still equal to xon in the profile + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # remove size configuration, new over subscribe ratio takes effect + ingress_lossless_pool_in_configdb['xoff'] = '0' + self.config_db.update_entry('BUFFER_POOL', 'ingress_lossless_pool', ingress_lossless_pool_in_configdb) + # shp size: pg xoff * 2 / over subscribe ratio (4) = pg xoff / 2 + shp_size = str(int(int(profileInApplDb['xoff']) / 2)) + time.sleep(30) + ingress_lossless_pool_in_appldb['xoff'] = shp_size + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = shp_size + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + # make sure the size is still equal to xon in the profile + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # remove over subscribe ratio configuration + default_lossless_buffer_parameter['over_subscribe_ratio'] = '0' + self.config_db.update_entry('DEFAULT_LOSSLESS_BUFFER_PARAMETER', 'AZURE', default_lossless_buffer_parameter) + # check whether shp size has been removed from both asic db and appl db + ingress_lossless_pool_in_appldb['xoff'] = '0' + self.app_db.wait_for_field_match('BUFFER_POOL_TABLE', 'ingress_lossless_pool', ingress_lossless_pool_in_appldb) + ingress_lossless_pool_in_asicdb['SAI_BUFFER_POOL_ATTR_XOFF_SIZE'] = '0' + self.asic_db.wait_for_field_match('ASIC_STATE:SAI_OBJECT_TYPE_BUFFER_POOL', self.ingress_lossless_pool_oid, ingress_lossless_pool_in_asicdb) + # make sure the size is equal to xon + xoff in the profile + profileInApplDb['size'] = str(int(profileInApplDb['xon']) + int(profileInApplDb['xoff'])) + self.app_db.wait_for_field_match('BUFFER_PROFILE_TABLE', expectedProfile, profileInApplDb) + self.check_new_profile_in_asic_db(dvs, expectedProfile) + + # remove lossless PG 3-4 on interface + self.config_db.delete_entry('BUFFER_PG', 'Ethernet0|3-4') + dvs.runcmd('config interface shutdown Ethernet0')