Skip to content

Commit

Permalink
[psud] Enhancement for PSU led management (sonic-net#55)
Browse files Browse the repository at this point in the history
  • Loading branch information
Junchao-Mellanox authored Apr 30, 2020
1 parent 238fc06 commit f1409e0
Showing 1 changed file with 196 additions and 0 deletions.
196 changes: 196 additions & 0 deletions sonic-psud/scripts/psud
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def _wrapper_get_psus_status(psu_index):
pass
return platform_psuutil.get_psu_status(psu_index)


#
# Helper functions =============================================================
#
Expand All @@ -85,6 +86,108 @@ def psu_db_update(psu_tbl, psu_num):
'true' if _wrapper_get_psus_status(psu_index) else 'false')])
psu_tbl.set(PSU_INFO_KEY_TEMPLATE.format(psu_index), fvs)


# try get information from platform API and return a default value if caught NotImplementedError
def try_get(callback, default=None):
"""
Handy function to invoke the callback and catch NotImplementedError
:param callback: Callback to be invoked
:param default: Default return value if exception occur
:return: Default return value if exception occur else return value of the callback
"""
try:
ret = callback()
if ret is None:
ret = default
except NotImplementedError:
ret = default

return ret

def log_on_status_changed(normal_status, normal_log, abnormal_log):
"""
Log when any status changed
:param normal_status: Expected status.
:param normal_log: Log string for expected status.
:param abnormal_log: Log string for unexpected status
:return:
"""
if normal_status:
logger.log_notice(normal_log)
else:
logger.log_warning(abnormal_log)

#
# PSU status ===================================================================
#

class PsuStatus(object):
update_led_color = True

def __init__(self, psu):
self.psu = psu
self.presence = True
self.power_good = True
self.voltage_good = True
self.temperature_good = True

def set_presence(self, presence):
"""
Set and cache PSU presence status
:param presence: PSU presence status
:return: True if status changed else False
"""
if presence == self.presence:
return False

self.presence = presence
return True

def set_power_good(self, power_good):
"""
Set and cache PSU power good status
:param power_good: PSU power good status
:return: True if status changed else False
"""
if power_good == self.power_good:
return False

self.power_good = power_good
return True

def set_voltage(self, voltage, high_threshold, low_threshold):
if not voltage or not high_threshold or not low_threshold:
if self.voltage_good is not True:
logger.log_warning('PSU voltage or high_threshold or low_threshold become unavailable, '
'voltage={}, high_threshold={}, low_threshold={}'.format(voltage, high_threshold, low_threshold))
self.voltage_good = True
return False

voltage_good = (low_threshold <= voltage <= high_threshold)
if voltage_good == self.voltage_good:
return False

self.voltage_good = voltage_good
return True

def set_temperature(self, temperature, high_threshold):
if not temperature or not high_threshold:
if self.temperature_good is not True:
logger.log_warning('PSU temperature or high_threshold become unavailable, '
'temperature={}, high_threshold={}'.format(temperature, high_threshold))
self.temperature_good = True
return False

temperature_good = (temperature < high_threshold)
if temperature_good == self.temperature_good:
return False

self.temperature_good = temperature_good
return True

def is_ok(self):
return self.presence and self.power_good and self.voltage_good and self.temperature_good

#
# Daemon =======================================================================
#
Expand All @@ -94,6 +197,7 @@ class DaemonPsud(DaemonBase):
DaemonBase.__init__(self)

self.stop = threading.Event()
self.psu_status_dict = {}

# Signal handler
def signal_handler(self, sig, frame):
Expand Down Expand Up @@ -145,6 +249,8 @@ class DaemonPsud(DaemonBase):

while not self.stop.wait(PSU_INFO_UPDATE_PERIOD_SECS):
psu_db_update(psu_tbl, psu_num)
self.update_psu_data()
self._update_led_color(psu_tbl)

logger.log_info("Stop daemon main loop")

Expand All @@ -156,6 +262,96 @@ class DaemonPsud(DaemonBase):

logger.log_info("Shutting down...")

def update_psu_data(self):
if not platform_chassis:
return

for index, psu in enumerate(platform_chassis.get_all_psus()):
try:
self._update_single_psu_data(index + 1, psu)
except Exception as e:
logger.log_warning("Failed to update PSU data - {}".format(e))

def _update_single_psu_data(self, index, psu):
name = try_get(psu.get_name)
if not name:
name = PSU_INFO_KEY_TEMPLATE.format(index)
presence = _wrapper_get_psus_presence(index)
power_good = False
voltage = None
voltage_high_threshold = None
voltage_low_threshold = None
temperature = None
temperature_threshold = None
if presence:
power_good = _wrapper_get_psus_status(index)
voltage = try_get(psu.get_voltage)
voltage_high_threshold = try_get(psu.get_voltage_high_threshold)
voltage_low_threshold = try_get(psu.get_voltage_low_threshold)
temperature = try_get(psu.get_temperature)
temperature_threshold = try_get(psu.get_temperature_high_threshold)

if index not in self.psu_status_dict:
self.psu_status_dict[index] = PsuStatus(psu)

psu_status = self.psu_status_dict[index]
set_led = False
if psu_status.set_presence(presence):
set_led = True
log_on_status_changed(psu_status.presence,
'PSU absence warning cleared: {} is inserted back.'.format(name),
'PSU absence warning: {} is not present.'.format(name)
)

if presence and psu_status.set_power_good(power_good):
set_led = True
log_on_status_changed(psu_status.power_good,
'Power absence warning cleared: {} power is back to normal.'.format(name),
'Power absence warning: {} is out of power.'.format(name)
)

if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold):
set_led = True
log_on_status_changed(psu_status.voltage_good,
'PSU voltage warning cleared: {} voltage is back to normal.'.format(name),
'PSU voltage warning: {} voltage out of range, current voltage={}, valid range=[{}, {}].'.format(name, voltage, voltage_high_threshold, voltage_low_threshold)
)

if presence and psu_status.set_temperature(temperature, temperature_threshold):
set_led = True
log_on_status_changed(psu_status.temperature_good,
'PSU temperature warning cleared: {} temperature is back to normal.'.format(name),
'PSU temperature warning: {} temperature too hot, temperature={}, threshold={}.'.format(name, temperature, temperature_threshold)
)

if set_led:
PsuStatus.update_led_color = True
self._set_psu_led(psu, psu_status)

def _set_psu_led(self, psu, psu_status):
try:
color = psu.STATUS_LED_COLOR_GREEN if psu_status.is_ok() else psu.STATUS_LED_COLOR_RED
psu.set_status_led(color)
except NotImplementedError as e:
pass

def _update_led_color(self, psu_tbl):
if not platform_chassis:
return

if PsuStatus.update_led_color:
for index, psu_status in self.psu_status_dict.items():
try:
fvs = swsscommon.FieldValuePairs([
('led_status', str(try_get(psu_status.psu.get_status_led)))
])
except Exception as e:
logger.log_warning('Failed to get led status for psu {}'.format(index))
fvs = swsscommon.FieldValuePairs([
('led_status', NOT_AVAILABLE)
])
psu_tbl.set(PSU_INFO_KEY_TEMPLATE.format(index), fvs)
PsuStatus.update_led_color = False
#
# Main =========================================================================
#
Expand Down

0 comments on commit f1409e0

Please sign in to comment.