From 56a16be59c1cbd4fb84dec0eb3a5eea67c9c19ff Mon Sep 17 00:00:00 2001 From: roger530-ho Date: Tue, 23 Jan 2024 08:54:28 +0000 Subject: [PATCH] 1. Change CPLD value for shutdown DUT 2. sync disk data before shutdown DUT --- .../utils/accton_as4630_54pe_pddf_monitor.py | 39 ++++++++++++++----- .../utils/accton_as4630_54te_monitor.py | 16 ++++++-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/platform/broadcom/sonic-platform-modules-accton/as4630-54pe/utils/accton_as4630_54pe_pddf_monitor.py b/platform/broadcom/sonic-platform-modules-accton/as4630-54pe/utils/accton_as4630_54pe_pddf_monitor.py index 6bbaeaa06d..5e8a5210e8 100755 --- a/platform/broadcom/sonic-platform-modules-accton/as4630-54pe/utils/accton_as4630_54pe_pddf_monitor.py +++ b/platform/broadcom/sonic-platform-modules-accton/as4630-54pe/utils/accton_as4630_54pe_pddf_monitor.py @@ -207,14 +207,24 @@ def manage_fans(self): fan_fail_list[i] = 0 if sum(fan_fail_list) == NUM_FANS: + logging.critical( + 'Alarm for all fan faulty/absent is detected, disable PoE') + cmd_str = ["i2cset", "-f", "-y", "16", "0x20", "0x06", "0x0", "0x0", "0xff", "0xff", "0xff", "0xff", "0xff", "0xff", "0xff", "0xff", "0xFE", "i"] + getstatusoutput_noshell(cmd_str) # Disable PoE + # Critical: Either all the fans are faulty or they are removed, shutdown the system logging.critical('Alarm for all fan faulty/absent is detected') - logging.critical("Alarm for all fan faulty/absent is detected, reset DUT") - cmd_str = ["i2cset", "-y", "-f", "3", "0x60", "0x4", "0xE4"] + logging.critical("Alarm for all fan faulty/absent is detected, shutdown DUT") + + # Sync log buffer to disk + cmd_str = ["sync"] + getstatusoutput_noshell(cmd_str) + cmd_str = ["/sbin/fstrim", "-av"] + getstatusoutput_noshell(cmd_str) + time.sleep(3) + + cmd_str = ["i2cset", "-y", "-f", "3", "0x60", "0x4", "0x74"] time.sleep(2) - getstatusoutput_noshell('sync') - getstatusoutput_noshell('sync') - getstatusoutput_noshell('sync') getstatusoutput_noshell(cmd_str) elif sum(fan_fail_list) != 0: # Set the 100% speed only for first fan failure detection @@ -243,8 +253,13 @@ def manage_fans(self): if temp[0] >= 70000: # LM77-48 # critical case*/ + logging.critical( + 'Alarm-Critical for temperature critical is detected, disable PoE') + cmd_str = ["i2cset", "-f", "-y", "16", "0x20", "0x06", "0x0", "0x0", "0xff", "0xff", "0xff", "0xff", "0xff", "0xff", "0xff", "0xff", "0xFE", "i"] + getstatusoutput_noshell(cmd_str) # Disable PoE + logging.critical('Alarm for temperature critical is detected') - logging.critical("Alarm-Critical for temperature critical is detected, reset DUT") + logging.critical("Alarm-Critical for temperature critical is detected, shutdown DUT") # Update the reboot cause file to reflect that critical temperature # has been crossed. Upon next boot, the contents of this file will # be used to determine the cause of the previous reboot @@ -255,10 +270,14 @@ def manage_fans(self): if status: logging.warning('Reboot cause file not updated. {}'.format(output)) - cmd_str = ["i2cset", "-y", "-f", "3", "0x60", "0x4", "0xE4"] - getstatusoutput_noshell('sync') - getstatusoutput_noshell('sync') - getstatusoutput_noshell('sync') + # Sync log buffer to disk + cmd_str = ["sync"] + getstatusoutput_noshell(cmd_str) + cmd_str = ["/sbin/fstrim", "-av"] + getstatusoutput_noshell(cmd_str) + time.sleep(3) + + cmd_str = ["i2cset", "-y", "-f", "3", "0x60", "0x4", "0x74"] time.sleep(3) getstatusoutput_noshell(cmd_str) diff --git a/platform/broadcom/sonic-platform-modules-accton/as4630-54te/utils/accton_as4630_54te_monitor.py b/platform/broadcom/sonic-platform-modules-accton/as4630-54te/utils/accton_as4630_54te_monitor.py index 005d9d6b1e..9258561ef5 100755 --- a/platform/broadcom/sonic-platform-modules-accton/as4630-54te/utils/accton_as4630_54te_monitor.py +++ b/platform/broadcom/sonic-platform-modules-accton/as4630-54te/utils/accton_as4630_54te_monitor.py @@ -20,12 +20,14 @@ # ------------------------------------------------------------------ try: + import os import sys import getopt import logging import logging.config import logging.handlers import time + import subprocess from as4630_54te.fanutil import FanUtil from as4630_54te.thermalutil import ThermalUtil except ImportError as e: @@ -195,11 +197,19 @@ def manage_fans(self): if temp[0] >= 70000: # LM75-48 # critical case*/ logging.critical( - 'Alarm-Critical for temperature critical is detected, reset DUT') - cmd_str = ["i2cset", "-y", "-f", "3", "0x60", "0x4", "0xE4"] + 'Alarm-Critical for temperature critical is detected, shutdown DUT') + + # Sync log buffer to disk + cmd_str="sync" + status, output = subprocess.getstatusoutput(cmd_str) + cmd_str="/sbin/fstrim -av" + status, output = subprocess.getstatusoutput(cmd_str) + time.sleep(3) + + cmd_str = ["i2cset", "-y", "-f", "3", "0x60", "0x4", "0x74"] time.sleep(2) return_value = subprocess.call(cmd_str) - logging.warning('Fan set: i2cset -y -f 3 0x60 0x4 0xE4, status is %d', return_value) + logging.warning('Fan set: i2cset -y -f 3 0x60 0x4 0x74, status is %d', return_value) #logging.debug('ori_state=%d, current_state=%d, temp_val=%d\n\n',ori_state, fan_policy_state, temp_val)