diff --git a/src/system-health/health_checker/manager.py b/src/system-health/health_checker/manager.py index f6bc31f874e51..b1c6dd15bec3a 100644 --- a/src/system-health/health_checker/manager.py +++ b/src/system-health/health_checker/manager.py @@ -1,4 +1,3 @@ -from . import utils from .config import Config from .health_checker import HealthChecker from .service_checker import ServiceChecker @@ -10,14 +9,10 @@ class HealthCheckerManager(object): """ Manage all system health checkers and system health configuration. """ - STATE_BOOTING = 'booting' - STATE_RUNNING = 'running' boot_timeout = None def __init__(self): self._checkers = [] - self._state = self.STATE_BOOTING - self.config = Config() self.initialize() @@ -33,17 +28,11 @@ def check(self, chassis): """ Load new configuration if any and perform the system health check for all existing checkers. :param chassis: A chassis object. - :return: A tuple. The first element indicate the status of the checker; the second element is a dictionary that - contains the status for all objects that was checked. + :return: A dictionary that contains the status for all objects that was checked. """ HealthChecker.summary = HealthChecker.STATUS_OK stats = {} self.config.load_config() - # check state first to avoid user change boot timeout in configuration file - # after finishing system boot - if self._state == self.STATE_BOOTING and self._is_system_booting(): - self._set_system_led(chassis, self.config, 'booting') - return self._state, stats for checker in self._checkers: self._do_check(checker, stats) @@ -56,7 +45,7 @@ def check(self, chassis): led_status = 'normal' if HealthChecker.summary == HealthChecker.STATUS_OK else 'fault' self._set_system_led(chassis, self.config, led_status) - return self._state, stats + return stats def _do_check(self, checker, stats): """ @@ -86,15 +75,6 @@ def _do_check(self, checker, stats): else: stats['Internal'].update(entry) - def _is_system_booting(self): - uptime = utils.get_uptime() - if not self.boot_timeout: - self.boot_timeout = self.config.get_bootup_timeout() - booting = uptime < self.boot_timeout - if not booting: - self._state = self.STATE_RUNNING - return booting - def _set_system_led(self, chassis, config, status): try: chassis.set_status_led(config.get_led_color(status)) diff --git a/src/system-health/health_checker/service_checker.py b/src/system-health/health_checker/service_checker.py index 171f7e2e8b4d0..fa47e1e50c632 100644 --- a/src/system-health/health_checker/service_checker.py +++ b/src/system-health/health_checker/service_checker.py @@ -218,7 +218,7 @@ def check_by_monit(self, config): output = utils.run_command(ServiceChecker.CHECK_CMD) lines = output.splitlines() if not lines or len(lines) < ServiceChecker.MIN_CHECK_CMD_LINES: - self.set_object_not_ok('Service', 'monit', 'output of \"monit summary -B\" is invalid or incompatible') + self.set_object_not_ok('Service', 'monit', 'monit service is not ready') return status_begin = lines[1].find('Status') diff --git a/src/system-health/health_checker/utils.py b/src/system-health/health_checker/utils.py index f310002e1e5f2..00e7754e1ec2a 100644 --- a/src/system-health/health_checker/utils.py +++ b/src/system-health/health_checker/utils.py @@ -8,7 +8,7 @@ def run_command(command): :return: Output of the shell command. """ try: - process = subprocess.Popen(command, shell=True, universal_newlines=True, stdout=subprocess.PIPE) + process = subprocess.Popen(command, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return process.communicate()[0] except Exception: return None diff --git a/src/system-health/scripts/healthd b/src/system-health/scripts/healthd index da0258bb59e5c..dd276df2fafda 100644 --- a/src/system-health/scripts/healthd +++ b/src/system-health/scripts/healthd @@ -18,7 +18,7 @@ SYSLOG_IDENTIFIER = 'healthd' class HealthDaemon(DaemonBase): """ - A daemon that run as a service to perform system health checker with a configurable interval. Also set system LED + A daemon that run as a service to perform system health checker with a configurable interval. Also set system LED according to the check result and store the check result to redis. """ SYSTEM_HEALTH_TABLE_NAME = 'SYSTEM_HEALTH_INFO' @@ -35,7 +35,7 @@ class HealthDaemon(DaemonBase): def deinit(self): """ Destructor. Remove all entries in $SYSTEM_HEALTH_TABLE_NAME table. - :return: + :return: """ self._clear_system_health_table() @@ -64,7 +64,7 @@ class HealthDaemon(DaemonBase): def run(self): """ Check system health in an infinite loop. - :return: + :return: """ self.log_notice("Starting up...") @@ -76,9 +76,8 @@ class HealthDaemon(DaemonBase): self.log_warning("System health configuration file not found, exit...") return while 1: - state, stat = manager.check(chassis) - if state == HealthCheckerManager.STATE_RUNNING: - self._process_stat(chassis, manager.config, stat) + stat = manager.check(chassis) + self._process_stat(chassis, manager.config, stat) if self.stop_event.wait(manager.config.interval): break diff --git a/src/system-health/tests/test_system_health.py b/src/system-health/tests/test_system_health.py index c7ff86c28b97b..14d58c0f44b54 100644 --- a/src/system-health/tests/test_system_health.py +++ b/src/system-health/tests/test_system_health.py @@ -439,26 +439,14 @@ def test_config(): @patch('health_checker.user_defined_checker.UserDefinedChecker.get_info') @patch('health_checker.service_checker.ServiceChecker.get_info') @patch('health_checker.hardware_checker.HardwareChecker.get_info') -@patch('health_checker.utils.get_uptime') -def test_manager(mock_uptime, mock_hw_info, mock_service_info, mock_udc_info): +def test_manager(mock_hw_info, mock_service_info, mock_udc_info): chassis = MagicMock() chassis.set_status_led = MagicMock() manager = HealthCheckerManager() manager.config.user_defined_checkers = ['some check'] - assert manager._state == HealthCheckerManager.STATE_BOOTING assert len(manager._checkers) == 2 - mock_uptime.return_value = 200 - assert manager._is_system_booting() - state, stat = manager.check(chassis) - assert state == HealthCheckerManager.STATE_BOOTING - assert len(stat) == 0 - chassis.set_status_led.assert_called_with('orange_blink') - - mock_uptime.return_value = 500 - assert not manager._is_system_booting() - assert manager._state == HealthCheckerManager.STATE_RUNNING mock_hw_info.return_value = { 'ASIC': { 'type': 'ASIC', @@ -485,8 +473,7 @@ def test_manager(mock_uptime, mock_hw_info, mock_service_info, mock_udc_info): 'status': 'OK' } } - state, stat = manager.check(chassis) - assert state == HealthCheckerManager.STATE_RUNNING + stat = manager.check(chassis) assert 'Services' in stat assert stat['Services']['snmp:snmpd']['status'] == 'OK' @@ -500,7 +487,7 @@ def test_manager(mock_uptime, mock_hw_info, mock_service_info, mock_udc_info): mock_hw_info.side_effect = RuntimeError() mock_service_info.side_effect = RuntimeError() mock_udc_info.side_effect = RuntimeError() - state, stat = manager.check(chassis) + stat = manager.check(chassis) assert 'Internal' in stat assert stat['Internal']['ServiceChecker']['status'] == 'Not OK' assert stat['Internal']['HardwareChecker']['status'] == 'Not OK' @@ -518,6 +505,3 @@ def test_utils(): output = utils.run_command('ls') assert output - - uptime = utils.get_uptime() - assert uptime > 0