From 9299a249d3ed272158d11567ad1ef18102af912f Mon Sep 17 00:00:00 2001 From: Guohan Lu Date: Sat, 6 Apr 2019 22:21:30 +0000 Subject: [PATCH] Revert "[201803] [services] Restart SwSS service upon unexpected critical process exit (#2546)" This reverts commit 2a8af2705e216c1b26f0218eff60ef8c1a84449f. --- dockers/docker-dhcp-relay/wait_for_intf.sh.j2 | 38 +++++++------- dockers/docker-orchagent/Dockerfile.j2 | 2 - dockers/docker-orchagent/critical_processes | 7 --- dockers/docker-orchagent/supervisord.conf | 8 +-- files/build_templates/dhcp_relay.service.j2 | 2 +- files/build_templates/radv.service.j2 | 2 +- files/build_templates/snmp.service.j2 | 3 -- files/build_templates/swss.service.j2 | 4 -- files/build_templates/teamd.service.j2 | 6 +-- files/scripts/supervisor-proc-exit-listener | 45 ----------------- platform/broadcom/docker-orchagent-brcm.mk | 2 +- platform/cavium/docker-orchagent-cavm.mk | 2 +- platform/centec/docker-orchagent-centec.mk | 2 +- platform/marvell/docker-orchagent-mrvl.mk | 2 +- platform/mellanox/docker-orchagent-mlnx.mk | 2 +- platform/nephos/docker-orchagent-nephos.mk | 2 +- rules/docker-dhcp-relay.mk | 2 +- rules/scripts.mk | 6 +-- .../tests/sample_output/wait_for_intf.sh | 50 ++++++++++--------- 19 files changed, 60 insertions(+), 127 deletions(-) delete mode 100644 dockers/docker-orchagent/critical_processes delete mode 100755 files/scripts/supervisor-proc-exit-listener diff --git a/dockers/docker-dhcp-relay/wait_for_intf.sh.j2 b/dockers/docker-dhcp-relay/wait_for_intf.sh.j2 index 23133706cb6c..037dc66ead63 100644 --- a/dockers/docker-dhcp-relay/wait_for_intf.sh.j2 +++ b/dockers/docker-dhcp-relay/wait_for_intf.sh.j2 @@ -1,40 +1,42 @@ #!/usr/bin/env bash -STATE_DB_IDX="6" - -PORT_TABLE_PREFIX="PORT_TABLE" -VLAN_TABLE_PREFIX="VLAN_TABLE" -LAG_TABLE_PREFIX="LAG_TABLE" - function wait_until_iface_ready { - TABLE_PREFIX=$1 - IFACE=$2 + IFACE=$1 + + echo "Waiting until interface $IFACE is up..." + + # Wait for the interface to come up (i.e., 'ip link show' returns 0) + until ip link show dev $IFACE up > /dev/null 2>&1; do + sleep 1 + done - echo "Waiting until interface $IFACE is ready..." + echo "Interface $IFACE is up" - # Wait for the interface to come up - # (i.e., interface is present in STATE_DB and state is "ok") + echo "Waiting until interface $IFACE has an IPv4 address..." + + # Wait until the interface gets assigned an IPv4 address while true; do - RESULT=$(redis-cli -n ${STATE_DB_IDX} HGET "${TABLE_PREFIX}|${IFACE}" "state" 2> /dev/null) - if [ x"$RESULT" == x"ok" ]; then + IP=$(ip -4 addr show dev $IFACE | grep "inet " | awk '{ print $2 }' | cut -d '/' -f1) + + if [ -n "$IP" ]; then break fi sleep 1 done - echo "Interface ${IFACE} is ready!" + echo "Interface $IFACE is configured with IP $IP" } -# Wait for all interfaces to be up and ready +# Wait for all interfaces to come up and have IPv4 addresses assigned {% for (name, prefix) in INTERFACE %} -wait_until_iface_ready ${PORT_TABLE_PREFIX} {{ name }} +wait_until_iface_ready {{ name }} {% endfor %} {% for (name, prefix) in VLAN_INTERFACE %} -wait_until_iface_ready ${VLAN_TABLE_PREFIX} {{ name }} +wait_until_iface_ready {{ name }} {% endfor %} {% for (name, prefix) in PORTCHANNEL_INTERFACE %} -wait_until_iface_ready ${LAG_TABLE_PREFIX} {{ name }} +wait_until_iface_ready {{ name }} {% endfor %} diff --git a/dockers/docker-orchagent/Dockerfile.j2 b/dockers/docker-orchagent/Dockerfile.j2 index 6958390ed2da..1597b436c3fe 100755 --- a/dockers/docker-orchagent/Dockerfile.j2 +++ b/dockers/docker-orchagent/Dockerfile.j2 @@ -30,8 +30,6 @@ COPY ["files/arp_update", "/usr/bin"] COPY ["enable_counters.py", "/usr/bin"] COPY ["start.sh", "orchagent.sh", "swssconfig.sh", "/usr/bin/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] -COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] -COPY ["critical_processes", "/etc/supervisor/"] ## Copy all Jinja2 template files into the templates folder COPY ["*.j2", "/usr/share/sonic/templates/"] diff --git a/dockers/docker-orchagent/critical_processes b/dockers/docker-orchagent/critical_processes deleted file mode 100644 index d48eb66cda1b..000000000000 --- a/dockers/docker-orchagent/critical_processes +++ /dev/null @@ -1,7 +0,0 @@ -orchagent -portsyncd -intfsyncd -neighsyncd -vlanmgrd -intfmgrd -buffermgrd diff --git a/dockers/docker-orchagent/supervisord.conf b/dockers/docker-orchagent/supervisord.conf index 076c02abd289..2369a4c31b05 100644 --- a/dockers/docker-orchagent/supervisord.conf +++ b/dockers/docker-orchagent/supervisord.conf @@ -3,12 +3,6 @@ logfile_maxbytes=1MB logfile_backups=2 nodaemon=true -[eventlistener:supervisor-proc-exit-listener] -command=/usr/bin/supervisor-proc-exit-listener -events=PROCESS_STATE_EXITED -autostart=true -autorestart=unexpected - [program:start.sh] command=/usr/bin/start.sh priority=1 @@ -21,7 +15,7 @@ stderr_logfile=syslog command=/usr/sbin/rsyslogd -n priority=2 autostart=false -autorestart=unexpected +autorestart=false stdout_logfile=syslog stderr_logfile=syslog diff --git a/files/build_templates/dhcp_relay.service.j2 b/files/build_templates/dhcp_relay.service.j2 index c3edc27bea94..b4d9ceb38f0c 100644 --- a/files/build_templates/dhcp_relay.service.j2 +++ b/files/build_templates/dhcp_relay.service.j2 @@ -11,4 +11,4 @@ ExecStart=/usr/bin/{{ docker_container_name }}.sh attach ExecStop=/usr/bin/{{ docker_container_name }}.sh stop [Install] -WantedBy=multi-user.target swss.service teamd.service +WantedBy=multi-user.target teamd.service diff --git a/files/build_templates/radv.service.j2 b/files/build_templates/radv.service.j2 index 469cf92108ad..065f01906076 100644 --- a/files/build_templates/radv.service.j2 +++ b/files/build_templates/radv.service.j2 @@ -11,4 +11,4 @@ ExecStart=/usr/bin/{{ docker_container_name }}.sh attach ExecStop=/usr/bin/{{ docker_container_name }}.sh stop [Install] -WantedBy=multi-user.target swss.service +WantedBy=multi-user.target diff --git a/files/build_templates/snmp.service.j2 b/files/build_templates/snmp.service.j2 index ca2648cd80e8..f344f2e805ea 100644 --- a/files/build_templates/snmp.service.j2 +++ b/files/build_templates/snmp.service.j2 @@ -8,6 +8,3 @@ Before=ntp-config.service ExecStartPre=/usr/bin/{{docker_container_name}}.sh start ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop - -[Install] -WantedBy=multi-user.target swss.service diff --git a/files/build_templates/swss.service.j2 b/files/build_templates/swss.service.j2 index c122e678c4fe..afd442e8c44a 100644 --- a/files/build_templates/swss.service.j2 +++ b/files/build_templates/swss.service.j2 @@ -14,8 +14,6 @@ After=opennsl-modules-3.16.0-6-amd64.service After=nps-modules-3.16.0-6-amd64.service {% endif %} Before=ntp-config.service -StartLimitInterval=1200 -StartLimitBurst=3 [Service] User=root @@ -54,8 +52,6 @@ ExecStopPost=/usr/bin/mst stop ExecStopPost=/etc/init.d/xpnet.sh stop ExecStopPost=/etc/init.d/xpnet.sh start {% endif %} -Restart=always -RestartSec=30 [Install] WantedBy=multi-user.target diff --git a/files/build_templates/teamd.service.j2 b/files/build_templates/teamd.service.j2 index 0255e14a34f8..5cd36c6748b9 100644 --- a/files/build_templates/teamd.service.j2 +++ b/files/build_templates/teamd.service.j2 @@ -1,7 +1,7 @@ [Unit] Description=TEAMD container -Requires=updategraph.service swss.service -After=updategraph.service swss.service +Requires=updategraph.service +After=updategraph.service Before=ntp-config.service [Service] @@ -11,4 +11,4 @@ ExecStart=/usr/bin/{{docker_container_name}}.sh attach ExecStop=/usr/bin/{{docker_container_name}}.sh stop [Install] -WantedBy=multi-user.target swss.service +WantedBy=multi-user.target diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener deleted file mode 100755 index 6bc62fc400c8..000000000000 --- a/files/scripts/supervisor-proc-exit-listener +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python - -import os -import signal -import sys -import syslog - -from supervisor import childutils - -# Contents of file should be the names of critical processes (as defined in -# supervisor.conf file), one per line -CRITICAL_PROCESSES_FILE = '/etc/supervisor/critical_processes' - -def main(): - # Read the list of critical processes from a file - with open(CRITICAL_PROCESSES_FILE, 'r') as f: - critical_processes = [line.rstrip('\n') for line in f] - - while True: - # Transition from ACKNOWLEDGED to READY - childutils.listener.ready() - - line = sys.stdin.readline() - headers = childutils.get_headers(line) - payload = sys.stdin.read(int(headers['len'])) - - # Transition from READY to ACKNOWLEDGED - childutils.listener.ok() - - # We only care about PROCESS_STATE_EXITED events - if headers['eventname'] == 'PROCESS_STATE_EXITED': - payload_headers, payload_data = childutils.eventdata(payload + '\n') - - expected = int(payload_headers['expected']) - processname = payload_headers['processname'] - - # If a critical process exited unexpectedly, terminate supervisor - if expected == 0 and processname in critical_processes: - MSG_FORMAT_STR = "Process {} exited unxepectedly. Terminating supervisor..." - msg = MSG_FORMAT_STR.format(payload_headers['processname']) - syslog.syslog(syslog.LOG_INFO, msg) - os.kill(os.getppid(), signal.SIGTERM) - -if __name__ == "__main__": - main() diff --git a/platform/broadcom/docker-orchagent-brcm.mk b/platform/broadcom/docker-orchagent-brcm.mk index a3bbb3fe4193..066973967450 100644 --- a/platform/broadcom/docker-orchagent-brcm.mk +++ b/platform/broadcom/docker-orchagent-brcm.mk @@ -16,4 +16,4 @@ $(DOCKER_ORCHAGENT_BRCM)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_BRCM)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_BRCM)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_BRCM)_FILES += $(ARP_UPDATE_SCRIPT) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_ORCHAGENT_BRCM)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/cavium/docker-orchagent-cavm.mk b/platform/cavium/docker-orchagent-cavm.mk index 684376647fa9..a171a6c801d7 100644 --- a/platform/cavium/docker-orchagent-cavm.mk +++ b/platform/cavium/docker-orchagent-cavm.mk @@ -16,4 +16,4 @@ $(DOCKER_ORCHAGENT_CAVM)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_CAVM)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_CAVM)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_CAVM)_FILES += $(ARP_UPDATE_SCRIPT) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_ORCHAGENT_CAVM)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/centec/docker-orchagent-centec.mk b/platform/centec/docker-orchagent-centec.mk index 253e6be06e9e..e1d7fd6cf0d6 100644 --- a/platform/centec/docker-orchagent-centec.mk +++ b/platform/centec/docker-orchagent-centec.mk @@ -16,4 +16,4 @@ $(DOCKER_ORCHAGENT_CENTEC)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_CENTEC)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_CENTEC)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_CENTEC)_FILES += $(ARP_UPDATE_SCRIPT) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_ORCHAGENT_CENTEC)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/marvell/docker-orchagent-mrvl.mk b/platform/marvell/docker-orchagent-mrvl.mk index 6b24c5905c0b..f2cb0c997d75 100644 --- a/platform/marvell/docker-orchagent-mrvl.mk +++ b/platform/marvell/docker-orchagent-mrvl.mk @@ -15,4 +15,4 @@ $(DOCKER_ORCHAGENT_MRVL)_RUN_OPT += -v /host/machine.conf:/host/machine.conf $(DOCKER_ORCHAGENT_MRVL)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_MRVL)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_MRVL)_FILES += $(ARP_UPDATE_SCRIPT) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_ORCHAGENT_MRVL)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/mellanox/docker-orchagent-mlnx.mk b/platform/mellanox/docker-orchagent-mlnx.mk index 78bf8d44cf4b..6dda3c446b06 100644 --- a/platform/mellanox/docker-orchagent-mlnx.mk +++ b/platform/mellanox/docker-orchagent-mlnx.mk @@ -16,4 +16,4 @@ $(DOCKER_ORCHAGENT_MLNX)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_MLNX)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_MLNX)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_MLNX)_FILES += $(ARP_UPDATE_SCRIPT) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_ORCHAGENT_MLNX)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/platform/nephos/docker-orchagent-nephos.mk b/platform/nephos/docker-orchagent-nephos.mk index 4b6ef241f536..b21e69d6cf87 100644 --- a/platform/nephos/docker-orchagent-nephos.mk +++ b/platform/nephos/docker-orchagent-nephos.mk @@ -16,4 +16,4 @@ $(DOCKER_ORCHAGENT_NEPHOS)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro $(DOCKER_ORCHAGENT_NEPHOS)_RUN_OPT += -v /var/log/swss:/var/log/swss:rw $(DOCKER_ORCHAGENT_NEPHOS)_BASE_IMAGE_FILES += swssloglevel:/usr/bin/swssloglevel -$(DOCKER_ORCHAGENT_NEPHOS)_FILES += $(ARP_UPDATE_SCRIPT) $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_ORCHAGENT_NEPHOS)_FILES += $(ARP_UPDATE_SCRIPT) diff --git a/rules/docker-dhcp-relay.mk b/rules/docker-dhcp-relay.mk index 7f960920ec0c..53406ad1e15f 100644 --- a/rules/docker-dhcp-relay.mk +++ b/rules/docker-dhcp-relay.mk @@ -2,7 +2,7 @@ DOCKER_DHCP_RELAY = docker-dhcp-relay.gz $(DOCKER_DHCP_RELAY)_PATH = $(DOCKERS_PATH)/docker-dhcp-relay -$(DOCKER_DHCP_RELAY)_DEPENDS += $(ISC_DHCP_COMMON) $(ISC_DHCP_RELAY) $(ISC_DHCP_CLIENT) $(REDIS_TOOLS) +$(DOCKER_DHCP_RELAY)_DEPENDS += $(ISC_DHCP_COMMON) $(ISC_DHCP_RELAY) $(ISC_DHCP_CLIENT) $(DOCKER_DHCP_RELAY)_LOAD_DOCKERS = $(DOCKER_CONFIG_ENGINE) SONIC_DOCKER_IMAGES += $(DOCKER_DHCP_RELAY) SONIC_INSTALL_DOCKER_IMAGES += $(DOCKER_DHCP_RELAY) diff --git a/rules/scripts.mk b/rules/scripts.mk index d5bcc51fd3a9..fbefdd68d2cd 100644 --- a/rules/scripts.mk +++ b/rules/scripts.mk @@ -5,11 +5,7 @@ $(ARP_UPDATE_SCRIPT)_PATH = files/scripts CONFIGDB_LOAD_SCRIPT = configdb-load.sh $(CONFIGDB_LOAD_SCRIPT)_PATH = files/scripts -SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT = supervisor-proc-exit-listener -$(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)_PATH = files/scripts - SONIC_COPY_FILES += $(CONFIGDB_LOAD_SCRIPT) \ - $(ARP_UPDATE_SCRIPT) \ - $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) + $(ARP_UPDATE_SCRIPT) diff --git a/src/sonic-config-engine/tests/sample_output/wait_for_intf.sh b/src/sonic-config-engine/tests/sample_output/wait_for_intf.sh index 3562082647a0..3cbde972fe18 100644 --- a/src/sonic-config-engine/tests/sample_output/wait_for_intf.sh +++ b/src/sonic-config-engine/tests/sample_output/wait_for_intf.sh @@ -1,41 +1,43 @@ #!/usr/bin/env bash -STATE_DB_IDX="6" - -PORT_TABLE_PREFIX="PORT_TABLE" -VLAN_TABLE_PREFIX="VLAN_TABLE" -LAG_TABLE_PREFIX="LAG_TABLE" - function wait_until_iface_ready { - TABLE_PREFIX=$1 - IFACE=$2 + IFACE=$1 + + echo "Waiting until interface $IFACE is up..." + + # Wait for the interface to come up (i.e., 'ip link show' returns 0) + until ip link show dev $IFACE up > /dev/null 2>&1; do + sleep 1 + done - echo "Waiting until interface $IFACE is ready..." + echo "Interface $IFACE is up" - # Wait for the interface to come up - # (i.e., interface is present in STATE_DB and state is "ok") + echo "Waiting until interface $IFACE has an IPv4 address..." + + # Wait until the interface gets assigned an IPv4 address while true; do - RESULT=$(redis-cli -n ${STATE_DB_IDX} HGET "${TABLE_PREFIX}|${IFACE}" "state" 2> /dev/null) - if [ x"$RESULT" == x"ok" ]; then + IP=$(ip -4 addr show dev $IFACE | grep "inet " | awk '{ print $2 }' | cut -d '/' -f1) + + if [ -n "$IP" ]; then break fi sleep 1 done - echo "Interface ${IFACE} is ready!" + echo "Interface $IFACE is configured with IP $IP" } -# Wait for all interfaces to be up and ready -wait_until_iface_ready ${VLAN_TABLE_PREFIX} Vlan1000 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel04 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel02 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel03 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel03 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel01 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel02 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel04 -wait_until_iface_ready ${LAG_TABLE_PREFIX} PortChannel01 +# Wait for all interfaces to come up and have IPv4 addresses assigned +wait_until_iface_ready Vlan1000 +wait_until_iface_ready PortChannel04 +wait_until_iface_ready PortChannel02 +wait_until_iface_ready PortChannel03 +wait_until_iface_ready PortChannel03 +wait_until_iface_ready PortChannel01 +wait_until_iface_ready PortChannel02 +wait_until_iface_ready PortChannel04 +wait_until_iface_ready PortChannel01