Skip to content

Commit cfe01f1

Browse files
authored
Separate syncd service from swss service (#2051)
* [swss.sh] refactor ssh service script code - Move checks and waits to helper functions. - Remove early returns from code stream Signed-off-by: Ying Xie <[email protected]> * [swss.sh] Add debug log for service state changes Signed-off-by: Ying Xie <[email protected]> * [syncd] Separate out syncd service from swss service Still make them start/stop/restart synchronously so existing scripts continue working. Signed-off-by: Ying Xie <[email protected]> * Remove extra 'After' in swss service and remove syncd docker warm boot code Syncd warm boot needs more thinking, we can put it back once the work flow has been defined and ready for coding/testing. * [syncd] syncd start/stop/restart shouldn't affect swss state Semi-detach syncd service state change from swss: - swss state change still chase syncd service to follow except warm boot - syncd state change will only affect itself. Signed-off-by: Ying Xie <[email protected]> * add missing '{'
1 parent 715806c commit cfe01f1

File tree

5 files changed

+242
-57
lines changed

5 files changed

+242
-57
lines changed

files/build_templates/sonic_debian_extension.j2

+2-1
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,9 @@ sudo LANG=C chroot $FILESYSTEM_ROOT fuser -km /sys || true
293293
sudo LANG=C chroot $FILESYSTEM_ROOT umount -lf /sys
294294
{% endif %}
295295

296-
# Copy swss service script
296+
# Copy swss and syncd service script
297297
sudo LANG=C cp $SCRIPTS_DIR/swss.sh $FILESYSTEM_ROOT/usr/local/bin/swss.sh
298+
sudo LANG=C cp $SCRIPTS_DIR/syncd.sh $FILESYSTEM_ROOT/usr/local/bin/syncd.sh
298299

299300
# Copy systemd timer configuration
300301
# It implements delayed start of services

files/build_templates/swss.service.j2

-5
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,6 @@ Requires=nps-modules-4.9.0-7-amd64.service
88
{% endif %}
99
After=database.service updategraph.service
1010
After=interfaces-config.service
11-
{% if sonic_asic_platform == 'broadcom' %}
12-
After=opennsl-modules-4.9.0-7-amd64.service
13-
{% elif sonic_asic_platform == 'nephos' %}
14-
After=nps-modules-4.9.0-7-amd64.service
15-
{% endif %}
1611

1712
[Service]
1813
User=root
+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
[Unit]
2+
Description=syncd service
3+
Requires=database.service updategraph.service
4+
{% if sonic_asic_platform == 'broadcom' %}
5+
Requires=opennsl-modules-4.9.0-7-amd64.service
6+
{% elif sonic_asic_platform == 'nephos' %}
7+
Requires=nps-modules-4.9.0-7-amd64.service
8+
{% endif %}
9+
After=database.service updategraph.service
10+
After=interfaces-config.service
11+
{% if sonic_asic_platform == 'broadcom' %}
12+
After=opennsl-modules-4.9.0-7-amd64.service
13+
{% elif sonic_asic_platform == 'nephos' %}
14+
After=nps-modules-4.9.0-7-amd64.service
15+
{% endif %}
16+
17+
[Service]
18+
User=root
19+
Environment=sonic_asic_platform={{ sonic_asic_platform }}
20+
ExecStart=/usr/local/bin/syncd.sh start
21+
ExecStop=/usr/local/bin/syncd.sh stop
22+
23+
[Install]
24+
WantedBy=multi-user.target

files/scripts/swss.sh

+93-51
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,56 @@
11
#!/bin/bash
22

3-
start() {
3+
SERVICE="swss"
4+
PEER="syncd"
5+
DEBUGLOG="/tmp/swss-syncd-debug.log"
6+
LOCKFILE="/tmp/swss-syncd-lock"
7+
8+
function debug()
9+
{
10+
/bin/echo `date` "- $1" >> ${DEBUGLOG}
11+
}
12+
13+
function lock_service_state_change()
14+
{
15+
debug "Locking ${LOCKFILE} from ${SERVICE} service"
16+
17+
exec {LOCKFD}>${LOCKFILE}
18+
/usr/bin/flock -x ${LOCKFD}
19+
trap "/usr/bin/flock -u ${LOCKFD}" 0 2 3 15
20+
21+
debug "Locked ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
22+
}
23+
24+
function unlock_service_state_change()
25+
{
26+
debug "Unlocking ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
27+
/usr/bin/flock -u ${LOCKFD}
28+
}
29+
30+
function check_warm_boot()
31+
{
32+
SYSTEM_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|system" enable`
33+
SERVICE_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|${SERVICE}" enable`
34+
if [[ x"$SYSTEM_WARM_START" == x"true" ]] || [[ x"$SERVICE_WARM_START" == x"true" ]]; then
35+
WARM_BOOT="true"
36+
else
37+
WARM_BOOT="false"
38+
fi
39+
}
40+
41+
function validate_restart_count()
42+
{
43+
if [[ x"$WARM_BOOT" == x"true" ]]; then
44+
RESTART_COUNT=`/usr/bin/redis-cli -n 6 hget "WARM_RESTART_TABLE|orchagent" restart_count`
45+
# We have to make sure db data has not been flushed.
46+
if [[ -z "$RESTART_COUNT" ]]; then
47+
WARM_BOOT="false"
48+
fi
49+
fi
50+
}
51+
52+
function wait_for_database_service()
53+
{
454
# Wait for redis server start before database clean
555
until [[ $(/usr/bin/docker exec database redis-cli ping | grep -c PONG) -gt 0 ]];
656
do sleep 1;
@@ -10,66 +60,58 @@ start() {
1060
until [[ $(/usr/bin/docker exec database redis-cli -n 4 GET "CONFIG_DB_INITIALIZED") ]];
1161
do sleep 1;
1262
done
63+
}
1364

14-
SYSTEM_WARM_START=`/usr/bin/docker exec database redis-cli -n 4 HGET "WARM_RESTART|system" enable`
15-
SWSS_WARM_START=`/usr/bin/docker exec database redis-cli -n 4 HGET "WARM_RESTART|swss" enable`
16-
# if warm start enabled, just do swss docker start.
17-
# Don't flush DB or try to start other modules.
18-
if [[ "$SYSTEM_WARM_START" == "true" ]] || [[ "$SWSS_WARM_START" == "true" ]]; then
19-
RESTART_COUNT=`redis-cli -n 6 hget "WARM_RESTART_TABLE|orchagent" restart_count`
20-
# We have to make sure db data has not been flushed.
21-
if [[ -n "$RESTART_COUNT" ]]; then
22-
/usr/bin/swss.sh start
23-
/usr/bin/swss.sh attach
24-
return 0
25-
fi
26-
fi
65+
start() {
66+
debug "Starting ${SERVICE} service..."
67+
68+
lock_service_state_change
69+
70+
wait_for_database_service
71+
check_warm_boot
72+
validate_restart_count
2773

28-
# Flush DB
29-
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
30-
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB
31-
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
32-
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
33-
/usr/bin/docker exec database redis-cli -n 6 FLUSHDB
34-
35-
# platform specific tasks
36-
if [ x$sonic_asic_platform == x'mellanox' ]; then
37-
FAST_BOOT=1
38-
/usr/bin/mst start
39-
/usr/bin/mlnx-fw-upgrade.sh
40-
/etc/init.d/sxdkernel start
41-
/sbin/modprobe i2c-dev
42-
/etc/mlnx/mlnx-hw-management start
43-
elif [ x$sonic_asic_platform == x'cavium' ]; then
44-
/etc/init.d/xpnet.sh start
74+
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."
75+
76+
# Don't flush DB during warm boot
77+
if [[ x"$WARM_BOOT" != x"true" ]]; then
78+
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
79+
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
80+
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
81+
/usr/bin/docker exec database redis-cli -n 6 FLUSHDB
4582
fi
4683

47-
# start swss and syncd docker
48-
/usr/bin/swss.sh start
49-
/usr/bin/syncd.sh start
50-
/usr/bin/swss.sh attach
84+
# start service docker
85+
/usr/bin/${SERVICE}.sh start
86+
debug "Started ${SERVICE} service..."
87+
88+
# Unlock has to happen before reaching out to peer service
89+
unlock_service_state_change
90+
91+
if [[ x"$WARM_BOOT" != x"true" ]]; then
92+
/bin/systemctl start ${PEER}
93+
fi
94+
/usr/bin/${SERVICE}.sh attach
5195
}
5296

5397
stop() {
54-
/usr/bin/swss.sh stop
98+
debug "Stopping ${SERVICE} service..."
5599

56-
SYSTEM_WARM_START=`redis-cli -n 4 hget "WARM_RESTART|system" enable`
57-
SWSS_WARM_START=`redis-cli -n 4 hget "WARM_RESTART|swss" enable`
58-
# if warm start enabled, just stop swss docker, then return
59-
if [[ "$SYSTEM_WARM_START" == "true" ]] || [[ "$SWSS_WARM_START" == "true" ]]; then
60-
return 0
61-
fi
100+
[[ -f ${LOCKFILE} ]] || /usr/bin/touch ${LOCKFILE}
101+
102+
lock_service_state_change
103+
check_warm_boot
104+
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."
105+
106+
/usr/bin/${SERVICE}.sh stop
107+
debug "Stopped ${SERVICE} service..."
62108

63-
/usr/bin/syncd.sh stop
109+
# Unlock has to happen before reaching out to peer service
110+
unlock_service_state_change
64111

65-
# platform specific tasks
66-
if [ x$sonic_asic_platform == x'mellanox' ]; then
67-
/etc/mlnx/mlnx-hw-management stop
68-
/etc/init.d/sxdkernel stop
69-
/usr/bin/mst stop
70-
elif [ x$sonic_asic_platform == x'cavium' ]; then
71-
/etc/init.d/xpnet.sh stop
72-
/etc/init.d/xpnet.sh start
112+
# if warm start enabled or peer lock exists, don't stop peer service docker
113+
if [[ x"$WARM_BOOT" != x"true" ]]; then
114+
/bin/systemctl stop ${PEER}
73115
fi
74116
}
75117

files/scripts/syncd.sh

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#!/bin/bash
2+
3+
SERVICE="syncd"
4+
PEER="swss"
5+
DEBUGLOG="/tmp/swss-syncd-debug.log"
6+
LOCKFILE="/tmp/swss-syncd-lock"
7+
8+
function debug()
9+
{
10+
/bin/echo `date` "- $1" >> ${DEBUGLOG}
11+
}
12+
13+
function lock_service_state_change()
14+
{
15+
debug "Locking ${LOCKFILE} from ${SERVICE} service"
16+
17+
exec {LOCKFD}>${LOCKFILE}
18+
/usr/bin/flock -x ${LOCKFD}
19+
trap "/usr/bin/flock -u ${LOCKFD}" 0 2 3 15
20+
21+
debug "Locked ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
22+
}
23+
24+
function unlock_service_state_change()
25+
{
26+
debug "Unlocking ${LOCKFILE} (${LOCKFD}) from ${SERVICE} service"
27+
/usr/bin/flock -u ${LOCKFD}
28+
}
29+
30+
function check_warm_boot()
31+
{
32+
SYSTEM_WARM_START=`/usr/bin/redis-cli -n 4 hget "WARM_RESTART|system" enable`
33+
# SYSTEM_WARM_START could be empty, always make WARM_BOOT meaningful.
34+
if [[ x"$SYSTEM_WARM_START" == x"true" ]]; then
35+
WARM_BOOT="true"
36+
else
37+
WARM_BOOT="false"
38+
fi
39+
}
40+
41+
function wait_for_database_service()
42+
{
43+
# Wait for redis server start before database clean
44+
until [[ $(/usr/bin/docker exec database redis-cli ping | grep -c PONG) -gt 0 ]];
45+
do sleep 1;
46+
done
47+
48+
# Wait for configDB initialization
49+
until [[ $(/usr/bin/docker exec database redis-cli -n 4 GET "CONFIG_DB_INITIALIZED") ]];
50+
do sleep 1;
51+
done
52+
}
53+
54+
start() {
55+
debug "Starting ${SERVICE} service..."
56+
57+
lock_service_state_change
58+
59+
wait_for_database_service
60+
check_warm_boot
61+
62+
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."
63+
64+
# Don't flush DB during warm boot
65+
if [[ x"$WARM_BOOT" != x"true" ]]; then
66+
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB
67+
68+
# platform specific tasks
69+
if [ x$sonic_asic_platform == x'mellanox' ]; then
70+
FAST_BOOT=1
71+
/usr/bin/mst start
72+
/usr/bin/mlnx-fw-upgrade.sh
73+
/etc/init.d/sxdkernel start
74+
/sbin/modprobe i2c-dev
75+
/etc/mlnx/mlnx-hw-management start
76+
elif [ x$sonic_asic_platform == x'cavium' ]; then
77+
/etc/init.d/xpnet.sh start
78+
fi
79+
fi
80+
81+
# start service docker
82+
/usr/bin/${SERVICE}.sh start
83+
debug "Started ${SERVICE} service..."
84+
85+
unlock_service_state_change
86+
/usr/bin/${SERVICE}.sh attach
87+
}
88+
89+
stop() {
90+
debug "Stopping ${SERVICE} service..."
91+
92+
lock_service_state_change
93+
check_warm_boot
94+
debug "Warm boot flag: ${SERVICE} ${WARM_BOOT}."
95+
96+
/usr/bin/${SERVICE}.sh stop
97+
debug "Stopped ${SERVICE} service..."
98+
99+
# if warm start enabled, don't stop peer service docker
100+
if [[ x"$WARM_BOOT" != x"true" ]]; then
101+
# platform specific tasks
102+
if [ x$sonic_asic_platform == x'mellanox' ]; then
103+
/etc/mlnx/mlnx-hw-management stop
104+
/etc/init.d/sxdkernel stop
105+
/usr/bin/mst stop
106+
elif [ x$sonic_asic_platform == x'cavium' ]; then
107+
/etc/init.d/xpnet.sh stop
108+
/etc/init.d/xpnet.sh start
109+
fi
110+
fi
111+
112+
unlock_service_state_change
113+
}
114+
115+
case "$1" in
116+
start|stop)
117+
$1
118+
;;
119+
*)
120+
echo "Usage: $0 {start|stop}"
121+
exit 1
122+
;;
123+
esac

0 commit comments

Comments
 (0)