Skip to content

Commit

Permalink
[warm boot] introduce command line options to warm/fast reboot scripts (
Browse files Browse the repository at this point in the history
sonic-net#399)

* [warm reboot] introducing command line options parsing

- Add -h -? to show help information
- Add -f as force option
- Add -v as verbose option
- Introduce helper function debug for verbose output

Signed-off-by: Ying Xie <[email protected]>

* [warm reboot] allow ignoring orchagent pausing failures with -f option

Signed-off-by: Ying Xie <[email protected]>

* [warm reboot] add verbose output statements

Signed-off-by: Ying Xie <[email protected]>

* [warm reboot] send debug logs to syslog

Signed-off-by: Ying Xie <[email protected]>
  • Loading branch information
yxieca authored Dec 5, 2018
1 parent e438c66 commit 17519cf
Showing 1 changed file with 61 additions and 1 deletion.
62 changes: 61 additions & 1 deletion scripts/fast-reboot
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ WARM_DIR=/host/warmboot
REDIS_FILE=dump.rdb
REBOOT_SCRIPT_NAME=$(basename $0)
REBOOT_TYPE="${REBOOT_SCRIPT_NAME}"
VERBOSE=no
FORCE=no

# Check root privileges
if [[ "$EUID" -ne 0 ]]
Expand All @@ -15,10 +17,49 @@ then
exit 1
fi

function debug()
{
if [[ x"${VERBOSE}" == x"yes" ]]; then
echo `date` $@
fi
logger "$@"
}

function showHelpAndExit()
{
echo "Usage: ${REBOOT_SCRIPT_NAME} [options]"
echo " -h -? : get this help"
echo " -v : turn on verbose"
echo " -f : force execution"

exit 0
}

function parseOptions()
{
while getopts "vfh?" opt; do
case ${opt} in
h )
showHelpAndExit
;;
\? )
showHelpAndExit
;;
v )
VERBOSE=yes
;;
f )
FORCE=yes
;;
esac
done
}

sonic_asic_type=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type)

function clear_warm_boot()
{
debug "Failure ($?) cleanup ..."
config warm_restart disable || /bin/true
/sbin/kexec -u || /bin/true

Expand All @@ -43,6 +84,7 @@ function cleanup_except_table()

function initialize_pre_shutdown()
{
debug "Initialize pre-shutdown ..."
TABLE="WARM_RESTART_TABLE|warm-shutdown"
RESTORE_COUNT=`/usr/bin/redis-cli -n 6 hget "${TABLE}" restore_count`
if [[ -z "$RESTORE_COUNT" ]]; then
Expand All @@ -53,11 +95,13 @@ function initialize_pre_shutdown()

function request_pre_shutdown()
{
debug "Requesting pre-shutdown ..."
/usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre
}

function wait_for_pre_shutdown_complete_or_fail()
{
debug "Waiting for pre-shutdown ..."
TABLE="WARM_RESTART_TABLE|warm-shutdown"
STATE="requesting"
declare -i waitcount;
Expand All @@ -73,13 +117,15 @@ function wait_for_pre_shutdown_complete_or_fail()
done

if [[ x"${STATE}" != x"pre-shutdown-succeeded" ]]; then
echo "Syncd pre-shutdown failed: ${STATE} ..."
debug "Syncd pre-shutdown failed: ${STATE} ..."
exit 10
fi
debug "Pre-shutdown succeeded ..."
}

function backup_datebase()
{
debug "Backing up database ..."
# Dump redis content to a file 'dump.rdb' in warmboot directory
mkdir -p $WARM_DIR
# Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA*
Expand All @@ -95,6 +141,8 @@ function backup_datebase()
docker exec -i database rm /var/lib/redis/$REDIS_FILE
}

parseOptions $@

# Check reboot type supported
BOOT_TYPE_ARG="cold"
case "$REBOOT_TYPE" in
Expand Down Expand Up @@ -205,20 +253,27 @@ if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then
# Freeze orchagent for warm restart
# Try freeze 5 times, it is possible that the orchagent is in transient state and no opportunity to be freezed
# Note: assume that 1 second is enough for orchagent to process the request and respone freeze or not
debug "Pausing orchagent ..."
for i in `seq 4 -1 0`; do
docker exec -i swss /usr/bin/orchagent_restart_check -w 1000 && break
echo "RESTARTCHECK failed $i" >&2
if [[ "$i" = "0" ]]; then
echo "RESTARTCHECK failed finally" >&2
if [[ x"${FORCE}" == x"yes" ]]; then
debug "Ignoring orchagent pausing failure ..."
break;
fi
exit 10
fi
sleep 1
done
fi

# Kill bgpd to start the bgp graceful restart procedure
debug "Stopping bgp ..."
docker exec -i bgp pkill -9 zebra
docker exec -i bgp pkill -9 bgpd
debug "Stopped bgp ..."

# Kill lldp, otherwise it sends informotion about reboot
docker kill lldp > /dev/null
Expand Down Expand Up @@ -267,19 +322,23 @@ fi

# Stop teamd gracefully
if [[ "$REBOOT_TYPE" = "warm-reboot" || "$REBOOT_TYPE" = "fastfast-reboot" ]]; then
debug "Stopping teamd ..."
# Send USR1 signal to all teamd instances to stop them
# It will prepare teamd for warm-reboot
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
docker exec -i teamd pkill -USR1 teamd > /dev/null
debug "Stopped teamd ..."
fi

debug "Stopping syncd ..."
# syncd service stop is capable of handling both warm/fast/cold shutdown
if [[ "$sonic_asic_type" = "mellanox" ]]; then
docker kill syncd
else
# syncd service stop is capable of handling both warm/fast/cold shutdown
systemctl stop syncd
fi
debug "Stopped syncd ..."

# Kill other containers to make the reboot faster
docker ps -q | xargs docker kill > /dev/null
Expand Down Expand Up @@ -312,6 +371,7 @@ sync
sleep 1
sync

debug "Rebooting ..."
# Reboot: explicity call Linux native reboot under sbin
echo "Rebooting to $NEXT_SONIC_IMAGE..."
exec /sbin/reboot
Expand Down

0 comments on commit 17519cf

Please sign in to comment.