libvm.sh: strengthen reboot

Unsurprisingly, rebooting machines that are running in OpenStack is
not as reliable or as fast as a local VM, which is what vmcheck was
originally written for and tested against.

Replace the:

  sleep 2 # give time for port to go down

which is rife with raciness, with a stronger boot_id-based check to
ensure we're in a new boot. Run "sync" before rebooting which sometimes
helps (though I didn't fully investigate why or whether it always helps,
there's probably something more subtle going on underneath). Increase
the timeout to 120s.

Closes: #543
Approved by: cgwalters
This commit is contained in:
Jonathan Lebon 2016-12-07 15:14:54 -05:00 committed by Atomic Bot
parent 9c11b057f7
commit 5bf7a040a5
2 changed files with 16 additions and 9 deletions

View File

@ -60,7 +60,7 @@ inherit: true
context: vmcheck
required: false
required: true
cluster:
hosts:

View File

@ -75,25 +75,32 @@ EOF
# wait until ssh is available on the vm
# - $1 timeout in second (optional)
# - $2 previous bootid (optional)
vm_ssh_wait() {
timeout=${1:-0}
timeout=${1:-0}; shift
old_bootid=${1:-}; shift
while [ $timeout -gt 0 ]; do
if vm_cmd true &> /dev/null; then
return 0
if bootid=$(vm_get_boot_id 2>/dev/null); then
if [[ $bootid != $old_bootid ]]; then
return 0
fi
fi
timeout=$((timeout - 1))
sleep 1
done
# final check at the timeout mark
set -x
vm_cmd true
false "Timed out while waiting for SSH."
}
vm_get_boot_id() {
vm_cmd cat /proc/sys/kernel/random/boot_id 2>/dev/null
}
# reboot the vm
vm_reboot() {
vm_cmd sync
bootid=$(vm_get_boot_id)
vm_cmd systemctl reboot || :
sleep 2 # give time for port to go down
vm_ssh_wait 60
vm_ssh_wait 120 $bootid
}
# check that the given files exist on the VM