From 2de23bbaffdc64ddbaf90cc0fbfb2682986107c6 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 24 Apr 2012 17:00:29 +0200 Subject: [PATCH 1/3] bug #1211: add retry and force_shutdown to scripts_common --- src/mad/sh/scripts_common.sh | 42 ++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/src/mad/sh/scripts_common.sh b/src/mad/sh/scripts_common.sh index 9889741187..0f2b477e1c 100644 --- a/src/mad/sh/scripts_common.sh +++ b/src/mad/sh/scripts_common.sh @@ -176,6 +176,48 @@ function timeout_exec_and_log fi } +# Parameters are times (seconds) and monitoring command (or function). +# Executes monitoring command until it is successful (VM is no longer +# running) or the timeout is reached. +function retry +{ + times=$1 + function=$2 + + count=1 + + ret=$($function) + error=$? + + while [ $count -lt $times -a "$error" != "0" ]; do + sleep 1 + count=$(( $count + 1 )) + ret=$($function) + error=$? + done + + [ "x$error" = "x0" ] +} + +# Parameters are deploy_id and cancel command. If the last command is +# unsuccessful and $FORCE_DESTROY=yes then calls cancel command +function force_shutdown { + error=$? + deploy_id=$1 + command=$2 + + if [ "x$error" != "x0" ]; then + if [ "$FORCE_DESTROY" = "yes" ]; then + log_error "Timeout shutting down $deploy_id. Destroying it" + $($command) + sleep 2 + else + error_message "Timed out shutting down $deploy_id" + exit -1 + fi + fi +} + # This function will return a command that upon execution will format a # filesystem with its proper parameters based on the filesystem type function mkfs_command { From 19fe111fccd8a4371aebce630b9171d7e24c65c8 Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Tue, 24 Apr 2012 17:23:02 +0200 Subject: [PATCH 2/3] bug #1211: change vmm/xen/shutdown to make use of retry and force_destroy --- src/vmm_mad/remotes/xen/shutdown | 27 +++++++++++++-------------- src/vmm_mad/remotes/xen/xenrc | 6 ++++++ 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/vmm_mad/remotes/xen/shutdown b/src/vmm_mad/remotes/xen/shutdown index d7ed0a16c6..9ec12b8c87 100755 --- a/src/vmm_mad/remotes/xen/shutdown +++ b/src/vmm_mad/remotes/xen/shutdown @@ -21,24 +21,23 @@ source $(dirname $0)/../../scripts_common.sh deploy_id=$1 -function gdm { - $XM_LIST | grep "$deploy_id " +if [ -z "$SHUTDOWN_TIMEOUT" ]; then + TIMEOUT=120 +else + TIMEOUT=$SHUTDOWN_TIMEOUT +fi + +function monitor +{ + $XM_LIST "$deploy_id" > /dev/null + + [ "x$?" != "x0" ] } exec_and_log "$XM_SHUTDOWN $deploy_id" \ "Could not shutdown $deploy_id" -OUT=$(gdm) +retry $TIMEOUT monitor -while [ -n "$OUT" -a "$(echo $OUT | awk '{print $5}')" != "---s--" ]; do - sleep 1 - OUT=$(gdm) -done - -OUT=$(gdm) - -if [ -n "$OUT" ]; then - $XM_CANCEL "$deploy_id" -fi -sleep 2 +force_shutdown "$deploy_id" "$XM_CANCEL \"$deploy_id\"" diff --git a/src/vmm_mad/remotes/xen/xenrc b/src/vmm_mad/remotes/xen/xenrc index 694d9570c1..357a93478b 100644 --- a/src/vmm_mad/remotes/xen/xenrc +++ b/src/vmm_mad/remotes/xen/xenrc @@ -28,4 +28,10 @@ export XM_LIST="sudo $XM_PATH list" export XM_SHUTDOWN="sudo $XM_PATH shutdown" export XM_POLL="sudo /usr/sbin/xentop -bi2" +# Senconds to wait after shutdown until timeout +export SHUTDOWN_TIMEOUT=120 + +# Uncoment this line to force VM cancellation after shutdown timeout +#export FORCE_DESTROY=yes + From c8d8e1d1ccfee5db17fe464aacdb5336dd96b7bc Mon Sep 17 00:00:00 2001 From: Javi Fontan Date: Wed, 25 Apr 2012 16:12:46 +0200 Subject: [PATCH 3/3] bug #1211: change vmm/kvm/shutdown to make use of retry and force_destroy --- src/vmm_mad/remotes/kvm/kvmrc | 7 ++++ src/vmm_mad/remotes/kvm/shutdown | 59 +++++++++++++++----------------- 2 files changed, 35 insertions(+), 31 deletions(-) diff --git a/src/vmm_mad/remotes/kvm/kvmrc b/src/vmm_mad/remotes/kvm/kvmrc index 3c4650a9cd..0cb2f2791f 100644 --- a/src/vmm_mad/remotes/kvm/kvmrc +++ b/src/vmm_mad/remotes/kvm/kvmrc @@ -19,3 +19,10 @@ export LANG=C export LIBVIRT_URI=qemu:///system export QEMU_PROTOCOL=qemu+ssh + +# Senconds to wait after shutdown until timeout +export SHUTDOWN_TIMEOUT=120 + +# Uncoment this line to force VM cancellation after shutdown timeout +#export FORCE_DESTROY=yes + diff --git a/src/vmm_mad/remotes/kvm/shutdown b/src/vmm_mad/remotes/kvm/shutdown index 86f82002c9..936f80cb71 100755 --- a/src/vmm_mad/remotes/kvm/shutdown +++ b/src/vmm_mad/remotes/kvm/shutdown @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash # -------------------------------------------------------------------------- # # Copyright 2002-2012, OpenNebula Project Leads (OpenNebula.org) # @@ -19,45 +19,42 @@ source $(dirname $0)/kvmrc source $(dirname $0)/../../scripts_common.sh -#------------------------------------------------------------------------------ -# Wait the VM to shutdown TIMEOUT (xPOLL_INTERVAL) seconds. -# Set to ~10min -#------------------------------------------------------------------------------ -POLL_INTERVAL=2 -TIMEOUT=300 -HALF_LOOP=$(($TIMEOUT/POLL_INTERVAL)) +count=0 deploy_id=$1 -virsh --connect $LIBVIRT_URI shutdown $deploy_id - -exit_code=$? - -if [ "$exit_code" != "0" ]; then - error_message "Could not shutdown $deploy_id" - exit $exit_code +if [ -z "$SHUTDOWN_TIMEOUT" ]; then + TIMEOUT=120 +else + TIMEOUT=$SHUTDOWN_TIMEOUT fi -count=0 -while [ $(virsh --connect $LIBVIRT_URI --readonly dominfo $deploy_id > /dev/null 2>&1; echo $?) = "0" ] -do - sleep $POLL_INTERVAL - if [ "$count" -gt "$TIMEOUT" ] - then - error_message "Timeout reached and VM $deploy_id is still alive" - echo "Timeout reached" >&2 - exit 1 - fi +HALF_LOOP=$(($TIMEOUT/2)) +function monitor +{ # Issue another shutdown to cover occasional libvirt lack of attention - if [ "$count" -eq "$HALF_LOOP" ] - then - virsh --connect $LIBVIRT_URI shutdown $deploy_id + if [ "$count" -eq "$HALF_LOOP" ] + then + virsh --connect $LIBVIRT_URI shutdown $deploy_id fi - let count=count+$POLL_INTERVAL -done + let count=count+1 + + export count + + virsh --connect $LIBVIRT_URI --readonly dominfo $deploy_id > /dev/null 2>&1 + + [ "x$?" != "x0" ] +} + +exec_and_log "virsh --connect $LIBVIRT_URI shutdown \"$deploy_id\"" \ + "Could not shutdown $deploy_id" + +retry $TIMEOUT monitor + +force_shutdown "$deploy_id" \ + "virsh --connect $LIBVIRT_URI destroy \"$deploy_id\"" sleep 4 -exit 0