From 212de23fab8195102ee14d82c74d2209cdc6c00a Mon Sep 17 00:00:00 2001 From: Vlastimil Holer Date: Wed, 22 Nov 2017 15:54:28 +0100 Subject: [PATCH] B #1481: Better estimate image sizes (#1502) --- src/datastore_mad/remotes/common/stat | 12 +- src/datastore_mad/remotes/downloader.sh | 26 ++- src/datastore_mad/remotes/libfs.sh | 238 ++++++++++++++++++++---- src/tm_mad/fs_lvm/clone | 2 +- 4 files changed, 229 insertions(+), 49 deletions(-) diff --git a/src/datastore_mad/remotes/common/stat b/src/datastore_mad/remotes/common/stat index 07adcaca51..af41fb3ed7 100755 --- a/src/datastore_mad/remotes/common/stat +++ b/src/datastore_mad/remotes/common/stat @@ -45,15 +45,21 @@ unset i XPATH_ELEMENTS while IFS= read -r -d '' element; do XPATH_ELEMENTS[i++]="$element" -done < <($XPATH /DS_DRIVER_ACTION_DATA/IMAGE/PATH) +done < <($XPATH /DS_DRIVER_ACTION_DATA/IMAGE/PATH \ + /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/NO_DECOMPRESS \ + /DS_DRIVER_ACTION_DATA/DATASTORE/TEMPLATE/LIMIT_TRANSFER_BW) -SRC="${XPATH_ELEMENTS[0]}" +unset i + +SRC="${XPATH_ELEMENTS[i++]}" +NO_DECOMPRESS="${XPATH_ELEMENTS[i++]}" +LIMIT_TRANSFER_BW="${XPATH_ELEMENTS[i++]}" # ------------------------------------------------------------------------------ # Compute the size # ------------------------------------------------------------------------------ -SIZE=`fs_size $SRC` +SIZE=`fs_size "${SRC}" "${NO_DECOMPRESS}" "${LIMIT_TRANSFER_BW}"` if [ "$SIZE" = "0" ]; then log_error "Cannot determine size for $SRC" diff --git a/src/datastore_mad/remotes/downloader.sh b/src/datastore_mad/remotes/downloader.sh index 748da6bb33..1f85dda909 100755 --- a/src/datastore_mad/remotes/downloader.sh +++ b/src/datastore_mad/remotes/downloader.sh @@ -57,7 +57,10 @@ function get_decompressor echo "gunzip -c -" ;; "application/x-bzip2") - echo "bunzip2 -c -" + echo "bunzip2 -qc -" + ;; + "application/x-xz") + echo "unxz -c -" ;; *) echo "cat" @@ -221,7 +224,7 @@ function get_rbd_cmd echo "ssh '$(esc_sq "$DST_HOST")' \"$RBD export '$(esc_sq "$SOURCE")' -\"" } -TEMP=`getopt -o m:s:l:n -l md5:,sha1:,limit:,nodecomp -- "$@"` +TEMP=`getopt -o m:s:l:c:n -l md5:,sha1:,limit:,max-size:,nodecomp -- "$@"` if [ $? != 0 ] ; then echo "Arguments error" @@ -250,6 +253,10 @@ while true; do export LIMIT_RATE="$2" shift 2 ;; + -c|--max-size) + export MAX_SIZE="$2" + shift 2 + ;; --) shift break @@ -324,7 +331,19 @@ esac file_type=$(get_type "$command") decompressor=$(get_decompressor "$file_type") -eval "$command" | tee >( hasher $HASH_TYPE) | decompress "$decompressor" "$TO" +if [ -z "${MAX_SIZE}" ]; then + eval "$command" | \ + tee >( hasher $HASH_TYPE) | \ + decompress "$decompressor" "$TO" +else + # ignore broken pipe + trap '' PIPE + + eval "$command" | \ + tee >( hasher $HASH_TYPE) 2>/dev/null | \ + decompress "$decompressor" "$TO" 2>/dev/null | \ + head -c "${MAX_SIZE}" +fi if [ "$?" != "0" -o "$PIPESTATUS" != "0" ]; then echo "Error copying" >&2 @@ -344,4 +363,3 @@ fi if [ "$TO" != "-" ]; then unarchive "$TO" fi - diff --git a/src/datastore_mad/remotes/libfs.sh b/src/datastore_mad/remotes/libfs.sh index b477945584..16ada366ee 100644 --- a/src/datastore_mad/remotes/libfs.sh +++ b/src/datastore_mad/remotes/libfs.sh @@ -60,6 +60,14 @@ function image_format { echo "$($QEMU_IMG info $1 2>/dev/null | grep -Po '(?<=file format: )\w+')" } +#------------------------------------------------------------------------------- +# Get image virtual size using qemu-img +# @return string representation of the format, empty if error +#------------------------------------------------------------------------------- +function image_vsize { + echo "$($QEMU_IMG info "${1}" 2>/dev/null | sed -n 's/.*(\([0-9]*\) bytes).*/\1/p')" +} + #------------------------------------------------------------------------------- # Generates an unique image hash. Requires ID to be set # @return hash for the image (empty if error) @@ -118,7 +126,7 @@ function set_downloader_args { HASHES="$HASHES --sha1 $2" fi - if [ "$3" = "yes" -o "$3" = "Yes" -o "$3" = "YES" ]; then + if [ "x$(echo "$3" | tr A-Z a-z)" = "xyes" ]; then HASHES="$HASHES --nodecomp" fi @@ -149,6 +157,17 @@ function file_size { $STAT_CMD "$*" } + +#------------------------------------------------------------------------------ +# Gets the MIME type of a file +# @param $1 - Path to the image +# @return MIME type +#------------------------------------------------------------------------------ + +function file_type { + file -b --mime-type "${1}" | tr A-Z a-z +} + #------------------------------------------------------------------------------ # Gets the size in bytes of a gzipped file # @param $1 - Path to the image @@ -159,58 +178,195 @@ function gzip_file_size { gzip -l "$1" | tail -n 1 | awk '{print $2}' } +#------------------------------------------------------------------------------ +# Gets the size in bytes of a xzipped file +# @param $1 - Path to the image +# @return size of the image in bytes +#------------------------------------------------------------------------------ + +function xz_file_size { + xz -l --robot "$1" | tail -n 1 | awk '{print $5}' +} + +#------------------------------------------------------------------------------ +# Gets the size in bytes of a bzipped file +# @param $1 - Path to the image +# @return size of the image in bytes +#------------------------------------------------------------------------------ + +function bzip_file_size { + bunzip2 -c "${1}" | wc -c +} + #------------------------------------------------------------------------------- # Computes the size of an image # @param $1 - Path to the image +# @param $2 - NO_DECOMPRESS +# @param $3 - BW LIMIT # @return size of the image in Mb #------------------------------------------------------------------------------- function fs_size { + SRC=$1 + NO_DECOMPRESS=$(echo "$2" | tr A-Z a-z) + LIMIT_TRANSFER_BW=$3 - case $1 in - http://*|https://*) - HEADERS=`curl -LIk --max-time 60 "${1}" 2>&1` + DOWNLOADER_ARGS=`set_downloader_args '' '' "${NO_DECOMPRESS}" "${LIMIT_TRANSFER_BW}"` - if echo "$HEADERS" | grep -i -q "OpenNebula-AppMarket-Size"; then - # An AppMarket/Marketplace URL - SIZE=$(echo "$HEADERS" | grep -i "^OpenNebula-AppMarket-Size:" | tail -n1 | cut -d: -f2) + if [ -z "${UTILS_PATH}" ] && [ -n "${DRIVER_PATH}" ]; then + UTILS_PATH="${DRIVER_PATH}/../../datastore" + fi + + if ! [ -d "${UTILS_PATH}" ]; then + log_error "Failed to detect downloader.sh location" + exit 1 + fi + + error=1 + + # limit only on local or remote http(s) + if [ -d "${SRC}" ]; then + SIZE=`du -sb "${SRC}" | cut -f1` + error=$? + elif [ -f "${SRC}" ] || (echo "${SRC}" | grep -qe '^https\?://'); then + IMAGE=$(mktemp) + + # try first download only a part of image + $UTILS_PATH/downloader.sh ${DOWNLOADER_ARGS} -c 65536 "${SRC}" - >"${IMAGE}" 2>/dev/null + error=$? + if [ $error -ne 0 ]; then + # better fail here ... + log_error "Failed to download image head" + echo '0' + return + fi + + TYPE=$(image_format "${IMAGE}") + + # raw images requires special handling, as there is no image header + # with size available and we can't predict image virtual size just + # from a part of the file + if [ "${TYPE}" = 'raw' ]; then + $UTILS_PATH/downloader.sh ${DOWNLOADER_ARGS} --nodecomp -c 65536 "${SRC}" - >"${IMAGE}" 2>/dev/null error=$? - else - # Not an AppMarket/Marketplace URL - SIZE=$(echo "$HEADERS" | grep -i "^Content-Length:" | tail -n1 | cut -d: -f2 | tr -d "\r") - error=$? - - # Try to download the image head and inspect via qemu-img - IMAGE=$(mktemp) - curl -Lk --max-time 60 "${1}" 2>/dev/null | head -c 65536 >${IMAGE} - QSIZE=$($QEMU_IMG info "${IMAGE}" | sed -n 's/.*(\([0-9]*\) bytes).*/\1/p') - rm "${IMAGE}" 2>/dev/null - - if [ "${QSIZE}" -gt "${SIZE}" ]; then - SIZE="${QSIZE}" + if [ $error -ne 0 ]; then + # better fail here ... + log_error "Failed to download image head" + echo '0' + return + fi + + ORIG_TYPE=$(file_type "${IMAGE}") + + # if NO_DECOMPRESS=yes is configured on the datastore, + # treat the downloaded data as image as is + if [ "${NO_DECOMPRESS}" = 'yes' ]; then + ORIG_TYPE='application/octet-stream' + fi + + if [ -f "${SRC}" ] ; then + # for local raw images: + # - compressed: use decompressor on local file + # - uncompressed: get file size + case ${ORIG_TYPE} in + "application/x-gzip"|"application/gzip") + SIZE=$(gzip_file_size "${SRC}") + error=$? + ;; + "application/x-xz") + SIZE=$(xz_file_size "${SRC}") + error=$? + ;; + "application/x-bzip2") + SIZE=$(bzip_file_size "${SRC}") + error=$? + ;; + *) + SIZE=$(image_vsize "${SRC}") + error=$? + ;; + esac + else + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # code which allows downloading is experimental for future use, + # longer downloads may result in image import failure, as + # the datastore stat operation is synchronous with import call + # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + ALLOW_DOWNLOADS='no' + + # for remote raw images + # - compressed: complete download and use decompressor + # - uncompressed: get size from HTTP headers or complete download + case ${ORIG_TYPE} in + "application/x-gzip"|"application/gzip") + if [ "${ALLOW_DOWNLOADS}" = 'yes' ]; then + $UTILS_PATH/downloader.sh ${DOWNLOADER_ARGS} --nodecomp "${SRC}" - >"${IMAGE}" 2>/dev/null + error=$? + if [ $error -eq 0 ]; then + SIZE=$(gzip_file_size "${IMAGE}") + error=$? + fi + else + log_error 'Unsupported remote image format' + error=1 + fi + ;; + "application/x-xz") + if [ "${ALLOW_DOWNLOADS}" = 'yes' ]; then + $UTILS_PATH/downloader.sh ${DOWNLOADER_ARGS} --nodecomp "${SRC}" - >"${IMAGE}" 2>/dev/null + error=$? + if [ $error -eq 0 ]; then + SIZE=$(xz_file_size "${IMAGE}") + error=$? + fi + else + log_error 'Unsupported remote image format' + error=1 + fi + ;; + "application/x-bzip2") + if [ "${ALLOW_DOWNLOADS}" = 'yes' ]; then + $UTILS_PATH/downloader.sh ${DOWNLOADER_ARGS} "${SRC}" - >"${IMAGE}" 2>/dev/null + error=$? + if [ $error -eq 0 ]; then + SIZE=$(image_vsize "${IMAGE}") + error=$? + fi + else + log_error 'Unsupported remote image format' + error=1 + fi + ;; + *) + HEADERS=`curl -LIk --max-time 60 "${SRC}" 2>&1` + error=$? + SIZE=$(echo "$HEADERS" | grep -i "^Content-Length:" | tail -n1 | cut -d: -f2 | tr -d "\r") + + if [ -z "${SIZE}" ]; then + if [ "${ALLOW_DOWNLOADS}" = 'yes' ]; then + $UTILS_PATH/downloader.sh ${DOWNLOADER_ARGS} "${SRC}" - >"${IMAGE}" 2>/dev/null + error=$? + if [ $error -eq 0 ]; then + SIZE=$(image_vsize "${IMAGE}") + error=$? + fi + else + log_error 'Unsupported remote image format' + error=1 + fi + fi + ;; + esac fi - fi - ;; - *) - if [ -d "$1" ]; then - SIZE=`du -sb "$1" | cut -f1` - error=$? else - TYPE=$(cat "$1" | head -n 1024 | file -b - | tr A-Z a-z) - case "$TYPE" in - *gzip*) - SIZE=$(gzip_file_size "$1") - ;; - *qcow*) - SIZE=$($QEMU_IMG info "$1" | sed -n 's/.*(\([0-9]*\) bytes).*/\1/p') - ;; - *) - SIZE=$(file_size "$1") - ;; - esac - error=$? + SIZE=$(image_vsize "${IMAGE}") fi - ;; - esac + + if [ -f "${IMAGE}" ]; then + unlink "${IMAGE}" 2>/dev/null + fi + fi + + ##### SIZE=$(echo $SIZE | tr -d "\r") diff --git a/src/tm_mad/fs_lvm/clone b/src/tm_mad/fs_lvm/clone index a891b11a4b..a15d667db7 100755 --- a/src/tm_mad/fs_lvm/clone +++ b/src/tm_mad/fs_lvm/clone @@ -76,7 +76,7 @@ SIZE="${XPATH_ELEMENTS[j++]}" ORIGINAL_SIZE="${XPATH_ELEMENTS[j++]}" TM_MAD="${XPATH_ELEMENTS[j++]}" -FILE_SIZE=`fs_size $SRC_PATH` +FILE_SIZE=`fs_size "${SRC_PATH}" YES` if [ $FILE_SIZE -gt $SIZE ]; then SIZE="$FILE_SIZE"