From 473fc4f9dab5a46e8c247036486eeec68a3902e8 Mon Sep 17 00:00:00 2001 From: Alejandro Huertas Herrero Date: Tue, 19 Jan 2021 12:28:15 +0100 Subject: [PATCH 1/6] M #-: add provision AR count (#669) --- .../hybrid+/provisions/aws.d/inputs.yml | 5 +++++ .../hybrid+/provisions/aws.d/networks.yml | 2 ++ .../hybrid+/provisions/packet.d/inputs.yml | 5 +++++ .../hybrid+/provisions/packet.d/networks.yml | 2 ++ src/oneprovision/lib/provision/provision.rb | 6 ++++-- .../lib/provision/resources/physical/network.rb | 15 +++++++++++++++ 6 files changed, 33 insertions(+), 2 deletions(-) diff --git a/share/oneprovision/hybrid+/provisions/aws.d/inputs.yml b/share/oneprovision/hybrid+/provisions/aws.d/inputs.yml index 2edf00e8a1..a095876f31 100644 --- a/share/oneprovision/hybrid+/provisions/aws.d/inputs.yml +++ b/share/oneprovision/hybrid+/provisions/aws.d/inputs.yml @@ -21,6 +21,11 @@ inputs: description: 'Number of AWS instances to create' default: '1' + - name: 'number_public_ips' + type: text + description: 'Number of public IPs to get' + default: '1' + - name: 'aws_ami_image' type: text description: "AWS ami image used for host deployments" diff --git a/share/oneprovision/hybrid+/provisions/aws.d/networks.yml b/share/oneprovision/hybrid+/provisions/aws.d/networks.yml index 0252545a4b..016c4c0c13 100644 --- a/share/oneprovision/hybrid+/provisions/aws.d/networks.yml +++ b/share/oneprovision/hybrid+/provisions/aws.d/networks.yml @@ -19,6 +19,8 @@ networks: - name: "${provision}-public" vn_mad: 'elastic' bridge: 'br0' + provision: + count: "${input.number_public_ips}" ar: - provison_id: "${provision_id}" size: '1' diff --git a/share/oneprovision/hybrid+/provisions/packet.d/inputs.yml b/share/oneprovision/hybrid+/provisions/packet.d/inputs.yml index a3b2159dcc..71b42cceb9 100644 --- a/share/oneprovision/hybrid+/provisions/packet.d/inputs.yml +++ b/share/oneprovision/hybrid+/provisions/packet.d/inputs.yml @@ -21,6 +21,11 @@ inputs: description: "Number of metal servers to create" default: '1' + - name: 'number_public_ips' + type: text + description: 'Number of public IPs to get' + default: '1' + - name: 'packet_plan' type: text description: "Packet plan (device type)" diff --git a/share/oneprovision/hybrid+/provisions/packet.d/networks.yml b/share/oneprovision/hybrid+/provisions/packet.d/networks.yml index 63633a744e..246b359643 100644 --- a/share/oneprovision/hybrid+/provisions/packet.d/networks.yml +++ b/share/oneprovision/hybrid+/provisions/packet.d/networks.yml @@ -19,6 +19,8 @@ networks: - name: "${provision}-public" vn_mad: 'elastic' bridge: 'br0' + provision: + count: "${input.number_public_ips}" ar: - provison_id: "${provision_id}" size: '1' diff --git a/src/oneprovision/lib/provision/provision.rb b/src/oneprovision/lib/provision/provision.rb index 0ce9d73353..49bcd64a8d 100644 --- a/src/oneprovision/lib/provision/provision.rb +++ b/src/oneprovision/lib/provision/provision.rb @@ -574,8 +574,10 @@ module OneProvision cfg[r].each do |x| Driver.retry_loop('Failed to create some resources', self) do - x['provision'] = { 'id' => @id } - obj = Resource.object(r, nil, x) + x['provision'] ||= {} + x['provision'].merge!({ 'id' => @id }) + + obj = Resource.object(r, nil, x) next if obj.nil? diff --git a/src/oneprovision/lib/provision/resources/physical/network.rb b/src/oneprovision/lib/provision/resources/physical/network.rb index 23befc00aa..4f376196f0 100644 --- a/src/oneprovision/lib/provision/resources/physical/network.rb +++ b/src/oneprovision/lib/provision/resources/physical/network.rb @@ -32,6 +32,21 @@ module OneProvision @type = 'network' end + # Creates the object in OpenNebula + # + # @param cluster_id [Integer] Cluster ID + # + # @return [Integer] Resource ID + def create(cluster_id) + if @p_template['provision']['count'] && @p_template['ar'] + (Integer(@p_template['provision']['count']) - 1).times do + @p_template['ar'] << @p_template['ar'][0] + end + end + + super + end + # Info an specific object # # @param id [String] Object ID From 14f09cf958de765270273034f3d4fe70db071105 Mon Sep 17 00:00:00 2001 From: Alejandro Huertas Herrero Date: Tue, 19 Jan 2021 15:48:54 +0100 Subject: [PATCH 2/6] F #644: convert roles/disks into hash (#670) --- src/market_mad/remotes/one/monitor | 16 ++++++---------- src/oca/ruby/opennebula/marketplaceapp_ext.rb | 13 +++++++++---- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/market_mad/remotes/one/monitor b/src/market_mad/remotes/one/monitor index 9e5309116c..2f7efca070 100755 --- a/src/market_mad/remotes/one/monitor +++ b/src/market_mad/remotes/one/monitor @@ -21,6 +21,7 @@ require 'uri' require 'json' require 'base64' require 'rexml/document' +require 'securerandom' # OpenNebula MarketPlace class OneMarket @@ -111,14 +112,12 @@ class OneMarket def render_vmtemplate(tmpl, app) print_var(tmpl, 'SIZE', 0) - print_var(tmpl, 'MD5', app['md5']) if app['disks'] - app['disks'].each do |disk| - dname = disk.keys.first - + app['disks'].each do |app_name| tmpl << <<-EOT.strip - DISK = [ NAME = "#{dname}", APP="#{disk[dname]}" ] + DISK = [ NAME = "#{SecureRandom.hex[0..9]}", + APP="#{app_name}" ] EOT end end @@ -137,14 +136,11 @@ class OneMarket def render_service_template(tmpl, app) print_var(tmpl, 'SIZE', 0) - print_var(tmpl, 'MD5', app['md5']) if app['roles'] - app['roles'].each do |role| - rname = role.keys.first - + app['roles'].each do |role_name, app_name| tmpl << <<-EOT.strip - ROLE = [ NAME = "#{rname}", APP="#{role[rname]}" ] + ROLE = [ NAME = "#{role_name}", APP="#{app_name}" ] EOT end end diff --git a/src/oca/ruby/opennebula/marketplaceapp_ext.rb b/src/oca/ruby/opennebula/marketplaceapp_ext.rb index e9503af9db..d7974298da 100644 --- a/src/oca/ruby/opennebula/marketplaceapp_ext.rb +++ b/src/oca/ruby/opennebula/marketplaceapp_ext.rb @@ -73,6 +73,8 @@ module OpenNebula::MarketPlaceAppExt when 'IMAGE' export_image(options) when 'VMTEMPLATE' + options[:notemplate] = true + export_vm_template(options) when 'SERVICE_TEMPLATE' export_service_template(options) @@ -328,10 +330,12 @@ module OpenNebula::MarketPlaceAppExt tmpl['roles'].each do |role| t_id = roles.find {|_, v| v[:names].include?(role['name']) } - next if t_id.nil? || t_id[1].nil? || t_id[1][:vmtemplate] + if t_id.nil? || t_id[1].nil? || t_id[1][:vmtemplate].nil? + next + end role['vm_template'] = nil - role['vm_template'] = t_id[1][:vmtemplate] + role['vm_template'] = t_id[1][:vmtemplate][0] end # -------------------------------------------------------------- @@ -387,8 +391,9 @@ module OpenNebula::MarketPlaceAppExt obj.extend(MarketPlaceAppExt) rc = obj.export( - :dsid => options[:dsid], - :name => "#{options[:name]}-#{idx}" + :dsid => options[:dsid], + :name => "#{options[:name]}-#{idx}", + :notemplate => options[:notemplate] ) image = rc[:image].first if rc[:image] From 7807dc8f1e7b0eadb0640c95f03d7ddfc399fa31 Mon Sep 17 00:00:00 2001 From: Tino Vazquez Date: Tue, 19 Jan 2021 16:07:47 +0100 Subject: [PATCH 3/6] M #-: Avoid error reporting exception in vcenter At the time of loading vcenterrc, if someting goes wrong it tries to log the error using a non defined error_message function --- src/vmm_mad/remotes/vcenter/vcenter_driver.rb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vmm_mad/remotes/vcenter/vcenter_driver.rb b/src/vmm_mad/remotes/vcenter/vcenter_driver.rb index bd148ae97c..b8d03c8ea0 100644 --- a/src/vmm_mad/remotes/vcenter/vcenter_driver.rb +++ b/src/vmm_mad/remotes/vcenter/vcenter_driver.rb @@ -63,8 +63,7 @@ class VCenterConf < Hash vcenterrc_path = "#{VAR_LOCATION}/remotes/etc/vmm/vcenter/vcenterrc" merge!(YAML.load_file(vcenterrc_path)) rescue StandardError => e - STDERR.puts error_message("Couldn't load vcenterrc. \ - Reason #{e.message}.") + STDERR.puts "Couldn't load vcenterrc. Reason #{e.message}." end super From 9b71bb9d66c47d8ac989e988fb428a0cafb18406 Mon Sep 17 00:00:00 2001 From: "Ruben S. Montero" Date: Tue, 19 Jan 2021 16:29:42 +0100 Subject: [PATCH 4/6] M #-: Update provision templates --- share/oneprovision/hybrid+/provisions/aws.d/networks.yml | 1 - share/oneprovision/hybrid+/provisions/aws.yml | 2 +- share/oneprovision/hybrid+/provisions/packet.yml | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/share/oneprovision/hybrid+/provisions/aws.d/networks.yml b/share/oneprovision/hybrid+/provisions/aws.d/networks.yml index 016c4c0c13..ae7fa770ca 100644 --- a/share/oneprovision/hybrid+/provisions/aws.d/networks.yml +++ b/share/oneprovision/hybrid+/provisions/aws.d/networks.yml @@ -24,7 +24,6 @@ networks: ar: - provison_id: "${provision_id}" size: '1' - packet_ip_type: 'public_ipv4' ipam_mad: 'aws' cidr: "${cluster.0.cidr}" diff --git a/share/oneprovision/hybrid+/provisions/aws.yml b/share/oneprovision/hybrid+/provisions/aws.yml index 4cd9c3c1b9..e4e48c5e13 100644 --- a/share/oneprovision/hybrid+/provisions/aws.yml +++ b/share/oneprovision/hybrid+/provisions/aws.yml @@ -73,6 +73,6 @@ cluster: reserved_cpu: '0' reserved_mem: '0' datastores: - - 0 + - 1 provision: cidr: '10.0.0.0/16' diff --git a/share/oneprovision/hybrid+/provisions/packet.yml b/share/oneprovision/hybrid+/provisions/packet.yml index ed3b8a8117..145359d78f 100644 --- a/share/oneprovision/hybrid+/provisions/packet.yml +++ b/share/oneprovision/hybrid+/provisions/packet.yml @@ -70,6 +70,6 @@ cluster: name: "${provision}" description: 'Packet cluster' datastores: - - 0 + - 1 reserved_cpu: '0' reserved_mem: '0' From fe6c2abf41d3bc928acf1f1bd1e7953c41ba90e0 Mon Sep 17 00:00:00 2001 From: Jan Orel Date: Tue, 19 Jan 2021 17:19:50 +0100 Subject: [PATCH 5/6] F #4985: Use qcow2 recover snaps instead blockcopy (#638) --- src/tm_mad/ssh/clone.replica | 59 ++++++++++++++++-------- src/tm_mad/ssh/monitor_ds | 2 +- src/tm_mad/ssh/recovery_snap_create_live | 45 ++++++++++++++---- src/tm_mad/ssh/snap_revert | 31 ++++++++++--- src/tm_mad/ssh/ssh_utils.sh | 30 +++++++++++- src/vmm_mad/remotes/kvm/migrate | 11 +++++ 6 files changed, 142 insertions(+), 36 deletions(-) diff --git a/src/tm_mad/ssh/clone.replica b/src/tm_mad/ssh/clone.replica index 1f00605f0c..111f45ca58 100755 --- a/src/tm_mad/ssh/clone.replica +++ b/src/tm_mad/ssh/clone.replica @@ -110,11 +110,13 @@ REPLICA_STORAGE_IP=$(awk 'gsub(/[\0]/, x)' \ # ------------------------------------------------------------------------------ # Synchronize Image Datastore in the Replica Host. Use a recovery snapshot # if present in the RECOVERY_SNAPS_DIR +# (recovery snap existence means recreate is running, VMIDs is reused) # ------------------------------------------------------------------------------ if recovery_snap_exists $REPLICA_HOST $VMID/$DST_FILE; then + # point to [disk].recovery_snaphost files SRC_DIR=${REPLICA_RECOVERY_SNAPS_DIR}/$VMID - SRC_FILE="${DST_FILE}.recovery_snapshot" + SRC_FILE=$DST_FILE SRC_PATH="$SRC_DIR/$SRC_FILE" RECOVERY="YES" else @@ -132,15 +134,24 @@ log "Cloning $SRC_PATH via replica $REPLICA_HOST in $DST" # copy locally, we hit the replica if [ "$REPLICA_HOST" = "$DST_HOST" ]; then + # if recovery snapshot is needed, prepare base <- base.1 qcow2 structure CLONE_CMD=$(cat < /dev/null -rsync -q $SNAP_PATH \ - $REPLICA_HOST:$REPLICA_RECOVERY_SNAPS_DIR/$VMID/${DISK_NAME}.recovery_snapshot > /dev/null +# reduce the backing-chain using blockcommit +# base <- base.1 <- rs_tmp is reduced to base <- base.1 +# outdated rs_tmp is deleted next cycle +virsh -c ${LIBVIRT_URI} blockcommit one-${VMID} $SNAP_PATH \ + --base $DISK_PATH.snap/base.1 \ + --top $SNAP_PATH \ + --active --pivot --wait stat -c "%Y" $SNAP_PATH diff --git a/src/tm_mad/ssh/snap_revert b/src/tm_mad/ssh/snap_revert index 3a148d292e..867d669b6c 100755 --- a/src/tm_mad/ssh/snap_revert +++ b/src/tm_mad/ssh/snap_revert @@ -25,15 +25,18 @@ DSID=$4 if [ -z "${ONE_LOCATION}" ]; then TMCOMMON=/var/lib/one/remotes/tm/tm_common.sh + SSH_UTILS=/var/lib/one/remotes/tm/ssh/ssh_utils.sh DATASTORES=/var/lib/one/datastores else TMCOMMON=$ONE_LOCATION/var/remotes/tm/tm_common.sh + SSH_UTILS=$ONE_LOCATION/var/remotes/tm/ssh/ssh_utils.sh DATASTORES=$ONE_LOCATION/var/datastores fi DRIVER_PATH=$(dirname $0) . $TMCOMMON +. $SSH_UTILS SRC_PATH=$(arg_path $SRC) SRC_HOST=$(arg_host $SRC) @@ -52,11 +55,12 @@ while IFS= read -r -d '' element; do XPATH_ELEMENTS[i++]="$element" done < <(onevm show -x $VMID| $XPATH \ /VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/SOURCE \ - /VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/CLONE) + /VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/CLONE \ + /VM/TEMPLATE/DISK[DISK_ID=$DISK_ID]/RECOVERY_SNAPSHOT_FREQ) DISK_SRC="${XPATH_ELEMENTS[j++]}" CLONE="${XPATH_ELEMENTS[j++]}" - +RECOVERY_SNAPSHOT_FREQ="${XPATH_ELEMENTS[j++]}" SYSTEM_DS_PATH=$(dirname ${SRC_PATH}) IMAGE_DS_PATH=$(dirname ${DISK_SRC}) @@ -68,12 +72,27 @@ SNAP_PATH="${SNAP_DIR}/${SNAP_ID}" SNAP_PATH_RELATIVE=$(basename ${SNAP_PATH}) CURRENT_PATH=${DISK_PATH} -CMD=$(cat < disk.0.snap/base.1 +# $VM_DIR/disk.0.snap dir +# $VM_DIR/disk.0.snap/disk.0.snap symlink -> . for relative referencing +# $VM_DIR/disk.0.snap/base base image (cp/mv from SRC_PATH) +# $VM_DIR/disk.0.snap/base.1 qcow2 overlay (backing file = base) +# +# ------------------------------------------------------------------------------ +function create_base() { + local SRC_PATH=$1 + local DST_PATH=$2 + local COPY=${3:-cp} + DST_FILE=$(basename $DST_PATH) + + mkdir -p $DST_PATH.snap + cd $DST_PATH.snap + ln -f -s . $DST_FILE.snap + $COPY $SRC_PATH base + qemu-img create -b $DST_FILE.snap/base -f qcow2 base.1 + ln -f -s $DST_FILE.snap/base.1 $DST_PATH + cd - } diff --git a/src/vmm_mad/remotes/kvm/migrate b/src/vmm_mad/remotes/kvm/migrate index 5d2451d045..d5e04737d5 100755 --- a/src/vmm_mad/remotes/kvm/migrate +++ b/src/vmm_mad/remotes/kvm/migrate @@ -42,6 +42,13 @@ get_size_and_format_of_disk_img() { local PARAM="$2" if [ -L "$QEMU_IMG_PATH" ]; then + TARGET=$(readlink "$QEMU_IMG_PATH")A + # symlink to disk.X.snap/base.1 + if [[ "$TARGET" =~ disk.[0-9]*.snap/base.1 ]]; then + echo unknown qcow2-symlink + return + fi + # symlink, assume network disk echo unknown network-disk return @@ -143,6 +150,10 @@ else create_target_disk_img "$DEST_HOST" "$DISK_PATH" "$SIZE" MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}" + elif [ "$FORMAT" = "qcow2-symlink" ]; then + # don't create disk, .snap dir will be copied anyway + MIGRATE_DISKS+="${MIGRATE_DISKS:+,}${DISK_DEV}" + elif [ "$FORMAT" = "network-disk" ]; then true # skip fi From 81273b2b31d42f22f25f9bb23c0dff5fadf4475b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Gonz=C3=A1lez?= Date: Tue, 19 Jan 2021 17:57:29 +0100 Subject: [PATCH 6/6] B #: fix HA race condition (#673) Fix race condition when a node fails to became leader due to another node having greater term. --- src/raft/RaftManager.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/raft/RaftManager.cc b/src/raft/RaftManager.cc index 5a40171c13..300690173f 100644 --- a/src/raft/RaftManager.cc +++ b/src/raft/RaftManager.cc @@ -400,13 +400,13 @@ void RaftManager::leader() heartbeat_manager.replicate(); clock_gettime(CLOCK_REALTIME, &last_heartbeat); + + auto im = nd.get_im(); + im->raft_status(state); } aclm->reload_rules(); - auto im = nd.get_im(); - im->raft_status(state); - if ( nd.is_federation_master() ) { frm->start_replica_threads();