1
0
mirror of https://github.com/OpenNebula/one.git synced 2024-12-24 21:34:01 +03:00
one/include/ImageManager.h

436 lines
15 KiB
C
Raw Normal View History

/* -------------------------------------------------------------------------- */
2023-01-09 14:23:19 +03:00
/* Copyright 2002-2023, OpenNebula Project, OpenNebula Systems */
/* */
/* Licensed under the Apache License, Version 2.0 (the "License"); you may */
/* not use this file except in compliance with the License. You may obtain */
/* a copy of the License at */
/* */
/* http://www.apache.org/licenses/LICENSE-2.0 */
/* */
/* Unless required by applicable law or agreed to in writing, software */
/* distributed under the License is distributed on an "AS IS" BASIS, */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
/* See the License for the specific language governing permissions and */
/* limitations under the License. */
/* -------------------------------------------------------------------------- */
#ifndef IMAGE_MANAGER_H_
#define IMAGE_MANAGER_H_
#include "DriverManager.h"
#include "ProtocolMessages.h"
#include "Listener.h"
class DatastorePool;
class Image;
class ImagePool;
class Snapshots;
class Template;
class ImageManager : public DriverManager<Driver<image_msg_t>>
{
public:
ImageManager(time_t _timer_period,
time_t _monitor_period,
ImagePool * _ipool,
DatastorePool * _dspool,
const std::string& _mads_location,
int _monitor_vm_disk):
DriverManager(_mads_location),
timer_thread(_timer_period, [this](){timer_action();}),
timer_period(_timer_period),
monitor_period(_monitor_period),
monitor_vm_disk(_monitor_vm_disk),
ipool(_ipool),
dspool(_dspool)
{
}
~ImageManager() = default;
/**
* This functions starts the associated listener thread, and creates a
* new thread for the Information Manager. This thread will wait in
* an action loop till it receives ACTION_FINALIZE.
* @return 0 on success.
*/
int start();
void finalize()
{
timer_thread.stop();
stop(drivers_timeout);
};
/**
* Loads the Image Driver defined in configuration file
* @param _mads configuration of drivers
*/
int load_drivers(const std::vector<const VectorAttribute*>& _mads);
/**************************************************************************/
/* Image Manager Actions */
/* Operates in a semi-sinchronous mode. Operations will be granted or not */
/* , when needed the image repository drivers will be used to perform FS */
/* operations in the background. */
/**************************************************************************/
/**
* Try to acquire an image from the repository for a VM.
* @param image_id id of image
* @param error string describing the error
* @param attach true if attaching the image to a VM
* @return pointer to the image or 0 if could not be acquired
*/
std::unique_ptr<Image> acquire_image(int vm_id, int image_id,
bool attach, std::string& error);
/**
* Try to acquire an image from the repository for a VM.
* @param name of the image
* @param id of owner
* @param error string describing the error
* @param attach true if attaching the image to a VM
* @return pointer to the image or 0 if could not be acquired
*/
std::unique_ptr<Image> acquire_image(int vm_id, const std::string& name,
int uid, bool attach, std::string& error);
/**
* Releases an image and triggers any needed operations in the repo
* @param iid image id of the image to be released
* @param failed the associated VM releasing the images is FAILED
*/
void release_image(int vm_id, int iid, bool failed);
/**
* Closes any cloning operation on the image, updating the state if needed
* @param iid image id of the image to that was being cloned
* @param ot Object type, image or market app
* @param clone_oid the cloned resource id
*/
void release_cloning_resource(int iid, PoolObjectSQL::ObjectType ot, int clone_oid);
/**
* Closes any cloning operation on the image, updating the state if needed
* @param iid image id of the image to that was being cloned
* @param clone_img_id the cloned image id
*/
void release_cloning_image(int iid, int clone_img_id)
{
release_cloning_resource(iid, PoolObjectSQL::IMAGE, clone_img_id);
};
/**
* Closes any cloning operation on the image, updating the state if needed
* @param iid image id of the image to that was being cloned
* @param clone_oid the cloned marketplace app id
*/
void release_cloning_app(int iid, int clone_oid)
{
release_cloning_resource(iid, PoolObjectSQL::MARKETPLACEAPP, clone_oid);
};
/**
* Enables the image
* @param iid Image id
* @param to_enable true will enable the image.
* @param error_str Error reason, if any
* @return 0 on success
*/
int enable_image(int iid, bool to_enable, std::string& error_str);
/**
* Adds a new image to the repository copying or creating it as needed
* @param img pointer to the image
* @param ds_data data of the associated datastore in XML format
* @param extra_data data to be sent to the driver
* @param error Error reason
*
* @return 0 on success
*/
int register_image(int iid,
const std::string& ds_data,
const std::string& extra_data,
std::string& error);
/**
* Checks if an image is ready to be cloned
*
* @param cloning_id ID of the image to be cloned
* @param oss_error Error reason, if any
*
* @return 0 if the image can be cloned, -1 otherwise
*/
int can_clone_image(int cloning_id, std::ostringstream& oss_error);
/**
* Sets the state to CLONE for the given image
* @param ot Object type, image or market app
* @param new_id for the target image or market app
* @param clonning_id the ID of the image to be cloned
* @param error if any
* @return 0 on success
*/
int set_clone_state(PoolObjectSQL::ObjectType ot, int new_id,
int cloning_id, std::string& error);
/**
* Sets the state to CLONE for the given image
* @param new_id for the target image
* @param clonning_id the ID of the image to be cloned
* @param error if any
* @return 0 on success
*/
int set_img_clone_state(int new_id, int cloning_id, std::string& error)
{
return set_clone_state(PoolObjectSQL::IMAGE, new_id, cloning_id, error);
};
/**
* Sets the state to CLONE for the given image
* @param new_id for the target market app
* @param clonning_id the ID of the image to be cloned
* @param error if any
* @return 0 on success
*/
int set_app_clone_state(int new_id, int cloning_id, std::string& error)
{
return set_clone_state(PoolObjectSQL::MARKETPLACEAPP, new_id, cloning_id, error);
};
/**
* Clone an existing image to the repository
* @param new_id of the new image
* @param cloning_id of the image to be cloned
* @param ds_data data of the associated datastore in XML format
* @param extra_data data to be sent to the driver
* @param error describing the error
* @return 0 on success
*/
int clone_image(int new_id,
int cloning_id,
const std::string& ds_data,
const std::string& extra_data,
std::string& error);
/**
* Deletes an image from the repository and the DB. The Datastore image list
* is also updated
* @param iid id of image
* @param error_str Error reason, if any
* @return 0 on success
*/
int delete_image(int iid, std::string& error_str);
F #5516: New backup interface for OpenNebula co-authored-by: Frederick Borges <fborges@opennebula.io> co-authored-by: Neal Hansen <nhansen@opennebula.io> co-authored-by: Daniel Clavijo Coca <dclavijo@opennebula.io> co-authored-by: Pavel Czerný <pczerny@opennebula.systems> BACKUP INTERFACE ================= * Backups are exposed through a a special Datastore (BACKUP_DS) and Image (BACKUP) types. These new types can only be used for backup'ing up VMs. This approach allows to: - Implement tier based backup policies (backups made on different locations). - Leverage access control and quota systems - Support differnt storage and backup technologies * Backup interface for the VMs: - VM configures backups with BACKUP_CONFIG. This attribute can be set in the VM template or updated with updateconf API call. It can include: + BACKUP_VOLATILE: To backup or not volatile disks + FS_FREEZE: How the FS is freeze for running VMs (qemu-agent, suspend or none). When possible backups are crash consistent. + KEEP_LAST: keep only a given number of backups. - Backups are initiated by the one.vm.backup API call that requires the target Datastore to perform the backup (one-shot). This is exposed by the onevm backup command. - Backups can be periodic through scheduled actions. - Backup configuration is updated with one.vm.updateconf API call. * Restore interface: - Restores are initiated by the one.image.restore API call. This is exposed by oneimage restore command. - Restore include configurable options for the VM template + NO_IP: to not preserve IP addresses (but keep the NICs and network mapping) + NO_NIC: to not preserve network mappings - Other template attributes: + Clean PCI devices, including network configuration in case of TYPE=NIC attributes. By default it removes SHORT_ADDRESS and leave the "auto" selection attributes. + Clean NUMA_NODE, removes node id and cpu sets. It keeps the NUMA node - It is possible to restore single files stored in the repository by using the backup specific URL. * Sunstone (Ruby version) has been updated to expose this feautres. BACKUP DRIVERS & IMPLEMENTATION =============================== * Backup operation is implemented by a combination of 3 driver operations: - VMM. New (internal oned <-> one_vmm_exec.rb) to orchestrate backups for RUNNING VMs. - TM. This commit introduces 2 new operations (and their corresponding _live variants): + pre_backup(_live): Prepares the disks to be back'ed up in the repository. It is specific to the driver: (i) ceph uses the export operation; (ii) qcow2/raw uses snapshot-create-as and fs_freeze as needed. + post_backup(_live): Performs cleanning operations, i.e. KVM snapshots or tmp dirs. - DATASTORE. Each backup technology is represented by its corresponfing driver, that needs to implement: + backup: it takes the VM disks in file (qcow2) format and stores it the backup repository. + restore: it takes a backup image and restores the associated disks and VM template. + monitor: to gather available space in the repository + rm: to remove existing backups + stat: to return the "restored" size of a disk stored in a backup + downloader pseudo-URL handler: in the form <backup_proto>://<driver_snapshot_id>/<disk filename> BACKUP MANAGEMENT ================= Backup actions may potentially take some time, leaving some vmm_exec threads in use for a long time, stucking other vmm operations. Backups are planned by the scheduler through the sched action interface. Two attributes has been added to sched.conf: * MAX_BACKUPS max active backup operations in the cloud. No more backups will be started beyond this limit. * MAX_BACKUPS_HOST max number of backups per host * Fix onevm CLI to properly show and manage schedule actions. --schedule supports now, as well as relative times +<seconds_from_stime> onvm backup --schedule now -d 100 63 * Backup is added as VM_ADMIN_ACTIONS in oned.conf. Regular users needs to use the batch interface or request specific permissions Internal restructure of Scheduler: - All sched_actions interface is now in SchedActionsXML class and files. This class uses references to VM XML, and MUST be used in the same lifetime scope. - XMLRPC API calls for sched actions has been moved to ScheduledActionXML.cc as static functions. - VirtualMachineActionPool includes counters for active backups (total and per host). SUPPORTED PLATFORMS ==================== * hypervisor: KVM * TM: qcow2/shared/ssh, ceph * backup: restic, rsync Notes on Ceph * Ceph backups are performed in the following steps: 1. A snapshot of each disk is taken (group snapshots cannot be used as it seems we cannot export the disks afterwards) 2. Disks are export to a file 3. File is converted to qcow2 format 4. Disk files are upload to the backup repo TODO: * Confirm crash consistent snapshots cannot be used in Ceph TODO: * Check if using VM dir instead of full path is better to accomodate DS migrations i.e.: - Current path: /var/lib/one/datastores/100/53/backup/disk.0 - Proposal: 53/backup/disk.0 RESTIC DRIVER ============= Developed together with this feature is part of the EE edtion. * It supports the SFTP protocol, the following attributes are supported: - RESTIC_SFTP_SERVER - RESTIC_SFTP_USER: only if different from oneadmin - RESTIC_PASSWORD - RESTIC_IONICE: Run restic under a given ionice priority (class 2) - RESTIC_NICE: Run restic under a given nice - RESTIC_BWLIMIT: Limit restic upload/download BW - RESTIC_COMPRESSION: Restic 0.14 implements compression (three modes: off, auto, max). This requires repositories version 2. By default, auto is used (average compression without to much CPU usage) - RESTIC_CONNECTIONS: Sets the number of concurrent connections to a backend (5 by default). For high-latency backends this number can be increased. * downloader URL: restic://<datastore_id>/<snapshot_id>/<file_name> snapshot_id is the restic snapshot hash. To recover single disk images from a backup. This URLs support: - RESTIC_CONNECTIONS - RESTIC_BWLIMIT - RESTIC_IONICE - RESTIC_NICE These options needs to be defined in the associated datastore. RSYNC DRIVER ============= A rsync driver is included as part of the CE distribution. It uses the rsync tool to store backups in a remote server through SSH: * The following attributes are supported to configure the backup datastore: - RSYNC_HOST - RSYNC_USER - RSYNC_ARGS: Arguments to perform the rsync operatin (-aS by default) * downloader URL: rsync://<ds_id>/<vmid>/<hash>/<file> can be used to recover single files from an existing backup. (RSYNC_HOST and RSYN_USER needs to be set in ds_id EMULATOR_CPUS ============= This commit includes a non related backup feature: * Add EMULATOR_CPUS (KVM). This host (or cluster attribute) defines the CPU IDs where the emulator threads will be pinned. If this value is not defined the allocated CPU wll be used when using a PIN policy. (cherry picked from commit a9e6a8e000e9a5a2f56f80ce622ad9ffc9fa032b) F OpenNebula/one#5516: adding rsync backup driver (cherry picked from commit fb52edf5d009dc02b071063afb97c6519b9e8305) F OpenNebula/one#5516: update install.sh, add vmid to source, some polish Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 6fc6f8a67e435f7f92d5c40fdc3d1c825ab5581d) F OpenNebula/one#5516: cleanup Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 12f4333b833f23098142cd4762eb9e6c505e1340) F OpenNebula/one#5516: update downloader, default args, size check Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 510124ef2780a4e2e8c3d128c9a42945be38a305) LL (cherry picked from commit d4fcd134dc293f2b862086936db4d552792539fa)
2022-09-09 12:46:44 +03:00
/**
* Restores a backup image restoring the associated disk images and VM
* template.
* @param iid id of the backup image
* @param dst_ds_id destination ds where the images will be restored
* @param opts XML encoded options for the restore operation
*
* @param result string with objects ids or error reason
*
* @return 0 on success
*/
int restore_image(int iid, int dst_ds_id, const std::string& opts,
std::string& result);
/**
* Gets the size of an image by calling the STAT action of the associated
* datastore driver.
*
* @param img_tmpl the template for the image
* @param ds_tmpl the template for the datastore
* @oaram result with a string representation of the size or if an error
* occurred describing the error.
* @result 0 on success
*/
int stat_image(Template* img_tmpl,
const std::string& ds_tmpl,
std::string& res);
/**
* Trigger a monitor action for the datastore.
* @param ds_id id of the datastore to monitor
*/
void monitor_datastore(int ds_id);
/**
* Set the snapshots for the given image. The image MUST be persistent
* and of type OS or DATABLOCK.
* @param iid id of image
* @param s snapshot list
*/
void set_image_snapshots(int iid, const Snapshots& s);
/**
* Clear the snapshots of an image by setting an empty set.
* @param iid id of image
*/
void clear_image_snapshots(int iid);
/**
* Set the size for the given image. The image MUST be persistent
* and of type OS or DATABLOCK.
* @param iid id of image
* @param size
*/
void set_image_size(int iid, long long size);
/**
* Deletes the snapshot of an image
* @param iid id of image
* @param sid id of the snapshot
* @param error_str Error reason, if any
* @return 0 on success
*/
int delete_snapshot(int iid, int sid, std::string& error);
/**
* Reverts image state to a previous snapshot
* @param iid id of image
* @param sid id of the snapshot
* @param error_str Error reason, if any
* @return 0 on success
*/
int revert_snapshot(int iid, int sid, std::string& error);
/**
* Flattens the snapshot by commiting changes to base image.
* @param iid id of image
* @param sid id of the snapshot
* @param error_str Error reason, if any
* @return 0 on success
*/
int flatten_snapshot(int iid, int sid, std::string& error);
/**
* Flattens the backup chain by commiting changes to first (full) backup
* @param iid id of image
* @param ds_id id of the datastore
* @param edata XML string with KEEP_LAST and VM_ID
* @param error_str Error reason, if any
* @return 0 on success
*/
int flatten_increments(int iid, int ds_id, const std::string& edata, std::string& error);
private:
/**
* Generic name for the Image driver
*/
static const char * image_driver_name;
/**
* Timer action async execution
*/
Timer timer_thread;
/**
* Timer period for the Image Manager.
*/
time_t timer_period;
/**
* Datastore Monitor Interval
*/
time_t monitor_period;
/**
* Monitor Virtual Machine disk usage every X datastore monitoring.
* 0 to disable
*/
int monitor_vm_disk;
/**
* Pointer to the Image Pool to access VMs
*/
ImagePool * ipool;
/**
2013-07-12 18:51:36 +04:00
* Pointer to the DS Pool
*/
DatastorePool * dspool;
/**
*
*/
static const int drivers_timeout = 10;
/**
* Returns a pointer to the Image Manager Driver used for the Repository
* @return the Image Manager driver or 0 in not found
*/
const Driver<image_msg_t> * get() const
{
return DriverManager::get_driver(image_driver_name);
};
/**
* Acquires an image updating its state.
* @param image pointer to image, it should be locked
* @param attach true if attaching the image to a VM
* @return 0 on success
*/
int acquire_image(int vm_id, Image *img, bool attach, std::string& error);
/**
* Moves a file to an image in the repository
* @param image to be updated (it's source attribute)
* @param source path of the disk file
*/
void move_image(Image *img, const std::string& source);
/**
* Formats an XML message for the MAD
*
* @param img_data Image XML representation
* @param ds_data Datastore XML representation
* @param extra_data additional XML formatted data for the driver
* @return the XML message
*/
static std::string format_message(const std::string& img_data,
const std::string& ds_data,
const std::string& extra_data);
// -------------------------------------------------------------------------
// Protocol implementation, procesing messages from driver
// -------------------------------------------------------------------------
static void _undefined(std::unique_ptr<image_msg_t> msg);
void _stat(std::unique_ptr<image_msg_t> msg);
void _cp(std::unique_ptr<image_msg_t> msg);
void _clone(std::unique_ptr<image_msg_t> msg);
void _mkfs(std::unique_ptr<image_msg_t> msg);
void _rm(std::unique_ptr<image_msg_t> msg);
void _monitor(std::unique_ptr<image_msg_t> msg);
void _snap_delete(std::unique_ptr<image_msg_t> msg);
void _snap_revert(std::unique_ptr<image_msg_t> msg);
void _snap_flatten(std::unique_ptr<image_msg_t> msg);
void _increment_flatten(std::unique_ptr<image_msg_t> msg);
F #5516: New backup interface for OpenNebula co-authored-by: Frederick Borges <fborges@opennebula.io> co-authored-by: Neal Hansen <nhansen@opennebula.io> co-authored-by: Daniel Clavijo Coca <dclavijo@opennebula.io> co-authored-by: Pavel Czerný <pczerny@opennebula.systems> BACKUP INTERFACE ================= * Backups are exposed through a a special Datastore (BACKUP_DS) and Image (BACKUP) types. These new types can only be used for backup'ing up VMs. This approach allows to: - Implement tier based backup policies (backups made on different locations). - Leverage access control and quota systems - Support differnt storage and backup technologies * Backup interface for the VMs: - VM configures backups with BACKUP_CONFIG. This attribute can be set in the VM template or updated with updateconf API call. It can include: + BACKUP_VOLATILE: To backup or not volatile disks + FS_FREEZE: How the FS is freeze for running VMs (qemu-agent, suspend or none). When possible backups are crash consistent. + KEEP_LAST: keep only a given number of backups. - Backups are initiated by the one.vm.backup API call that requires the target Datastore to perform the backup (one-shot). This is exposed by the onevm backup command. - Backups can be periodic through scheduled actions. - Backup configuration is updated with one.vm.updateconf API call. * Restore interface: - Restores are initiated by the one.image.restore API call. This is exposed by oneimage restore command. - Restore include configurable options for the VM template + NO_IP: to not preserve IP addresses (but keep the NICs and network mapping) + NO_NIC: to not preserve network mappings - Other template attributes: + Clean PCI devices, including network configuration in case of TYPE=NIC attributes. By default it removes SHORT_ADDRESS and leave the "auto" selection attributes. + Clean NUMA_NODE, removes node id and cpu sets. It keeps the NUMA node - It is possible to restore single files stored in the repository by using the backup specific URL. * Sunstone (Ruby version) has been updated to expose this feautres. BACKUP DRIVERS & IMPLEMENTATION =============================== * Backup operation is implemented by a combination of 3 driver operations: - VMM. New (internal oned <-> one_vmm_exec.rb) to orchestrate backups for RUNNING VMs. - TM. This commit introduces 2 new operations (and their corresponding _live variants): + pre_backup(_live): Prepares the disks to be back'ed up in the repository. It is specific to the driver: (i) ceph uses the export operation; (ii) qcow2/raw uses snapshot-create-as and fs_freeze as needed. + post_backup(_live): Performs cleanning operations, i.e. KVM snapshots or tmp dirs. - DATASTORE. Each backup technology is represented by its corresponfing driver, that needs to implement: + backup: it takes the VM disks in file (qcow2) format and stores it the backup repository. + restore: it takes a backup image and restores the associated disks and VM template. + monitor: to gather available space in the repository + rm: to remove existing backups + stat: to return the "restored" size of a disk stored in a backup + downloader pseudo-URL handler: in the form <backup_proto>://<driver_snapshot_id>/<disk filename> BACKUP MANAGEMENT ================= Backup actions may potentially take some time, leaving some vmm_exec threads in use for a long time, stucking other vmm operations. Backups are planned by the scheduler through the sched action interface. Two attributes has been added to sched.conf: * MAX_BACKUPS max active backup operations in the cloud. No more backups will be started beyond this limit. * MAX_BACKUPS_HOST max number of backups per host * Fix onevm CLI to properly show and manage schedule actions. --schedule supports now, as well as relative times +<seconds_from_stime> onvm backup --schedule now -d 100 63 * Backup is added as VM_ADMIN_ACTIONS in oned.conf. Regular users needs to use the batch interface or request specific permissions Internal restructure of Scheduler: - All sched_actions interface is now in SchedActionsXML class and files. This class uses references to VM XML, and MUST be used in the same lifetime scope. - XMLRPC API calls for sched actions has been moved to ScheduledActionXML.cc as static functions. - VirtualMachineActionPool includes counters for active backups (total and per host). SUPPORTED PLATFORMS ==================== * hypervisor: KVM * TM: qcow2/shared/ssh, ceph * backup: restic, rsync Notes on Ceph * Ceph backups are performed in the following steps: 1. A snapshot of each disk is taken (group snapshots cannot be used as it seems we cannot export the disks afterwards) 2. Disks are export to a file 3. File is converted to qcow2 format 4. Disk files are upload to the backup repo TODO: * Confirm crash consistent snapshots cannot be used in Ceph TODO: * Check if using VM dir instead of full path is better to accomodate DS migrations i.e.: - Current path: /var/lib/one/datastores/100/53/backup/disk.0 - Proposal: 53/backup/disk.0 RESTIC DRIVER ============= Developed together with this feature is part of the EE edtion. * It supports the SFTP protocol, the following attributes are supported: - RESTIC_SFTP_SERVER - RESTIC_SFTP_USER: only if different from oneadmin - RESTIC_PASSWORD - RESTIC_IONICE: Run restic under a given ionice priority (class 2) - RESTIC_NICE: Run restic under a given nice - RESTIC_BWLIMIT: Limit restic upload/download BW - RESTIC_COMPRESSION: Restic 0.14 implements compression (three modes: off, auto, max). This requires repositories version 2. By default, auto is used (average compression without to much CPU usage) - RESTIC_CONNECTIONS: Sets the number of concurrent connections to a backend (5 by default). For high-latency backends this number can be increased. * downloader URL: restic://<datastore_id>/<snapshot_id>/<file_name> snapshot_id is the restic snapshot hash. To recover single disk images from a backup. This URLs support: - RESTIC_CONNECTIONS - RESTIC_BWLIMIT - RESTIC_IONICE - RESTIC_NICE These options needs to be defined in the associated datastore. RSYNC DRIVER ============= A rsync driver is included as part of the CE distribution. It uses the rsync tool to store backups in a remote server through SSH: * The following attributes are supported to configure the backup datastore: - RSYNC_HOST - RSYNC_USER - RSYNC_ARGS: Arguments to perform the rsync operatin (-aS by default) * downloader URL: rsync://<ds_id>/<vmid>/<hash>/<file> can be used to recover single files from an existing backup. (RSYNC_HOST and RSYN_USER needs to be set in ds_id EMULATOR_CPUS ============= This commit includes a non related backup feature: * Add EMULATOR_CPUS (KVM). This host (or cluster attribute) defines the CPU IDs where the emulator threads will be pinned. If this value is not defined the allocated CPU wll be used when using a PIN policy. (cherry picked from commit a9e6a8e000e9a5a2f56f80ce622ad9ffc9fa032b) F OpenNebula/one#5516: adding rsync backup driver (cherry picked from commit fb52edf5d009dc02b071063afb97c6519b9e8305) F OpenNebula/one#5516: update install.sh, add vmid to source, some polish Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 6fc6f8a67e435f7f92d5c40fdc3d1c825ab5581d) F OpenNebula/one#5516: cleanup Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 12f4333b833f23098142cd4762eb9e6c505e1340) F OpenNebula/one#5516: update downloader, default args, size check Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 510124ef2780a4e2e8c3d128c9a42945be38a305) LL (cherry picked from commit d4fcd134dc293f2b862086936db4d552792539fa)
2022-09-09 12:46:44 +03:00
void _restore(std::unique_ptr<image_msg_t> msg);
static void _log(std::unique_ptr<image_msg_t> msg);
/**
F #5516: New backup interface for OpenNebula co-authored-by: Frederick Borges <fborges@opennebula.io> co-authored-by: Neal Hansen <nhansen@opennebula.io> co-authored-by: Daniel Clavijo Coca <dclavijo@opennebula.io> co-authored-by: Pavel Czerný <pczerny@opennebula.systems> BACKUP INTERFACE ================= * Backups are exposed through a a special Datastore (BACKUP_DS) and Image (BACKUP) types. These new types can only be used for backup'ing up VMs. This approach allows to: - Implement tier based backup policies (backups made on different locations). - Leverage access control and quota systems - Support differnt storage and backup technologies * Backup interface for the VMs: - VM configures backups with BACKUP_CONFIG. This attribute can be set in the VM template or updated with updateconf API call. It can include: + BACKUP_VOLATILE: To backup or not volatile disks + FS_FREEZE: How the FS is freeze for running VMs (qemu-agent, suspend or none). When possible backups are crash consistent. + KEEP_LAST: keep only a given number of backups. - Backups are initiated by the one.vm.backup API call that requires the target Datastore to perform the backup (one-shot). This is exposed by the onevm backup command. - Backups can be periodic through scheduled actions. - Backup configuration is updated with one.vm.updateconf API call. * Restore interface: - Restores are initiated by the one.image.restore API call. This is exposed by oneimage restore command. - Restore include configurable options for the VM template + NO_IP: to not preserve IP addresses (but keep the NICs and network mapping) + NO_NIC: to not preserve network mappings - Other template attributes: + Clean PCI devices, including network configuration in case of TYPE=NIC attributes. By default it removes SHORT_ADDRESS and leave the "auto" selection attributes. + Clean NUMA_NODE, removes node id and cpu sets. It keeps the NUMA node - It is possible to restore single files stored in the repository by using the backup specific URL. * Sunstone (Ruby version) has been updated to expose this feautres. BACKUP DRIVERS & IMPLEMENTATION =============================== * Backup operation is implemented by a combination of 3 driver operations: - VMM. New (internal oned <-> one_vmm_exec.rb) to orchestrate backups for RUNNING VMs. - TM. This commit introduces 2 new operations (and their corresponding _live variants): + pre_backup(_live): Prepares the disks to be back'ed up in the repository. It is specific to the driver: (i) ceph uses the export operation; (ii) qcow2/raw uses snapshot-create-as and fs_freeze as needed. + post_backup(_live): Performs cleanning operations, i.e. KVM snapshots or tmp dirs. - DATASTORE. Each backup technology is represented by its corresponfing driver, that needs to implement: + backup: it takes the VM disks in file (qcow2) format and stores it the backup repository. + restore: it takes a backup image and restores the associated disks and VM template. + monitor: to gather available space in the repository + rm: to remove existing backups + stat: to return the "restored" size of a disk stored in a backup + downloader pseudo-URL handler: in the form <backup_proto>://<driver_snapshot_id>/<disk filename> BACKUP MANAGEMENT ================= Backup actions may potentially take some time, leaving some vmm_exec threads in use for a long time, stucking other vmm operations. Backups are planned by the scheduler through the sched action interface. Two attributes has been added to sched.conf: * MAX_BACKUPS max active backup operations in the cloud. No more backups will be started beyond this limit. * MAX_BACKUPS_HOST max number of backups per host * Fix onevm CLI to properly show and manage schedule actions. --schedule supports now, as well as relative times +<seconds_from_stime> onvm backup --schedule now -d 100 63 * Backup is added as VM_ADMIN_ACTIONS in oned.conf. Regular users needs to use the batch interface or request specific permissions Internal restructure of Scheduler: - All sched_actions interface is now in SchedActionsXML class and files. This class uses references to VM XML, and MUST be used in the same lifetime scope. - XMLRPC API calls for sched actions has been moved to ScheduledActionXML.cc as static functions. - VirtualMachineActionPool includes counters for active backups (total and per host). SUPPORTED PLATFORMS ==================== * hypervisor: KVM * TM: qcow2/shared/ssh, ceph * backup: restic, rsync Notes on Ceph * Ceph backups are performed in the following steps: 1. A snapshot of each disk is taken (group snapshots cannot be used as it seems we cannot export the disks afterwards) 2. Disks are export to a file 3. File is converted to qcow2 format 4. Disk files are upload to the backup repo TODO: * Confirm crash consistent snapshots cannot be used in Ceph TODO: * Check if using VM dir instead of full path is better to accomodate DS migrations i.e.: - Current path: /var/lib/one/datastores/100/53/backup/disk.0 - Proposal: 53/backup/disk.0 RESTIC DRIVER ============= Developed together with this feature is part of the EE edtion. * It supports the SFTP protocol, the following attributes are supported: - RESTIC_SFTP_SERVER - RESTIC_SFTP_USER: only if different from oneadmin - RESTIC_PASSWORD - RESTIC_IONICE: Run restic under a given ionice priority (class 2) - RESTIC_NICE: Run restic under a given nice - RESTIC_BWLIMIT: Limit restic upload/download BW - RESTIC_COMPRESSION: Restic 0.14 implements compression (three modes: off, auto, max). This requires repositories version 2. By default, auto is used (average compression without to much CPU usage) - RESTIC_CONNECTIONS: Sets the number of concurrent connections to a backend (5 by default). For high-latency backends this number can be increased. * downloader URL: restic://<datastore_id>/<snapshot_id>/<file_name> snapshot_id is the restic snapshot hash. To recover single disk images from a backup. This URLs support: - RESTIC_CONNECTIONS - RESTIC_BWLIMIT - RESTIC_IONICE - RESTIC_NICE These options needs to be defined in the associated datastore. RSYNC DRIVER ============= A rsync driver is included as part of the CE distribution. It uses the rsync tool to store backups in a remote server through SSH: * The following attributes are supported to configure the backup datastore: - RSYNC_HOST - RSYNC_USER - RSYNC_ARGS: Arguments to perform the rsync operatin (-aS by default) * downloader URL: rsync://<ds_id>/<vmid>/<hash>/<file> can be used to recover single files from an existing backup. (RSYNC_HOST and RSYN_USER needs to be set in ds_id EMULATOR_CPUS ============= This commit includes a non related backup feature: * Add EMULATOR_CPUS (KVM). This host (or cluster attribute) defines the CPU IDs where the emulator threads will be pinned. If this value is not defined the allocated CPU wll be used when using a PIN policy. (cherry picked from commit a9e6a8e000e9a5a2f56f80ce622ad9ffc9fa032b) F OpenNebula/one#5516: adding rsync backup driver (cherry picked from commit fb52edf5d009dc02b071063afb97c6519b9e8305) F OpenNebula/one#5516: update install.sh, add vmid to source, some polish Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 6fc6f8a67e435f7f92d5c40fdc3d1c825ab5581d) F OpenNebula/one#5516: cleanup Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 12f4333b833f23098142cd4762eb9e6c505e1340) F OpenNebula/one#5516: update downloader, default args, size check Signed-off-by: Neal Hansen <nhansen@opennebula.io> (cherry picked from commit 510124ef2780a4e2e8c3d128c9a42945be38a305) LL (cherry picked from commit d4fcd134dc293f2b862086936db4d552792539fa)
2022-09-09 12:46:44 +03:00
* This function is executed periodically to monitor Datastores and
* check sync actions
*/
void timer_action();
};
#endif /*IMAGE_MANAGER_H*/