one/include/HostShare.h

/* ------------------------------------------------------------------------ */
/* Copyright 2002-2019, OpenNebula Project, OpenNebula Systems              */
/*                                                                          */
/* Licensed under the Apache License, Version 2.0 (the "License"); you may  */
/* not use this file except in compliance with the License. You may obtain  */
/* a copy of the License at                                                 */
/*                                                                          */
/* http://www.apache.org/licenses/LICENSE-2.0                               */
/*                                                                          */
/* Unless required by applicable law or agreed to in writing, software      */
/* distributed under the License is distributed on an "AS IS" BASIS,        */
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
/* See the License for the specific language governing permissions and      */
/* limitations under the License.                                           */
/* ------------------------------------------------------------------------ */

#ifndef HOST_SHARE_H_
#define HOST_SHARE_H_

#include "ObjectXML.h"
#include "Template.h"
#include <time.h>
#include <set>
#include <map>

//Forward declarations
class Host;
class HostShareNUMA;

/* ------------------------------------------------------------------------ */
/* ------------------------------------------------------------------------ */

/**
 *   This class represents a HostShare capacity allocation from a VM. The following
 *   attributes are updated with the final allocation in the Host:
 *     - topology, number of sockets, cores and threads
 *     - pci, with device address
 *     - nodes with the numa nodes configured for the VM
 *
 *    NUMA node requests are described by an attribute:
 *
 *    NUMA_NODE = [ TOTAL_CPUS=, MEMORY="...", CPUS="...", NODE_ID="...",
 *      MEMORY_NODE_ID="..." ]
 *
 *    CPUS: list of CPU IDs to pin the vCPUs in this host
 *    NODE_ID: the ID of the numa node in the host to pin this virtual node
 *    MEMORY_NODE_ID: the ID of the node to allocate memory for this virtual node
 */
struct HostShareCapacity
{
    int vmid;

    unsigned int vcpu;

    long long cpu;
    long long mem;
    long long disk;

    vector<VectorAttribute *> pci;

    VectorAttribute * topology;

    vector<VectorAttribute *> nodes;
};

/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

class HostShareDatastore : public Template
{
public:
    HostShareDatastore() : Template(false, '=', "DATASTORES"){};

    virtual ~HostShareDatastore(){};
};

/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 *  This class represents a PCI DEVICE list for the host. The list is in the
 *  form:
 *  <PCI>
 *    <DOMAIN> PCI address domain
 *    <BUS>    PCI address bus
 *    <SLOT>   PCI address slot
 *    <FUNCTION> PCI address function
 *    <ADDRESS> PCI address, bus, slot and function
 *    <VENDOR> ID of PCI device vendor
 *    <DEVICE> ID of PCI device
 *    <CLASS> ID of PCI device class
 *    <VMID> ID using this device, -1 if free
 *
 *  The monitor probe may report additional information such as VENDOR_NAME,
 *  DEVICE_NAME, CLASS_NAME...
 */
class HostSharePCI : public Template
{
public:

    HostSharePCI() : Template(false, '=', "PCI_DEVICES"){};

    virtual ~HostSharePCI()
    {
        map<string, PCIDevice *>::iterator it;

        for (it=pci_devices.begin(); it != pci_devices.end(); it++)
        {
            delete it->second;
        };
    };

    /**
     *  Builds the devices list from its XML representation. This function
     *  is used when importing it from the DB.
     *    @param node xmlNode for the template
     *    @return 0 on success
     */
    int from_xml_node(const xmlNodePtr node);

    /**
     *  Test whether this PCI device set has the requested devices available.
     *    @param devs list of requested devices by the VM.
     *    @return true if all the devices are available.
     */
    bool test(const vector<VectorAttribute *> &devs) const;

    /**
     *  Assign the requested devices to the given VM. The assigned devices will
     *  be labeled with the VM and the PCI attribute of the VM extended with
     *  the address of the assigned devices.
     *    @param devs list of requested PCI devices, will include address of
     *    assigned devices.
     *    @param vmid of the VM
     */
    void add(vector<VectorAttribute *> &devs, int vmid);

    /**
     *  Remove the VM assignment from the PCI device list
     */
    void del(const vector<VectorAttribute *> &devs);

    /**
     *  Updates the PCI list with monitor data, it will create or
     *  remove PCIDevices as needed.
     */
    void set_monitorization(vector<VectorAttribute*> &pci_att);

    /**
     *  Prints the PCI device list to an output stream. This function is used
     *  for logging purposes and *not* for generating DB content.
     */
    friend ostream& operator<<(ostream& o, const HostSharePCI& p);

    /**
     *  Gets a 4 hex digits value from attribute
     *    @param name of the attribute
     *    @pci_device VectorAttribute representing the device
     *    @return the 0 if not found, -1 syntax error, >0 valid hex value
     */
    static int get_pci_value(const char * name,
                             const VectorAttribute * pci_device,
                             unsigned int& value);
    /**
     *  Sets the PCI device address in the Virtual Machine as follows;
     *    - VM_DOMAIN: 0x0000
     *    - VM_BUS: dbus or VM_BUS in PCI attribute
     *    - VM_SLOT: PCI_ID + 1
     *    - VM_FUNCTION: 0
     *    - VM_ADDRESS: BUS:SLOT.0
     *  @param pci_device to set the address in
     *  @param default_bus if not set in PCI attribute (PCI_PASSTHROUGH_BUS
     *   in oned.conf)
     *  @return -1 if wrong bus 0 on success
     */
    static int set_pci_address(VectorAttribute * pci_device, const string& dbus);

private:
    /**
     *  Sets the internal class structures from the template
     */
    void init();

    /**
     *  Internal structure to represent PCI devices for fast look up and
     *  update
     */
    struct PCIDevice
    {
        PCIDevice(VectorAttribute * _attrs);

        ~PCIDevice(){};

        unsigned int vendor_id;
        unsigned int device_id;
        unsigned int class_id;

        int vmid;

        string address;

        VectorAttribute * attrs;
    };

    map <string, PCIDevice *> pci_devices;
};


/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 *  This class represents the NUMA nodes in a hypervisor for the following attr:
 *    NODE_ID = 0
 *    HUGEPAGE = [ SIZE = "2048", PAGES = "0", FREE = "0"]
 *    HUGEPAGE = [ SIZE = "1048576", PAGES = "0", FREE = "0"]
 *    CORE = [ ID = "3", CPUS = "3:-1,7:-1", FREE = 2]
 *    CORE = [ ID = "1", CPUS = "1:23,5:-1", FREE = 0 ]
 *    CORE = [ ID = "2", CPUS = "2:47,6:-1", FREE = 1]
 *    CORE = [ ID = "0", CPUS = "0:23,4:-1", FREE = 0]
 *    MEMORY = [ TOTAL = "66806708", FREE = "390568", USED = "66416140",
 *               DISTANCE = "0 1", USAGE = "8388608" ]
 *
 *  - NODE_ID
 *  - HUGEPAGE is the total PAGES and FREE hugepages of a given SIZE in the node
 *  - CORE is a CPU core with its ID and sibling CPUs for HT architectures
 */
class HostShareNode : public Template
{
public:
    HostShareNode() : Template(false, '=', "NODE"){};

    HostShareNode(unsigned int i) : Template(false, '=', "NODE"), node_id(i)
    {
        replace("NODE_ID", i);
    };

    virtual ~HostShareNode(){};

    /**
     *  Builds the node from its XML representation. This function is used when
     *  loading the host from the DB.
     *    @param node xmlNode for the template
     *    @return 0 on success
     */
    int from_xml_node(const xmlNodePtr &node);

    /**
     *  Get free capacity of the node
     *    @param fcpus number of free virtual cores
     *    @param memory free in the node
     *    @param threads_core per virtual core
     */
    void free_capacity(unsigned int &fcpus, long long &memory, unsigned int tc);

    void free_dedicated_capacity(unsigned int &fcpus, long long &memory);

    /**
     *  Allocate tcpus with a dedicated policy
     *    @param id of the VM allocating the CPUs
     *    @param tcpus total number of cpus
     *    @param c_s the resulting allocation string CPUS="0,4,2,6"
     *
     *    @return 0 on success
     */
    int allocate_dedicated_cpus(int id, unsigned int tcpus, std::string &c_s);

    /**
     *  Allocate tcpus with a HT policy
     *    @param id of the VM allocating the CPUs
     *    @param tcpus total number of cpus
     *    @param tc allocate cpus in tc (threads/core) chunks
     *    @param c_s the resulting allocation string CPUS="0,4,2,6"
     *
     *    @return 0 on success
     */
    int allocate_ht_cpus(int id, unsigned int tcpus, unsigned int tc,
            std::string &c_s);

    /**
     *  Remove allocation for the given CPUs
     *    @param cpu_ids list of cpu ids to free, comma separated
     */
    void del_cpu(const std::string &cpu_ids);

    /**
     *  Remove memory allocation
     *    @param memory to free
     */
    void del_memory(long long memory)
    {
        mem_usage -= memory;
    }

    /**
     *  Reserve CPU IDs
     *    @param rcpus list of reserved cpu ids (comma separated)
     */
    void reserve_cpus(const std::string& rcpus);

    /**
     *  Prints the NUMA node to an output stream.
     */
    friend ostream& operator<<(ostream& o, const HostShareNode& n);

private:
    friend class HostShareNUMA;

    //This stuct represents a core and its allocation status
    struct Core
    {
        /**
         *  Initializes the structure from the CORE attributes:
         *    @param _i ID of core
         *    @param _c CPUS list <cpu_id>:<vm_id>
         *    @param _f FREE cpus in core. If -1 it will be derived from CPUS
         */
        Core(unsigned int _i, const std::string& _c, int _f);

        /**
         *  ID of this CPU CORE
         */
        unsigned int id;

        /**
         *  Number of free cpus in the core. A VM can use one thread but
         *  reserve all the cpus in the core when using the dedicated policy.
         */
        unsigned int free_cpus;

        /**
         *  cpu_id - vm_id map. represents assigment status of the
         *  cpu thread, (-1) means the VM is free.
         */
        std::map<unsigned int, int> cpus;

        /**
         *  @return a VectorAttribute representing this core in the form:
         *    CORE = [ ID = "3", CPUS = "3:-1,7:-1", FREE = 2]
         */
        VectorAttribute * to_attribute();
    };

    //This stuct represents the hugepages available in the node
    struct HugePage
    {
        unsigned long size_kb;

        unsigned int  nr;
        unsigned int  free;

        unsigned long  usage;
        unsigned long  allocated;

        /**
         *  @return a VectorAttribute representing this core in the form:
         *    HUGEPAGE = [ SIZE = "1048576", PAGES = "200", FREE = "100",
         *          USAGE = "100"]
         */
        VectorAttribute * to_attribute();
    };

    /**
     *  ID of this node as reported by the Host
     */
    unsigned int node_id;

    /**
     *  Threads per core in this node
     */
    unsigned int threads_core = 1;

    /**
     *  CPU Cores in this node
     */
    std::map<unsigned int, struct Core> cores;

    /**
     *  Huge pages configured in this node
     */
    std::map<unsigned long, struct HugePage> pages;

    /**
     *  Memory information for this node:
     *    - total, free and used memory as reported by IM (meminfo file)
     *    - mem_used memory allocated to VMs by oned in this node
     *    - distance sorted list of nodes, first is the closest (this one)
     */
    long long total_mem = 0;
    long long free_mem  = 0;
    long long used_mem  = 0;

    long long mem_usage = 0;

    std::vector<unsigned int> distance;

    /**
     *  Temporal allocation on the node. This is used by the scheduling
     */
    unsigned int allocated_cpus   = 0;
    long long    allocated_memory = 0;

    //--------------------------------------------------------------------------
    //--------------------------------------------------------------------------

    /**
     *  Creates a new Core element and associates it to this node. If the
     *  core already exists this function does nothing
     *    @param id of core
     *    @param cpus string representing the cpu_id and allocation
     *    @param free cpus in core -1 to derive them from cpus string
     *    @param update if true also adds the core to the object Template
     */
    void set_core(unsigned int id, std::string& cpus, int free, bool update);

    /**
     *  Regenerate the template representation of the CORES for this node.
     */
    void update_cores();

    /**
     *  Regenerate the template representation of the HUGEPAGES for this node.
     */
    void update_hugepages();

    /**
     *  Creates a new HugePage element and associates it to this node. If a
     *  hugepage of the same size already exists this function does nothing
     *    @param size in kb of the page
     *    @param nr number of pages
     *    @param free pages
     *    @param update if true also adds the page to the object Template
     */
    void set_hugepage(unsigned long size, unsigned int nr, unsigned int fr,
            unsigned long usage, bool update);

    void update_hugepage(unsigned long size);

    /**
     *  Adds a new memory attribute based on the moniroting attributes and
     *  current mem usage.
     */
    void set_memory();

    /**
     *  Updates the memory usage for the node in the template representation
     */
    void update_memory();
};

/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 *  This class includes a list of all NUMA nodes in the host. And structure as
 *  follows:
 *
 *    <NUMA_NODES>
 *    <NODE>
 *      <ID>0</ID>
 *      <HUGEPAGE>
 *        <SIZE>2048</SIZE>
 *        <PAGES>0</PAGES>
 *        <FREE>0</FREE>
 *      </HUGEPAGE>
 *      ...
 *      <CORE>
 *        <ID>3</ID>
 *        <CPUS>3,7</CPUS>
 *      </CORE>
 *      ...
 *    </NODE>
 *    <NODE>
 *      <ID>1</ID>
 *      ...
 *    </NODE>
 *    </NUMA_NODES>
 */
class HostShareNUMA
{
public:
    HostShareNUMA():threads_core(1){};

    virtual ~HostShareNUMA()
    {
        for (auto it = nodes.begin(); it != nodes.end(); ++it)
        {
            delete it->second;
        }
    };

    /**
     *  Builds the NUMA nodes from its XML representation. This function is used
     *  when loading the host from the DB.
     *    @param node xmlNode for the template
     *    @return 0 on success
     */
    int from_xml_node(const vector<xmlNodePtr> &ns);

    /**
     *  Updates the NUMA node information with monitor data
     *    @param ht template with the information returned by monitor probes.
     */
    void set_monitorization(Template &ht);

    /**
     *  @param idx of the node
     *  @return the NUMA node for the the fiven index. If the node does not
     *  exit it is created
     */
    HostShareNode& get_node(unsigned int idx);

    /**
     * Function to print the HostShare object into a string in
     * XML format
     *  @param xml the resulting XML string
     *  @return a reference to the generated string
     */
    string& to_xml(string& xml) const;

    /**
     *  Test if the virtual nodes and topology request fits in the host.
     *    @param sr the share request with the node/topology
     *    @return true if the nodes fit in the host, false otherwise
     */
    bool test(HostShareCapacity &sr)
    {
        return make_topology(sr, -1, false) == 0;
    }

    /**
     *  Assign the requested nodes to the host.
     *    @param sr the share request with the node/topology
     *    @param vmid of the VM
     */
    void add(HostShareCapacity &sr)
    {
        make_topology(sr, sr.vmid, true);
    }

    /**
     *  Remove the VM assignment from the NUMA nodes
     */
    void del(HostShareCapacity &sr);

    /**
     *
     */
    void reserve_cpus(const std::string &cpu_ids)
    {
        for (auto it = nodes.begin(); it != nodes.end(); ++it)
        {
            it->second->reserve_cpus(cpu_ids);

            it->second->update_cores();
        }
    };

    /**
     *  Prints the NUMA nodes to an output stream.
     */
    friend ostream& operator<<(ostream& o, const HostShareNUMA& n);

private:
    /**
     *  Number of threads per core of the host
     */
    unsigned int threads_core;

    std::map<unsigned int, HostShareNode *> nodes;

    /* ---------------------------------------------------------------------- */
    /* ---------------------------------------------------------------------- */
    /**
     * Computes the virtual topology for this VM in this host based on:
     *   - user preferences TOPOLOGY/[SOCKETS, CORES, THREADS].
     *   - Architecture of the Host core_threads
     *   - allocation policy
     *
     *   @param sr the resource allocation request
     *   @param vm_id of the VM making the request
     *   @param do_alloc actually allocate the nodes (true) or just test (false)
     *   @return 0 success (vm was allocated) -1 otherwise
     */
    int make_topology(HostShareCapacity &sr, int vm_id, bool do_alloc);

    /**
     *  This is an internal structure to represent a virtual node allocation
     *  request and the resulting schedule
     */
    struct NUMANodeRequest
    {
        VectorAttribute * attr;

        unsigned int total_cpus;
        long long    memory;

        //NUMA node to allocate CPU cores from
        int node_id;
        std::string cpu_ids;

        //NUMA node to allocate memory from
        int mem_node_id;
    };

    bool schedule_nodes(NUMANodeRequest &nr, unsigned int thr, bool dedicated,
        unsigned long hpsz_kb, std::set<unsigned int> &pci, bool do_alloc);
};

/* -------------------------------------------------------------------------- */
/* -------------------------------------------------------------------------- */

/**
 *  The HostShare class. It represents a logical partition of a host...
 */
class HostShare : public ObjectXML
{
public:

    HostShare();

    ~HostShare(){};

    /**
     *  Pin policy for the host
     */
    enum PinPolicy
    {
        PP_NONE   = 0, /**< No pin. Default. */
        PP_CORE   = 1, /**< vCPUs are assigned to host cores exclusively */
        PP_THREAD = 2, /**< vCPUS are assigned to host threads */
        PP_SHARED = 3  /**< vCPUs are assigned to a set of host threads */
    };

    static PinPolicy str_to_pin_policy(std::string& pp_s);

    /**
     *  Rebuilds the object from an xml node
     *    @param node The xml node pointer
     *
     *    @return 0 on success, -1 otherwise
     */
    int from_xml_node(const xmlNodePtr node);

    /**
     *  Add a VM capacity to this share
     *    @param sr requested capacity by the VM
     */
    void add(HostShareCapacity &sr)
    {
        cpu_usage  += sr.cpu;
        mem_usage  += sr.mem;
        disk_usage += sr.disk;

        pci.add(sr.pci, sr.vmid);

        numa.add(sr);

        running_vms++;
    }

    /**
     *  Delete VM capacity from this share
     *    @param sr requested capacity by the VM
     */
    void del(HostShareCapacity &sr)
    {
        cpu_usage  -= sr.cpu;
        mem_usage  -= sr.mem;
        disk_usage -= sr.disk;

        pci.del(sr.pci);

        numa.del(sr);

        running_vms--;
    }

    /**
     *  Check if this share can host a VM.
     *    @param cpu requested by the VM
     *    @param mem requested by the VM
     *    @param disk requested by the VM
     *    @param pci_devs requested by the VM
     *    @param error Returns the error reason, if any
     *
     *    @return true if the share can host the VM or it is the only one
     *    configured
     */
    bool test(HostShareCapacity& sr, string& error)
    {
        if ( !test_compute(sr.cpu, sr.mem, error) )
        {
            return false;
        }

        if ( !test_pci(sr.pci, error) )
        {
            return false;
        }

        if ( !test_numa(sr, error) )
        {
            return false;
        }

        return true;
    }

    /**
     *  Function to write a HostShare to an output stream
     */
    friend ostream& operator<<(ostream& os, HostShare& hs);

    /**
     * Function to print the HostShare object into a string in
     * XML format
     *  @param xml the resulting XML string
     *  @return a reference to the generated string
     */
    string& to_xml(string& xml) const;

    /**
     *  Set host information based on the monitorinzation attributes
     *  sent by the probes.
     */
    void set_ds_monitorization(const vector<VectorAttribute*> &ds_att);

    void set_pci_monitorization(vector<VectorAttribute*> &pci_att)
    {
        pci.set_monitorization(pci_att);
    }

    void set_numa_monitorization(Template &ht)
    {
        numa.set_monitorization(ht);
    }

    /**
     *  Resets capaity values of the share
     */
    void reset_capacity()
    {
        total_cpu = 0;
        total_mem = 0;

        max_cpu = 0;
        max_mem = 0;

        free_cpu = 0;
        free_mem = 0;

        used_cpu = 0;
        used_mem = 0;
    };

    /**
     * Set the capacity attributes of the share. CPU and Memory may reserve some
     * capacity according to RESERVED_CPU and RESERVED_MEM. These values can be
     * either absolute or a percentage.
     *
     * Share values are read from the Host template returned by the monitoring
     * probes. The values are removed from the template.
     *
     *   @param host for this share, capacity values are removed from the template
     *   @para cluster_rcpu, reserved cpu default cluster value
     *   @para cluster_rmem, reserved mem default cluster value
     */
    void set_capacity(Host *host, const string& cluster_rcpu,
            const string& cluster_rmem);

    /**
     * Update the capacity attributes when the RESERVED_CPU and RESERVED_MEM
     * are updated.
     *   @param host for this share
     *   @para cluster_rcpu, reserved cpu default cluster value
     *   @para cluster_rmem, reserved mem default cluster value
     */
    void update_capacity(Host *host, const string& cluster_rcpu,
            const string& cluster_rmem);

    /**
     *  Reserve CPUs in the numa nodes
     */
    void reserve_cpus(const std::string &cpu_ids)
    {
        numa.reserve_cpus(cpu_ids);
    }

    /**
     *  Return the number of running VMs in this host
     */
    long long get_running_vms()
    {
        return running_vms;
    };

private:

    long long disk_usage; /**< Disk allocated to VMs (in MB).        */
    long long mem_usage;  /**< Memory allocated to VMs (in KB)       */
    long long cpu_usage;  /**< CPU  allocated to VMs (in percentage) */

    long long total_mem;  /**< Total memory capacity (in KB)         */
    long long total_cpu;  /**< Total cpu capacity (in percentage)    */

    long long max_disk;   /**< Total disk capacity (in MB)           */
    long long max_mem;    /**< Total memory capacity (in KB) +/- reserved     */
    long long max_cpu;    /**< Total cpu capacity (in percentage) +/- reserved*/

    long long free_disk;  /**< Free disk from the IM monitor         */
    long long free_mem;   /**< Free memory from the IM monitor       */
    long long free_cpu;   /**< Free cpu from the IM monitor          */

    long long used_disk;  /**< Used disk from the IM monitor         */
    long long used_mem;   /**< Used memory from the IM monitor       */
    long long used_cpu;   /**< Used cpu from the IM monitor          */

    long long running_vms;/**< Number of running VMs in this Host   */

    HostShareDatastore ds;
    HostSharePCI       pci;
    HostShareNUMA      numa;

    /**
     *  Check if this share can host a VM, testing only the PCI devices.
     *    @param pci_devs requested by the VM
     *    @param error Returns the error reason, if any
     *
     *    @return true if the share can host the VM or it is the only one
     *    configured
     */
    bool test_compute(int cpu, long long mem, std::string &error) const
    {
        bool cpu_fit  = (max_cpu  - cpu_usage ) >= cpu;
        bool mem_fit  = (max_mem  - mem_usage ) >= mem;

        bool fits = cpu_fit && mem_fit;

        if ( fits )
        {
            return true;
        }

        ostringstream oss;

        if (!cpu_fit)
        {
            oss << "Not enough CPU: " << cpu << "/" << max_cpu - cpu_usage;
        }
        else if (!mem_fit)
        {
            oss << "Not enough memory: " << mem << "/" << max_mem - mem_usage;
        }

        error = oss.str();

        return false;
    }

    bool test_pci(vector<VectorAttribute *>& pci_devs, string& error) const
    {
        bool fits = pci.test(pci_devs);

        error = "Unavailable PCI device.";

        return fits;
    }

    bool test_numa(HostShareCapacity &sr, string& error)
    {
        bool fits = numa.test(sr);

        error = "Cannot allocate NUMA topology";

        return fits;
    }
};

#endif /*HOST_SHARE_H_*/