mirror of
https://github.com/OpenNebula/one.git
synced 2025-01-21 18:03:38 +03:00
45f28e15d3
VM topology can be defined: - To use a NUMA node and huge pages. Example: TOPOLOGY = [ NODE_AFFINITY = 0, HUGEPAGE_SIZE = 2 ] - To use huge pages without specifying the NUMA node. Example: TOPOLOGY = [ HUGEPAGE_SIZE = 2 ] In any case OpenNebula will: - look (or check) for a NUMA node with enough free huge pages - will pick the NUMA node with more free pages (if no NUMA node is specified) - configure the VM with CPU affinity to the selected NUMA node (cherry picked from commit 1b3b88ed4e4087b20a2925fbe42ad362aaee3fcd)
507 lines
15 KiB
C++
507 lines
15 KiB
C++
/* ------------------------------------------------------------------------ */
|
|
/* Copyright 2002-2023, OpenNebula Project, OpenNebula Systems */
|
|
/* */
|
|
/* Licensed under the Apache License, Version 2.0 (the "License"); you may */
|
|
/* not use this file except in compliance with the License. You may obtain */
|
|
/* a copy of the License at */
|
|
/* */
|
|
/* http://www.apache.org/licenses/LICENSE-2.0 */
|
|
/* */
|
|
/* Unless required by applicable law or agreed to in writing, software */
|
|
/* distributed under the License is distributed on an "AS IS" BASIS, */
|
|
/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
|
|
/* See the License for the specific language governing permissions and */
|
|
/* limitations under the License. */
|
|
/* ------------------------------------------------------------------------ */
|
|
|
|
#ifndef HOST_SHARE_NUMA_H_
|
|
#define HOST_SHARE_NUMA_H_
|
|
|
|
#include "ObjectXML.h"
|
|
#include "Template.h"
|
|
#include "HostShareCapacity.h"
|
|
|
|
#include <string>
|
|
#include <set>
|
|
#include <map>
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
/**
|
|
* This class represents the NUMA nodes in a hypervisor for the following attr:
|
|
* NODE_ID = 0
|
|
* HUGEPAGE = [ SIZE = "2048", PAGES = "0", USAGE = "0" ]
|
|
* HUGEPAGE = [ SIZE = "1048576", PAGES = "0", USAGE = "0" ]
|
|
* CORE = [ ID = "3", CPUS = "3:-1,7:-1", FREE = 2, DEDICATED="NO"]
|
|
* CORE = [ ID = "1", CPUS = "1:23,5:-1", FREE = 0, DEDICATED="YES" ]
|
|
* CORE = [ ID = "2", CPUS = "2:47,6:-1", FREE = 1, DEDICATED="NO"]
|
|
* CORE = [ ID = "0", CPUS = "0:23,4:-1", FREE = 0, DEDICATED="NO"]
|
|
* MEMORY = [ TOTAL = "66806708", DISTANCE = "0 1", USAGE = "8388608" ]
|
|
*
|
|
* - NODE_ID
|
|
* - HUGEPAGE is the total PAGES and USAGE hugepages of a given SIZE in the node
|
|
* - CORE is a CPU core with its ID and sibling CPUs for HT architectures
|
|
* - USAGE - hugepages or memory allocated by oned
|
|
*
|
|
* The free hugaepages and memory capacity is stored in the monitoring node,
|
|
* see HostMonitoringTemplate.h
|
|
*/
|
|
class HostShareNode : public Template
|
|
{
|
|
public:
|
|
HostShareNode()
|
|
: Template(false, '=', "NODE")
|
|
, node_id(std::numeric_limits<unsigned int>::max())
|
|
{}
|
|
|
|
HostShareNode(unsigned int i)
|
|
: Template(false, '=', "NODE")
|
|
, node_id(i)
|
|
{
|
|
replace("NODE_ID", i);
|
|
}
|
|
|
|
virtual ~HostShareNode() = default;
|
|
|
|
/**
|
|
* Builds the node from its XML representation. This function is used when
|
|
* loading the host from the DB.
|
|
* @param node xmlNode for the template
|
|
* @param _vt vms_thread
|
|
* @return 0 on success
|
|
*/
|
|
int from_xml_node(const xmlNodePtr &node, unsigned int _vt);
|
|
|
|
/**
|
|
* Get free capacity of the node
|
|
* @param fcpus number of free virtual cores
|
|
* @param memory free in the node
|
|
* @param threads_core per virtual core
|
|
*/
|
|
void free_capacity(unsigned int &fcpus, long long &memory, unsigned int tc);
|
|
|
|
void free_dedicated_capacity(unsigned int &fcpus, long long &memory);
|
|
|
|
/**
|
|
* Allocate tcpus with a dedicated policy
|
|
* @param id of the VM allocating the CPUs
|
|
* @param tcpus total number of cpus
|
|
* @param c_s the resulting allocation string CPUS="0,4,2,6"
|
|
*
|
|
* @return 0 on success
|
|
*/
|
|
int allocate_dedicated_cpus(int id, unsigned int tcpus, std::string &c_s);
|
|
|
|
/**
|
|
* Allocate tcpus with a HT policy
|
|
* @param id of the VM allocating the CPUs
|
|
* @param tcpus total number of cpus
|
|
* @param tc allocate cpus in tc (threads/core) chunks
|
|
* @param c_s the resulting allocation string CPUS="0,4,2,6"
|
|
*
|
|
* @return 0 on success
|
|
*/
|
|
int allocate_ht_cpus(int id, unsigned int tcpus, unsigned int tc,
|
|
std::string &c_s);
|
|
|
|
/**
|
|
* Remove allocation for the given CPUs
|
|
* @param cpu_ids list of cpu ids to free, comma separated
|
|
* @param vmid of the VM using the threads
|
|
*/
|
|
void del_cpu(const std::string &cpu_ids, unsigned int vmid);
|
|
|
|
/**
|
|
* Remove memory allocation
|
|
* @param memory to free
|
|
*/
|
|
void del_memory(long long memory)
|
|
{
|
|
mem_usage -= memory;
|
|
}
|
|
|
|
/**
|
|
* Reserve CPU IDs
|
|
* @param rcpus list of reserved cpu ids (comma separated)
|
|
*/
|
|
void reserve_cpus(const std::string& rcpus);
|
|
|
|
/**
|
|
* List the cpus of this node (as a , separated string)
|
|
* @param inc_reserved include reserved CPUs or not
|
|
*/
|
|
void ls_cpus(bool inc_reserved, std::string &cpu_s);
|
|
|
|
/**
|
|
* Prints the NUMA node to an output stream.
|
|
*/
|
|
friend std::ostream& operator<<(std::ostream& o, const HostShareNode& n);
|
|
|
|
private:
|
|
friend class HostShareNUMA;
|
|
|
|
//This stuct represents a core and its allocation status
|
|
struct Core
|
|
{
|
|
/**
|
|
* Initializes the structure from the CORE attributes:
|
|
* @param _i ID of core
|
|
* @param _c CPUS list <cpu_id>:<vm_id>
|
|
* @param _vt VMS per thread
|
|
* @param _d true if the core is dedicated to a VM
|
|
*/
|
|
Core(unsigned int _i, const std::string& _c, unsigned int _vt, bool _d);
|
|
|
|
/**
|
|
* ID of this CPU CORE
|
|
*/
|
|
unsigned int id;
|
|
|
|
/**
|
|
* Number of free & used cpus in the core.
|
|
*/
|
|
unsigned int free_cpus;
|
|
|
|
unsigned int used_cpus;
|
|
|
|
/**
|
|
* Number of VMs that can be allocated per physical thread.
|
|
*/
|
|
unsigned int vms_thread;
|
|
|
|
/**
|
|
* This core is dedicated to one VM
|
|
*/
|
|
bool dedicated;
|
|
|
|
/**
|
|
* CPU threads usage map
|
|
* t0 -> [vm1, vm1]
|
|
* t1 -> [vm2]
|
|
* t3 -> [vm3]
|
|
* t4 -> []
|
|
*
|
|
* When no over commitment is used only 1 VM is assigned ot a thread
|
|
*/
|
|
std::map<unsigned int, std::multiset<unsigned int> > cpus;
|
|
|
|
/**
|
|
* Set of reserved threads in this core
|
|
*/
|
|
std::set<unsigned int> reserved_cpus;
|
|
|
|
/**
|
|
* @return a VectorAttribute representing this core in the form:
|
|
* CORE = [ ID = "3", CPUS = "3:-1,7:-1", FREE = 2, DEDICATED=no]
|
|
*/
|
|
VectorAttribute * to_attribute();
|
|
|
|
/**
|
|
* Compute and set the free/used cpus of the core
|
|
*/
|
|
void set_cpu_usage();
|
|
};
|
|
|
|
//This stuct represents the hugepages available in the node
|
|
struct HugePage
|
|
{
|
|
unsigned long size_kb;
|
|
|
|
unsigned int nr;
|
|
|
|
unsigned long usage;
|
|
unsigned long allocated;
|
|
|
|
/**
|
|
* @return a VectorAttribute representing this core in the form:
|
|
* HUGEPAGE = [ SIZE = "1048576", PAGES = "200", USAGE = "100"]
|
|
*/
|
|
VectorAttribute * to_attribute() const;
|
|
};
|
|
|
|
/**
|
|
* ID of this node as reported by the Host
|
|
*/
|
|
unsigned int node_id;
|
|
|
|
/**
|
|
* CPU Cores in this node
|
|
*/
|
|
std::map<unsigned int, struct Core> cores;
|
|
|
|
/**
|
|
* Huge pages configured in this node
|
|
*/
|
|
std::map<unsigned long, struct HugePage> pages;
|
|
|
|
/**
|
|
* Memory information for this node:
|
|
* - total_mem total memory available
|
|
* - mem_usage memory allocated to VMs by oned in this node
|
|
* - distance sorted list of nodes, first is the closest (this one)
|
|
*/
|
|
long long total_mem = 0;
|
|
|
|
long long mem_usage = 0;
|
|
|
|
std::vector<unsigned int> distance;
|
|
|
|
/**
|
|
* Temporal allocation on the node. This is used by the scheduling
|
|
*/
|
|
unsigned int allocated_cpus = 0;
|
|
long long allocated_memory = 0;
|
|
|
|
//--------------------------------------------------------------------------
|
|
//--------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Creates a new Core element and associates it to this node. If the
|
|
* core already exists this function does nothing
|
|
* @param id of core
|
|
* @param cpus string representing the cpu_id and allocation
|
|
* @param vms_thread VMs per thread
|
|
* @param update if true also adds the core to the object Template
|
|
*/
|
|
void set_core(unsigned int id, std::string& cpus, unsigned int vms_thread,
|
|
bool dedicated, bool update);
|
|
|
|
/**
|
|
* Regenerate the template representation of the CORES for this node.
|
|
*/
|
|
void update_cores();
|
|
|
|
/**
|
|
* Regenerate the template representation of the HUGEPAGES for this node.
|
|
*/
|
|
void update_hugepages();
|
|
|
|
/**
|
|
* Creates a new HugePage element and associates it to this node. If a
|
|
* hugepage of the same size already exists this function does nothing
|
|
* @param size in kb of the page
|
|
* @param nr number of pages
|
|
* @param update if true also adds the page to the object Template
|
|
*/
|
|
void set_hugepage(unsigned long size, unsigned int nr,
|
|
unsigned long usage, bool update);
|
|
|
|
void update_hugepage(unsigned long size);
|
|
|
|
/**
|
|
* Adds a new memory attribute based on the moniroting attributes and
|
|
* current mem usage.
|
|
*/
|
|
void set_memory();
|
|
|
|
/**
|
|
* Updates the memory usage for the node in the template representation
|
|
*/
|
|
void update_memory();
|
|
};
|
|
|
|
/* -------------------------------------------------------------------------- */
|
|
/* -------------------------------------------------------------------------- */
|
|
|
|
/**
|
|
* This class includes a list of all NUMA nodes in the host. And structure as
|
|
* follows:
|
|
*
|
|
* <NUMA_NODES>
|
|
* <NODE>
|
|
* <ID>0</ID>
|
|
* <HUGEPAGE>
|
|
* <SIZE>2048</SIZE>
|
|
* <PAGES>0</PAGES>
|
|
* <FREE>0</FREE>
|
|
* </HUGEPAGE>
|
|
* ...
|
|
* <CORE>
|
|
* <ID>3</ID>
|
|
* <CPUS>3,7</CPUS>
|
|
* </CORE>
|
|
* ...
|
|
* </NODE>
|
|
* <NODE>
|
|
* <ID>1</ID>
|
|
* ...
|
|
* </NODE>
|
|
* </NUMA_NODES>
|
|
*/
|
|
class HostShareNUMA
|
|
{
|
|
public:
|
|
HostShareNUMA():threads_core(1){};
|
|
|
|
virtual ~HostShareNUMA()
|
|
{
|
|
clear();
|
|
};
|
|
|
|
/**
|
|
* Builds the NUMA nodes from its XML representation. This function is used
|
|
* when loading the host from the DB.
|
|
* @param node xmlNode for the template
|
|
* @param _vt vms_thread
|
|
* @return 0 on success
|
|
*/
|
|
int from_xml_node(const std::vector<xmlNodePtr> &ns, unsigned int _vt);
|
|
|
|
/**
|
|
* Updates the NUMA node information with monitor data
|
|
* @param ht template with the information returned by monitor probes.
|
|
*/
|
|
void set_monitorization(Template &ht, unsigned int vms_thread);
|
|
|
|
/**
|
|
* @param idx of the node
|
|
* @return the NUMA node for the the fiven index. If the node does not
|
|
* exit it is created
|
|
*/
|
|
HostShareNode& get_node(unsigned int idx);
|
|
|
|
/**
|
|
* Function to print the HostShare object into a string in
|
|
* XML format
|
|
* @param xml the resulting XML string
|
|
* @return a reference to the generated string
|
|
*/
|
|
std::string& to_xml(std::string& xml) const;
|
|
|
|
/**
|
|
* Test if the virtual nodes and topology request fits in the host.
|
|
* @param sr the share request with the node/topology
|
|
* @return true if the nodes fit in the host, false otherwise
|
|
*/
|
|
bool test(HostShareCapacity &sr) const
|
|
{
|
|
return (const_cast<HostShareNUMA *>(this))->make_topology(sr, -1, false) == 0;
|
|
}
|
|
|
|
/**
|
|
* Assign the requested nodes to the host.
|
|
* @param sr the share request with the node/topology
|
|
* @param vmid of the VM
|
|
*/
|
|
void add(HostShareCapacity &sr)
|
|
{
|
|
make_topology(sr, sr.vmid, true);
|
|
}
|
|
|
|
/**
|
|
* Remove the VM assignment from the NUMA nodes
|
|
*/
|
|
void del(HostShareCapacity &sr);
|
|
|
|
/**
|
|
* Reserves the provided CPUs and frees any CPUS not included in the list
|
|
* @param cpu_ids list of cpus to reserve "0,3,5"
|
|
*/
|
|
void reserve_cpus(const std::string &cpu_ids)
|
|
{
|
|
for (auto it = nodes.begin(); it != nodes.end(); ++it)
|
|
{
|
|
it->second->reserve_cpus(cpu_ids);
|
|
|
|
it->second->update_cores();
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Update the vms_thread in the cores and recomputes the cpu_usage based
|
|
* on the new value;
|
|
* @param vms_thread value
|
|
*/
|
|
void update_cpu_usage(unsigned int vms_thread);
|
|
|
|
void clear()
|
|
{
|
|
for (auto& node : nodes)
|
|
{
|
|
delete node.second;
|
|
}
|
|
|
|
nodes.clear();
|
|
}
|
|
/**
|
|
* Prints the NUMA nodes to an output stream.
|
|
*/
|
|
friend std::ostream& operator<<(std::ostream& o, const HostShareNUMA& n);
|
|
|
|
HostShareNUMA& operator=(const HostShareNUMA& other);
|
|
|
|
HostShareNUMA& operator=(HostShareNUMA&& other) noexcept;
|
|
|
|
private:
|
|
/**
|
|
* Number of threads per core of the host
|
|
*/
|
|
unsigned int threads_core;
|
|
|
|
std::map<unsigned int, HostShareNode *> nodes;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
/* ---------------------------------------------------------------------- */
|
|
/**
|
|
* Computes the virtual topology for this VM in this host based on:
|
|
* - user preferences TOPOLOGY/[SOCKETS, CORES, THREADS].
|
|
* - Architecture of the Host core_threads
|
|
* - allocation policy
|
|
*
|
|
* @param sr the resource allocation request
|
|
* @param vm_id of the VM making the request
|
|
* @param do_alloc actually allocate the nodes (true) or just test (false)
|
|
* @return 0 success (vm was allocated) -1 otherwise
|
|
*/
|
|
int make_topology(HostShareCapacity &sr, int vm_id, bool do_alloc);
|
|
|
|
/*
|
|
* Computes the virtual topology for this VM setting the affinity to a given
|
|
* NUMA node. If hugepages are used it checks that enough pages are available
|
|
* in the nod,
|
|
*
|
|
* @param sr the resource allocation request
|
|
* @param node_id of the NUMA node
|
|
* @param hpsz_kb size of the requested huge page (in KB) 0 if none
|
|
* @param do_alloc actually allocate the node (true) or just test (false).
|
|
* @return 0 success (vm was allocated) -1 otherwise
|
|
*/
|
|
int make_affined_topology(HostShareCapacity &sr, int node_id,
|
|
unsigned long hpsz_kb, bool do_alloc);
|
|
|
|
/*
|
|
* Computes the virtual topology for the VM based on the huge pages allocation
|
|
*
|
|
* @param sr the resource allocation request
|
|
* @param hpsz_kb size of the requested huge page (in KB)
|
|
* @param do_alloc actually allocate the node (true) or just test (false).
|
|
* @return 0 success (vm was allocated) -1 otherwise
|
|
*/
|
|
int make_hugepage_topology(HostShareCapacity &sr, unsigned long hpzs_kb,
|
|
bool do_alloc);
|
|
/**
|
|
* This is an internal structure to represent a virtual node allocation
|
|
* request and the resulting schedule
|
|
*/
|
|
struct NUMANodeRequest
|
|
{
|
|
VectorAttribute * attr;
|
|
|
|
unsigned int total_cpus;
|
|
long long memory;
|
|
|
|
//NUMA node to allocate CPU cores from
|
|
int node_id;
|
|
std::string cpu_ids;
|
|
|
|
//NUMA node to allocate memory from
|
|
int mem_node_id;
|
|
};
|
|
|
|
bool schedule_nodes(NUMANodeRequest &nr, unsigned int thr, bool dedicated,
|
|
unsigned long hpsz_kb, std::set<unsigned int> &pci, bool do_alloc);
|
|
};
|
|
|
|
#endif /*HOST_SHARE_NUMA_H_*/
|