* target/i386/kvm: support for reading RAPL MSRs using a helper program

* hpet: emulation improvements
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmaelL4UHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroMXoQf+K77lNlHLETSgeeP3dr7yZPOmXjjN
 qFY/18jiyLw7MK1rZC09fF+n9SoaTH8JDKupt0z9M1R10HKHLIO04f8zDE+dOxaE
 Rou3yKnlTgFPGSoPPFr1n1JJfxtYlLZRoUzaAcHUaa4W7JR/OHJX90n1Rb9MXeDk
 jV6P0v1FWtIDdM6ERm9qBGoQdYhj6Ra2T4/NZKJFXwIhKEkxgu4yO7WXv8l0dxQz
 jE4fKotqAvrkYW1EsiVZm30lw/19duhvGiYeQXoYhk8KKXXjAbJMblLITSNWsCio
 3l6Uud/lOxekkJDAq5nH3H9hCBm0WwvwL+0vRf3Mkr+/xRGvrhtmUdp8NQ==
 =00mB
 -----END PGP SIGNATURE-----

Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* target/i386/kvm: support for reading RAPL MSRs using a helper program
* hpet: emulation improvements

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmaelL4UHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroMXoQf+K77lNlHLETSgeeP3dr7yZPOmXjjN
# qFY/18jiyLw7MK1rZC09fF+n9SoaTH8JDKupt0z9M1R10HKHLIO04f8zDE+dOxaE
# Rou3yKnlTgFPGSoPPFr1n1JJfxtYlLZRoUzaAcHUaa4W7JR/OHJX90n1Rb9MXeDk
# jV6P0v1FWtIDdM6ERm9qBGoQdYhj6Ra2T4/NZKJFXwIhKEkxgu4yO7WXv8l0dxQz
# jE4fKotqAvrkYW1EsiVZm30lw/19duhvGiYeQXoYhk8KKXXjAbJMblLITSNWsCio
# 3l6Uud/lOxekkJDAq5nH3H9hCBm0WwvwL+0vRf3Mkr+/xRGvrhtmUdp8NQ==
# =00mB
# -----END PGP SIGNATURE-----
# gpg: Signature made Tue 23 Jul 2024 03:19:58 AM AEST
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu:
  hpet: avoid timer storms on periodic timers
  hpet: store full 64-bit target value of the counter
  hpet: accept 64-bit reads and writes
  hpet: place read-only bits directly in "new_val"
  hpet: remove unnecessary variable "index"
  hpet: ignore high bits of comparator in 32-bit mode
  hpet: fix and cleanup persistence of interrupt status
  Add support for RAPL MSRs in KVM/Qemu
  tools: build qemu-vmsr-helper
  qio: add support for SO_PEERCRED for socket channel
  target/i386: do not crash if microvm guest uses SGX CPUID leaves

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-07-24 11:25:40 +10:00
commit 43f59bf765
23 changed files with 1995 additions and 185 deletions

View File

@ -140,6 +140,7 @@ F: docs/system/target-i386*
F: target/i386/*.[ch]
F: target/i386/Kconfig
F: target/i386/meson.build
F: tools/i386/
Guest CPU cores (TCG)
---------------------

View File

@ -3776,6 +3776,21 @@ static void kvm_set_device(Object *obj,
s->device = g_strdup(value);
}
static void kvm_set_kvm_rapl(Object *obj, bool value, Error **errp)
{
KVMState *s = KVM_STATE(obj);
s->msr_energy.enable = value;
}
static void kvm_set_kvm_rapl_socket_path(Object *obj,
const char *str,
Error **errp)
{
KVMState *s = KVM_STATE(obj);
g_free(s->msr_energy.socket_path);
s->msr_energy.socket_path = g_strdup(str);
}
static void kvm_accel_instance_init(Object *obj)
{
KVMState *s = KVM_STATE(obj);
@ -3795,6 +3810,7 @@ static void kvm_accel_instance_init(Object *obj)
s->xen_gnttab_max_frames = 64;
s->xen_evtchn_max_pirq = 256;
s->device = NULL;
s->msr_energy.enable = false;
}
/**
@ -3839,6 +3855,17 @@ static void kvm_accel_class_init(ObjectClass *oc, void *data)
object_class_property_set_description(oc, "device",
"Path to the device node to use (default: /dev/kvm)");
object_class_property_add_bool(oc, "rapl",
NULL,
kvm_set_kvm_rapl);
object_class_property_set_description(oc, "rapl",
"Allow energy related MSRs for RAPL interface in Guest");
object_class_property_add_str(oc, "rapl-helper-socket", NULL,
kvm_set_kvm_rapl_socket_path);
object_class_property_set_description(oc, "rapl-helper-socket",
"Socket Path for comminucating with the Virtual MSR helper daemon");
kvm_arch_accel_class_init(oc);
}

View File

@ -0,0 +1,15 @@
[Unit]
Description=Virtual RAPL MSR Daemon for QEMU
[Service]
WorkingDirectory=/tmp
Type=simple
ExecStart=/usr/bin/qemu-vmsr-helper
PrivateTmp=yes
ProtectSystem=strict
ReadWritePaths=/var/run
RestrictAddressFamilies=AF_UNIX
Restart=always
RestartSec=0
[Install]

View File

@ -0,0 +1,9 @@
[Unit]
Description=Virtual RAPL MSR helper for QEMU
[Socket]
ListenStream=/run/qemu-vmsr-helper.sock
SocketMode=0600
[Install]
WantedBy=multi-user.target

View File

@ -34,3 +34,4 @@ guest hardware that is specific to QEMU.
virt-ctlr
vmcoreinfo
vmgenid
rapl-msr

155
docs/specs/rapl-msr.rst Normal file
View File

@ -0,0 +1,155 @@
================
RAPL MSR support
================
The RAPL interface (Running Average Power Limit) is advertising the accumulated
energy consumption of various power domains (e.g. CPU packages, DRAM, etc.).
The consumption is reported via MSRs (model specific registers) like
MSR_PKG_ENERGY_STATUS for the CPU package power domain. These MSRs are 64 bits
registers that represent the accumulated energy consumption in micro Joules.
Thanks to the MSR Filtering patch [#a]_ not all MSRs are handled by KVM. Some
of them can now be handled by the userspace (QEMU). It uses a mechanism called
"MSR filtering" where a list of MSRs is given at init time of a VM to KVM so
that a callback is put in place. The design of this patch uses only this
mechanism for handling the MSRs between guest/host.
At the moment the following MSRs are involved:
.. code:: C
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
#define MSR_PKG_ENERGY_STATUS 0x00000611
#define MSR_PKG_POWER_INFO 0x00000614
The ``*_POWER_UNIT``, ``*_POWER_LIMIT``, ``*_POWER INFO`` are part of the RAPL
spec and specify the power limit of the package, provide range of parameter(min
power, max power,..) and also the information of the multiplier for the energy
counter to calculate the power. Those MSRs are populated once at the beginning
by reading the host CPU MSRs and are given back to the guest 1:1 when
requested.
The MSR_PKG_ENERGY_STATUS is a counter; it represents the total amount of
energy consumed since the last time the register was cleared. If you multiply
it with the UNIT provided above you'll get the power in micro-joules. This
counter is always increasing and it increases more or less faster depending on
the consumption of the package. This counter is supposed to overflow at some
point.
Each core belonging to the same Package reading the MSR_PKG_ENERGY_STATUS (i.e
"rdmsr 0x611") will retrieve the same value. The value represents the energy
for the whole package. Whatever Core reading it will get the same value and a
core that belongs to PKG-0 will not be able to get the value of PKG-1 and
vice-versa.
High level implementation
-------------------------
In order to update the value of the virtual MSR, a QEMU thread is created.
The thread is basically just an infinity loop that does:
1. Snapshot of the time metrics of all QEMU threads (Time spent scheduled in
Userspace and System)
2. Snapshot of the actual MSR_PKG_ENERGY_STATUS counter of all packages where
the QEMU threads are running on.
3. Sleep for 1 second - During this pause the vcpu and other non-vcpu threads
will do what they have to do and so the energy counter will increase.
4. Repeat 2. and 3. and calculate the delta of every metrics representing the
time spent scheduled for each QEMU thread *and* the energy spent by the
packages during the pause.
5. Filter the vcpu threads and the non-vcpu threads.
6. Retrieve the topology of the Virtual Machine. This helps identify which
vCPU is running on which virtual package.
7. The total energy spent by the non-vcpu threads is divided by the number
of vcpu threads so that each vcpu thread will get an equal part of the
energy spent by the QEMU workers.
8. Calculate the ratio of energy spent per vcpu threads.
9. Calculate the energy for each virtual package.
10. The virtual MSRs are updated for each virtual package. Each vCPU that
belongs to the same package will return the same value when accessing the
the MSR.
11. Loop back to 1.
Ratio calculation
-----------------
In Linux, a process has an execution time associated with it. The scheduler is
dividing the time in clock ticks. The number of clock ticks per second can be
found by the sysconf system call. A typical value of clock ticks per second is
100. So a core can run a process at the maximum of 100 ticks per second. If a
package has 4 cores, 400 ticks maximum can be scheduled on all the cores
of the package for a period of 1 second.
The /proc/[pid]/stat [#b]_ is a sysfs file that can give the executed time of a
process with the [pid] as the process ID. It gives the amount of ticks the
process has been scheduled in userspace (utime) and kernel space (stime).
By reading those metrics for a thread, one can calculate the ratio of time the
package has spent executing the thread.
Example:
A 4 cores package can schedule a maximum of 400 ticks per second with 100 ticks
per second per core. If a thread was scheduled for 100 ticks between a second
on this package, that means my thread has been scheduled for 1/4 of the whole
package. With that, the calculation of the energy spent by the thread on this
package during this whole second is 1/4 of the total energy spent by the
package.
Usage
-----
Currently this feature is only working on an Intel CPU that has the RAPL driver
mounted and available in the sysfs. if not, QEMU fails at start-up.
This feature is activated with -accel
kvm,rapl=true,rapl-helper-socket=/path/sock.sock
It is important that the socket path is the same as the one
:program:`qemu-vmsr-helper` is listening to.
qemu-vmsr-helper
----------------
The qemu-vmsr-helper is working very much like the qemu-pr-helper. Instead of
making persistent reservation, qemu-vmsr-helper is here to overcome the
CVE-2020-8694 which remove user access to the rapl msr attributes.
A socket communication is established between QEMU processes that has the RAPL
MSR support activated and the qemu-vmsr-helper. A systemd service and socket
activation is provided in contrib/systemd/qemu-vmsr-helper.(service/socket).
The systemd socket uses 600, like contrib/systemd/qemu-pr-helper.socket. The
socket can be passed via SCM_RIGHTS by libvirt, or its permissions can be
changed (e.g. 660 and root:kvm for a Debian system for example). Libvirt could
also start a separate helper if needed. All in all, the policy is left to the
user.
See the qemu-pr-helper documentation or manpage for further details.
Current Limitations
-------------------
- Works only on Intel host CPUs because AMD CPUs are using different MSR
addresses.
- Only the Package Power-Plane (MSR_PKG_ENERGY_STATUS) is reported at the
moment.
References
----------
.. [#a] https://patchwork.kernel.org/project/kvm/patch/20200916202951.23760-7-graf@amazon.com/
.. [#b] https://man7.org/linux/man-pages/man5/proc.5.html

View File

@ -16,3 +16,4 @@ command line utilities and other standalone programs.
qemu-pr-helper
qemu-trace-stap
virtfs-proxy-helper
qemu-vmsr-helper

View File

@ -0,0 +1,89 @@
==================================
QEMU virtual RAPL MSR helper
==================================
Synopsis
--------
**qemu-vmsr-helper** [*OPTION*]
Description
-----------
Implements the virtual RAPL MSR helper for QEMU.
Accessing the RAPL (Running Average Power Limit) MSR enables the RAPL powercap
driver to advertise and monitor the power consumption or accumulated energy
consumption of different power domains, such as CPU packages, DRAM, and other
components when available.
However those register are accesible under priviliged access (CAP_SYS_RAWIO).
QEMU can use an external helper to access those priviliged register.
:program:`qemu-vmsr-helper` is that external helper; it creates a listener
socket which will accept incoming connections for communication with QEMU.
If you want to run VMs in a setup like this, this helper should be started as a
system service, and you should read the QEMU manual section on "RAPL MSR
support" to find out how to configure QEMU to connect to the socket created by
:program:`qemu-vmsr-helper`.
After connecting to the socket, :program:`qemu-vmsr-helper` can
optionally drop root privileges, except for those capabilities that
are needed for its operation.
:program:`qemu-vmsr-helper` can also use the systemd socket activation
protocol. In this case, the systemd socket unit should specify a
Unix stream socket, like this::
[Socket]
ListenStream=/var/run/qemu-vmsr-helper.sock
Options
-------
.. program:: qemu-vmsr-helper
.. option:: -d, --daemon
run in the background (and create a PID file)
.. option:: -q, --quiet
decrease verbosity
.. option:: -v, --verbose
increase verbosity
.. option:: -f, --pidfile=PATH
PID file when running as a daemon. By default the PID file
is created in the system runtime state directory, for example
:file:`/var/run/qemu-vmsr-helper.pid`.
.. option:: -k, --socket=PATH
path to the socket. By default the socket is created in
the system runtime state directory, for example
:file:`/var/run/qemu-vmsr-helper.sock`.
.. option:: -T, --trace [[enable=]PATTERN][,events=FILE][,file=FILE]
.. include:: ../qemu-option-trace.rst.inc
.. option:: -u, --user=USER
user to drop privileges to
.. option:: -g, --group=GROUP
group to drop privileges to
.. option:: -h, --help
Display a help message and exit.
.. option:: -V, --version
Display version information and exit.

View File

@ -268,10 +268,12 @@ void hmp_info_sgx(Monitor *mon, const QDict *qdict)
bool sgx_epc_get_section(int section_nr, uint64_t *addr, uint64_t *size)
{
PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
PCMachineState *pcms =
(PCMachineState *)object_dynamic_cast(qdev_get_machine(),
TYPE_PC_MACHINE);
SGXEPCDevice *epc;
if (pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) {
if (!pcms || pcms->sgx_epc.size == 0 || pcms->sgx_epc.nr_sections <= section_nr) {
return true;
}

View File

@ -54,10 +54,12 @@ typedef struct HPETTimer { /* timers */
uint64_t cmp; /* comparator */
uint64_t fsb; /* FSB route */
/* Hidden register state */
uint64_t cmp64; /* comparator (extended to counter width) */
uint64_t period; /* Last value written to comparator */
uint8_t wrap_flag; /* timer pop will indicate wrap for one-shot 32-bit
* mode. Next pop will be actual timer expiration.
*/
uint64_t last; /* last value armed, to avoid timer storms */
} HPETTimer;
struct HPETState {
@ -115,11 +117,6 @@ static uint32_t timer_enabled(HPETTimer *t)
}
static uint32_t hpet_time_after(uint64_t a, uint64_t b)
{
return ((int32_t)(b - a) < 0);
}
static uint32_t hpet_time_after64(uint64_t a, uint64_t b)
{
return ((int64_t)(b - a) < 0);
}
@ -156,29 +153,34 @@ static uint64_t hpet_get_ticks(HPETState *s)
return ns_to_ticks(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + s->hpet_offset);
}
/*
* calculate diff between comparator value and current ticks
*/
static inline uint64_t hpet_calculate_diff(HPETTimer *t, uint64_t current)
static uint64_t hpet_get_ns(HPETState *s, uint64_t tick)
{
return ticks_to_ns(tick) - s->hpet_offset;
}
/*
* calculate next value of the general counter that matches the
* target (either entirely, or the low 32-bit only depending on
* the timer mode).
*/
static uint64_t hpet_calculate_cmp64(HPETTimer *t, uint64_t cur_tick, uint64_t target)
{
if (t->config & HPET_TN_32BIT) {
uint32_t diff, cmp;
cmp = (uint32_t)t->cmp;
diff = cmp - (uint32_t)current;
diff = (int32_t)diff > 0 ? diff : (uint32_t)1;
return (uint64_t)diff;
uint64_t result = deposit64(cur_tick, 0, 32, target);
if (result < cur_tick) {
result += 0x100000000ULL;
}
return result;
} else {
uint64_t diff, cmp;
cmp = t->cmp;
diff = cmp - current;
diff = (int64_t)diff > 0 ? diff : (uint64_t)1;
return diff;
return target;
}
}
static uint64_t hpet_next_wrap(uint64_t cur_tick)
{
return (cur_tick | 0xffffffffU) + 1;
}
static void update_irq(struct HPETTimer *timer, int set)
{
uint64_t mask;
@ -196,21 +198,31 @@ static void update_irq(struct HPETTimer *timer, int set)
}
s = timer->state;
mask = 1 << timer->tn;
if (!set || !timer_enabled(timer) || !hpet_enabled(timer->state)) {
if (set && (timer->config & HPET_TN_TYPE_LEVEL)) {
/*
* If HPET_TN_ENABLE bit is 0, "the timer will still operate and
* generate appropriate status bits, but will not cause an interrupt"
*/
s->isr |= mask;
} else {
s->isr &= ~mask;
}
if (set && timer_enabled(timer) && hpet_enabled(s)) {
if (timer_fsb_route(timer)) {
address_space_stl_le(&address_space_memory, timer->fsb >> 32,
timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED,
NULL);
} else if (timer->config & HPET_TN_TYPE_LEVEL) {
qemu_irq_raise(s->irqs[route]);
} else {
qemu_irq_pulse(s->irqs[route]);
}
} else {
if (!timer_fsb_route(timer)) {
qemu_irq_lower(s->irqs[route]);
}
} else if (timer_fsb_route(timer)) {
address_space_stl_le(&address_space_memory, timer->fsb >> 32,
timer->fsb & 0xffffffff, MEMTXATTRS_UNSPECIFIED,
NULL);
} else if (timer->config & HPET_TN_TYPE_LEVEL) {
s->isr |= mask;
qemu_irq_raise(s->irqs[route]);
} else {
s->isr &= ~mask;
qemu_irq_pulse(s->irqs[route]);
}
}
@ -250,7 +262,13 @@ static bool hpet_validate_num_timers(void *opaque, int version_id)
static int hpet_post_load(void *opaque, int version_id)
{
HPETState *s = opaque;
int i;
for (i = 0; i < s->num_timers; i++) {
HPETTimer *t = &s->timer[i];
t->cmp64 = hpet_calculate_cmp64(t, s->hpet_counter, t->cmp);
t->last = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - NANOSECONDS_PER_SECOND;
}
/* Recalculate the offset between the main counter and guest time */
if (!s->hpet_offset_saved) {
s->hpet_offset = ticks_to_ns(s->hpet_counter)
@ -346,14 +364,17 @@ static const VMStateDescription vmstate_hpet = {
}
};
static void hpet_arm(HPETTimer *t, uint64_t ticks)
static void hpet_arm(HPETTimer *t, uint64_t tick)
{
if (ticks < ns_to_ticks(INT64_MAX / 2)) {
timer_mod(t->qemu_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ticks_to_ns(ticks));
} else {
timer_del(t->qemu_timer);
uint64_t ns = hpet_get_ns(t->state, tick);
/* Clamp period to reasonable min value (1 us) */
if (timer_is_periodic(t) && ns - t->last < 1000) {
ns = t->last + 1000;
}
t->last = ns;
timer_mod(t->qemu_timer, ns);
}
/*
@ -362,72 +383,68 @@ static void hpet_arm(HPETTimer *t, uint64_t ticks)
static void hpet_timer(void *opaque)
{
HPETTimer *t = opaque;
uint64_t diff;
uint64_t period = t->period;
uint64_t cur_tick = hpet_get_ticks(t->state);
if (timer_is_periodic(t) && period != 0) {
while (hpet_time_after(cur_tick, t->cmp64)) {
t->cmp64 += period;
}
if (t->config & HPET_TN_32BIT) {
while (hpet_time_after(cur_tick, t->cmp)) {
t->cmp = (uint32_t)(t->cmp + t->period);
}
t->cmp = (uint32_t)t->cmp64;
} else {
while (hpet_time_after64(cur_tick, t->cmp)) {
t->cmp += period;
}
}
diff = hpet_calculate_diff(t, cur_tick);
hpet_arm(t, diff);
} else if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
if (t->wrap_flag) {
diff = hpet_calculate_diff(t, cur_tick);
hpet_arm(t, diff);
t->wrap_flag = 0;
t->cmp = t->cmp64;
}
hpet_arm(t, t->cmp64);
} else if (t->wrap_flag) {
t->wrap_flag = 0;
hpet_arm(t, t->cmp64);
}
update_irq(t, 1);
}
static void hpet_set_timer(HPETTimer *t)
{
uint64_t diff;
uint32_t wrap_diff; /* how many ticks until we wrap? */
uint64_t cur_tick = hpet_get_ticks(t->state);
/* whenever new timer is being set up, make sure wrap_flag is 0 */
t->wrap_flag = 0;
diff = hpet_calculate_diff(t, cur_tick);
t->cmp64 = hpet_calculate_cmp64(t, cur_tick, t->cmp);
if (t->config & HPET_TN_32BIT) {
/* hpet spec says in one-shot 32-bit mode, generate an interrupt when
* counter wraps in addition to an interrupt with comparator match.
*/
if (t->config & HPET_TN_32BIT && !timer_is_periodic(t)) {
wrap_diff = 0xffffffff - (uint32_t)cur_tick;
if (wrap_diff < (uint32_t)diff) {
diff = wrap_diff;
/* hpet spec says in one-shot 32-bit mode, generate an interrupt when
* counter wraps in addition to an interrupt with comparator match.
*/
if (!timer_is_periodic(t) && t->cmp64 > hpet_next_wrap(cur_tick)) {
t->wrap_flag = 1;
hpet_arm(t, hpet_next_wrap(cur_tick));
return;
}
}
hpet_arm(t, diff);
hpet_arm(t, t->cmp64);
}
static void hpet_del_timer(HPETTimer *t)
{
HPETState *s = t->state;
timer_del(t->qemu_timer);
update_irq(t, 0);
if (s->isr & (1 << t->tn)) {
/* For level-triggered interrupt, this leaves ISR set but lowers irq. */
update_irq(t, 1);
}
}
static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
unsigned size)
{
HPETState *s = opaque;
uint64_t cur_tick, index;
int shift = (addr & 4) * 8;
uint64_t cur_tick;
trace_hpet_ram_read(addr);
index = addr;
/*address range of all TN regs*/
if (index >= 0x100 && index <= 0x3ff) {
if (addr >= 0x100 && addr <= 0x3ff) {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@ -436,52 +453,33 @@ static uint64_t hpet_ram_read(void *opaque, hwaddr addr,
return 0;
}
switch ((addr - 0x100) % 0x20) {
case HPET_TN_CFG:
return timer->config;
case HPET_TN_CFG + 4: // Interrupt capabilities
return timer->config >> 32;
switch (addr & 0x18) {
case HPET_TN_CFG: // including interrupt capabilities
return timer->config >> shift;
case HPET_TN_CMP: // comparator register
return timer->cmp;
case HPET_TN_CMP + 4:
return timer->cmp >> 32;
return timer->cmp >> shift;
case HPET_TN_ROUTE:
return timer->fsb;
case HPET_TN_ROUTE + 4:
return timer->fsb >> 32;
return timer->fsb >> shift;
default:
trace_hpet_ram_read_invalid();
break;
}
} else {
switch (index) {
case HPET_ID:
return s->capability;
case HPET_PERIOD:
return s->capability >> 32;
switch (addr & ~4) {
case HPET_ID: // including HPET_PERIOD
return s->capability >> shift;
case HPET_CFG:
return s->config;
case HPET_CFG + 4:
trace_hpet_invalid_hpet_cfg(4);
return 0;
return s->config >> shift;
case HPET_COUNTER:
if (hpet_enabled(s)) {
cur_tick = hpet_get_ticks(s);
} else {
cur_tick = s->hpet_counter;
}
trace_hpet_ram_read_reading_counter(0, cur_tick);
return cur_tick;
case HPET_COUNTER + 4:
if (hpet_enabled(s)) {
cur_tick = hpet_get_ticks(s);
} else {
cur_tick = s->hpet_counter;
}
trace_hpet_ram_read_reading_counter(4, cur_tick);
return cur_tick >> 32;
trace_hpet_ram_read_reading_counter(addr & 4, cur_tick);
return cur_tick >> shift;
case HPET_STATUS:
return s->isr;
return s->isr >> shift;
default:
trace_hpet_ram_read_invalid();
break;
@ -495,15 +493,14 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
{
int i;
HPETState *s = opaque;
uint64_t old_val, new_val, val, index;
int shift = (addr & 4) * 8;
int len = MIN(size * 8, 64 - shift);
uint64_t old_val, new_val, cleared;
trace_hpet_ram_write(addr, value);
index = addr;
old_val = hpet_ram_read(opaque, addr, 4);
new_val = value;
/*address range of all TN regs*/
if (index >= 0x100 && index <= 0x3ff) {
if (addr >= 0x100 && addr <= 0x3ff) {
uint8_t timer_id = (addr - 0x100) / 0x20;
HPETTimer *timer = &s->timer[timer_id];
@ -512,71 +509,49 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
trace_hpet_timer_id_out_of_range(timer_id);
return;
}
switch ((addr - 0x100) % 0x20) {
switch (addr & 0x18) {
case HPET_TN_CFG:
trace_hpet_ram_write_tn_cfg();
if (activating_bit(old_val, new_val, HPET_TN_FSB_ENABLE)) {
trace_hpet_ram_write_tn_cfg(addr & 4);
old_val = timer->config;
new_val = deposit64(old_val, shift, len, value);
new_val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
if (deactivating_bit(old_val, new_val, HPET_TN_TYPE_LEVEL)) {
/*
* Do this before changing timer->config; otherwise, if
* HPET_TN_FSB is set, update_irq will not lower the qemu_irq.
*/
update_irq(timer, 0);
}
val = hpet_fixup_reg(new_val, old_val, HPET_TN_CFG_WRITE_MASK);
timer->config = (timer->config & 0xffffffff00000000ULL) | val;
timer->config = new_val;
if (activating_bit(old_val, new_val, HPET_TN_ENABLE)
&& (s->isr & (1 << timer_id))) {
update_irq(timer, 1);
}
if (new_val & HPET_TN_32BIT) {
timer->cmp = (uint32_t)timer->cmp;
timer->period = (uint32_t)timer->period;
}
if (activating_bit(old_val, new_val, HPET_TN_ENABLE) &&
hpet_enabled(s)) {
hpet_set_timer(timer);
} else if (deactivating_bit(old_val, new_val, HPET_TN_ENABLE)) {
hpet_del_timer(timer);
}
break;
case HPET_TN_CFG + 4: // Interrupt capabilities
trace_hpet_ram_write_invalid_tn_cfg(4);
break;
case HPET_TN_CMP: // comparator register
trace_hpet_ram_write_tn_cmp(0);
if (timer->config & HPET_TN_32BIT) {
new_val = (uint32_t)new_val;
}
if (!timer_is_periodic(timer)
|| (timer->config & HPET_TN_SETVAL)) {
timer->cmp = (timer->cmp & 0xffffffff00000000ULL) | new_val;
}
if (timer_is_periodic(timer)) {
/*
* FIXME: Clamp period to reasonable min value?
* Clamp period to reasonable max value
*/
if (timer->config & HPET_TN_32BIT) {
new_val = MIN(new_val, ~0u >> 1);
}
timer->period =
(timer->period & 0xffffffff00000000ULL) | new_val;
}
/*
* FIXME: on a 64-bit write, HPET_TN_SETVAL should apply to the
* high bits part as well.
*/
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {
hpet_set_timer(timer);
}
break;
case HPET_TN_CMP + 4: // comparator register high order
trace_hpet_ram_write_tn_cmp(4);
case HPET_TN_CMP: // comparator register
if (timer->config & HPET_TN_32BIT) {
/* High 32-bits are zero, leave them untouched. */
if (shift) {
trace_hpet_ram_write_invalid_tn_cmp();
break;
}
len = 64;
value = (uint32_t) value;
}
trace_hpet_ram_write_tn_cmp(addr & 4);
if (!timer_is_periodic(timer)
|| (timer->config & HPET_TN_SETVAL)) {
timer->cmp = (timer->cmp & 0xffffffffULL) | new_val << 32;
timer->cmp = deposit64(timer->cmp, shift, len, value);
}
if (timer_is_periodic(timer)) {
/*
* FIXME: Clamp period to reasonable min value?
* Clamp period to reasonable max value
*/
new_val = MIN(new_val, ~0u >> 1);
timer->period =
(timer->period & 0xffffffffULL) | new_val << 32;
timer->period = deposit64(timer->period, shift, len, value);
}
timer->config &= ~HPET_TN_SETVAL;
if (hpet_enabled(s)) {
@ -584,10 +559,7 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
}
break;
case HPET_TN_ROUTE:
timer->fsb = (timer->fsb & 0xffffffff00000000ULL) | new_val;
break;
case HPET_TN_ROUTE + 4:
timer->fsb = (new_val << 32) | (timer->fsb & 0xffffffff);
timer->fsb = deposit64(timer->fsb, shift, len, value);
break;
default:
trace_hpet_ram_write_invalid();
@ -595,20 +567,23 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
}
return;
} else {
switch (index) {
switch (addr & ~4) {
case HPET_ID:
return;
case HPET_CFG:
val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
s->config = (s->config & 0xffffffff00000000ULL) | val;
old_val = s->config;
new_val = deposit64(old_val, shift, len, value);
new_val = hpet_fixup_reg(new_val, old_val, HPET_CFG_WRITE_MASK);
s->config = new_val;
if (activating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
/* Enable main counter and interrupt generation. */
s->hpet_offset =
ticks_to_ns(s->hpet_counter) - qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
for (i = 0; i < s->num_timers; i++) {
if ((&s->timer[i])->cmp != ~0ULL) {
hpet_set_timer(&s->timer[i]);
if (timer_enabled(&s->timer[i]) && (s->isr & (1 << i))) {
update_irq(&s->timer[i], 1);
}
hpet_set_timer(&s->timer[i]);
}
} else if (deactivating_bit(old_val, new_val, HPET_CFG_ENABLE)) {
/* Halt main counter and disable interrupt generation. */
@ -629,13 +604,11 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
qemu_set_irq(s->irqs[RTC_ISA_IRQ], s->rtc_irq_level);
}
break;
case HPET_CFG + 4:
trace_hpet_invalid_hpet_cfg(4);
break;
case HPET_STATUS:
val = new_val & s->isr;
new_val = value << shift;
cleared = new_val & s->isr;
for (i = 0; i < s->num_timers; i++) {
if (val & (1 << i)) {
if (cleared & (1 << i)) {
update_irq(&s->timer[i], 0);
}
}
@ -644,15 +617,7 @@ static void hpet_ram_write(void *opaque, hwaddr addr,
if (hpet_enabled(s)) {
trace_hpet_ram_write_counter_write_while_enabled();
}
s->hpet_counter =
(s->hpet_counter & 0xffffffff00000000ULL) | value;
trace_hpet_ram_write_counter_written(0, value, s->hpet_counter);
break;
case HPET_COUNTER + 4:
trace_hpet_ram_write_counter_write_while_enabled();
s->hpet_counter =
(s->hpet_counter & 0xffffffffULL) | (((uint64_t)value) << 32);
trace_hpet_ram_write_counter_written(4, value, s->hpet_counter);
s->hpet_counter = deposit64(s->hpet_counter, shift, len, value);
break;
default:
trace_hpet_ram_write_invalid();
@ -666,7 +631,11 @@ static const MemoryRegionOps hpet_ram_ops = {
.write = hpet_ram_write,
.valid = {
.min_access_size = 4,
.max_access_size = 4,
.max_access_size = 8,
},
.impl = {
.min_access_size = 4,
.max_access_size = 8,
},
.endianness = DEVICE_NATIVE_ENDIAN,
};

View File

@ -108,9 +108,9 @@ hpet_ram_read_reading_counter(uint8_t reg_off, uint64_t cur_tick) "reading count
hpet_ram_read_invalid(void) "invalid hpet_ram_readl"
hpet_ram_write(uint64_t addr, uint64_t value) "enter hpet_ram_writel at 0x%" PRIx64 " = 0x%" PRIx64
hpet_ram_write_timer_id(uint64_t timer_id) "hpet_ram_writel timer_id = 0x%" PRIx64
hpet_ram_write_tn_cfg(void) "hpet_ram_writel HPET_TN_CFG"
hpet_ram_write_invalid_tn_cfg(uint8_t reg_off) "invalid HPET_TN_CFG + %" PRIu8 " write"
hpet_ram_write_tn_cfg(uint8_t reg_off) "hpet_ram_writel HPET_TN_CFG + %" PRIu8
hpet_ram_write_tn_cmp(uint8_t reg_off) "hpet_ram_writel HPET_TN_CMP + %" PRIu8
hpet_ram_write_invalid_tn_cmp(void) "invalid HPET_TN_CMP + 4 write"
hpet_ram_write_invalid(void) "invalid hpet_ram_writel"
hpet_ram_write_counter_write_while_enabled(void) "Writing counter while HPET enabled!"
hpet_ram_write_counter_written(uint8_t reg_off, uint64_t value, uint64_t counter) "HPET counter + %" PRIu8 "written. crt = 0x%" PRIx64 " -> 0x%" PRIx64

View File

@ -160,6 +160,9 @@ struct QIOChannelClass {
void *opaque);
int (*io_flush)(QIOChannel *ioc,
Error **errp);
int (*io_peerpid)(QIOChannel *ioc,
unsigned int *pid,
Error **errp);
};
/* General I/O handling functions */
@ -981,4 +984,22 @@ int coroutine_mixed_fn qio_channel_writev_full_all(QIOChannel *ioc,
int qio_channel_flush(QIOChannel *ioc,
Error **errp);
/**
* qio_channel_get_peercred:
* @ioc: the channel object
* @pid: pointer to pid
* @errp: pointer to a NULL-initialized error object
*
* Returns the pid of the peer process connected to this socket.
*
* The use of this function is possible only for connected
* AF_UNIX stream sockets and for AF_UNIX stream and datagram
* socket pairs on Linux.
* Return -1 on error with pid -1 for the non-Linux OS.
*
*/
int qio_channel_get_peerpid(QIOChannel *ioc,
unsigned int *pid,
Error **errp);
#endif /* QIO_CHANNEL_H */

View File

@ -14,6 +14,9 @@
#include "qemu/accel.h"
#include "qemu/queue.h"
#include "sysemu/kvm.h"
#include "hw/boards.h"
#include "hw/i386/topology.h"
#include "io/channel-socket.h"
typedef struct KVMSlot
{
@ -50,6 +53,34 @@ typedef struct KVMMemoryListener {
#define KVM_MSI_HASHTAB_SIZE 256
typedef struct KVMHostTopoInfo {
/* Number of package on the Host */
unsigned int maxpkgs;
/* Number of cpus on the Host */
unsigned int maxcpus;
/* Number of cpus on each different package */
unsigned int *pkg_cpu_count;
/* Each package can have different maxticks */
unsigned int *maxticks;
} KVMHostTopoInfo;
struct KVMMsrEnergy {
pid_t pid;
bool enable;
char *socket_path;
QIOChannelSocket *sioc;
QemuThread msr_thr;
unsigned int guest_vcpus;
unsigned int guest_vsockets;
X86CPUTopoInfo guest_topo_info;
KVMHostTopoInfo host_topo;
const CPUArchIdList *guest_cpu_list;
uint64_t *msr_value;
uint64_t msr_unit;
uint64_t msr_limit;
uint64_t msr_info;
};
enum KVMDirtyRingReaperState {
KVM_DIRTY_RING_REAPER_NONE = 0,
/* The reaper is sleeping */
@ -117,6 +148,7 @@ struct KVMState
bool kvm_dirty_ring_with_bitmap;
uint64_t kvm_eager_split_size; /* Eager Page Splitting chunk size */
struct KVMDirtyRingReaper reaper;
struct KVMMsrEnergy msr_energy;
NotifyVmexitOption notify_vmexit;
uint32_t notify_window;
uint32_t xen_version;

View File

@ -841,6 +841,33 @@ qio_channel_socket_set_cork(QIOChannel *ioc,
socket_set_cork(sioc->fd, v);
}
static int
qio_channel_socket_get_peerpid(QIOChannel *ioc,
unsigned int *pid,
Error **errp)
{
#ifdef CONFIG_LINUX
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
Error *err = NULL;
socklen_t len = sizeof(struct ucred);
struct ucred cred;
if (getsockopt(sioc->fd,
SOL_SOCKET, SO_PEERCRED,
&cred, &len) == -1) {
error_setg_errno(&err, errno, "Unable to get peer credentials");
error_propagate(errp, err);
*pid = -1;
return -1;
}
*pid = (unsigned int)cred.pid;
return 0;
#else
error_setg(errp, "Unsupported feature");
*pid = -1;
return -1;
#endif
}
static int
qio_channel_socket_close(QIOChannel *ioc,
@ -938,6 +965,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
#ifdef QEMU_MSG_ZEROCOPY
ioc_klass->io_flush = qio_channel_socket_flush;
#endif
ioc_klass->io_peerpid = qio_channel_socket_get_peerpid;
}
static const TypeInfo qio_channel_socket_info = {

View File

@ -548,6 +548,19 @@ void qio_channel_set_cork(QIOChannel *ioc,
}
}
int qio_channel_get_peerpid(QIOChannel *ioc,
unsigned int *pid,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
if (!klass->io_peerpid) {
error_setg(errp, "Channel does not support peer pid");
return -1;
}
klass->io_peerpid(ioc, pid, errp);
return 0;
}
off_t qio_channel_io_seek(QIOChannel *ioc,
off_t offset,

View File

@ -4089,6 +4089,13 @@ if have_tools
dependencies: [authz, crypto, io, qom, qemuutil,
libcap_ng, mpathpersist],
install: true)
if cpu in ['x86', 'x86_64']
executable('qemu-vmsr-helper', files('tools/i386/qemu-vmsr-helper.c'),
dependencies: [authz, crypto, io, qom, qemuutil,
libcap_ng, mpathpersist],
install: true)
endif
endif
if have_ivshmem

View File

@ -414,6 +414,10 @@ typedef enum X86Seg {
#define MSR_IA32_TSX_CTRL 0x122
#define MSR_IA32_TSCDEADLINE 0x6e0
#define MSR_IA32_PKRS 0x6e1
#define MSR_RAPL_POWER_UNIT 0x00000606
#define MSR_PKG_POWER_LIMIT 0x00000610
#define MSR_PKG_ENERGY_STATUS 0x00000611
#define MSR_PKG_POWER_INFO 0x00000614
#define MSR_ARCH_LBR_CTL 0x000014ce
#define MSR_ARCH_LBR_DEPTH 0x000014cf
#define MSR_ARCH_LBR_FROM_0 0x00001500
@ -1880,6 +1884,10 @@ typedef struct CPUArchState {
uintptr_t retaddr;
/* RAPL MSR */
uint64_t msr_rapl_power_unit;
uint64_t msr_pkg_energy_status;
/* Fields up to this point are cleared by a CPU reset */
struct {} end_reset_fields;

View File

@ -16,9 +16,12 @@
#include "qapi/qapi-events-run-state.h"
#include "qapi/error.h"
#include "qapi/visitor.h"
#include <math.h>
#include <sys/ioctl.h>
#include <sys/utsname.h>
#include <sys/syscall.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <linux/kvm.h>
#include <linux/kvm_para.h>
@ -27,6 +30,7 @@
#include "cpu.h"
#include "host-cpu.h"
#include "vmsr_energy.h"
#include "sysemu/sysemu.h"
#include "sysemu/hw_accel.h"
#include "sysemu/kvm_int.h"
@ -2559,7 +2563,8 @@ static int kvm_get_supported_msrs(KVMState *s)
return ret;
}
static bool kvm_rdmsr_core_thread_count(X86CPU *cpu, uint32_t msr,
static bool kvm_rdmsr_core_thread_count(X86CPU *cpu,
uint32_t msr,
uint64_t *val)
{
CPUState *cs = CPU(cpu);
@ -2570,6 +2575,53 @@ static bool kvm_rdmsr_core_thread_count(X86CPU *cpu, uint32_t msr,
return true;
}
static bool kvm_rdmsr_rapl_power_unit(X86CPU *cpu,
uint32_t msr,
uint64_t *val)
{
CPUState *cs = CPU(cpu);
*val = cs->kvm_state->msr_energy.msr_unit;
return true;
}
static bool kvm_rdmsr_pkg_power_limit(X86CPU *cpu,
uint32_t msr,
uint64_t *val)
{
CPUState *cs = CPU(cpu);
*val = cs->kvm_state->msr_energy.msr_limit;
return true;
}
static bool kvm_rdmsr_pkg_power_info(X86CPU *cpu,
uint32_t msr,
uint64_t *val)
{
CPUState *cs = CPU(cpu);
*val = cs->kvm_state->msr_energy.msr_info;
return true;
}
static bool kvm_rdmsr_pkg_energy_status(X86CPU *cpu,
uint32_t msr,
uint64_t *val)
{
CPUState *cs = CPU(cpu);
*val = cs->kvm_state->msr_energy.msr_value[cs->cpu_index];
return true;
}
static Notifier smram_machine_done;
static KVMMemoryListener smram_listener;
static AddressSpace smram_address_space;
@ -2604,6 +2656,340 @@ static void register_smram_listener(Notifier *n, void *unused)
&smram_address_space, 1, "kvm-smram");
}
static void *kvm_msr_energy_thread(void *data)
{
KVMState *s = data;
struct KVMMsrEnergy *vmsr = &s->msr_energy;
g_autofree vmsr_package_energy_stat *pkg_stat = NULL;
g_autofree vmsr_thread_stat *thd_stat = NULL;
g_autofree CPUState *cpu = NULL;
g_autofree unsigned int *vpkgs_energy_stat = NULL;
unsigned int num_threads = 0;
X86CPUTopoIDs topo_ids;
rcu_register_thread();
/* Allocate memory for each package energy status */
pkg_stat = g_new0(vmsr_package_energy_stat, vmsr->host_topo.maxpkgs);
/* Allocate memory for thread stats */
thd_stat = g_new0(vmsr_thread_stat, 1);
/* Allocate memory for holding virtual package energy counter */
vpkgs_energy_stat = g_new0(unsigned int, vmsr->guest_vsockets);
/* Populate the max tick of each packages */
for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
/*
* Max numbers of ticks per package
* Time in second * Number of ticks/second * Number of cores/package
* ex: 100 ticks/second/CPU, 12 CPUs per Package gives 1200 ticks max
*/
vmsr->host_topo.maxticks[i] = (MSR_ENERGY_THREAD_SLEEP_US / 1000000)
* sysconf(_SC_CLK_TCK)
* vmsr->host_topo.pkg_cpu_count[i];
}
while (true) {
/* Get all qemu threads id */
g_autofree pid_t *thread_ids =
thread_ids = vmsr_get_thread_ids(vmsr->pid, &num_threads);
if (thread_ids == NULL) {
goto clean;
}
thd_stat = g_renew(vmsr_thread_stat, thd_stat, num_threads);
/* Unlike g_new0, g_renew0 function doesn't exist yet... */
memset(thd_stat, 0, num_threads * sizeof(vmsr_thread_stat));
/* Populate all the thread stats */
for (int i = 0; i < num_threads; i++) {
thd_stat[i].utime = g_new0(unsigned long long, 2);
thd_stat[i].stime = g_new0(unsigned long long, 2);
thd_stat[i].thread_id = thread_ids[i];
vmsr_read_thread_stat(vmsr->pid,
thd_stat[i].thread_id,
thd_stat[i].utime,
thd_stat[i].stime,
&thd_stat[i].cpu_id);
thd_stat[i].pkg_id =
vmsr_get_physical_package_id(thd_stat[i].cpu_id);
}
/* Retrieve all packages power plane energy counter */
for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
for (int j = 0; j < num_threads; j++) {
/*
* Use the first thread we found that ran on the CPU
* of the package to read the packages energy counter
*/
if (thd_stat[j].pkg_id == i) {
pkg_stat[i].e_start =
vmsr_read_msr(MSR_PKG_ENERGY_STATUS,
thd_stat[j].cpu_id,
thd_stat[j].thread_id,
s->msr_energy.sioc);
break;
}
}
}
/* Sleep a short period while the other threads are working */
usleep(MSR_ENERGY_THREAD_SLEEP_US);
/*
* Retrieve all packages power plane energy counter
* Calculate the delta of all packages
*/
for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
for (int j = 0; j < num_threads; j++) {
/*
* Use the first thread we found that ran on the CPU
* of the package to read the packages energy counter
*/
if (thd_stat[j].pkg_id == i) {
pkg_stat[i].e_end =
vmsr_read_msr(MSR_PKG_ENERGY_STATUS,
thd_stat[j].cpu_id,
thd_stat[j].thread_id,
s->msr_energy.sioc);
/*
* Prevent the case we have migrate the VM
* during the sleep period or any other cases
* were energy counter might be lower after
* the sleep period.
*/
if (pkg_stat[i].e_end > pkg_stat[i].e_start) {
pkg_stat[i].e_delta =
pkg_stat[i].e_end - pkg_stat[i].e_start;
} else {
pkg_stat[i].e_delta = 0;
}
break;
}
}
}
/* Delta of ticks spend by each thread between the sample */
for (int i = 0; i < num_threads; i++) {
vmsr_read_thread_stat(vmsr->pid,
thd_stat[i].thread_id,
thd_stat[i].utime,
thd_stat[i].stime,
&thd_stat[i].cpu_id);
if (vmsr->pid < 0) {
/*
* We don't count the dead thread
* i.e threads that existed before the sleep
* and not anymore
*/
thd_stat[i].delta_ticks = 0;
} else {
vmsr_delta_ticks(thd_stat, i);
}
}
/*
* Identify the vcpu threads
* Calculate the number of vcpu per package
*/
CPU_FOREACH(cpu) {
for (int i = 0; i < num_threads; i++) {
if (cpu->thread_id == thd_stat[i].thread_id) {
thd_stat[i].is_vcpu = true;
thd_stat[i].vcpu_id = cpu->cpu_index;
pkg_stat[thd_stat[i].pkg_id].nb_vcpu++;
thd_stat[i].acpi_id = kvm_arch_vcpu_id(cpu);
break;
}
}
}
/* Retrieve the virtual package number of each vCPU */
for (int i = 0; i < vmsr->guest_cpu_list->len; i++) {
for (int j = 0; j < num_threads; j++) {
if ((thd_stat[j].acpi_id ==
vmsr->guest_cpu_list->cpus[i].arch_id)
&& (thd_stat[j].is_vcpu == true)) {
x86_topo_ids_from_apicid(thd_stat[j].acpi_id,
&vmsr->guest_topo_info, &topo_ids);
thd_stat[j].vpkg_id = topo_ids.pkg_id;
}
}
}
/* Calculate the total energy of all non-vCPU thread */
for (int i = 0; i < num_threads; i++) {
if ((thd_stat[i].is_vcpu != true) &&
(thd_stat[i].delta_ticks > 0)) {
double temp;
temp = vmsr_get_ratio(pkg_stat[thd_stat[i].pkg_id].e_delta,
thd_stat[i].delta_ticks,
vmsr->host_topo.maxticks[thd_stat[i].pkg_id]);
pkg_stat[thd_stat[i].pkg_id].e_ratio
+= (uint64_t)lround(temp);
}
}
/* Calculate the ratio per non-vCPU thread of each package */
for (int i = 0; i < vmsr->host_topo.maxpkgs; i++) {
if (pkg_stat[i].nb_vcpu > 0) {
pkg_stat[i].e_ratio = pkg_stat[i].e_ratio / pkg_stat[i].nb_vcpu;
}
}
/*
* Calculate the energy for each Package:
* Energy Package = sum of each vCPU energy that belongs to the package