pvestatd: store subsystem status data in a shared cache

This commit adds a new module PVE::PullMetric. This module allows
us to store the status data of various subsystems, including status
data for the most recent pvestatd update loops. Right now, we
store 6 old generations - including the most recent values, that gives
70 seconds of stat history (based on a 10 second pvestatd update loop
interval).

This cache allows us to add support for pull-style metric collection
systems, be it Prometheus/OpenMetrics or some custom, JSON based
metric format.

This patch raises the required lib{proxmox,pve}-perl-rs version
requirements, since we need the new bindings for proxmox-shared-cache.

Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
[WB: actually bump *runtime* deps in d/control]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Lukas Wagner 2024-08-12 10:36:05 +02:00 committed by Wolfgang Bumiller
parent e6efc75389
commit 5732ad6584
4 changed files with 84 additions and 12 deletions

View File

@ -13,6 +13,7 @@ PERLSOURCE = \
HTTPServer.pm \
Jobs.pm \
NodeConfig.pm \
PullMetric.pm \
Report.pm \
VZDump.pm

54
PVE/PullMetric.pm Normal file
View File

@ -0,0 +1,54 @@
package PVE::PullMetric;
use strict;
use warnings;
use Proxmox::RS::SharedCache;
use PVE::Network;
use constant OLD_GENERATIONS => 180;
use constant LOCK_TIMEOUT => 2;
my $cache;
my $get_cache = sub {
if (!defined($cache)) {
my $uid = getpwnam('root');
my $gid = getgrnam('www-data');
$cache = Proxmox::RS::SharedCache->new({
path => "/run/pve/metrics",
owner => $uid,
group => $gid,
entry_mode => 0640, # Entry permissions
keep_old => OLD_GENERATIONS,
}
);
}
return $cache;
};
# Return the number of generations stored by the metrics cache
sub max_generations {
# Number of old stats plus the most recent ones
return OLD_GENERATIONS + 1;
}
sub transaction_start {
return {};
}
sub transaction_finish {
my ($txn) = @_;
$get_cache->()->set($txn, 2);
}
sub update {
my ($txn, $subsystem, $data, $timestamp) = @_;
$txn->{$subsystem}->{data} = $data;
$txn->{$subsystem}->{timestamp} = $timestamp;
}
1;

View File

@ -31,6 +31,7 @@ use PVE::Ceph::Tools;
use PVE::pvecfg;
use PVE::ExtMetric;
use PVE::PullMetric;
use PVE::Status::Plugin;
use base qw(PVE::Daemon);
@ -147,7 +148,7 @@ my sub broadcast_static_node_info {
}
sub update_node_status {
my ($status_cfg) = @_;
my ($status_cfg, $pull_txn) = @_;
my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
@ -199,6 +200,8 @@ sub update_node_status {
PVE::ExtMetric::update_all($transactions, 'node', $nodename, $node_metric, $ctime);
PVE::ExtMetric::transactions_finish($transactions);
PVE::PullMetric::update($pull_txn, 'node', $node_metric, $ctime);
broadcast_static_node_info($maxcpu, $meminfo->{memtotal});
}
@ -231,7 +234,7 @@ sub auto_balloning {
}
sub update_qemu_status {
my ($status_cfg) = @_;
my ($status_cfg, $pull_txn) = @_;
my $ctime = time();
my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
@ -261,6 +264,8 @@ sub update_qemu_status {
}
PVE::ExtMetric::transactions_finish($transactions);
PVE::PullMetric::update($pull_txn, 'qemu', $vmstatus, $ctime);
}
sub remove_stale_lxc_consoles {
@ -440,7 +445,7 @@ sub rebalance_lxc_containers {
}
sub update_lxc_status {
my ($status_cfg) = @_;
my ($status_cfg, $pull_txn) = @_;
my $ctime = time();
my $vmstatus = PVE::LXC::vmstatus();
@ -469,10 +474,12 @@ sub update_lxc_status {
PVE::ExtMetric::update_all($transactions, 'lxc', $vmid, $d, $ctime, $nodename);
}
PVE::ExtMetric::transactions_finish($transactions);
PVE::PullMetric::update($pull_txn, 'lxc', $vmstatus, $ctime);
}
sub update_storage_status {
my ($status_cfg) = @_;
my ($status_cfg, $pull_txn) = @_;
my $cfg = PVE::Storage::config();
my $ctime = time();
@ -492,6 +499,8 @@ sub update_storage_status {
PVE::ExtMetric::update_all($transactions, 'storage', $nodename, $storeid, $d, $ctime);
}
PVE::ExtMetric::transactions_finish($transactions);
PVE::PullMetric::update($pull_txn, 'storage', $info, $ctime);
}
sub rotate_authkeys {
@ -532,6 +541,8 @@ sub update_status {
# correct list in case of an unexpected crash.
my $rpcenv = PVE::RPCEnvironment::get();
my $pull_txn = PVE::PullMetric::transaction_start();
eval {
my $tlist = $rpcenv->active_workers();
PVE::Cluster::broadcast_tasklist($tlist);
@ -542,19 +553,19 @@ sub update_status {
my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
eval {
update_node_status($status_cfg);
update_node_status($status_cfg, $pull_txn);
};
$err = $@;
syslog('err', "node status update error: $err") if $err;
eval {
update_qemu_status($status_cfg);
update_qemu_status($status_cfg, $pull_txn);
};
$err = $@;
syslog('err', "qemu status update error: $err") if $err;
eval {
update_lxc_status($status_cfg);
update_lxc_status($status_cfg, $pull_txn);
};
$err = $@;
syslog('err', "lxc status update error: $err") if $err;
@ -566,7 +577,7 @@ sub update_status {
syslog('err', "lxc cpuset rebalance error: $err") if $err;
eval {
update_storage_status($status_cfg);
update_storage_status($status_cfg, $pull_txn);
};
$err = $@;
syslog('err', "storage status update error: $err") if $err;
@ -600,6 +611,12 @@ sub update_status {
};
$err = $@;
syslog('err', "version info update error: $err") if $err;
eval {
PVE::PullMetric::transaction_finish($pull_txn);
};
$err = $@;
syslog('err', "could not populate metric data cache: $err") if $err;
}
my $next_update = 0;

8
debian/control vendored
View File

@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13),
libhttp-daemon-perl,
libpod-parser-perl,
libproxmox-acme-perl,
libproxmox-rs-perl (>= 0.2.0),
libproxmox-rs-perl (>= 0.3.4),
libpve-access-control (>= 8.0.7),
libpve-cluster-api-perl,
libpve-cluster-perl (>= 6.1-6),
@ -15,7 +15,7 @@ Build-Depends: debhelper-compat (= 13),
libpve-guest-common-perl (>= 5.1.1),
libpve-http-server-perl (>= 2.0-12),
libpve-notify-perl,
libpve-rs-perl (>= 0.7.1),
libpve-rs-perl (>= 0.8.10),
libpve-storage-perl (>= 6.3-2),
libtemplate-perl,
libtest-mockmodule-perl,
@ -56,7 +56,7 @@ Depends: apt (>= 1.5~),
libnet-dns-perl,
libproxmox-acme-perl,
libproxmox-acme-plugins,
libproxmox-rs-perl (>= 0.2.0),
libproxmox-rs-perl (>= 0.3.4),
libpve-access-control (>= 8.1.3),
libpve-cluster-api-perl (>= 7.0-5),
libpve-cluster-perl (>= 7.2-3),
@ -64,7 +64,7 @@ Depends: apt (>= 1.5~),
libpve-guest-common-perl (>= 5.1.4),
libpve-http-server-perl (>= 4.1-1),
libpve-notify-perl (>= 8.0.5),
libpve-rs-perl (>= 0.7.1),
libpve-rs-perl (>= 0.8.10),
libpve-storage-perl (>= 8.1.5),
librados2-perl (>= 1.3-1),
libtemplate-perl,