pvestatd: store subsystem status data in a shared cache
This commit adds a new module PVE::PullMetric. This module allows us to store the status data of various subsystems, including status data for the most recent pvestatd update loops. Right now, we store 6 old generations - including the most recent values, that gives 70 seconds of stat history (based on a 10 second pvestatd update loop interval). This cache allows us to add support for pull-style metric collection systems, be it Prometheus/OpenMetrics or some custom, JSON based metric format. This patch raises the required lib{proxmox,pve}-perl-rs version requirements, since we need the new bindings for proxmox-shared-cache. Signed-off-by: Lukas Wagner <l.wagner@proxmox.com> [WB: actually bump *runtime* deps in d/control] Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
parent
e6efc75389
commit
5732ad6584
@ -13,6 +13,7 @@ PERLSOURCE = \
|
||||
HTTPServer.pm \
|
||||
Jobs.pm \
|
||||
NodeConfig.pm \
|
||||
PullMetric.pm \
|
||||
Report.pm \
|
||||
VZDump.pm
|
||||
|
||||
|
54
PVE/PullMetric.pm
Normal file
54
PVE/PullMetric.pm
Normal file
@ -0,0 +1,54 @@
|
||||
package PVE::PullMetric;
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use Proxmox::RS::SharedCache;
|
||||
use PVE::Network;
|
||||
|
||||
use constant OLD_GENERATIONS => 180;
|
||||
use constant LOCK_TIMEOUT => 2;
|
||||
|
||||
my $cache;
|
||||
my $get_cache = sub {
|
||||
if (!defined($cache)) {
|
||||
|
||||
my $uid = getpwnam('root');
|
||||
my $gid = getgrnam('www-data');
|
||||
|
||||
$cache = Proxmox::RS::SharedCache->new({
|
||||
path => "/run/pve/metrics",
|
||||
owner => $uid,
|
||||
group => $gid,
|
||||
entry_mode => 0640, # Entry permissions
|
||||
keep_old => OLD_GENERATIONS,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return $cache;
|
||||
};
|
||||
|
||||
# Return the number of generations stored by the metrics cache
|
||||
sub max_generations {
|
||||
# Number of old stats plus the most recent ones
|
||||
return OLD_GENERATIONS + 1;
|
||||
}
|
||||
|
||||
sub transaction_start {
|
||||
return {};
|
||||
}
|
||||
|
||||
sub transaction_finish {
|
||||
my ($txn) = @_;
|
||||
|
||||
$get_cache->()->set($txn, 2);
|
||||
}
|
||||
|
||||
sub update {
|
||||
my ($txn, $subsystem, $data, $timestamp) = @_;
|
||||
|
||||
$txn->{$subsystem}->{data} = $data;
|
||||
$txn->{$subsystem}->{timestamp} = $timestamp;
|
||||
}
|
||||
|
||||
1;
|
@ -31,6 +31,7 @@ use PVE::Ceph::Tools;
|
||||
use PVE::pvecfg;
|
||||
|
||||
use PVE::ExtMetric;
|
||||
use PVE::PullMetric;
|
||||
use PVE::Status::Plugin;
|
||||
|
||||
use base qw(PVE::Daemon);
|
||||
@ -147,7 +148,7 @@ my sub broadcast_static_node_info {
|
||||
}
|
||||
|
||||
sub update_node_status {
|
||||
my ($status_cfg) = @_;
|
||||
my ($status_cfg, $pull_txn) = @_;
|
||||
|
||||
my ($uptime) = PVE::ProcFSTools::read_proc_uptime();
|
||||
|
||||
@ -199,6 +200,8 @@ sub update_node_status {
|
||||
PVE::ExtMetric::update_all($transactions, 'node', $nodename, $node_metric, $ctime);
|
||||
PVE::ExtMetric::transactions_finish($transactions);
|
||||
|
||||
PVE::PullMetric::update($pull_txn, 'node', $node_metric, $ctime);
|
||||
|
||||
broadcast_static_node_info($maxcpu, $meminfo->{memtotal});
|
||||
}
|
||||
|
||||
@ -231,7 +234,7 @@ sub auto_balloning {
|
||||
}
|
||||
|
||||
sub update_qemu_status {
|
||||
my ($status_cfg) = @_;
|
||||
my ($status_cfg, $pull_txn) = @_;
|
||||
|
||||
my $ctime = time();
|
||||
my $vmstatus = PVE::QemuServer::vmstatus(undef, 1);
|
||||
@ -261,6 +264,8 @@ sub update_qemu_status {
|
||||
}
|
||||
|
||||
PVE::ExtMetric::transactions_finish($transactions);
|
||||
|
||||
PVE::PullMetric::update($pull_txn, 'qemu', $vmstatus, $ctime);
|
||||
}
|
||||
|
||||
sub remove_stale_lxc_consoles {
|
||||
@ -440,7 +445,7 @@ sub rebalance_lxc_containers {
|
||||
}
|
||||
|
||||
sub update_lxc_status {
|
||||
my ($status_cfg) = @_;
|
||||
my ($status_cfg, $pull_txn) = @_;
|
||||
|
||||
my $ctime = time();
|
||||
my $vmstatus = PVE::LXC::vmstatus();
|
||||
@ -469,10 +474,12 @@ sub update_lxc_status {
|
||||
PVE::ExtMetric::update_all($transactions, 'lxc', $vmid, $d, $ctime, $nodename);
|
||||
}
|
||||
PVE::ExtMetric::transactions_finish($transactions);
|
||||
|
||||
PVE::PullMetric::update($pull_txn, 'lxc', $vmstatus, $ctime);
|
||||
}
|
||||
|
||||
sub update_storage_status {
|
||||
my ($status_cfg) = @_;
|
||||
my ($status_cfg, $pull_txn) = @_;
|
||||
|
||||
my $cfg = PVE::Storage::config();
|
||||
my $ctime = time();
|
||||
@ -492,6 +499,8 @@ sub update_storage_status {
|
||||
PVE::ExtMetric::update_all($transactions, 'storage', $nodename, $storeid, $d, $ctime);
|
||||
}
|
||||
PVE::ExtMetric::transactions_finish($transactions);
|
||||
|
||||
PVE::PullMetric::update($pull_txn, 'storage', $info, $ctime);
|
||||
}
|
||||
|
||||
sub rotate_authkeys {
|
||||
@ -532,6 +541,8 @@ sub update_status {
|
||||
# correct list in case of an unexpected crash.
|
||||
my $rpcenv = PVE::RPCEnvironment::get();
|
||||
|
||||
my $pull_txn = PVE::PullMetric::transaction_start();
|
||||
|
||||
eval {
|
||||
my $tlist = $rpcenv->active_workers();
|
||||
PVE::Cluster::broadcast_tasklist($tlist);
|
||||
@ -542,19 +553,19 @@ sub update_status {
|
||||
my $status_cfg = PVE::Cluster::cfs_read_file('status.cfg');
|
||||
|
||||
eval {
|
||||
update_node_status($status_cfg);
|
||||
update_node_status($status_cfg, $pull_txn);
|
||||
};
|
||||
$err = $@;
|
||||
syslog('err', "node status update error: $err") if $err;
|
||||
|
||||
eval {
|
||||
update_qemu_status($status_cfg);
|
||||
update_qemu_status($status_cfg, $pull_txn);
|
||||
};
|
||||
$err = $@;
|
||||
syslog('err', "qemu status update error: $err") if $err;
|
||||
|
||||
eval {
|
||||
update_lxc_status($status_cfg);
|
||||
update_lxc_status($status_cfg, $pull_txn);
|
||||
};
|
||||
$err = $@;
|
||||
syslog('err', "lxc status update error: $err") if $err;
|
||||
@ -566,7 +577,7 @@ sub update_status {
|
||||
syslog('err', "lxc cpuset rebalance error: $err") if $err;
|
||||
|
||||
eval {
|
||||
update_storage_status($status_cfg);
|
||||
update_storage_status($status_cfg, $pull_txn);
|
||||
};
|
||||
$err = $@;
|
||||
syslog('err', "storage status update error: $err") if $err;
|
||||
@ -600,6 +611,12 @@ sub update_status {
|
||||
};
|
||||
$err = $@;
|
||||
syslog('err', "version info update error: $err") if $err;
|
||||
|
||||
eval {
|
||||
PVE::PullMetric::transaction_finish($pull_txn);
|
||||
};
|
||||
$err = $@;
|
||||
syslog('err', "could not populate metric data cache: $err") if $err;
|
||||
}
|
||||
|
||||
my $next_update = 0;
|
||||
|
8
debian/control
vendored
8
debian/control
vendored
@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13),
|
||||
libhttp-daemon-perl,
|
||||
libpod-parser-perl,
|
||||
libproxmox-acme-perl,
|
||||
libproxmox-rs-perl (>= 0.2.0),
|
||||
libproxmox-rs-perl (>= 0.3.4),
|
||||
libpve-access-control (>= 8.0.7),
|
||||
libpve-cluster-api-perl,
|
||||
libpve-cluster-perl (>= 6.1-6),
|
||||
@ -15,7 +15,7 @@ Build-Depends: debhelper-compat (= 13),
|
||||
libpve-guest-common-perl (>= 5.1.1),
|
||||
libpve-http-server-perl (>= 2.0-12),
|
||||
libpve-notify-perl,
|
||||
libpve-rs-perl (>= 0.7.1),
|
||||
libpve-rs-perl (>= 0.8.10),
|
||||
libpve-storage-perl (>= 6.3-2),
|
||||
libtemplate-perl,
|
||||
libtest-mockmodule-perl,
|
||||
@ -56,7 +56,7 @@ Depends: apt (>= 1.5~),
|
||||
libnet-dns-perl,
|
||||
libproxmox-acme-perl,
|
||||
libproxmox-acme-plugins,
|
||||
libproxmox-rs-perl (>= 0.2.0),
|
||||
libproxmox-rs-perl (>= 0.3.4),
|
||||
libpve-access-control (>= 8.1.3),
|
||||
libpve-cluster-api-perl (>= 7.0-5),
|
||||
libpve-cluster-perl (>= 7.2-3),
|
||||
@ -64,7 +64,7 @@ Depends: apt (>= 1.5~),
|
||||
libpve-guest-common-perl (>= 5.1.4),
|
||||
libpve-http-server-perl (>= 4.1-1),
|
||||
libpve-notify-perl (>= 8.0.5),
|
||||
libpve-rs-perl (>= 0.7.1),
|
||||
libpve-rs-perl (>= 0.8.10),
|
||||
libpve-storage-perl (>= 8.1.5),
|
||||
librados2-perl (>= 1.3-1),
|
||||
libtemplate-perl,
|
||||
|
Loading…
x
Reference in New Issue
Block a user