metrics: add /cluster/metrics/export endpoint

This new endpoint returns node, storage and guest metrics in JSON
format. The endpoint supports history/max-age parameters, allowing
the caller to query the recent metric history as recorded by the
PVE::PullMetric module.

The returned data format is quite simple, being an array of
metric records, including a value, a metric name, an id to identify
the object (e.g. qemu/100, node/foo), a timestamp and a type
('gauge', 'derive', ...). The latter property makes the format
self-describing and aids the metric collector in choosing a
representation for storing the metric data.

    [
        ...
        {
            "metric": "cpu_avg1",
            "value": 0.12,
            "timestamp": 170053205,
            "id": "node/foo",
            "type": "gauge"
        },
        ...
    ]

Some experiments were made in regards to making the format
more 'efficient', e.g. by grouping based on timestamps/ids, resulting
in a much more nested/complicated data format. While that
certainly reduces the size of the raw JSON response by quite a bit,
after GZIP compression the differences are negligible (the
simple, flat data format as described above compresses by a factor
of 25 for large clusters!). Also, the slightly increased CPU load
of compressing the larger amount of data when e.g. polling once a
minute is so small that it's indistinguishable from noise in relation
to a usual hypervisor workload. Thus the simpler, format was
chosen. One benefit of this format is that it is more or less already
the exact same format as the one Prometheus uses, but in JSON format -
so adding a Prometheus metric scraping endpoint should not be much
work at all.

The API endpoint collects metrics for the whole cluster by calling
the same endpoint for all cluster nodes. To avoid endless request
recursion, the 'local-only' request parameter is provided. If this
parameter is set, the endpoint implementation will only return metrics
for the local node, avoiding a loop.

Signed-off-by: Lukas Wagner <l.wagner@proxmox.com>
[WB: remove unused $start_time leftover from benchmarks]
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
This commit is contained in:
Lukas Wagner 2024-08-12 10:36:06 +02:00 committed by Wolfgang Bumiller
parent 5732ad6584
commit 073b53ae71
2 changed files with 354 additions and 0 deletions

View File

@ -6,8 +6,11 @@ use strict;
use PVE::Tools qw(extract_param extract_sensitive_params);
use PVE::Exception qw(raise_perm_exc raise_param_exc);
use PVE::JSONSchema qw(get_standard_option);
use PVE::INotify;
use PVE::RPCEnvironment;
use PVE::ExtMetric;
use PVE::PullMetric;
use PVE::SafeSyslog;
use PVE::RESTHandler;
@ -288,4 +291,184 @@ __PACKAGE__->register_method ({
return;
}});
__PACKAGE__->register_method ({
name => 'export',
path => 'export',
method => 'GET',
protected => 1,
description => "Retrieve metrics of the cluster.",
permissions => {
check => ['perm', '/', ['Sys.Audit']],
},
parameters => {
additionalProperties => 0,
properties => {
'local-only' => {
type => 'boolean',
description =>
'Only return metrics for the current node instead of the whole cluster',
optional => 1,
default => 0,
},
'start-time' => {
type => 'integer',
description => 'Only include metrics with a timestamp > start-time.',
optional => 1,
default => 0,
},
'history' => {
type => 'boolean',
description => 'Also return historic values.'
. ' Returns full available metric history unless `start-time` is also set',
optional => 1,
default => 0,
},
},
},
returns => {
type => 'object',
additionalProperties => 0,
properties => {
data => {
type => 'array',
description => 'Array of system metrics. Metrics are sorted by their timestamp.',
items => {
type => 'object',
additionalProperties => 0,
properties => {
timestamp => {
type => 'integer',
description => 'Time at which this metric was observed',
},
id => {
type => 'string',
description => "Unique identifier for this metric object,"
. " for instance 'node/<nodename>' or"
. " 'qemu/<vmid>'."
},
metric => {
type => 'string',
description => "Name of the metric.",
},
value => {
type => 'number',
description => 'Metric value.',
},
type => {
type => 'string',
description => 'Type of the metric.',
enum => [qw(gauge counter derive)],
}
}
},
},
}
},
code => sub {
my ($param) = @_;
my $local_only = $param->{'local-only'} // 0;
my $start = $param->{'start-time'};
my $history = $param->{'history'} // 0;
my $now = time();
my $generations;
if ($history) {
# Assuming update loop time of pvestatd of 10 seconds.
if (defined($start)) {
my $delta = $now - $start;
$generations = int($delta / 10);
} else {
$generations = PVE::PullMetric::max_generations();
}
} else {
$generations = 0;
};
my @metrics = @{PVE::PullMetric::get_local_metrics($generations)};
if (defined($start)) {
@metrics = grep {
$_->{timestamp} > ($start)
} @metrics;
}
my $nodename = PVE::INotify::nodename();
# Fan out to cluster members
# Do NOT remove this check
if (!$local_only) {
my $members = PVE::Cluster::get_members();
my $rpcenv = PVE::RPCEnvironment::get();
my $authuser = $rpcenv->get_user();
my ($user, undef) = PVE::AccessControl::split_tokenid($authuser, 1);
my $ticket;
if ($user) {
# Theoretically, we might now bypass token privilege separation, since
# we use the regular user instead of the token, but
# since we already passed the permission check for this handler,
# this should be fine.
$ticket = PVE::AccessControl::assemble_ticket($user);
} else {
$ticket = PVE::AccessControl::assemble_ticket($authuser);
}
for my $name (keys %$members) {
if ($name eq $nodename) {
# Skip own node, for that one we already have the metrics
next;
}
if (!$members->{$name}->{online}) {
next;
}
my $status = eval {
my $fingerprint = PVE::Cluster::get_node_fingerprint($name);
my $ip = scalar(PVE::Cluster::remote_node_ip($name));
my $conn_args = {
protocol => 'https',
host => $ip,
port => 8006,
ticket => $ticket,
timeout => 5,
};
$conn_args->{cached_fingerprints} = { $fingerprint => 1 };
my $api_client = PVE::APIClient::LWP->new(%$conn_args);
my $params = {
# Do NOT remove 'local-only' - potential for request recursion!
'local-only' => 1,
history => $history,
};
$params->{'start-time'} = $start if defined($start);
$api_client->get('/cluster/metrics/export', $params);
};
if ($@) {
syslog('warning', "could not fetch metrics from $name: $@");
} else {
push @metrics, $status->{data}->@*;
}
}
}
my @sorted = sort {$a->{timestamp} <=> $b->{timestamp}} @metrics;
return {
data => \@sorted,
};
},
});
1;

View File

@ -51,4 +51,175 @@ sub update {
$txn->{$subsystem}->{timestamp} = $timestamp;
}
my sub gauge {
my ($id, $timestamp, $metric, $value) = @_;
return {
metric => $metric,
id => $id,
value => $value + 0,
timestamp => $timestamp + 0,
type => 'gauge',
}
}
my sub derive {
my ($id, $timestamp, $metric, $value) = @_;
return {
metric => $metric,
id => $id,
value => $value + 0,
timestamp => $timestamp + 0,
type => 'derive',
}
}
my $nodename = PVE::INotify::nodename();
my sub get_node_metrics {
my ($stats) = @_;
my $metrics = [];
my $data = $stats->{data};
my $timestamp = $stats->{timestamp};
my $id = "node/$nodename";
push @$metrics, gauge($id, $timestamp, "uptime", $data->{uptime});
my ($netin, $netout) = (0, 0);
for my $dev (grep { /^$PVE::Network::PHYSICAL_NIC_RE$/ } keys $data->{nics}->%*) {
$netin += $data->{nics}->{$dev}->{receive};
$netout += $data->{nics}->{$dev}->{transmit};
}
push @$metrics, derive($id, $timestamp, "net_in", $netin);
push @$metrics, derive($id, $timestamp, "net_out", $netout);
my $cpustat = $data->{cpustat};
push @$metrics, gauge($id, $timestamp, "cpu_avg1", $cpustat->{avg1});
push @$metrics, gauge($id, $timestamp, "cpu_avg5", $cpustat->{avg5});
push @$metrics, gauge($id, $timestamp, "cpu_avg15", $cpustat->{avg15});
push @$metrics, gauge($id, $timestamp, "cpu_max", $cpustat->{cpus});
push @$metrics, gauge($id, $timestamp, "cpu_current", $cpustat->{cpu});
push @$metrics, gauge($id, $timestamp, "cpu_iowait", $cpustat->{iowait});
my $memory = $data->{memory};
push @$metrics, gauge($id, $timestamp, "mem_total", $memory->{memtotal});
push @$metrics, gauge($id, $timestamp, "mem_used", $memory->{memused});
push @$metrics, gauge($id, $timestamp, "swap_total", $memory->{swaptotal});
push @$metrics, gauge($id, $timestamp, "swap_used", $memory->{swapused});
my $blockstat = $data->{blockstat};
my $dused = $blockstat->{blocks} - $blockstat->{bfree};
push @$metrics, gauge($id, $timestamp, "disk_total", $blockstat->{blocks});
push @$metrics, gauge($id, $timestamp, "disk_used", $dused);
return $metrics;
}
my sub get_qemu_metrics {
my ($stats) = @_;
my $metrics = [];
my $timestamp = $stats->{timestamp};
for my $vmid (keys $stats->{data}->%*) {
my $id = "qemu/$vmid";
my $guest_data = $stats->{data}->{$vmid};
if ($guest_data->{status} eq 'running') {
push @$metrics, gauge($id, $timestamp, "cpu_current", $guest_data->{cpu});
push @$metrics, gauge($id, $timestamp, "mem_used", $guest_data->{mem});
push @$metrics, derive($id, $timestamp, "disk_read", $guest_data->{diskread});
push @$metrics, derive($id, $timestamp, "disk_write", $guest_data->{diskwrite});
push @$metrics, derive($id, $timestamp, "net_in", $guest_data->{netin});
push @$metrics, derive($id, $timestamp, "net_out", $guest_data->{netout});
}
push @$metrics, gauge($id, $timestamp, "uptime", $guest_data->{uptime});
push @$metrics, gauge($id, $timestamp, "cpu_max", $guest_data->{cpus});
push @$metrics, gauge($id, $timestamp, "mem_total", $guest_data->{maxmem});
push @$metrics, gauge($id, $timestamp, "disk_total", $guest_data->{maxdisk});
# TODO: This one always seems to be 0?
# push @$metrics, num_metric("disk_used", $id, $guest_data->{disk}, $timestamp);
}
return $metrics;
}
my sub get_lxc_metrics {
my ($stats) = @_;
my $metrics = [];
my $timestamp = $stats->{timestamp};
for my $vmid (keys $stats->{data}->%*) {
my $id = "lxc/$vmid";
my $guest_data = $stats->{data}->{$vmid};
if ($guest_data->{status} eq 'running') {
push @$metrics, gauge($id, $timestamp, "cpu_current", $guest_data->{cpu});
push @$metrics, gauge($id, $timestamp, "mem_used", $guest_data->{mem});
push @$metrics, derive($id, $timestamp, "disk_read", $guest_data->{diskread});
push @$metrics, derive($id, $timestamp, "disk_write", $guest_data->{diskwrite});
push @$metrics, derive($id, $timestamp, "net_in", $guest_data->{netin});
push @$metrics, derive($id, $timestamp, "net_out", $guest_data->{netout});
push @$metrics, gauge($id, $timestamp, "disk_used", $guest_data->{disk});
}
push @$metrics, gauge($id, $timestamp, "uptime", $guest_data->{uptime});
push @$metrics, gauge($id, $timestamp, "cpu_max", $guest_data->{cpus});
push @$metrics, gauge($id, $timestamp, "mem_total", $guest_data->{maxmem});
push @$metrics, gauge($id, $timestamp, "disk_total", $guest_data->{maxdisk});
}
return $metrics;
}
my sub get_storage_metrics {
my ($stats) = @_;
my $metrics = [];
my $timestamp = $stats->{timestamp};
for my $sid (keys $stats->{data}->%*) {
my $id = "storage/$nodename/$sid";
my $data = $stats->{data}->{$sid};
push @$metrics, gauge($id, $timestamp, "disk_total", $data->{total});
push @$metrics, gauge($id, $timestamp, "disk_used", $data->{used});
}
return $metrics;
}
# Return local metrics, including some recent history if needed.
#
sub get_local_metrics {
my ($history) = @_;
# If we do not provide the history parameter, set it to 0 -> only
# query most recent metrics from the cache.
$history = $history // 0;
$history = int($history);
my $metrics = [];
my $data = $get_cache->()->get_last($history);
for my $stat_gen ($data->@*) {
push @$metrics, get_node_metrics($stat_gen->{node})->@*;
push @$metrics, get_qemu_metrics($stat_gen->{qemu})->@*;
push @$metrics, get_lxc_metrics($stat_gen->{lxc})->@*;
push @$metrics, get_storage_metrics($stat_gen->{storage})->@*;
}
return $metrics;
}
1;