metrics: add /cluster/metrics/export endpoint

This new endpoint returns node, storage and guest metrics in JSON format. The endpoint supports history/max-age parameters, allowing the caller to query the recent metric history as recorded by the PVE::PullMetric module. The returned data format is quite simple, being an array of metric records, including a value, a metric name, an id to identify the object (e.g. qemu/100, node/foo), a timestamp and a type ('gauge', 'derive', ...). The latter property makes the format self-describing and aids the metric collector in choosing a representation for storing the metric data. [ ... { "metric": "cpu_avg1", "value": 0.12, "timestamp": 170053205, "id": "node/foo", "type": "gauge" }, ... ] Some experiments were made in regards to making the format more 'efficient', e.g. by grouping based on timestamps/ids, resulting in a much more nested/complicated data format. While that certainly reduces the size of the raw JSON response by quite a bit, after GZIP compression the differences are negligible (the simple, flat data format as described above compresses by a factor of 25 for large clusters!). Also, the slightly increased CPU load of compressing the larger amount of data when e.g. polling once a minute is so small that it's indistinguishable from noise in relation to a usual hypervisor workload. Thus the simpler, format was chosen. One benefit of this format is that it is more or less already the exact same format as the one Prometheus uses, but in JSON format - so adding a Prometheus metric scraping endpoint should not be much work at all. The API endpoint collects metrics for the whole cluster by calling the same endpoint for all cluster nodes. To avoid endless request recursion, the 'local-only' request parameter is provided. If this parameter is set, the endpoint implementation will only return metrics for the local node, avoiding a loop. Signed-off-by: Lukas Wagner <l.wagner@proxmox.com> [WB: remove unused $start_time leftover from benchmarks] Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
2024-08-12 10:36:06 +02:00 · 2024-08-12 10:36:06 +02:00 · 073b53ae71
commit 073b53ae71
parent 5732ad6584
2 changed files with 354 additions and 0 deletions
--- a/PVE/API2/Cluster/MetricServer.pm
+++ b/PVE/API2/Cluster/MetricServer.pm
@ -6,8 +6,11 @@ use strict;
 use PVE::Tools qw(extract_param extract_sensitive_params);
 use PVE::Exception qw(raise_perm_exc raise_param_exc);
 use PVE::JSONSchema qw(get_standard_option);
+use PVE::INotify;
 use PVE::RPCEnvironment;
 use PVE::ExtMetric;
+use PVE::PullMetric;
+use PVE::SafeSyslog;

 use PVE::RESTHandler;

@ -288,4 +291,184 @@ __PACKAGE__->register_method ({
 	return;
    }});

+__PACKAGE__->register_method ({
+    name => 'export',
+    path => 'export',
+    method => 'GET',
+    protected => 1,
+    description => "Retrieve metrics of the cluster.",
+    permissions => {
+	check => ['perm', '/', ['Sys.Audit']],
+    },
+    parameters => {
+	additionalProperties => 0,
+	properties => {
+	    'local-only' => {
+		type => 'boolean',
+		description =>
+		    'Only return metrics for the current node instead of the whole cluster',
+		optional => 1,
+		default => 0,
+	    },
+	    'start-time' => {
+		type => 'integer',
+		description => 'Only include metrics with a timestamp > start-time.',
+		optional => 1,
+		default => 0,
+	    },
+	    'history' => {
+		type => 'boolean',
+		description => 'Also return historic values.'
+		  . ' Returns full available metric history unless `start-time` is also set',
+		optional => 1,
+		default => 0,
+	    },
+	},
+    },
+    returns => {
+	type => 'object',
+	additionalProperties => 0,
+	properties => {
+	    data => {
+		type => 'array',
+		description => 'Array of system metrics. Metrics are sorted by their timestamp.',
+		items => {
+		    type => 'object',
+		    additionalProperties => 0,
+		    properties => {
+			timestamp => {
+			    type => 'integer',
+			    description => 'Time at which this metric was observed',
+			},
+			id => {
+			    type => 'string',
+			    description => "Unique identifier for this metric object,"
+				. " for instance 'node/<nodename>' or"
+				. " 'qemu/<vmid>'."
+			},
+			metric => {
+			    type => 'string',
+			    description => "Name of the metric.",
+			},
+			value => {
+			    type => 'number',
+			    description => 'Metric value.',
+			},
+			type => {
+			    type => 'string',
+			    description => 'Type of the metric.',
+			    enum => [qw(gauge counter derive)],
+			}
+		    }
+		},
+
+	    },
+
+	}
+    },
+    code => sub {
+	my ($param) = @_;
+	my $local_only = $param->{'local-only'} // 0;
+	my $start = $param->{'start-time'};
+	my $history = $param->{'history'} // 0;
+
+	my $now = time();
+
+	my $generations;
+	if ($history) {
+	    # Assuming update loop time of pvestatd of 10 seconds.
+	    if (defined($start)) {
+		my $delta = $now - $start;
+		$generations = int($delta / 10);
+	    } else {
+		$generations = PVE::PullMetric::max_generations();
+	    }
+
+	} else {
+	    $generations = 0;
+	};
+
+	my @metrics = @{PVE::PullMetric::get_local_metrics($generations)};
+
+	if (defined($start)) {
+	    @metrics = grep {
+		$_->{timestamp} > ($start)
+	    } @metrics;
+	}
+
+	my $nodename = PVE::INotify::nodename();
+
+	# Fan out to cluster members
+	# Do NOT remove this check
+	if (!$local_only) {
+	    my $members = PVE::Cluster::get_members();
+
+	    my $rpcenv = PVE::RPCEnvironment::get();
+	    my $authuser = $rpcenv->get_user();
+
+	    my ($user, undef) = PVE::AccessControl::split_tokenid($authuser, 1);
+
+	    my $ticket;
+	    if ($user) {
+		# Theoretically, we might now bypass token privilege separation, since
+		# we use the regular user instead of the token, but
+		# since we already passed the permission check for this handler,
+		# this should be fine.
+		$ticket = PVE::AccessControl::assemble_ticket($user);
+	    } else {
+		$ticket = PVE::AccessControl::assemble_ticket($authuser);
+	    }
+
+	    for my $name (keys %$members) {
+		if ($name eq $nodename) {
+		    # Skip own node, for that one we already have the metrics
+		    next;
+		}
+
+		if (!$members->{$name}->{online}) {
+		    next;
+		}
+
+		my $status = eval {
+		    my $fingerprint = PVE::Cluster::get_node_fingerprint($name);
+		    my $ip = scalar(PVE::Cluster::remote_node_ip($name));
+
+		    my $conn_args = {
+			protocol => 'https',
+			host => $ip,
+			port => 8006,
+			ticket => $ticket,
+			timeout => 5,
+		    };
+
+		    $conn_args->{cached_fingerprints} = { $fingerprint => 1 };
+
+		    my $api_client = PVE::APIClient::LWP->new(%$conn_args);
+
+		    my $params = {
+			# Do NOT remove 'local-only' - potential for request recursion!
+			'local-only' => 1,
+			history => $history,
+		    };
+		    $params->{'start-time'} = $start if defined($start);
+
+		    $api_client->get('/cluster/metrics/export', $params);
+		};
+
+		if ($@) {
+		    syslog('warning', "could not fetch metrics from $name: $@");
+		} else {
+		    push @metrics, $status->{data}->@*;
+		}
+	    }
+	}
+
+	my @sorted = sort {$a->{timestamp} <=> $b->{timestamp}} @metrics;
+
+	return {
+	    data => \@sorted,
+	};
+    },
+});
+
 1;
--- a/PVE/PullMetric.pm
+++ b/PVE/PullMetric.pm
@ -51,4 +51,175 @@ sub update {
    $txn->{$subsystem}->{timestamp} = $timestamp;
 }

+my sub gauge {
+    my ($id, $timestamp, $metric, $value) = @_;
+
+    return {
+	metric => $metric,
+	id => $id,
+	value => $value + 0,
+	timestamp => $timestamp + 0,
+	type => 'gauge',
+    }
+}
+
+my sub derive {
+    my ($id, $timestamp, $metric, $value) = @_;
+
+    return {
+	metric => $metric,
+	id => $id,
+	value => $value + 0,
+	timestamp => $timestamp + 0,
+	type => 'derive',
+    }
+}
+
+my $nodename = PVE::INotify::nodename();
+
+my sub get_node_metrics {
+    my ($stats) = @_;
+
+    my $metrics = [];
+
+    my $data = $stats->{data};
+    my $timestamp = $stats->{timestamp};
+
+    my $id = "node/$nodename";
+
+    push @$metrics, gauge($id, $timestamp, "uptime", $data->{uptime});
+
+    my ($netin, $netout) = (0, 0);
+    for my $dev (grep { /^$PVE::Network::PHYSICAL_NIC_RE$/ } keys $data->{nics}->%*) {
+	$netin += $data->{nics}->{$dev}->{receive};
+	$netout += $data->{nics}->{$dev}->{transmit};
+    }
+    push @$metrics, derive($id, $timestamp, "net_in", $netin);
+    push @$metrics, derive($id, $timestamp, "net_out", $netout);
+
+    my $cpustat = $data->{cpustat};
+    push @$metrics, gauge($id, $timestamp, "cpu_avg1", $cpustat->{avg1});
+    push @$metrics, gauge($id, $timestamp, "cpu_avg5", $cpustat->{avg5});
+    push @$metrics, gauge($id, $timestamp, "cpu_avg15", $cpustat->{avg15});
+    push @$metrics, gauge($id, $timestamp, "cpu_max", $cpustat->{cpus});
+    push @$metrics, gauge($id, $timestamp, "cpu_current", $cpustat->{cpu});
+    push @$metrics, gauge($id, $timestamp, "cpu_iowait", $cpustat->{iowait});
+
+    my $memory = $data->{memory};
+    push @$metrics, gauge($id, $timestamp, "mem_total", $memory->{memtotal});
+    push @$metrics, gauge($id, $timestamp, "mem_used", $memory->{memused});
+    push @$metrics, gauge($id, $timestamp, "swap_total", $memory->{swaptotal});
+    push @$metrics, gauge($id, $timestamp, "swap_used", $memory->{swapused});
+
+    my $blockstat = $data->{blockstat};
+    my $dused = $blockstat->{blocks} - $blockstat->{bfree};
+    push @$metrics, gauge($id, $timestamp, "disk_total", $blockstat->{blocks});
+    push @$metrics, gauge($id, $timestamp, "disk_used", $dused);
+
+    return $metrics;
+}
+
+my sub get_qemu_metrics {
+    my ($stats) = @_;
+
+    my $metrics = [];
+
+    my $timestamp = $stats->{timestamp};
+
+    for my $vmid (keys $stats->{data}->%*) {
+	my $id = "qemu/$vmid";
+	my $guest_data = $stats->{data}->{$vmid};
+
+	if ($guest_data->{status} eq 'running') {
+	    push @$metrics, gauge($id, $timestamp, "cpu_current", $guest_data->{cpu});
+	    push @$metrics, gauge($id, $timestamp, "mem_used", $guest_data->{mem});
+	    push @$metrics, derive($id, $timestamp, "disk_read", $guest_data->{diskread});
+	    push @$metrics, derive($id, $timestamp, "disk_write", $guest_data->{diskwrite});
+	    push @$metrics, derive($id, $timestamp, "net_in", $guest_data->{netin});
+	    push @$metrics, derive($id, $timestamp, "net_out", $guest_data->{netout});
+	}
+
+	push @$metrics, gauge($id, $timestamp, "uptime", $guest_data->{uptime});
+	push @$metrics, gauge($id, $timestamp, "cpu_max", $guest_data->{cpus});
+	push @$metrics, gauge($id, $timestamp, "mem_total", $guest_data->{maxmem});
+	push @$metrics, gauge($id, $timestamp, "disk_total", $guest_data->{maxdisk});
+	# TODO: This one always seems to be 0?
+	# push @$metrics, num_metric("disk_used", $id, $guest_data->{disk}, $timestamp);
+    }
+
+    return $metrics;
+}
+
+my sub get_lxc_metrics {
+    my ($stats) = @_;
+
+    my $metrics = [];
+
+    my $timestamp = $stats->{timestamp};
+
+    for my $vmid (keys $stats->{data}->%*) {
+	my $id = "lxc/$vmid";
+	my $guest_data = $stats->{data}->{$vmid};
+
+	if ($guest_data->{status} eq 'running') {
+	    push @$metrics, gauge($id, $timestamp, "cpu_current", $guest_data->{cpu});
+	    push @$metrics, gauge($id, $timestamp, "mem_used", $guest_data->{mem});
+	    push @$metrics, derive($id, $timestamp, "disk_read", $guest_data->{diskread});
+	    push @$metrics, derive($id, $timestamp, "disk_write", $guest_data->{diskwrite});
+	    push @$metrics, derive($id, $timestamp, "net_in", $guest_data->{netin});
+	    push @$metrics, derive($id, $timestamp, "net_out", $guest_data->{netout});
+	    push @$metrics, gauge($id, $timestamp, "disk_used", $guest_data->{disk});
+	}
+
+	push @$metrics, gauge($id, $timestamp, "uptime", $guest_data->{uptime});
+	push @$metrics, gauge($id, $timestamp, "cpu_max", $guest_data->{cpus});
+	push @$metrics, gauge($id, $timestamp, "mem_total", $guest_data->{maxmem});
+	push @$metrics, gauge($id, $timestamp, "disk_total", $guest_data->{maxdisk});
+    }
+
+    return $metrics;
+}
+
+my sub get_storage_metrics {
+    my ($stats) = @_;
+
+    my $metrics = [];
+
+    my $timestamp = $stats->{timestamp};
+
+    for my $sid (keys $stats->{data}->%*) {
+	my $id = "storage/$nodename/$sid";
+	my $data = $stats->{data}->{$sid};
+
+	push @$metrics, gauge($id, $timestamp, "disk_total", $data->{total});
+	push @$metrics, gauge($id, $timestamp, "disk_used", $data->{used});
+    }
+
+    return $metrics;
+}
+
+# Return local metrics, including some recent history if needed.
+#
+sub get_local_metrics {
+    my ($history) = @_;
+
+    # If we do not provide the history parameter, set it to 0 -> only
+    # query most recent metrics from the cache.
+    $history = $history // 0;
+    $history = int($history);
+
+    my $metrics = [];
+
+    my $data = $get_cache->()->get_last($history);
+
+    for my $stat_gen ($data->@*) {
+	push @$metrics, get_node_metrics($stat_gen->{node})->@*;
+	push @$metrics, get_qemu_metrics($stat_gen->{qemu})->@*;
+	push @$metrics, get_lxc_metrics($stat_gen->{lxc})->@*;
+	push @$metrics, get_storage_metrics($stat_gen->{storage})->@*;
+    }
+
+    return $metrics;
+}
+
 1;