mirror of
git://git.proxmox.com/git/pve-storage.git
synced 2025-01-11 05:18:01 +03:00
4be012a4cd
This essentially reverts commitc9bd3d2
("fix #1123: modify NVME device path for SMART support"). The man page for smartctl states > Use the forms "/dev/nvme[0-9]" (broadcast namespace) or > "/dev/nvme[0-9]n[1-9]" (specific namespace 1-9) for NVMe devices. so it should be fine to pass the path with the specific namespace to smartctl. But that text was already present in the man page of version 6.5, which is the version the commitc9bd3d2
talks about. It might be that it was necessary to drop the specific namespace for the version backported from Stretch to Jessie (the bug report mentions that that version was used[0]), but it's not quite clear. With current versions, passing in the path with the specific namespace did work as expected[1], even on a device with multiple namespaces set up tested locally. In PBS, the path queried via udev::Device::from_syspath("/sys/block/{name}") is passed to smartctl and that also included the specific namespace on the systems I tested with a short script. So pass the full path to make things a little bit simpler and to avoid potential future issues like bug #2020[2]. [0]: https://bugzilla.proxmox.com/show_bug.cgi?id=1123#c3 [1]: https://forum.proxmox.com/threads/113962/post-493185 [2]: https://bugzilla.proxmox.com/show_bug.cgi?id=2020 Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
914 lines
24 KiB
Perl
914 lines
24 KiB
Perl
package PVE::Diskmanage;
|
|
|
|
use strict;
|
|
use warnings;
|
|
|
|
use PVE::ProcFSTools;
|
|
use Data::Dumper;
|
|
use Cwd qw(abs_path);
|
|
use Fcntl ':mode';
|
|
use File::Basename;
|
|
use File::stat;
|
|
use JSON;
|
|
|
|
use PVE::Tools qw(extract_param run_command file_get_contents file_read_firstline dir_glob_regex dir_glob_foreach trim);
|
|
|
|
my $SMARTCTL = "/usr/sbin/smartctl";
|
|
my $ZPOOL = "/sbin/zpool";
|
|
my $SGDISK = "/sbin/sgdisk";
|
|
my $PVS = "/sbin/pvs";
|
|
my $LVS = "/sbin/lvs";
|
|
my $LSBLK = "/bin/lsblk";
|
|
|
|
my sub strip_dev :prototype($) {
|
|
my ($devpath) = @_;
|
|
$devpath =~ s|^/dev/||;
|
|
return $devpath;
|
|
}
|
|
|
|
sub check_bin {
|
|
my ($path) = @_;
|
|
return -x $path;
|
|
}
|
|
|
|
sub verify_blockdev_path {
|
|
my ($rel_path) = @_;
|
|
|
|
die "missing path" if !$rel_path;
|
|
my $path = abs_path($rel_path);
|
|
die "failed to get absolute path to $rel_path\n" if !$path;
|
|
|
|
die "got unusual device path '$path'\n" if $path !~ m|^/dev/(.*)$|;
|
|
|
|
$path = "/dev/$1"; # untaint
|
|
|
|
assert_blockdev($path);
|
|
|
|
return $path;
|
|
}
|
|
|
|
sub assert_blockdev {
|
|
my ($dev, $noerr) = @_;
|
|
|
|
if ($dev !~ m|^/dev/| || !(-b $dev)) {
|
|
return if $noerr;
|
|
die "not a valid block device\n";
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
sub init_disk {
|
|
my ($disk, $uuid) = @_;
|
|
|
|
assert_blockdev($disk);
|
|
|
|
# we should already have checked these in the api call, but we check again for safety
|
|
die "$disk is a partition\n" if is_partition($disk);
|
|
die "disk $disk is already in use\n" if disk_is_used($disk);
|
|
|
|
my $id = $uuid || 'R';
|
|
run_command([$SGDISK, $disk, '-U', $id]);
|
|
return 1;
|
|
}
|
|
|
|
sub disk_is_used {
|
|
my ($disk) = @_;
|
|
|
|
my $dev = $disk;
|
|
$dev =~ s|^/dev/||;
|
|
|
|
my $disklist = get_disks($dev, 1, 1);
|
|
|
|
die "'$disk' is not a valid local disk\n" if !defined($disklist->{$dev});
|
|
return 1 if $disklist->{$dev}->{used};
|
|
|
|
return 0;
|
|
}
|
|
|
|
sub get_smart_data {
|
|
my ($disk, $healthonly) = @_;
|
|
|
|
assert_blockdev($disk);
|
|
my $smartdata = {};
|
|
my $type;
|
|
|
|
my $cmd = [$SMARTCTL, '-H'];
|
|
push @$cmd, '-A', '-f', 'brief' if !$healthonly;
|
|
push @$cmd, $disk;
|
|
|
|
my $returncode = eval {
|
|
run_command($cmd, noerr => 1, outfunc => sub {
|
|
my ($line) = @_;
|
|
|
|
# ATA SMART attributes, e.g.:
|
|
# ID# ATTRIBUTE_NAME FLAGS VALUE WORST THRESH FAIL RAW_VALUE
|
|
# 1 Raw_Read_Error_Rate POSR-K 100 100 000 - 0
|
|
#
|
|
# SAS and NVME disks, e.g.:
|
|
# Data Units Written: 5,584,952 [2.85 TB]
|
|
# Accumulated start-stop cycles: 34
|
|
|
|
if (defined($type) && $type eq 'ata' && $line =~ m/^([ \d]{2}\d)\s+(\S+)\s+(\S{6})\s+(\d+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(.*)$/) {
|
|
my $entry = {};
|
|
|
|
$entry->{name} = $2 if defined $2;
|
|
$entry->{flags} = $3 if defined $3;
|
|
# the +0 makes a number out of the strings
|
|
# FIXME: 'value' is depreacated by 'normalized'; remove with PVE 7.0
|
|
$entry->{value} = $4+0 if defined $4;
|
|
$entry->{normalized} = $4+0 if defined $4;
|
|
$entry->{worst} = $5+0 if defined $5;
|
|
# some disks report the default threshold as --- instead of 000
|
|
if (defined($6) && $6 eq '---') {
|
|
$entry->{threshold} = 0;
|
|
} else {
|
|
$entry->{threshold} = $6+0 if defined $6;
|
|
}
|
|
$entry->{fail} = $7 if defined $7;
|
|
$entry->{raw} = $8 if defined $8;
|
|
$entry->{id} = $1 if defined $1;
|
|
push @{$smartdata->{attributes}}, $entry;
|
|
} elsif ($line =~ m/(?:Health Status|self\-assessment test result): (.*)$/ ) {
|
|
$smartdata->{health} = $1;
|
|
} elsif ($line =~ m/Vendor Specific SMART Attributes with Thresholds:/) {
|
|
$type = 'ata';
|
|
delete $smartdata->{text};
|
|
} elsif ($line =~ m/=== START OF (READ )?SMART DATA SECTION ===/) {
|
|
$type = 'text';
|
|
} elsif (defined($type) && $type eq 'text') {
|
|
$smartdata->{text} = '' if !defined $smartdata->{text};
|
|
$smartdata->{text} .= "$line\n";
|
|
# extract wearout from nvme/sas text, allow for decimal values
|
|
if ($line =~ m/Percentage Used(?: endurance indicator)?:\s*(\d+(?:\.\d+)?)\%/i) {
|
|
$smartdata->{wearout} = 100 - $1;
|
|
}
|
|
} elsif ($line =~ m/SMART Disabled/) {
|
|
$smartdata->{health} = "SMART Disabled";
|
|
}
|
|
})
|
|
};
|
|
my $err = $@;
|
|
|
|
# bit 0 and 1 mark a fatal error, other bits are for disk status -> ignore (see man 8 smartctl)
|
|
if ((defined($returncode) && ($returncode & 0b00000011)) || $err) {
|
|
die "Error getting S.M.A.R.T. data: Exit code: $returncode\n";
|
|
}
|
|
|
|
$smartdata->{type} = $type;
|
|
|
|
return $smartdata;
|
|
}
|
|
|
|
sub get_lsblk_info {
|
|
my $cmd = [$LSBLK, '--json', '-o', 'path,parttype,fstype'];
|
|
my $output = "";
|
|
eval { run_command($cmd, outfunc => sub { $output .= "$_[0]\n"; }) };
|
|
warn "$@\n" if $@;
|
|
return {} if $output eq '';
|
|
|
|
my $parsed = eval { decode_json($output) } // {};
|
|
warn "$@\n" if $@;
|
|
my $list = $parsed->{blockdevices} // [];
|
|
|
|
return {
|
|
map {
|
|
$_->{path} => {
|
|
parttype => $_->{parttype},
|
|
fstype => $_->{fstype}
|
|
}
|
|
} @{$list}
|
|
};
|
|
}
|
|
|
|
my sub get_devices_by_partuuid {
|
|
my ($lsblk_info, $uuids, $res) = @_;
|
|
|
|
$res = {} if !defined($res);
|
|
|
|
foreach my $dev (sort keys %{$lsblk_info}) {
|
|
my $uuid = $lsblk_info->{$dev}->{parttype};
|
|
next if !defined($uuid) || !defined($uuids->{$uuid});
|
|
$res->{$dev} = $uuids->{$uuid};
|
|
}
|
|
|
|
return $res;
|
|
}
|
|
|
|
sub get_zfs_devices {
|
|
my ($lsblk_info) = @_;
|
|
my $res = {};
|
|
|
|
return {} if !check_bin($ZPOOL);
|
|
|
|
# use zpool and parttype uuid, because log and cache do not have zfs type uuid
|
|
eval {
|
|
run_command([$ZPOOL, 'list', '-HPLv'], outfunc => sub {
|
|
my ($line) = @_;
|
|
if ($line =~ m|^\t([^\t]+)\t|) {
|
|
$res->{$1} = 1;
|
|
}
|
|
});
|
|
};
|
|
|
|
# only warn here, because maybe zfs tools are not installed
|
|
warn "$@\n" if $@;
|
|
|
|
my $uuids = {
|
|
"6a898cc3-1dd2-11b2-99a6-080020736631" => 1, # apple
|
|
"516e7cba-6ecf-11d6-8ff8-00022d09712b" => 1, # bsd
|
|
};
|
|
|
|
|
|
$res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
|
|
|
|
return $res;
|
|
}
|
|
|
|
sub get_lvm_devices {
|
|
my ($lsblk_info) = @_;
|
|
my $res = {};
|
|
eval {
|
|
run_command([$PVS, '--noheadings', '--readonly', '-o', 'pv_name'], outfunc => sub{
|
|
my ($line) = @_;
|
|
$line = trim($line);
|
|
if ($line =~ m|^/dev/|) {
|
|
$res->{$line} = 1;
|
|
}
|
|
});
|
|
};
|
|
|
|
# if something goes wrong, we do not want to give up, but indicate an error has occurred
|
|
warn "$@\n" if $@;
|
|
|
|
my $uuids = {
|
|
"e6d6d379-f507-44c2-a23c-238f2a3df928" => 1,
|
|
};
|
|
|
|
$res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
|
|
|
|
return $res;
|
|
}
|
|
|
|
sub get_ceph_journals {
|
|
my ($lsblk_info) = @_;
|
|
my $res = {};
|
|
|
|
my $uuids = {
|
|
'45b0969e-9b03-4f30-b4c6-b4b80ceff106' => 1, # journal
|
|
'30cd0809-c2b2-499c-8879-2d6b78529876' => 2, # db
|
|
'5ce17fce-4087-4169-b7ff-056cc58473f9' => 3, # wal
|
|
'cafecafe-9b03-4f30-b4c6-b4b80ceff106' => 4, # block
|
|
};
|
|
|
|
$res = get_devices_by_partuuid($lsblk_info, $uuids, $res);
|
|
|
|
return $res;
|
|
}
|
|
|
|
# reads the lv_tags and matches them with the devices
|
|
sub get_ceph_volume_infos {
|
|
my $result = {};
|
|
|
|
my $cmd = [ $LVS, '-S', 'lv_name=~^osd-', '-o', 'devices,lv_name,lv_tags',
|
|
'--noheadings', '--readonly', '--separator', ';' ];
|
|
|
|
run_command($cmd, outfunc => sub {
|
|
my $line = shift;
|
|
$line =~ s/(?:^\s+)|(?:\s+$)//g; # trim whitespaces
|
|
|
|
my $fields = [ split(';', $line) ];
|
|
|
|
# lvs syntax is /dev/sdX(Y) where Y is the start (which we do not need)
|
|
my ($dev) = $fields->[0] =~ m|^(/dev/[a-z]+[^(]*)|;
|
|
if ($fields->[1] =~ m|^osd-([^-]+)-|) {
|
|
my $type = $1;
|
|
# $result autovivification is wanted, to not creating empty hashes
|
|
if (($type eq 'block' || $type eq 'data') && $fields->[2] =~ m/ceph.osd_id=([^,]+)/) {
|
|
$result->{$dev}->{osdid} = $1;
|
|
$result->{$dev}->{bluestore} = ($type eq 'block');
|
|
if ($fields->[2] =~ m/ceph\.encrypted=1/) {
|
|
$result->{$dev}->{encrypted} = 1;
|
|
}
|
|
} else {
|
|
# undef++ becomes '1' (see `perldoc perlop`: Auto-increment)
|
|
$result->{$dev}->{$type}++;
|
|
}
|
|
}
|
|
});
|
|
|
|
return $result;
|
|
}
|
|
|
|
sub get_udev_info {
|
|
my ($dev) = @_;
|
|
|
|
my $info = "";
|
|
my $data = {};
|
|
eval {
|
|
run_command(['udevadm', 'info', '-p', $dev, '--query', 'all'], outfunc => sub {
|
|
my ($line) = @_;
|
|
$info .= "$line\n";
|
|
});
|
|
};
|
|
warn $@ if $@;
|
|
return if !$info;
|
|
|
|
return if $info !~ m/^E: DEVTYPE=(disk|partition)$/m;
|
|
return if $info =~ m/^E: ID_CDROM/m;
|
|
|
|
# we use this, because some disks are not simply in /dev e.g. /dev/cciss/c0d0
|
|
if ($info =~ m/^E: DEVNAME=(\S+)$/m) {
|
|
$data->{devpath} = $1;
|
|
}
|
|
return if !defined($data->{devpath});
|
|
|
|
$data->{serial} = 'unknown';
|
|
$data->{serial} = $1 if $info =~ m/^E: ID_SERIAL_SHORT=(\S+)$/m;
|
|
|
|
$data->{gpt} = $info =~ m/^E: ID_PART_TABLE_TYPE=gpt$/m ? 1 : 0;
|
|
|
|
$data->{rpm} = -1;
|
|
$data->{rpm} = $1 if $info =~ m/^E: ID_ATA_ROTATION_RATE_RPM=(\d+)$/m; # detects SSD implicit
|
|
|
|
$data->{usb} = 1 if $info =~ m/^E: ID_BUS=usb$/m;
|
|
|
|
$data->{model} = $1 if $info =~ m/^E: ID_MODEL=(.+)$/m;
|
|
|
|
$data->{wwn} = 'unknown';
|
|
$data->{wwn} = $1 if $info =~ m/^E: ID_WWN=(.*)$/m;
|
|
|
|
if ($info =~ m/^E: DEVLINKS=(.+)$/m) {
|
|
my @devlinks = grep(m#^/dev/disk/by-id/(ata|scsi|nvme(?!-eui))#, split (/ /, $1));
|
|
$data->{by_id_link} = $devlinks[0] if defined($devlinks[0]);
|
|
}
|
|
|
|
return $data;
|
|
}
|
|
|
|
sub get_sysdir_size {
|
|
my ($sysdir) = @_;
|
|
|
|
my $size = file_read_firstline("$sysdir/size");
|
|
return if !$size;
|
|
|
|
# linux always considers sectors to be 512 bytes, independently of real block size
|
|
return $size * 512;
|
|
}
|
|
|
|
sub get_sysdir_info {
|
|
my ($sysdir) = @_;
|
|
|
|
return if ! -d "$sysdir/device";
|
|
|
|
my $data = {};
|
|
|
|
$data->{size} = get_sysdir_size($sysdir) or return;
|
|
|
|
# dir/queue/rotational should be 1 for hdd, 0 for ssd
|
|
$data->{rotational} = file_read_firstline("$sysdir/queue/rotational") // -1;
|
|
|
|
$data->{vendor} = file_read_firstline("$sysdir/device/vendor") || 'unknown';
|
|
$data->{model} = file_read_firstline("$sysdir/device/model") || 'unknown';
|
|
|
|
return $data;
|
|
}
|
|
|
|
sub get_wear_leveling_info {
|
|
my ($smartdata) = @_;
|
|
my $attributes = $smartdata->{attributes};
|
|
|
|
if (defined($smartdata->{wearout})) {
|
|
return $smartdata->{wearout};
|
|
}
|
|
|
|
my $wearout;
|
|
|
|
# Common register names that represent percentage values of potential failure indicators used
|
|
# in drivedb.h of smartmontool's. Order matters, as some drives may have multiple definitions
|
|
my @wearoutregisters = (
|
|
"Media_Wearout_Indicator",
|
|
"SSD_Life_Left",
|
|
"Wear_Leveling_Count",
|
|
"Perc_Write\/Erase_Ct_BC",
|
|
"Perc_Rated_Life_Remain",
|
|
"Remaining_Lifetime_Perc",
|
|
"Percent_Lifetime_Remain",
|
|
"Lifetime_Left",
|
|
"PCT_Life_Remaining",
|
|
"Lifetime_Remaining",
|
|
"Percent_Life_Remaining",
|
|
"Percent_Lifetime_Used",
|
|
"Perc_Rated_Life_Used"
|
|
);
|
|
|
|
# Search for S.M.A.R.T. attributes for known register
|
|
foreach my $register (@wearoutregisters) {
|
|
last if defined $wearout;
|
|
foreach my $attr (@$attributes) {
|
|
next if $attr->{name} !~ m/$register/;
|
|
$wearout = $attr->{value};
|
|
last;
|
|
}
|
|
}
|
|
|
|
return $wearout;
|
|
}
|
|
|
|
sub dir_is_empty {
|
|
my ($dir) = @_;
|
|
|
|
my $dh = IO::Dir->new ($dir);
|
|
return 1 if !$dh;
|
|
|
|
while (defined(my $tmp = $dh->read)) {
|
|
next if $tmp eq '.' || $tmp eq '..';
|
|
$dh->close;
|
|
return 0;
|
|
}
|
|
$dh->close;
|
|
return 1;
|
|
}
|
|
|
|
sub is_iscsi {
|
|
my ($sysdir) = @_;
|
|
|
|
if (-l $sysdir && readlink($sysdir) =~ m|host[^/]*/session[^/]*|) {
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
my sub is_ssdlike {
|
|
my ($type) = @_;
|
|
return $type eq 'ssd' || $type eq 'nvme';
|
|
}
|
|
|
|
sub mounted_blockdevs {
|
|
my $mounted = {};
|
|
|
|
my $mounts = PVE::ProcFSTools::parse_proc_mounts();
|
|
|
|
foreach my $mount (@$mounts) {
|
|
next if $mount->[0] !~ m|^/dev/|;
|
|
$mounted->{abs_path($mount->[0])} = $mount->[1];
|
|
};
|
|
|
|
return $mounted;
|
|
}
|
|
|
|
# returns hashmap of abs mount path -> first part of /proc/mounts (what)
|
|
sub mounted_paths {
|
|
my $mounted = {};
|
|
|
|
my $mounts = PVE::ProcFSTools::parse_proc_mounts();
|
|
|
|
foreach my $mount (@$mounts) {
|
|
$mounted->{abs_path($mount->[1])} = $mount->[0];
|
|
};
|
|
|
|
return $mounted;
|
|
}
|
|
|
|
sub get_disks {
|
|
my ($disks, $nosmart, $include_partitions) = @_;
|
|
my $disklist = {};
|
|
|
|
my $mounted = mounted_blockdevs();
|
|
|
|
my $lsblk_info = get_lsblk_info();
|
|
|
|
my $journalhash = get_ceph_journals($lsblk_info);
|
|
my $ceph_volume_infos = get_ceph_volume_infos();
|
|
|
|
my $zfshash = get_zfs_devices($lsblk_info);
|
|
|
|
my $lvmhash = get_lvm_devices($lsblk_info);
|
|
|
|
my $disk_regex = ".*";
|
|
if (defined($disks)) {
|
|
if (!ref($disks)) {
|
|
$disks = [ $disks ];
|
|
} elsif (ref($disks) ne 'ARRAY') {
|
|
die "disks is not a string or array reference\n";
|
|
}
|
|
# we get cciss/c0d0 but need cciss!c0d0
|
|
$_ =~ s|cciss/|cciss!| for @$disks;
|
|
|
|
if ($include_partitions) {
|
|
# Proper blockdevice is needed for the regex, use parent for partitions.
|
|
for my $disk ($disks->@*) {
|
|
next if !is_partition("/dev/$disk");
|
|
$disk = strip_dev(get_blockdev("/dev/$disk"));
|
|
}
|
|
}
|
|
|
|
$disk_regex = "(?:" . join('|', @$disks) . ")";
|
|
}
|
|
|
|
dir_glob_foreach('/sys/block', $disk_regex, sub {
|
|
my ($dev) = @_;
|
|
# whitelisting following devices
|
|
# - hdX ide block device
|
|
# - sdX scsi/sata block device
|
|
# - vdX virtIO block device
|
|
# - xvdX: xen virtual block device
|
|
# - nvmeXnY: nvme devices
|
|
# - cciss!cXnY cciss devices
|
|
return if $dev !~ m/^(h|s|x?v)d[a-z]+$/ &&
|
|
$dev !~ m/^nvme\d+n\d+$/ &&
|
|
$dev !~ m/^cciss\!c\d+d\d+$/;
|
|
|
|
my $data = get_udev_info("/sys/block/$dev") // return;
|
|
my $devpath = $data->{devpath};
|
|
|
|
my $sysdir = "/sys/block/$dev";
|
|
|
|
# we do not want iscsi devices
|
|
return if is_iscsi($sysdir);
|
|
|
|
my $sysdata = get_sysdir_info($sysdir);
|
|
return if !defined($sysdata);
|
|
|
|
my $type = 'unknown';
|
|
|
|
if ($sysdata->{rotational} == 0) {
|
|
$type = 'ssd';
|
|
$type = 'nvme' if $dev =~ m/^nvme\d+n\d+$/;
|
|
$data->{rpm} = 0;
|
|
} elsif ($sysdata->{rotational} == 1) {
|
|
if ($data->{rpm} != -1) {
|
|
$type = 'hdd';
|
|
} elsif ($data->{usb}) {
|
|
$type = 'usb';
|
|
$data->{rpm} = 0;
|
|
}
|
|
}
|
|
|
|
my ($health, $wearout) = ('UNKNOWN', 'N/A');
|
|
if (!$nosmart) {
|
|
eval {
|
|
my $smartdata = get_smart_data($devpath, !is_ssdlike($type));
|
|
$health = $smartdata->{health} if $smartdata->{health};
|
|
|
|
if (is_ssdlike($type)) { # if we have an ssd we try to get the wearout indicator
|
|
my $wear_level = get_wear_leveling_info($smartdata);
|
|
$wearout = $wear_level if defined($wear_level);
|
|
}
|
|
};
|
|
}
|
|
|
|
# we replaced cciss/ with cciss! above, but in the result we need cciss/ again because the
|
|
# caller might want to check the result again with the original parameter
|
|
if ($dev =~ m|^cciss!|) {
|
|
$dev =~ s|^cciss!|cciss/|;
|
|
}
|
|
|
|
$disklist->{$dev} = {
|
|
vendor => $sysdata->{vendor},
|
|
model => $data->{model} || $sysdata->{model},
|
|
size => $sysdata->{size},
|
|
serial => $data->{serial},
|
|
gpt => $data->{gpt},
|
|
rpm => $data->{rpm},
|
|
type => $type,
|
|
wwn => $data->{wwn},
|
|
health => $health,
|
|
devpath => $devpath,
|
|
wearout => $wearout,
|
|
};
|
|
$disklist->{$dev}->{mounted} = 1 if exists $mounted->{$devpath};
|
|
|
|
my $by_id_link = $data->{by_id_link};
|
|
$disklist->{$dev}->{by_id_link} = $by_id_link if defined($by_id_link);
|
|
|
|
my ($osdid, $bluestore, $osdencrypted) = (-1, 0, 0);
|
|
my ($journal_count, $db_count, $wal_count) = (0, 0, 0);
|
|
|
|
my $partpath = $devpath;
|
|
# remove trailing part to get the partition base path, e.g. /dev/cciss/c0d0 -> /dev/cciss
|
|
$partpath =~ s/\/[^\/]+$//;
|
|
|
|
my $determine_usage = sub {
|
|
my ($devpath, $sysdir, $is_partition) = @_;
|
|
|
|
return 'LVM' if $lvmhash->{$devpath};
|
|
return 'ZFS' if $zfshash->{$devpath};
|
|
|
|
my $info = $lsblk_info->{$devpath} // {};
|
|
|
|
if (defined(my $parttype = $info->{parttype})) {
|
|
return 'BIOS boot'if $parttype eq '21686148-6449-6e6f-744e-656564454649';
|
|
return 'EFI' if $parttype eq 'c12a7328-f81f-11d2-ba4b-00a0c93ec93b';
|
|
return 'ZFS reserved' if $parttype eq '6a945a3b-1dd2-11b2-99a6-080020736631';
|
|
}
|
|
|
|
return "$info->{fstype}" if defined($info->{fstype});
|
|
return 'mounted' if $mounted->{$devpath};
|
|
|
|
return if !$is_partition;
|
|
|
|
# for devices, this check is done explicitly later
|
|
return 'Device Mapper' if !dir_is_empty("$sysdir/holders");
|
|
|
|
return; # unused partition
|
|
};
|
|
|
|
my $collect_ceph_info = sub {
|
|
my ($devpath) = @_;
|
|
|
|
my $ceph_volume = $ceph_volume_infos->{$devpath} or return;
|
|
$journal_count += $ceph_volume->{journal} // 0;
|
|
$db_count += $ceph_volume->{db} // 0;
|
|
$wal_count += $ceph_volume->{wal} // 0;
|
|
if (defined($ceph_volume->{osdid})) {
|
|
$osdid = $ceph_volume->{osdid};
|
|
$bluestore = 1 if $ceph_volume->{bluestore};
|
|
$osdencrypted = 1 if $ceph_volume->{encrypted};
|
|
}
|
|
|
|
my $result = { %{$ceph_volume} };
|
|
$result->{journals} = delete $result->{journal} if $result->{journal};
|
|
return $result;
|
|
};
|
|
|
|
my $partitions = {};
|
|
dir_glob_foreach("$sysdir", "$dev.+", sub {
|
|
my ($part) = @_;
|
|
|
|
$partitions->{$part} = $collect_ceph_info->("$partpath/$part");
|
|
my $lvm_based_osd = defined($partitions->{$part});
|
|
|
|
$partitions->{$part}->{devpath} = "$partpath/$part";
|
|
$partitions->{$part}->{parent} = "$devpath";
|
|
$partitions->{$part}->{mounted} = 1 if exists $mounted->{"$partpath/$part"};
|
|
$partitions->{$part}->{gpt} = $data->{gpt};
|
|
$partitions->{$part}->{type} = 'partition';
|
|
$partitions->{$part}->{size} = get_sysdir_size("$sysdir/$part") // 0;
|
|
$partitions->{$part}->{used} = $determine_usage->("$partpath/$part", "$sysdir/$part", 1);
|
|
$partitions->{$part}->{osdid} //= -1;
|
|
|
|
# avoid counting twice (e.g. partition with the LVM for the DB OSD is in $journalhash)
|
|
return if $lvm_based_osd;
|
|
|
|
# Legacy handling for non-LVM based OSDs
|
|
if (my $mp = $mounted->{"$partpath/$part"}) {
|
|
if ($mp =~ m|^/var/lib/ceph/osd/ceph-(\d+)$|) {
|
|
$osdid = $1;
|
|
$partitions->{$part}->{osdid} = $osdid;
|
|
}
|
|
}
|
|
|
|
if (my $journal_part = $journalhash->{"$partpath/$part"}) {
|
|
$journal_count++ if $journal_part == 1;
|
|
$db_count++ if $journal_part == 2;
|
|
$wal_count++ if $journal_part == 3;
|
|
$bluestore = 1 if $journal_part == 4;
|
|
|
|
$partitions->{$part}->{journals} = 1 if $journal_part == 1;
|
|
$partitions->{$part}->{db} = 1 if $journal_part == 2;
|
|
$partitions->{$part}->{wal} = 1 if $journal_part == 3;
|
|
$partitions->{$part}->{bluestore} = 1 if $journal_part == 4;
|
|
}
|
|
});
|
|
|
|
my $used = $determine_usage->($devpath, $sysdir, 0);
|
|
if (!$include_partitions) {
|
|
foreach my $part (sort keys %{$partitions}) {
|
|
$used //= $partitions->{$part}->{used};
|
|
}
|
|
} else {
|
|
# fstype might be set even if there are partitions, but showing that is confusing
|
|
$used = 'partitions' if scalar(keys %{$partitions});
|
|
}
|
|
$used //= 'partitions' if scalar(keys %{$partitions});
|
|
# multipath, software raid, etc.
|
|
# this check comes in last, to show more specific info
|
|
# if we have it
|
|
$used //= 'Device Mapper' if !dir_is_empty("$sysdir/holders");
|
|
|
|
$disklist->{$dev}->{used} = $used if $used;
|
|
|
|
$collect_ceph_info->($devpath);
|
|
|
|
$disklist->{$dev}->{osdid} = $osdid;
|
|
$disklist->{$dev}->{journals} = $journal_count if $journal_count;
|
|
$disklist->{$dev}->{bluestore} = $bluestore if $osdid != -1;
|
|
$disklist->{$dev}->{osdencrypted} = $osdencrypted if $osdid != -1;
|
|
$disklist->{$dev}->{db} = $db_count if $db_count;
|
|
$disklist->{$dev}->{wal} = $wal_count if $wal_count;
|
|
|
|
if ($include_partitions) {
|
|
$disklist->{$_} = $partitions->{$_} for keys %{$partitions};
|
|
}
|
|
});
|
|
|
|
return $disklist;
|
|
}
|
|
|
|
sub get_partnum {
|
|
my ($part_path) = @_;
|
|
|
|
my $st = stat($part_path);
|
|
|
|
die "error detecting block device '$part_path'\n"
|
|
if !$st || !$st->mode || !S_ISBLK($st->mode) || !$st->rdev;
|
|
|
|
my $major = PVE::Tools::dev_t_major($st->rdev);
|
|
my $minor = PVE::Tools::dev_t_minor($st->rdev);
|
|
my $partnum_path = "/sys/dev/block/$major:$minor/";
|
|
|
|
my $partnum = file_read_firstline("${partnum_path}partition");
|
|
die "Partition does not exist\n" if !defined($partnum);
|
|
die "Failed to get partition number\n" if $partnum !~ m/(\d+)/; # untaint
|
|
$partnum = $1;
|
|
die "Partition number $partnum is invalid\n" if $partnum > 128;
|
|
|
|
return $partnum;
|
|
}
|
|
|
|
sub get_blockdev {
|
|
my ($part_path) = @_;
|
|
|
|
my ($dev, $block_dev);
|
|
if ($part_path =~ m|^/dev/(.*)$|) {
|
|
$dev = $1;
|
|
my $link = readlink "/sys/class/block/$dev";
|
|
$block_dev = $1 if $link =~ m|([^/]*)/$dev$|;
|
|
}
|
|
|
|
die "Can't parse parent device\n" if !defined($block_dev);
|
|
die "No valid block device\n" if index($dev, $block_dev) == -1;
|
|
|
|
$block_dev = "/dev/$block_dev";
|
|
die "Block device does not exists\n" if !(-b $block_dev);
|
|
|
|
return $block_dev;
|
|
}
|
|
|
|
sub is_partition {
|
|
my ($dev_path) = @_;
|
|
|
|
return defined(eval { get_partnum($dev_path) });
|
|
}
|
|
|
|
sub locked_disk_action {
|
|
my ($sub) = @_;
|
|
my $res = PVE::Tools::lock_file('/run/lock/pve-diskmanage.lck', undef, $sub);
|
|
die $@ if $@;
|
|
return $res;
|
|
}
|
|
|
|
sub assert_disk_unused {
|
|
my ($dev) = @_;
|
|
die "device '$dev' is already in use\n" if disk_is_used($dev);
|
|
return;
|
|
}
|
|
|
|
sub append_partition {
|
|
my ($dev, $size) = @_;
|
|
|
|
my $devname = $dev;
|
|
$devname =~ s|^/dev/||;
|
|
|
|
my $newpartid = 1;
|
|
dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*?(\d+)/, sub {
|
|
my ($part, $partid) = @_;
|
|
|
|
if ($partid >= $newpartid) {
|
|
$newpartid = $partid + 1;
|
|
}
|
|
});
|
|
|
|
$size = PVE::Tools::convert_size($size, 'b' => 'mb');
|
|
|
|
run_command([ $SGDISK, '-n', "$newpartid:0:+${size}M", $dev ],
|
|
errmsg => "error creating partition '$newpartid' on '$dev'");
|
|
|
|
my $partition;
|
|
|
|
# loop again to detect the real partition device which does not always follow
|
|
# a strict $devname$partition scheme like /dev/nvme0n1 -> /dev/nvme0n1p1
|
|
dir_glob_foreach("/sys/block/$devname", qr/\Q$devname\E.*$newpartid/, sub {
|
|
my ($part) = @_;
|
|
|
|
$partition = "/dev/$part";
|
|
});
|
|
|
|
return $partition;
|
|
}
|
|
|
|
# Check if a disk or any of its partitions has a holder.
|
|
# Can also be called with a partition.
|
|
# Expected to be called with a result of verify_blockdev_path().
|
|
sub has_holder {
|
|
my ($devpath) = @_;
|
|
|
|
my $dev = strip_dev($devpath);
|
|
|
|
return $devpath if !dir_is_empty("/sys/class/block/${dev}/holders");
|
|
|
|
my $found;
|
|
dir_glob_foreach("/sys/block/${dev}", "${dev}.+", sub {
|
|
my ($part) = @_;
|
|
$found = "/dev/${part}" if !dir_is_empty("/sys/class/block/${part}/holders");
|
|
});
|
|
|
|
return $found;
|
|
}
|
|
|
|
# Basic check if a disk or any of its partitions is mounted.
|
|
# Can also be called with a partition.
|
|
# Expected to be called with a result of verify_blockdev_path().
|
|
sub is_mounted {
|
|
my ($devpath) = @_;
|
|
|
|
my $mounted = mounted_blockdevs();
|
|
|
|
return $devpath if $mounted->{$devpath};
|
|
|
|
my $dev = strip_dev($devpath);
|
|
|
|
my $found;
|
|
dir_glob_foreach("/sys/block/${dev}", "${dev}.+", sub {
|
|
my ($part) = @_;
|
|
my $partpath = "/dev/${part}";
|
|
|
|
$found = $partpath if $mounted->{$partpath};
|
|
});
|
|
|
|
return $found;
|
|
}
|
|
|
|
# Currently only supports GPT-partitioned disks.
|
|
sub change_parttype {
|
|
my ($partpath, $parttype) = @_;
|
|
|
|
my $err = "unable to change partition type for $partpath";
|
|
|
|
my $partnum = get_partnum($partpath);
|
|
my $blockdev = get_blockdev($partpath);
|
|
my $dev = strip_dev($blockdev);
|
|
|
|
my $info = get_disks($dev, 1);
|
|
die "$err - unable to get disk info for '$blockdev'\n" if !defined($info->{$dev});
|
|
die "$err - disk '$blockdev' is not GPT partitioned\n" if !$info->{$dev}->{gpt};
|
|
|
|
run_command(['sgdisk', "-t${partnum}:${parttype}", $blockdev], errmsg => $err);
|
|
}
|
|
|
|
# Wipes all labels and the first 200 MiB of a disk/partition (or the whole if it is smaller).
|
|
# If called with a partition, also sets the partition type to 0x83 'Linux filesystem'.
|
|
# Expected to be called with a result of verify_blockdev_path().
|
|
sub wipe_blockdev {
|
|
my ($devpath) = @_;
|
|
|
|
my $devname = basename($devpath);
|
|
my $dev_size = PVE::Tools::file_get_contents("/sys/class/block/$devname/size");
|
|
|
|
($dev_size) = $dev_size =~ m|(\d+)|; # untaint $dev_size
|
|
die "Couldn't get the size of the device $devname\n" if !defined($dev_size);
|
|
|
|
my $size = ($dev_size * 512 / 1024 / 1024);
|
|
my $count = ($size < 200) ? $size : 200;
|
|
|
|
my $to_wipe = [];
|
|
dir_glob_foreach("/sys/class/block/${devname}", "${devname}.+", sub {
|
|
my ($part) = @_;
|
|
push $to_wipe->@*, "/dev/${part}" if -b "/dev/${part}";
|
|
});
|
|
|
|
if (scalar($to_wipe->@*) > 0) {
|
|
print "found child partitions to wipe: ". join(', ', $to_wipe->@*) ."\n";
|
|
}
|
|
push $to_wipe->@*, $devpath; # put actual device last
|
|
|
|
print "wiping block device ${devpath}\n";
|
|
|
|
run_command(['wipefs', '--all', $to_wipe->@*], errmsg => "error wiping '${devpath}'");
|
|
|
|
run_command(
|
|
['dd', 'if=/dev/zero', "of=${devpath}", 'bs=1M', 'conv=fdatasync', "count=${count}"],
|
|
errmsg => "error wiping '${devpath}'",
|
|
);
|
|
|
|
if (is_partition($devpath)) {
|
|
eval { change_parttype($devpath, '8300'); };
|
|
warn $@ if $@;
|
|
}
|
|
}
|
|
|
|
# FIXME: Remove once we depend on systemd >= v249.
|
|
# Work around udev bug https://github.com/systemd/systemd/issues/18525 ensuring database is updated.
|
|
sub udevadm_trigger {
|
|
my @devs = @_;
|
|
|
|
return if scalar(@devs) == 0;
|
|
|
|
eval { run_command(['udevadm', 'trigger', @devs]); };
|
|
warn $@ if $@;
|
|
}
|
|
|
|
1;
|