diff --git a/doc/lvm-disk-reading.txt b/doc/lvm-disk-reading.txt index 241a7abae..1255ae8b2 100644 --- a/doc/lvm-disk-reading.txt +++ b/doc/lvm-disk-reading.txt @@ -187,3 +187,45 @@ For each VG name: command-specific work + +Filter i/o +---------- + +Some filters must be applied before reading a device, and other filters +must be applied after reading a device. In all cases, the filters must be +applied before lvm processes the device, i.e. before it looks for an lvm +label. + +1. Some filters need to be applied prior to reading any devices + because the purpose of the filter is to avoid submitting any + io on the excluded devices. The regex filter is the primary + example. Other filters benefit from being applied prior to + reading devices because they can tell which devices to + exclude without doing io to the device. An example of this + is the mpath filter. + +2. Some filters need to be applied after reading a device because + they are based on data/signatures seen on the device. + The partitioned filter is an example of this; lvm needs to + read a device to see if it has a partition table before it can + know whether to exclude the device from further processing. + +We apply filters from 1 before reading devices, and we apply filters from +2 after populating bcache, but before processing the device (i.e. before +checking for an lvm label, which is the first step in processing.) + +The current implementation of this makes filters return -EAGAIN if they +want to read the device, but bcache data is not yet available. This will +happen when filtering runs prior to populating bcache. In this case the +device is flagged. After bcache is populated, the filters are reapplied +to the flagged devices. The filters which need to look at device content +are now able to get it from bcache. Devices that do not pass filters at +this point are excluded just like devices which were excluded earlier. + +(Some filters from 2 can be skipped by consulting udev for the information +instead of reading the device. This is not entirely reliable, so it is +disabled by default with the config setting external_device_info_source. +It may be worthwhile to change the filters to use the udev info as a hint, +or only use udev info for filtering in reporting commands where +inaccuracies are not a big problem.) + diff --git a/lib/label/label.c b/lib/label/label.c index d6a5727e8..8b60780cc 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -884,7 +884,7 @@ int label_scan_open(struct device *dev) return 1; } -bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data) +bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data) { int ret; @@ -918,7 +918,7 @@ bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data) } -bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) +bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data) { int ret; @@ -961,7 +961,7 @@ bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) return true; } -bool dev_write_zeros(struct device *dev, off_t start, size_t len) +bool dev_write_zeros(struct device *dev, uint64_t start, size_t len) { int ret; @@ -969,15 +969,8 @@ bool dev_write_zeros(struct device *dev, off_t start, size_t len) return true; if (!scan_bcache) { - if (!dev_open(dev)) - return false; - - ret = dev_set(dev, start, len, 0, 0); - - if (!dev_close(dev)) - stack; - - return ret ? true : false; + log_error("dev_write_zeros %s bcache not set up", dev_name(dev)); + return false; } if (dev->bcache_fd <= 0) { @@ -1004,3 +997,39 @@ bool dev_write_zeros(struct device *dev, off_t start, size_t len) return true; } +bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val) +{ + int ret; + + if (test_mode()) + return true; + + if (!scan_bcache) { + log_error("dev_set_bytes %s bcache not set up", dev_name(dev)); + return false; + } + + if (dev->bcache_fd <= 0) { + /* This is not often needed, perhaps only with lvmetad. */ + if (!label_scan_open(dev)) { + log_error("dev_set_bytes %s cannot open dev", dev_name(dev)); + return false; + } + } + + if (!bcache_set_bytes(scan_bcache, dev->bcache_fd, start, len, val)) { + log_error("dev_set_bytes %s at %u bcache write failed invalidate fd %d", + dev_name(dev), (uint32_t)start, dev->bcache_fd); + label_scan_invalidate(dev); + return false; + } + + if (!bcache_flush(scan_bcache)) { + log_error("dev_set_bytes %s at %u bcache flush failed invalidate fd %d", + dev_name(dev), (uint32_t)start, dev->bcache_fd); + label_scan_invalidate(dev); + return false; + } + return true; +} + diff --git a/lib/label/label.h b/lib/label/label.h index 55e92e8d4..bccf777db 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -119,8 +119,9 @@ int label_scan_open(struct device *dev); * Wrappers around bcache equivalents. * (these make it easier to disable bcache and revert to direct rw if needed) */ -bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data); -bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data); -bool dev_write_zeros(struct device *dev, off_t start, size_t len); +bool dev_read_bytes(struct device *dev, uint64_t start, size_t len, void *data); +bool dev_write_bytes(struct device *dev, uint64_t start, size_t len, void *data); +bool dev_write_zeros(struct device *dev, uint64_t start, size_t len); +bool dev_set_bytes(struct device *dev, uint64_t start, size_t len, uint8_t val); #endif diff --git a/lib/locking/lvmlockd.c b/lib/locking/lvmlockd.c index 0521ede17..d78b35c88 100644 --- a/lib/locking/lvmlockd.c +++ b/lib/locking/lvmlockd.c @@ -403,22 +403,17 @@ static int _extend_sanlock_lv(struct cmd_context *cmd, struct volume_group *vg, return 0; } - if (!dev_open_quiet(dev)) { + if (!label_scan_open(dev)) { log_error("Extend sanlock LV %s cannot open device.", display_lvname(lv)); return 0; } - if (!dev_set(dev, old_size_bytes, new_size_bytes - old_size_bytes, DEV_IO_LV, 0)) { + if (!dev_write_zeros(dev, old_size_bytes, new_size_bytes - old_size_bytes)) { log_error("Extend sanlock LV %s cannot zero device.", display_lvname(lv)); - dev_close_immediate(dev); return 0; } - dev_flush(dev); - - if (!dev_close_immediate(dev)) - stack; - + label_scan_invalidate(dev); return 1; } diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c index 8bf93795e..84953bbdf 100644 --- a/lib/metadata/lv_manip.c +++ b/lib/metadata/lv_manip.c @@ -7155,8 +7155,10 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp) return 0; } - if (!dev_open_quiet(dev)) - return_0; + if (!label_scan_open(dev)) { + log_error("Failed to open %s/%s for wiping and zeroing.", lv->vg->name, lv->name); + goto out; + } if (wp.do_wipe_signatures) { log_verbose("Wiping known signatures on logical volume \"%s/%s\"", @@ -7177,15 +7179,17 @@ int wipe_lv(struct logical_volume *lv, struct wipe_params wp) display_size(lv->vg->cmd, zero_sectors), lv->vg->name, lv->name, wp.zero_value); - if (!dev_set(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT, DEV_IO_LV, wp.zero_value)) - stack; + if (!wp.zero_value) { + if (!dev_write_zeros(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT)) + stack; + } else { + if (!dev_set_bytes(dev, UINT64_C(0), (size_t) zero_sectors << SECTOR_SHIFT, (uint8_t)wp.zero_value)) + stack; + } } - dev_flush(dev); - - if (!dev_close_immediate(dev)) - stack; - + label_scan_invalidate(dev); +out: lv->status &= ~LV_NOSCAN; return 1; diff --git a/lib/metadata/metadata-liblvm.c b/lib/metadata/metadata-liblvm.c index 2dc375781..f7a07a504 100644 --- a/lib/metadata/metadata-liblvm.c +++ b/lib/metadata/metadata-liblvm.c @@ -66,19 +66,11 @@ static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw) if (pvw->pp->zero) { log_verbose("Zeroing start of device %s", pv_name); - if (!dev_open_quiet(dev)) { - log_error("%s not opened: device not zeroed", pv_name); - return 0; - } - if (!dev_set(dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) { + if (!dev_write_zeros(dev, UINT64_C(0), (size_t) 2048)) { log_error("%s not wiped: aborting", pv_name); - if (!dev_close(dev)) - stack; return 0; } - if (!dev_close(dev)) - stack; } }