From 00f1b208a1bf44665ec97a791355b1fcf525a3a7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 20 Apr 2018 10:43:50 -0500 Subject: [PATCH 01/87] [io paths] Unpick agk's aio stuff --- conf/example.conf.in | 16 - configure.in | 22 -- doc/aio_design.txt | 215 ----------- include/configure.h.in | 3 - lib/cache/lvmcache.c | 51 +-- lib/cache/lvmcache.h | 2 +- lib/cache/lvmetad.c | 2 +- lib/commands/toolcontext.c | 16 +- lib/commands/toolcontext.h | 1 - lib/config/config.c | 156 +++----- lib/config/config.h | 8 +- lib/config/config_settings.h | 10 - lib/config/defaults.h | 3 - lib/device/dev-cache.c | 12 - lib/device/dev-cache.h | 8 +- lib/device/dev-io.c | 610 +++++--------------------------- lib/device/dev-luks.c | 2 +- lib/device/dev-md.c | 2 +- lib/device/dev-swap.c | 3 +- lib/device/dev-type.c | 2 +- lib/device/device.h | 102 ++---- lib/filters/filter-composite.c | 4 +- lib/filters/filter-persistent.c | 8 +- lib/filters/filter.h | 2 +- lib/format1/disk-rep.c | 10 +- lib/format1/format1.c | 2 +- lib/format1/lvm1-label.c | 19 +- lib/format_pool/disk_rep.c | 2 +- lib/format_pool/format_pool.c | 2 +- lib/format_pool/pool_label.c | 13 +- lib/format_text/archive.c | 4 +- lib/format_text/archiver.c | 2 +- lib/format_text/export.c | 21 +- lib/format_text/format-text.c | 567 +++++++++-------------------- lib/format_text/import-export.h | 8 +- lib/format_text/import.c | 233 ++++-------- lib/format_text/layout.h | 12 +- lib/format_text/text_label.c | 197 +++-------- lib/label/label.c | 173 +++------ lib/label/label.h | 6 +- lib/metadata/metadata-liblvm.c | 3 +- lib/metadata/metadata.c | 8 +- lib/metadata/metadata.h | 5 +- make.tmpl.in | 3 +- tools/command.c | 11 +- tools/toollib.c | 1 - 46 files changed, 584 insertions(+), 1978 deletions(-) delete mode 100644 doc/aio_design.txt diff --git a/conf/example.conf.in b/conf/example.conf.in index 3b0638f60..aab274d74 100644 --- a/conf/example.conf.in +++ b/conf/example.conf.in @@ -59,22 +59,6 @@ devices { # This configuration option is advanced. scan = [ "/dev" ] - # Configuration option devices/use_aio. - # Use linux asynchronous I/O for parallel device access where possible. - # This configuration option has an automatic default value. - # use_aio = 1 - - # Configuration option devices/aio_max. - # Maximum number of asynchronous I/Os to issue concurrently. - # This configuration option has an automatic default value. - # aio_max = 128 - - # Configuration option devices/aio_memory. - # Approximate maximum total amount of memory (in MB) used - # for asynchronous I/O buffers. - # This configuration option has an automatic default value. - # aio_memory = 10 - # Configuration option devices/obtain_device_list_from_udev. # Obtain the list of available devices from udev. # This avoids opening or using any inapplicable non-block devices or diff --git a/configure.in b/configure.in index 605b6b212..b8e162351 100644 --- a/configure.in +++ b/configure.in @@ -39,7 +39,6 @@ case "$host_os" in LDDEPS="$LDDEPS .export.sym" LIB_SUFFIX=so DEVMAPPER=yes - AIO=yes BUILD_LVMETAD=no BUILD_LVMPOLLD=no LOCKDSANLOCK=no @@ -59,7 +58,6 @@ case "$host_os" in CLDNOWHOLEARCHIVE= LIB_SUFFIX=dylib DEVMAPPER=yes - AIO=no ODIRECT=no DM_IOCTLS=no SELINUX=no @@ -1124,24 +1122,6 @@ if test "$DEVMAPPER" = yes; then AC_DEFINE([DEVMAPPER_SUPPORT], 1, [Define to 1 to enable LVM2 device-mapper interaction.]) fi -################################################################################ -dnl -- Disable aio -AC_MSG_CHECKING(whether to use asynchronous I/O) -AC_ARG_ENABLE(aio, - AC_HELP_STRING([--disable-aio], - [disable asynchronous I/O]), - AIO=$enableval) -AC_MSG_RESULT($AIO) - -if test "$AIO" = yes; then - AC_CHECK_LIB(aio, io_setup, - [AC_DEFINE([AIO_SUPPORT], 1, [Define to 1 if aio is available.]) - AIO_LIBS="-laio" - AIO_SUPPORT=yes], - [AIO_LIBS= - AIO_SUPPORT=no ]) -fi - ################################################################################ dnl -- Build lvmetad AC_MSG_CHECKING(whether to build LVMetaD) @@ -2081,11 +2061,9 @@ AC_SUBST(DEFAULT_USE_LVMETAD) AC_SUBST(DEFAULT_USE_LVMPOLLD) AC_SUBST(DEFAULT_USE_LVMLOCKD) AC_SUBST(DEVMAPPER) -AC_SUBST(AIO) AC_SUBST(DLM_CFLAGS) AC_SUBST(DLM_LIBS) AC_SUBST(DL_LIBS) -AC_SUBST(AIO_LIBS) AC_SUBST(DMEVENTD_PATH) AC_SUBST(DM_LIB_PATCHLEVEL) AC_SUBST(ELDFLAGS) diff --git a/doc/aio_design.txt b/doc/aio_design.txt deleted file mode 100644 index c6eb44352..000000000 --- a/doc/aio_design.txt +++ /dev/null @@ -1,215 +0,0 @@ -Introducing asynchronous I/O to LVM -=================================== - -Issuing I/O asynchronously means instructing the kernel to perform specific -I/O and return immediately without waiting for it to complete. The data -is collected from the kernel later. - -Advantages ----------- - -A1. While waiting for the I/O to happen, the program could perform other -operations. - -A2. When LVM is searching for its Physical Volumes, it issues a small amount of -I/O to a large number of disks. If this was issued in parallel the overall -runtime might be shorter while there should be little effect on the cpu time. - -A3. If more than one timeout occurs when accessing any devices, these can be -taken in parallel, again reducing the runtime. This applies globally, -not just while the code is searching for Physical Volumes, so reading, -writing and committing the metadata may occasionally benefit too to some -extent and there are probably maintenance advantages in using the same -method of I/O throughout the main body of the code. - -A4. By introducing a simple callback function mechanism, the conversion can be -performed largely incrementally by first refactoring and continuing to -use synchronous I/O with the callbacks performed immediately. This allows the -callbacks to be introduced without changing the running sequence of the code -initially. Future projects could refactor some of the calling sites to -simplify the code structure and even eliminate some of the nesting. -This allows each part of what might ultimately amount to a large change to be -introduced and tested independently. - - -Disadvantages -------------- - -D1. The resulting code may be more complex with more failure modes to -handle. Mitigate by thorough auditing and testing, rolling out -gradually, and offering a simple switch to revert to the old behaviour. - -D2. The linux asynchronous I/O implementation is less mature than -its synchronous I/O implementation and might show up problems that -depend on the version of the kernel or library used. Fixes or -workarounds for some of these might require kernel changes. For -example, there are suggestions that despite being supposedly async, -there are still cases where system calls can block. There might be -resource dependencies on other processes running on the system that make -it unsuitable for use while any devices are suspended. Mitigation -as for D1. - -D3. The error handling within callbacks becomes more complicated. -However we know that existing call paths can already sometimes discard -errors, sometimes deliberately, sometimes not, so this aspect is in need -of a complete review anyway and the new approach will make the error -handling more transparent. Aim initially for overall behaviour that is -no worse than that of the existing code, then work on improving it -later. - -D4. The work will take a few weeks to code and test. This leads to a -significant opportunity cost when compared against other enhancements -that could be achieved in that time. However, the proof-of-concept work -performed while writing this design has satisfied me that the work could -proceed and be committed incrementally as a background task. - - -Observations regarding LVM's I/O Architecture ---------------------------------------------- - -H1. All device, metadata and config file I/O is constrained to pass through a -single route in lib/device. - -H2. The first step of the analysis was to instrument this code path with -log_debug messages. I/O is split into the following categories: - - "dev signatures", - "PV labels", - "VG metadata header", - "VG metadata content", - "extra VG metadata header", - "extra VG metadata content", - "LVM1 metadata", - "pool metadata", - "LV content", - "logging", - -H3. A bounce buffer is used for most I/O. - -H4. Most callers finish using the supplied data before any further I/O is -issued. The few that don't could be converted trivially to do so. - -H5. There is one stream of I/O per metadata area on each device. - -H6. Some reads fall at offsets close to immediately preceding reads, so it's -possible to avoid these by caching one "block" per metadata area I/O stream. - -H7. Simple analysis suggests a minimum aligned read size of 8k would deliver -immediate gains from this caching. A larger size might perform worse because -almost all the time the extra data read would not be used, but this can be -re-examined and tuned after the code is in place. - - -Proposal --------- - -P1. Retain the "single I/O path" but offer an asynchronous option. - -P2. Eliminate the bounce buffer in most cases by improving alignment. - -P3. Reduce the number of reads by always reading a minimum of an aligned -8k block. - -P4. Eliminate repeated reads by caching the last block read and changing -the lib/device interface to return a pointer to read-only data within -this block. - -P5. Only perform these interface changes for code on the critical path -for now by converting other code sites to use wrappers around the new -interface. - -P6. Treat asynchronous I/O as the interface of choice and optimise only -for this case. - -P7. Convert the callers on the critical path to pass callback functions -to the device layer. These functions will be called later with the -read-only data, a context pointer and a success/failure indicator. -Where an existing function performs a sequence of I/O, this has the -advantage of breaking up the large function into smaller ones and -wrapping the parameters used into structures. While this might look -rather messy and ad-hoc in the short-term, it's a first step towards -breaking up confusingly long functions into component parts and wrapping -the existing long parameter lists into more appropriate structures and -refactoring these parts of the code. - -P8. Limit the resources used by the asynchronous I/O by using two -tunable parameters, one limiting the number of outstanding I/Os issued -and another limiting the total amount of memory used. - -P9. Provide a fallback option if asynchronous I/O is unavailable by -sharing the code paths but issuing the I/O synchronously and calling the -callback immediately. - -P10. Only allocate the buffer for the I/O at the point where the I/O is -about to be issued. - -P11. If the thresholds are exceeded, add the request to a simple queue, -and process it later after some I/O has completed. - - -Future work ------------ -F1. Perform a complete review of the error tracking so that device -failures are handled and reported more cleanly, extending the existing -basic error counting mechanism. - -F2. Consider whether some of the nested callbacks can be eliminated, -which would allow for additional simplifications. - -F3. Adjust the contents of the adhoc context structs into more logical -arrangements and use them more widely. - -F4. Perform wider refactoring of these areas of code. - - -Testing considerations ----------------------- -T1. The changes touch code on the device path, so a thorough re-test of -the device layer is required. The new code needs a full audit down -through the library layer into the kernel to check that all the error -conditions that are currently implemented (such as EAGAIN) are handled -sensibly. (LVM's I/O layer needs to remain as solid as we can make it.) - -T2. The current test suite provides a reasonably broad range of coverage -of this area but is far from comprehensive. - - -Acceptance criteria -------------------- -A1. The current test suite should pass to the same extent as before the -changes. - -A2. When all debugging and logging is disabled, strace -c must show -improvements e.g. the expected fewer number of reads. - -A3. Running a range of commands under valgrind must not reveal any -new leaks due to the changes. - -A4. All new coverity reports from the change must be addressed. - -A5. CPU time should be similar to that before, as the same work -is being done overall, just in a different order. - -A6. Tests need to show improved behaviour in targetted areas. For example, -if several devices are slow and time out, the delays should occur -in parallel and the elapsed time should be less than before. - - -Release considerations ----------------------- -R1. Async I/O should be widely available and largely reliable on linux -nowadays (even though parts of its interface and implementation remain a -matter of controversy) so we should try to make its use the default -whereever it is supported. If certain types of systems have problems we -should try to detect those cases and disable it automatically there. - -R2. Because the implications of an unexpected problem in the new code -could be severe for the people affected, the roll out needs to be gentle -without a deadline to allow us plenty of time to gain confidence in the -new code. Our own testing will only be able to cover a tiny fraction of -the different setups our users have, so we need to look out for problems -caused by this proactively and encourage people to test it on their own -systems and report back. It must go into the tree near the start of a -release cycle rather than at the end to provide time for our confidence -in it to grow. - diff --git a/include/configure.h.in b/include/configure.h.in index a4918071f..be2f66031 100644 --- a/include/configure.h.in +++ b/include/configure.h.in @@ -1,8 +1,5 @@ /* include/configure.h.in. Generated from configure.in by autoheader. */ -/* Define to 1 if aio is available. */ -#undef AIO_SUPPORT - /* Define to 1 to use libblkid detection of signatures when wiping. */ #undef BLKID_WIPING_SUPPORT diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 5ff28274c..fb8100222 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -141,8 +141,6 @@ void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd) /* Volume Group metadata cache functions */ static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo) { - struct lvmcache_info *info; - if (!vginfo || !vginfo->vgmetadata) return; @@ -156,10 +154,6 @@ static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo) vginfo->cft = NULL; } - /* Invalidate any cached device buffers */ - dm_list_iterate_items(info, &vginfo->infos) - devbufs_release(info->dev); - log_debug_cache("lvmcache: VG %s wiped.", vginfo->vgname); release_vg(vginfo->cached_vg); @@ -548,6 +542,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, { struct lvmcache_vginfo *vginfo; struct lvmcache_info *info; + struct label *label; struct dm_list *devh, *tmp; struct dm_list devs; struct device_list *devl; @@ -592,7 +587,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, dm_list_iterate_safe(devh, tmp, &devs) { devl = dm_list_item(devh, struct device_list); - (void) label_read(devl->dev, NULL, UINT64_C(0)); + (void) label_read(devl->dev, &label, UINT64_C(0)); dm_list_del(&devl->list); dm_free(devl); } @@ -773,8 +768,10 @@ char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid) static void _rescan_entry(struct lvmcache_info *info) { + struct label *label; + if (info->status & CACHE_INVALID) - (void) label_read(info->dev, NULL, UINT64_C(0)); + (void) label_read(info->dev, &label, UINT64_C(0)); } static int _scan_invalid(void) @@ -1096,31 +1093,17 @@ next: goto next; } -/* Track the number of outstanding label reads */ -/* FIXME Switch to struct and also track failed */ -static void _process_label_data(int failed, unsigned ioflags, void *context, const void *data) -{ - int *nr_labels_outstanding = context; - - if (!*nr_labels_outstanding) { - log_error(INTERNAL_ERROR "_process_label_data called too many times"); - return; - } - - (*nr_labels_outstanding)--; -} - int lvmcache_label_scan(struct cmd_context *cmd) { struct dm_list del_cache_devs; struct dm_list add_cache_devs; struct lvmcache_info *info; struct device_list *devl; + struct label *label; struct dev_iter *iter; struct device *dev; struct format_type *fmt; int dev_count = 0; - int nr_labels_outstanding = 0; int r = 0; @@ -1159,22 +1142,13 @@ int lvmcache_label_scan(struct cmd_context *cmd) _destroy_duplicate_device_list(&_found_duplicate_devs); while ((dev = dev_iter_get(iter))) { - log_debug_io("Scanning device %s", dev_name(dev)); - nr_labels_outstanding++; - if (!label_read_callback(dev, UINT64_C(0), AIO_SUPPORTED_CODE_PATH, _process_label_data, &nr_labels_outstanding)) - nr_labels_outstanding--; + (void) label_read(dev, &label, UINT64_C(0)); dev_count++; } dev_iter_destroy(iter); - while (nr_labels_outstanding) { - log_very_verbose("Scanned %d device labels (%d outstanding)", dev_count, nr_labels_outstanding); - if (!dev_async_getevents()) - return_0; - } - - log_very_verbose("Scanned %d device labels (%d outstanding)", dev_count, nr_labels_outstanding); + log_very_verbose("Scanned %d device labels", dev_count); /* * _choose_preferred_devs() returns: @@ -1208,7 +1182,7 @@ int lvmcache_label_scan(struct cmd_context *cmd) dm_list_iterate_items(devl, &add_cache_devs) { log_debug_cache("Rescan preferred device %s for lvmcache", dev_name(devl->dev)); - (void) label_read(devl->dev, NULL, UINT64_C(0)); + (void) label_read(devl->dev, &label, UINT64_C(0)); } dm_list_splice(&_unused_duplicate_devs, &del_cache_devs); @@ -1228,7 +1202,7 @@ int lvmcache_label_scan(struct cmd_context *cmd) */ if (_force_label_scan && cmd->is_long_lived && cmd->dump_filter && cmd->full_filter && cmd->full_filter->dump && - !cmd->full_filter->dump(cmd->full_filter, cmd->mem, 0)) + !cmd->full_filter->dump(cmd->full_filter, 0)) stack; r = 1; @@ -1529,6 +1503,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, const char *devname) { struct device *dev; + struct label *label; if (!(dev = dev_cache_get(devname, cmd->filter))) { log_error("%s: Couldn't find device. Check your filters?", @@ -1536,7 +1511,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, return NULL; } - if (!(label_read(dev, NULL, UINT64_C(0)))) + if (!(label_read(dev, &label, UINT64_C(0)))) return NULL; return dev->pvid; @@ -2001,7 +1976,7 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt); } -int lvmcache_update_vgname_and_id(struct lvmcache_info *info, const struct lvmcache_vgsummary *vgsummary) +int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary) { const char *vgname = vgsummary->vgname; const char *vgid = (char *)&vgsummary->vgid; diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 3c76b780b..847c208f1 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -85,7 +85,7 @@ void lvmcache_del(struct lvmcache_info *info); /* Update things */ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, - const struct lvmcache_vgsummary *vgsummary); + struct lvmcache_vgsummary *vgsummary); int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted); void lvmcache_lock_vgname(const char *vgname, int read_only); diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c index 589b48eae..4b7410cec 100644 --- a/lib/cache/lvmetad.c +++ b/lib/cache/lvmetad.c @@ -1771,7 +1771,7 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton) struct volume_group *vg; if (mda_is_ignored(mda) || - !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1, 0))) + !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1))) return 1; /* FIXME Also ensure contents match etc. */ diff --git a/lib/commands/toolcontext.c b/lib/commands/toolcontext.c index b613b8f72..c99849587 100644 --- a/lib/commands/toolcontext.c +++ b/lib/commands/toolcontext.c @@ -636,16 +636,6 @@ static int _process_config(struct cmd_context *cmd) */ cmd->default_settings.udev_fallback = udev_disabled ? 1 : -1; -#ifdef AIO_SUPPORT - cmd->use_aio = find_config_tree_bool(cmd, devices_use_aio_CFG, NULL); -#else - cmd->use_aio = 0; -#endif - if (cmd->use_aio && !dev_async_setup(cmd)) - cmd->use_aio = 0; - - log_debug_io("%ssing asynchronous I/O.", cmd->use_aio ? "U" : "Not u"); - init_retry_deactivation(find_config_tree_bool(cmd, activation_retry_deactivation_CFG, NULL)); init_activation_checks(find_config_tree_bool(cmd, activation_checks_CFG, NULL)); @@ -1298,7 +1288,7 @@ int init_filters(struct cmd_context *cmd, unsigned load_persistent_cache) lvm_stat_ctim(&ts, &st); cts = config_file_timestamp(cmd->cft); if (timespeccmp(&ts, &cts, >) && - !persistent_filter_load(cmd->mem, cmd->filter, NULL)) + !persistent_filter_load(cmd->filter, NULL)) log_verbose("Failed to load existing device cache from %s", dev_cache); } @@ -2160,8 +2150,6 @@ int refresh_toolcontext(struct cmd_context *cmd) cmd->lib_dir = NULL; - label_init(); - if (!_init_lvm_conf(cmd)) return_0; @@ -2249,7 +2237,7 @@ void destroy_toolcontext(struct cmd_context *cmd) int flags; if (cmd->dump_filter && cmd->filter && cmd->filter->dump && - !cmd->filter->dump(cmd->filter, cmd->mem, 1)) + !cmd->filter->dump(cmd->filter, 1)) stack; archive_exit(cmd); diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h index 89d1088ee..f04afec8c 100644 --- a/lib/commands/toolcontext.h +++ b/lib/commands/toolcontext.h @@ -165,7 +165,6 @@ struct cmd_context { unsigned vg_notify:1; unsigned lv_notify:1; unsigned pv_notify:1; - unsigned use_aio:1; unsigned activate_component:1; /* command activates component LV */ unsigned process_component_lvs:1; /* command processes also component LVs */ diff --git a/lib/config/config.c b/lib/config/config.c index 97c5db8a1..8fca3728e 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -279,7 +279,7 @@ struct dm_config_tree *config_file_open_and_read(const char *config_file, } log_very_verbose("Loading config file: %s", config_file); - if (!config_file_read(cmd->mem, cft)) { + if (!config_file_read(cft)) { log_error("Failed to load config file %s", config_file); goto bad; } @@ -489,102 +489,32 @@ int override_config_tree_from_profile(struct cmd_context *cmd, return 0; } -struct process_config_file_params { - struct dm_config_tree *cft; - struct device *dev; - off_t offset; - size_t size; - off_t offset2; - size_t size2; - checksum_fn_t checksum_fn; - uint32_t checksum; - int checksum_only; - int no_dup_node_check; - lvm_callback_fn_t config_file_read_fd_callback; - void *config_file_read_fd_context; - int ret; -}; - -static void _process_config_file_buffer(int failed, unsigned ioflags, void *context, const void *data) -{ - struct process_config_file_params *pcfp = context; - const char *fb = data, *fe; - - if (failed) { - pcfp->ret = 0; - goto_out; - } - - if (pcfp->checksum_fn && pcfp->checksum != - (pcfp->checksum_fn(pcfp->checksum_fn(INITIAL_CRC, (const uint8_t *)fb, pcfp->size), - (const uint8_t *)(fb + pcfp->size), pcfp->size2))) { - log_error("%s: Checksum error at offset %" PRIu64, dev_name(pcfp->dev), (uint64_t) pcfp->offset); - pcfp->ret = 0; - goto out; - } - - if (!pcfp->checksum_only) { - fe = fb + pcfp->size + pcfp->size2; - if (pcfp->no_dup_node_check) { - if (!dm_config_parse_without_dup_node_check(pcfp->cft, fb, fe)) - pcfp->ret = 0; - } else if (!dm_config_parse(pcfp->cft, fb, fe)) - pcfp->ret = 0; - } - -out: - if (pcfp->config_file_read_fd_callback) - pcfp->config_file_read_fd_callback(!pcfp->ret, ioflags, pcfp->config_file_read_fd_context, NULL); -} - /* * When checksum_only is set, the checksum of buffer is only matched * and function avoids parsing of mda into config tree which * remains unmodified and should not be used. */ -int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason, +int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason, off_t offset, size_t size, off_t offset2, size_t size2, checksum_fn_t checksum_fn, uint32_t checksum, - int checksum_only, int no_dup_node_check, unsigned ioflags, - lvm_callback_fn_t config_file_read_fd_callback, void *config_file_read_fd_context) + int checksum_only, int no_dup_node_check) { - char *fb; + char *fb, *fe; int r = 0; - off_t mmap_offset = 0; int use_mmap = 1; - const char *buf = NULL; - unsigned circular = size2 ? 1 : 0; /* Wrapped around end of disk metadata buffer? */ + off_t mmap_offset = 0; + char *buf = NULL; struct config_source *cs = dm_config_get_custom(cft); - struct process_config_file_params *pcfp; if (!_is_file_based_config_source(cs->type)) { log_error(INTERNAL_ERROR "config_file_read_fd: expected file, special file " "or profile config source, found %s config source.", _config_source_names[cs->type]); - goto bad; + return 0; } - if (!(pcfp = dm_pool_zalloc(mem, sizeof(*pcfp)))) { - log_debug("config_file_read_fd: process_config_file_params struct allocation failed"); - goto bad; - } - - pcfp->cft = cft; - pcfp->dev = dev; - pcfp->offset = offset; - pcfp->size = size; - pcfp->offset2 = offset2; - pcfp->size2 = size2; - pcfp->checksum_fn = checksum_fn; - pcfp->checksum = checksum; - pcfp->checksum_only = checksum_only; - pcfp->no_dup_node_check = no_dup_node_check; - pcfp->config_file_read_fd_callback = config_file_read_fd_callback; - pcfp->config_file_read_fd_context = config_file_read_fd_context; - pcfp->ret = 1; - /* Only use mmap with regular files */ - if (!(dev->flags & DEV_REGULAR) || circular) + if (!(dev->flags & DEV_REGULAR) || size2) use_mmap = 0; if (use_mmap) { @@ -594,40 +524,56 @@ int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct MAP_PRIVATE, dev_fd(dev), offset - mmap_offset); if (fb == (caddr_t) (-1)) { log_sys_error("mmap", dev_name(dev)); - goto bad; + goto out; } - _process_config_file_buffer(0, ioflags, pcfp, fb + mmap_offset); - r = pcfp->ret; + fb = fb + mmap_offset; + } else { + if (!(buf = dm_malloc(size + size2))) { + log_error("Failed to allocate circular buffer."); + return 0; + } + if (!dev_read_circular(dev, (uint64_t) offset, size, + (uint64_t) offset2, size2, reason, buf)) { + goto out; + } + fb = buf; + } + + if (checksum_fn && checksum != + (checksum_fn(checksum_fn(INITIAL_CRC, (const uint8_t *)fb, size), + (const uint8_t *)(fb + size), size2))) { + log_error("%s: Checksum error at offset %" PRIu64, dev_name(dev), (uint64_t) offset); + goto out; + } + + if (!checksum_only) { + fe = fb + size + size2; + if (no_dup_node_check) { + if (!dm_config_parse_without_dup_node_check(cft, fb, fe)) + goto_out; + } else { + if (!dm_config_parse(cft, fb, fe)) + goto_out; + } + } + + r = 1; + + out: + if (!use_mmap) + dm_free(buf); + else { /* unmap the file */ - if (munmap(fb, size + mmap_offset)) { + if (munmap(fb - mmap_offset, size + mmap_offset)) { log_sys_error("munmap", dev_name(dev)); r = 0; } - } else { - if (circular) { - if (!(buf = dev_read_circular(dev, (uint64_t) offset, size, (uint64_t) offset2, size2, reason))) - goto_out; - _process_config_file_buffer(0, ioflags, pcfp, buf); - dm_free((void *)buf); - } else { - dev_read_callback(dev, (uint64_t) offset, size, reason, ioflags, _process_config_file_buffer, pcfp); - if (config_file_read_fd_callback) - return 1; - } - r = pcfp->ret; } -out: return r; - -bad: - if (config_file_read_fd_callback) - config_file_read_fd_callback(1, ioflags, config_file_read_fd_context, NULL); - - return 0; } -int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft) +int config_file_read(struct dm_config_tree *cft) { const char *filename = NULL; struct config_source *cs = dm_config_get_custom(cft); @@ -655,8 +601,8 @@ int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft) } } - r = config_file_read_fd(mem, cft, cf->dev, DEV_IO_MDA_CONTENT, 0, (size_t) info.st_size, 0, 0, - (checksum_fn_t) NULL, 0, 0, 0, 0, NULL, NULL); + r = config_file_read_fd(cft, cf->dev, DEV_IO_MDA_CONTENT, 0, (size_t) info.st_size, 0, 0, + (checksum_fn_t) NULL, 0, 0, 0); if (!cf->keep_open) { if (!dev_close(cf->dev)) diff --git a/lib/config/config.h b/lib/config/config.h index 4517cb7b0..d01306b36 100644 --- a/lib/config/config.h +++ b/lib/config/config.h @@ -239,13 +239,11 @@ config_source_t config_get_source_type(struct dm_config_tree *cft); typedef uint32_t (*checksum_fn_t) (uint32_t initial, const uint8_t *buf, uint32_t size); struct dm_config_tree *config_open(config_source_t source, const char *filename, int keep_open); -int config_file_read_fd(struct dm_pool *mem, struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason, +int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_reason_t reason, off_t offset, size_t size, off_t offset2, size_t size2, checksum_fn_t checksum_fn, uint32_t checksum, - int skip_parse, int no_dup_node_check, unsigned ioflags, - lvm_callback_fn_t config_file_read_fd_callback, void *config_file_read_fd_context); - -int config_file_read(struct dm_pool *mem, struct dm_config_tree *cft); + int skip_parse, int no_dup_node_check); +int config_file_read(struct dm_config_tree *cft); struct dm_config_tree *config_file_open_and_read(const char *config_file, config_source_t source, struct cmd_context *cmd); int config_write(struct dm_config_tree *cft, struct config_def_tree_spec *tree_spec, diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h index f1db79786..077fb15ce 100644 --- a/lib/config/config_settings.h +++ b/lib/config/config_settings.h @@ -226,16 +226,6 @@ cfg(devices_dir_CFG, "dir", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, cfg_array(devices_scan_CFG, "scan", devices_CFG_SECTION, CFG_ADVANCED, CFG_TYPE_STRING, "#S/dev", vsn(1, 0, 0), NULL, 0, NULL, "Directories containing device nodes to use with LVM.\n") -cfg(devices_use_aio_CFG, "use_aio", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_BOOL, DEFAULT_USE_AIO, vsn(2, 2, 178), NULL, 0, NULL, - "Use linux asynchronous I/O for parallel device access where possible.\n") - -cfg(devices_aio_max_CFG, "aio_max", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_AIO_MAX, vsn(2, 2, 178), NULL, 0, NULL, - "Maximum number of asynchronous I/Os to issue concurrently.\n") - -cfg(devices_aio_memory_CFG, "aio_memory", devices_CFG_SECTION, CFG_DEFAULT_COMMENTED, CFG_TYPE_INT, DEFAULT_AIO_MEMORY, vsn(2, 2, 178), NULL, 0, NULL, - "Approximate maximum total amount of memory (in MB) used\n" - "for asynchronous I/O buffers.\n") - cfg_array(devices_loopfiles_CFG, "loopfiles", devices_CFG_SECTION, CFG_DEFAULT_UNDEFINED | CFG_UNSUPPORTED, CFG_TYPE_STRING, NULL, vsn(1, 2, 0), NULL, 0, NULL, NULL) cfg(devices_obtain_device_list_from_udev_CFG, "obtain_device_list_from_udev", devices_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV, vsn(2, 2, 85), NULL, 0, NULL, diff --git a/lib/config/defaults.h b/lib/config/defaults.h index 1c730a9ce..d9e19d971 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -32,9 +32,6 @@ #define DEFAULT_SYSTEM_ID_SOURCE "none" #define DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV 1 #define DEFAULT_EXTERNAL_DEVICE_INFO_SOURCE "none" -#define DEFAULT_USE_AIO 1 -#define DEFAULT_AIO_MAX 128 -#define DEFAULT_AIO_MEMORY 10 #define DEFAULT_SYSFS_SCAN 1 #define DEFAULT_MD_COMPONENT_DETECTION 1 #define DEFAULT_FW_RAID_COMPONENT_DETECTION 0 diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c index 711c2d88b..e72ffd6fd 100644 --- a/lib/device/dev-cache.c +++ b/lib/device/dev-cache.c @@ -1245,24 +1245,12 @@ int dev_cache_check_for_open_devices(void) int dev_cache_exit(void) { - struct btree_iter *b; int num_open = 0; - dev_async_exit(); - if (_cache.names) if ((num_open = _check_for_open_devices(1)) > 0) log_error(INTERNAL_ERROR "%d device(s) were left open and have been closed.", num_open); - if (_cache.devices) { - /* FIXME Replace with structured devbuf cache */ - b = btree_first(_cache.devices); - while (b) { - devbufs_release(btree_get_data(b)); - b = btree_next(b); - } - } - if (_cache.mem) dm_pool_destroy(_cache.mem); diff --git a/lib/device/dev-cache.h b/lib/device/dev-cache.h index 560355387..546b1fe2a 100644 --- a/lib/device/dev-cache.h +++ b/lib/device/dev-cache.h @@ -23,10 +23,10 @@ * predicate for devices. */ struct dev_filter { - int (*passes_filter) (struct dev_filter *f, struct device *dev); - void (*destroy) (struct dev_filter *f); - void (*wipe) (struct dev_filter *f); - int (*dump) (struct dev_filter *f, struct dm_pool *mem, int merge_existing); + int (*passes_filter) (struct dev_filter * f, struct device * dev); + void (*destroy) (struct dev_filter * f); + void (*wipe) (struct dev_filter * f); + int (*dump) (struct dev_filter * f, int merge_existing); void *private; unsigned use_count; }; diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c index d631b8613..c321e61ad 100644 --- a/lib/device/dev-io.c +++ b/lib/device/dev-io.c @@ -53,12 +53,6 @@ # endif #endif -/* - * Always read at least 8k from disk. - * This seems to be a good compromise for the existing LVM2 metadata layout. - */ -#define MIN_READ_SIZE (8 * 1024) - static DM_LIST_INIT(_open_devices); static unsigned _dev_size_seqno = 1; @@ -80,319 +74,38 @@ static const char *_reason_text(dev_io_reason_t reason) return _reasons[(unsigned) reason]; } -/* - * Release the memory holding the last data we read - */ -static void _release_devbuf(struct device_buffer *devbuf) -{ - dm_free(devbuf->malloc_address); - devbuf->malloc_address = NULL; -} - -void devbufs_release(struct device *dev) -{ - if ((dev->flags & DEV_REGULAR)) - return; - - _release_devbuf(&dev->last_devbuf); - _release_devbuf(&dev->last_extra_devbuf); -} - -#ifdef AIO_SUPPORT - -# include - -static io_context_t _aio_ctx = 0; -static struct io_event *_aio_events = NULL; -static int _aio_max = 0; -static int64_t _aio_memory_max = 0; -static int _aio_must_queue = 0; /* Have we reached AIO capacity? */ - -static DM_LIST_INIT(_aio_queue); - -#define DEFAULT_AIO_COLLECTION_EVENTS 32 - -int dev_async_setup(struct cmd_context *cmd) -{ - int r; - - _aio_max = find_config_tree_int(cmd, devices_aio_max_CFG, NULL); - _aio_memory_max = find_config_tree_int(cmd, devices_aio_memory_CFG, NULL) * INT64_C(1024 * 1024); - - /* Threshold is zero? */ - if (!_aio_max || !_aio_memory_max) { - if (_aio_ctx) - dev_async_exit(); - return 1; - } - - /* Already set up? */ - if (_aio_ctx) - return 1; - - log_debug_io("Setting up aio context for up to %" PRId64 " MB across %d events.", _aio_memory_max, _aio_max); - - if (!_aio_events && !(_aio_events = dm_zalloc(sizeof(*_aio_events) * DEFAULT_AIO_COLLECTION_EVENTS))) { - log_error("Failed to allocate io_event array for asynchronous I/O."); - return 0; - } - - if ((r = io_setup(_aio_max, &_aio_ctx)) < 0) { - /* - * Possible errors: - * ENOSYS - aio not available in current kernel - * EAGAIN - _aio_max is too big - * EFAULT - invalid pointer - * EINVAL - _aio_ctx != 0 or kernel aio limits exceeded - * ENOMEM - */ - log_warn("WARNING: Asynchronous I/O setup for %d events failed: %s", _aio_max, strerror(-r)); - log_warn("WARNING: Using only synchronous I/O."); - dm_free(_aio_events); - _aio_events = NULL; - _aio_ctx = 0; - return 0; - } - - return 1; -} - -/* Reset aio context after fork */ -int dev_async_reset(struct cmd_context *cmd) -{ - log_debug_io("Resetting asynchronous I/O context."); - _aio_ctx = 0; - dm_free(_aio_events); - _aio_events = NULL; - - return dev_async_setup(cmd); -} - -/* - * Track the amount of in-flight async I/O. - * If it exceeds the defined threshold set _aio_must_queue. - */ -static void _update_aio_counters(int nr, ssize_t bytes) -{ - static int64_t aio_bytes = 0; - static int aio_count = 0; - - aio_bytes += bytes; - aio_count += nr; - - if (aio_count >= _aio_max || aio_bytes > _aio_memory_max) - _aio_must_queue = 1; - else - _aio_must_queue = 0; -} - -static int _io(struct device_buffer *devbuf, unsigned ioflags); - -int dev_async_getevents(void) -{ - struct device_buffer *devbuf, *tmp; - lvm_callback_fn_t dev_read_callback_fn; - void *dev_read_callback_context; - int r, event_nr; - - if (!_aio_ctx) - return 1; - - do { - /* FIXME Add timeout - currently NULL - waits for ever for at least 1 item */ - r = io_getevents(_aio_ctx, 1, DEFAULT_AIO_COLLECTION_EVENTS, _aio_events, NULL); - if (r > 0) - break; - if (!r) - return 1; /* Timeout elapsed */ - if (r == -EINTR) - continue; - if (r == -EAGAIN) { - usleep(100); - return 1; /* Give the caller the opportunity to do other work before repeating */ - } - /* - * ENOSYS - not supported by kernel - * EFAULT - memory invalid - * EINVAL - _aio_ctx invalid or min_nr/nr/timeout out of range - */ - log_error("Asynchronous event collection failed: %s", strerror(-r)); - return 0; - } while (1); - - for (event_nr = 0; event_nr < r; event_nr++) { - devbuf = _aio_events[event_nr].obj->data; - dm_free(_aio_events[event_nr].obj); - - _update_aio_counters(-1, -devbuf->where.size); - - dev_read_callback_fn = devbuf->dev_read_callback_fn; - dev_read_callback_context = devbuf->dev_read_callback_context; - - /* Clear the callbacks as a precaution */ - devbuf->dev_read_callback_context = NULL; - devbuf->dev_read_callback_fn = NULL; - - if (_aio_events[event_nr].res == devbuf->where.size) { - if (dev_read_callback_fn) - dev_read_callback_fn(0, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, (char *)devbuf->buf + devbuf->data_offset); - } else { - /* FIXME If partial read is possible, resubmit remainder */ - log_error("%s: asynchronous read only I/O failed (" FMTd64 ") of " FMTu64 " bytes at " FMTu64 " (for %s): %s", - dev_name(devbuf->where.dev), _aio_events[event_nr].res, - (uint64_t) devbuf->where.size, (uint64_t) devbuf->where.start, - _reason_text(devbuf->reason), - (((int64_t)_aio_events[event_nr].res) < 0) ? strerror(-(int64_t)_aio_events[event_nr].res) : 0); - _release_devbuf(devbuf); - if (dev_read_callback_fn) - dev_read_callback_fn(1, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, NULL); - else - r = 0; - } - } - - /* Submit further queued events if we can */ - dm_list_iterate_items_gen_safe(devbuf, tmp, &_aio_queue, aio_queued) { - if (_aio_must_queue) - break; - dm_list_del(&devbuf->aio_queued); - _io(devbuf, 1); - } - - return 1; -} - -static int _io_async(struct device_buffer *devbuf) -{ - struct device_area *where = &devbuf->where; - struct iocb *iocb; - int r; - - _update_aio_counters(1, devbuf->where.size); - - if (!(iocb = dm_malloc(sizeof(*iocb)))) { - log_error("Failed to allocate I/O control block array for asynchronous I/O."); - return 0; - } - - io_prep_pread(iocb, dev_fd(where->dev), devbuf->buf, where->size, where->start); - iocb->data = devbuf; - - do { - r = io_submit(_aio_ctx, 1L, &iocb); - if (r ==1) - break; /* Success */ - if (r == -EAGAIN) { - /* Try to release some resources then retry */ - usleep(100); - if (dev_async_getevents()) - return_0; - /* FIXME Add counter/timeout so we can't get stuck here for ever */ - continue; - } - /* - * Possible errors: - * EFAULT - invalid data - * ENOSYS - no aio support in kernel - * EBADF - bad file descriptor in iocb - * EINVAL - invalid _aio_ctx / iocb not initialised / invalid operation for this fd - */ - log_error("Asynchronous event submission failed: %s", strerror(-r)); - return 0; - } while (1); - - return 1; -} - -void dev_async_exit(void) -{ - struct device_buffer *devbuf, *tmp; - lvm_callback_fn_t dev_read_callback_fn; - void *dev_read_callback_context; - int r; - - if (!_aio_ctx) - return; - - /* Discard any queued requests */ - dm_list_iterate_items_gen_safe(devbuf, tmp, &_aio_queue, aio_queued) { - dm_list_del(&devbuf->aio_queued); - - _update_aio_counters(-1, -devbuf->where.size); - - dev_read_callback_fn = devbuf->dev_read_callback_fn; - dev_read_callback_context = devbuf->dev_read_callback_context; - - _release_devbuf(devbuf); - - if (dev_read_callback_fn) - dev_read_callback_fn(1, AIO_SUPPORTED_CODE_PATH, dev_read_callback_context, NULL); - } - - log_debug_io("Destroying aio context."); - if ((r = io_destroy(_aio_ctx)) < 0) - /* Returns -ENOSYS if aio not in kernel or -EINVAL if _aio_ctx invalid */ - log_error("Failed to destroy asynchronous I/O context: %s", strerror(-r)); - - dm_free(_aio_events); - _aio_events = NULL; - - _aio_ctx = 0; -} - -static void _queue_aio(struct device_buffer *devbuf) -{ - dm_list_add(&_aio_queue, &devbuf->aio_queued); - log_debug_io("Queueing aio."); -} - -#else - -static int _aio_ctx = 0; -static int _aio_must_queue = 0; - -int dev_async_setup(struct cmd_context *cmd) -{ - return 1; -} - -int dev_async_reset(struct cmd_context *cmd) -{ - return 1; -} - -int dev_async_getevents(void) -{ - return 1; -} - -void dev_async_exit(void) -{ -} - -static int _io_async(struct device_buffer *devbuf) -{ - return 0; -} - -static void _queue_aio(struct device_buffer *devbuf) -{ -} - -#endif /* AIO_SUPPORT */ - /*----------------------------------------------------------------- * The standard io loop that keeps submitting an io until it's * all gone. *---------------------------------------------------------------*/ -static int _io_sync(struct device_buffer *devbuf) +static int _io(struct device_area *where, char *buffer, int should_write, dev_io_reason_t reason) { - struct device_area *where = &devbuf->where; int fd = dev_fd(where->dev); - char *buffer = devbuf->buf; ssize_t n = 0; size_t total = 0; + if (fd < 0) { + log_error("Attempt to read an unopened device (%s).", + dev_name(where->dev)); + return 0; + } + + log_debug_io("%s %s:%8" PRIu64 " bytes (sync) at %" PRIu64 "%s (for %s)", + should_write ? "Write" : "Read ", dev_name(where->dev), + where->size, (uint64_t) where->start, + (should_write && test_mode()) ? " (test mode - suppressed)" : "", _reason_text(reason)); + + /* + * Skip all writes in test mode. + */ + if (should_write && test_mode()) + return 1; + + if (where->size > SSIZE_MAX) { + log_error("Read size too large: %" PRIu64, where->size); + return 0; + } + if (lseek(fd, (off_t) where->start, SEEK_SET) == (off_t) -1) { log_error("%s: lseek %" PRIu64 " failed: %s", dev_name(where->dev), (uint64_t) where->start, @@ -402,19 +115,18 @@ static int _io_sync(struct device_buffer *devbuf) while (total < (size_t) where->size) { do - n = devbuf->write ? + n = should_write ? write(fd, buffer, (size_t) where->size - total) : read(fd, buffer, (size_t) where->size - total); while ((n < 0) && ((errno == EINTR) || (errno == EAGAIN))); if (n < 0) - log_error("%s: synchronous %s failed after %" PRIu64 " of %" PRIu64 - " at %" PRIu64 " (for %s): %s", dev_name(where->dev), - devbuf->write ? "write" : "read", - (uint64_t) total, - (uint64_t) where->size, (uint64_t) where->start, - _reason_text(devbuf->reason), - strerror(errno)); + log_error_once("%s: %s failed after %" PRIu64 " of %" PRIu64 + " at %" PRIu64 ": %s", dev_name(where->dev), + should_write ? "write" : "read", + (uint64_t) total, + (uint64_t) where->size, + (uint64_t) where->start, strerror(errno)); if (n <= 0) break; @@ -426,42 +138,6 @@ static int _io_sync(struct device_buffer *devbuf) return (total == (size_t) where->size); } -static int _io(struct device_buffer *devbuf, unsigned ioflags) -{ - struct device_area *where = &devbuf->where; - int fd = dev_fd(where->dev); - int async = (!devbuf->write && _aio_ctx && aio_supported_code_path(ioflags) && devbuf->dev_read_callback_fn) ? 1 : 0; - - if (fd < 0) { - log_error("Attempt to read an unopened device (%s).", - dev_name(where->dev)); - return 0; - } - - if (!devbuf->buf && !(devbuf->malloc_address = devbuf->buf = dm_malloc_aligned((size_t) devbuf->where.size, 0))) { - log_error("I/O buffer malloc failed"); - return 0; - } - - log_debug_io("%s %s(fd %d):%8" PRIu64 " bytes (%ssync) at %" PRIu64 "%s (for %s)", - devbuf->write ? "Write" : "Read ", dev_name(where->dev), fd, - where->size, async ? "a" : "", (uint64_t) where->start, - (devbuf->write && test_mode()) ? " (test mode - suppressed)" : "", _reason_text(devbuf->reason)); - - /* - * Skip all writes in test mode. - */ - if (devbuf->write && test_mode()) - return 1; - - if (where->size > SSIZE_MAX) { - log_error("Read size too large: %" PRIu64, where->size); - return 0; - } - - return async ? _io_async(devbuf) : _io_sync(devbuf); -} - /*----------------------------------------------------------------- * LVM2 uses O_DIRECT when performing metadata io, which requires * block size aligned accesses. If any io is not aligned we have @@ -551,16 +227,15 @@ static void _widen_region(unsigned int block_size, struct device_area *region, result->size += block_size - delta; } -static int _aligned_io(struct device_area *where, char *write_buffer, - int should_write, dev_io_reason_t reason, - unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *dev_read_callback_context) +static int _aligned_io(struct device_area *where, char *buffer, + int should_write, dev_io_reason_t reason) { + char *bounce, *bounce_buf; unsigned int physical_block_size = 0; unsigned int block_size = 0; unsigned buffer_was_widened = 0; uintptr_t mask; struct device_area widened; - struct device_buffer *devbuf; int r = 0; if (!(where->dev->flags & DEV_REGULAR) && @@ -569,11 +244,6 @@ static int _aligned_io(struct device_area *where, char *write_buffer, if (!block_size) block_size = lvm_getpagesize(); - - /* Apply minimum read size */ - if (!should_write && block_size < MIN_READ_SIZE) - block_size = MIN_READ_SIZE; - mask = block_size - 1; _widen_region(block_size, where, &widened); @@ -583,75 +253,50 @@ static int _aligned_io(struct device_area *where, char *write_buffer, buffer_was_widened = 1; log_debug_io("Widening request for %" PRIu64 " bytes at %" PRIu64 " to %" PRIu64 " bytes at %" PRIu64 " on %s (for %s)", where->size, (uint64_t) where->start, widened.size, (uint64_t) widened.start, dev_name(where->dev), _reason_text(reason)); - } - - devbuf = DEV_DEVBUF(where->dev, reason); - _release_devbuf(devbuf); - devbuf->where.dev = where->dev; - devbuf->where.start = widened.start; - devbuf->where.size = widened.size; - devbuf->write = should_write; - devbuf->reason = reason; - devbuf->dev_read_callback_fn = dev_read_callback_fn; - devbuf->dev_read_callback_context = dev_read_callback_context; - - /* Store location of requested data relative to start of buf */ - devbuf->data_offset = where->start - devbuf->where.start; - - if (should_write && !buffer_was_widened && !((uintptr_t) write_buffer & mask)) + } else if (!((uintptr_t) buffer & mask)) /* Perform the I/O directly. */ - devbuf->buf = write_buffer; - else if (!should_write) - /* Postpone buffer allocation until we're about to issue the I/O */ - devbuf->buf = NULL; - else { - /* Allocate a bounce buffer with an extra block */ - if (!(devbuf->malloc_address = devbuf->buf = dm_malloc((size_t) devbuf->where.size + block_size))) { - log_error("Bounce buffer malloc failed"); - return 0; - } + return _io(where, buffer, should_write, reason); - /* - * Realign start of bounce buffer (using the extra sector) - */ - if (((uintptr_t) devbuf->buf) & mask) - devbuf->buf = (char *) ((((uintptr_t) devbuf->buf) + mask) & ~mask); + /* Allocate a bounce buffer with an extra block */ + if (!(bounce_buf = bounce = dm_malloc((size_t) widened.size + block_size))) { + log_error("Bounce buffer malloc failed"); + return 0; } - /* If we've reached our concurrent AIO limit, add this request to the queue */ - if (!devbuf->write && _aio_ctx && aio_supported_code_path(ioflags) && dev_read_callback_fn && _aio_must_queue) { - _queue_aio(devbuf); - return 1; - } - - devbuf->write = 0; + /* + * Realign start of bounce buffer (using the extra sector) + */ + if (((uintptr_t) bounce) & mask) + bounce = (char *) ((((uintptr_t) bounce) + mask) & ~mask); /* Do we need to read into the bounce buffer? */ - if ((!should_write || buffer_was_widened) && !_io(devbuf, ioflags)) { + if ((!should_write || buffer_was_widened) && + !_io(&widened, bounce, 0, reason)) { if (!should_write) - goto_bad; + goto_out; /* FIXME Handle errors properly! */ /* FIXME pre-extend the file */ - memset(devbuf->buf, '\n', devbuf->where.size); + memset(bounce, '\n', widened.size); } - if (!should_write) - return 1; + if (should_write) { + memcpy(bounce + (where->start - widened.start), buffer, + (size_t) where->size); - /* writes */ - - if (devbuf->malloc_address) { - memcpy((char *) devbuf->buf + devbuf->data_offset, write_buffer, (size_t) where->size); - log_debug_io("Overwriting %" PRIu64 " bytes at %" PRIu64 " (for %s)", where->size, - (uint64_t) where->start, _reason_text(devbuf->reason)); + /* ... then we write */ + if (!(r = _io(&widened, bounce, 1, reason))) + stack; + + goto out; } - /* ... then we write */ - devbuf->write = 1; - if (!(r = _io(devbuf, 0))) - stack; -bad: - _release_devbuf(devbuf); + memcpy(buffer, bounce + (where->start - widened.start), + (size_t) where->size); + + r = 1; + +out: + dm_free(bounce_buf); return r; } @@ -1005,7 +650,6 @@ static void _close(struct device *dev) dev->phys_block_size = -1; dev->block_size = -1; dm_list_del(&dev->open_list); - devbufs_release(dev); log_debug_devs("Closed %s", dev_name(dev)); @@ -1078,123 +722,57 @@ static void _dev_inc_error_count(struct device *dev) dev->max_error_count, dev_name(dev)); } -/* - * Data is returned (read-only) at DEV_DEVBUF_DATA(dev, reason). - * If dev_read_callback_fn is supplied, we always return 1 and take - * responsibility for calling it exactly once. This might happen before the - * function returns (if there's an error or the I/O is synchronous) or after. - * Any error is passed to that function, which must track it if required. - */ -static int _dev_read_callback(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, - unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *callback_context) +int dev_read(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *buffer) { struct device_area where; - struct device_buffer *devbuf; - uint64_t buf_end; - int cached = 0; - int ret = 0; + int ret; - if (!dev->open_count) { - log_error(INTERNAL_ERROR "Attempt to access device %s while closed.", dev_name(dev)); - goto out; - } + if (!dev->open_count) + return_0; if (!_dev_is_valid(dev)) - goto_out; - - /* - * Can we satisfy this from data we stored last time we read? - */ - if ((devbuf = DEV_DEVBUF(dev, reason)) && devbuf->malloc_address) { - buf_end = devbuf->where.start + devbuf->where.size - 1; - if (offset >= devbuf->where.start && offset <= buf_end && offset + len - 1 <= buf_end) { - /* Reuse this buffer */ - cached = 1; - devbuf->data_offset = offset - devbuf->where.start; - log_debug_io("Cached read for %" PRIu64 " bytes at %" PRIu64 " on %s (for %s)", - (uint64_t) len, (uint64_t) offset, dev_name(dev), _reason_text(reason)); - ret = 1; - goto out; - } - } + return 0; where.dev = dev; where.start = offset; where.size = len; - ret = _aligned_io(&where, NULL, 0, reason, ioflags, dev_read_callback_fn, callback_context); - if (!ret) { - log_debug("Read from %s failed (for %s).", dev_name(dev), _reason_text(reason)); + ret = _aligned_io(&where, buffer, 0, reason); + if (!ret) _dev_inc_error_count(dev); - } - -out: - /* If we had an error or this was sync I/O, pass the result to any callback fn */ - if ((!ret || !_aio_ctx || !aio_supported_code_path(ioflags) || cached) && dev_read_callback_fn) { - dev_read_callback_fn(!ret, ioflags, callback_context, DEV_DEVBUF_DATA(dev, reason)); - return 1; - } return ret; } -void dev_read_callback(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, - unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *callback_context) +/* + * Read from 'dev' into 'buf', possibly in 2 distinct regions, denoted + * by (offset,len) and (offset2,len2). Thus, the total size of + * 'buf' should be len+len2. + */ +int dev_read_circular(struct device *dev, uint64_t offset, size_t len, + uint64_t offset2, size_t len2, dev_io_reason_t reason, char *buf) { - /* Always returns 1 if callback fn is supplied */ - if (!_dev_read_callback(dev, offset, len, reason, ioflags, dev_read_callback_fn, callback_context)) - log_error(INTERNAL_ERROR "_dev_read_callback failed"); -} + if (!dev_read(dev, offset, len, reason, buf)) { + log_error("Read from %s failed", dev_name(dev)); + return 0; + } -/* Returns pointer to read-only buffer. Caller does not free it. */ -const char *dev_read(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason) -{ - if (!_dev_read_callback(dev, offset, len, reason, 0, NULL, NULL)) - return_NULL; + /* + * The second region is optional, and allows for + * a circular buffer on the device. + */ + if (!len2) + return 1; - return DEV_DEVBUF_DATA(dev, reason); -} - -/* Read into supplied retbuf owned by the caller. */ -int dev_read_buf(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *retbuf) -{ - if (!_dev_read_callback(dev, offset, len, reason, 0, NULL, NULL)) - return_0; - - memcpy(retbuf, DEV_DEVBUF_DATA(dev, reason), len); + if (!dev_read(dev, offset2, len2, reason, buf + len)) { + log_error("Circular read from %s failed", + dev_name(dev)); + return 0; + } return 1; } -/* - * Read from 'dev' in 2 distinct regions, denoted by (offset,len) and (offset2,len2). - * Caller is responsible for dm_free(). - */ -const char *dev_read_circular(struct device *dev, uint64_t offset, size_t len, - uint64_t offset2, size_t len2, dev_io_reason_t reason) -{ - char *buf = NULL; - - if (!(buf = dm_malloc(len + len2))) { - log_error("Buffer allocation failed for split metadata."); - return NULL; - } - - if (!dev_read_buf(dev, offset, len, reason, buf)) { - log_error("Read from %s failed.", dev_name(dev)); - dm_free(buf); - return NULL; - } - - if (!dev_read_buf(dev, offset2, len2, reason, buf + len)) { - log_error("Circular read from %s failed.", dev_name(dev)); - dm_free(buf); - return NULL; - } - - return buf; -} - /* FIXME If O_DIRECT can't extend file, dev_extend first; dev_truncate after. * But fails if concurrent processes writing */ @@ -1238,7 +816,7 @@ int dev_write(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t r dev->flags |= DEV_ACCESSED_W; - ret = _aligned_io(&where, buffer, 1, reason, 0, NULL, NULL); + ret = _aligned_io(&where, buffer, 1, reason); if (!ret) _dev_inc_error_count(dev); @@ -1248,7 +826,7 @@ int dev_write(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t r int dev_set(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, int value) { size_t s; - char buffer[4096] __attribute__((aligned(4096))); + char buffer[4096] __attribute__((aligned(8))); if (!dev_open(dev)) return_0; diff --git a/lib/device/dev-luks.c b/lib/device/dev-luks.c index f29161508..8513e1462 100644 --- a/lib/device/dev-luks.c +++ b/lib/device/dev-luks.c @@ -31,7 +31,7 @@ int dev_is_luks(struct device *dev, uint64_t *offset_found) if (offset_found) *offset_found = 0; - if (!dev_read_buf(dev, 0, LUKS_SIGNATURE_SIZE, DEV_IO_SIGNATURES, buf)) + if (!dev_read(dev, 0, LUKS_SIGNATURE_SIZE, DEV_IO_SIGNATURES, buf)) goto_out; ret = memcmp(buf, LUKS_SIGNATURE, LUKS_SIGNATURE_SIZE) ? 0 : 1; diff --git a/lib/device/dev-md.c b/lib/device/dev-md.c index 1a5d47062..92ee2144b 100644 --- a/lib/device/dev-md.c +++ b/lib/device/dev-md.c @@ -37,7 +37,7 @@ static int _dev_has_md_magic(struct device *dev, uint64_t sb_offset) uint32_t md_magic; /* Version 1 is little endian; version 0.90.0 is machine endian */ - if (dev_read_buf(dev, sb_offset, sizeof(uint32_t), DEV_IO_SIGNATURES, &md_magic) && + if (dev_read(dev, sb_offset, sizeof(uint32_t), DEV_IO_SIGNATURES, &md_magic) && ((md_magic == MD_SB_MAGIC) || ((MD_SB_MAGIC != xlate32(MD_SB_MAGIC)) && (md_magic == xlate32(MD_SB_MAGIC))))) return 1; diff --git a/lib/device/dev-swap.c b/lib/device/dev-swap.c index 094eb05e4..a7ff10bb1 100644 --- a/lib/device/dev-swap.c +++ b/lib/device/dev-swap.c @@ -60,7 +60,8 @@ int dev_is_swap(struct device *dev, uint64_t *offset_found) continue; if (size < (page >> SECTOR_SHIFT)) break; - if (!dev_read_buf(dev, page - SIGNATURE_SIZE, SIGNATURE_SIZE, DEV_IO_SIGNATURES, buf)) { + if (!dev_read(dev, page - SIGNATURE_SIZE, + SIGNATURE_SIZE, DEV_IO_SIGNATURES, buf)) { ret = -1; break; } diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c index b9e77f81e..9608146b9 100644 --- a/lib/device/dev-type.c +++ b/lib/device/dev-type.c @@ -363,7 +363,7 @@ static int _has_partition_table(struct device *dev) uint16_t magic; } __attribute__((packed)) buf; /* sizeof() == SECTOR_SIZE */ - if (!dev_read_buf(dev, UINT64_C(0), sizeof(buf), DEV_IO_SIGNATURES, &buf)) + if (!dev_read(dev, UINT64_C(0), sizeof(buf), DEV_IO_SIGNATURES, &buf)) return_0; /* FIXME Check for other types of partition table too */ diff --git a/lib/device/device.h b/lib/device/device.h index ea71d00a4..503373f88 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -32,18 +32,6 @@ #define DEV_ASSUMED_FOR_LV 0x00000200 /* Is device assumed for an LV */ #define DEV_NOT_O_NOATIME 0x00000400 /* Don't use O_NOATIME */ -/* ioflags */ -#define AIO_SUPPORTED_CODE_PATH 0x00000001 /* Set if the code path supports AIO */ - -#define aio_supported_code_path(ioflags) (((ioflags) & AIO_SUPPORTED_CODE_PATH) ? 1 : 0) - -/* - * Standard format for callback functions. - * When provided, callback functions are called exactly once. - * If failed is set, data cannot be accessed. - */ -typedef void (*lvm_callback_fn_t)(int failed, unsigned ioflags, void *context, const void *data); - /* * Support for external device info. * Any new external device info source needs to be @@ -61,48 +49,6 @@ struct dev_ext { void *handle; }; -/* - * All I/O is annotated with the reason it is performed. - */ -typedef enum dev_io_reason { - DEV_IO_SIGNATURES = 0, /* Scanning device signatures */ - DEV_IO_LABEL, /* LVM PV disk label */ - DEV_IO_MDA_HEADER, /* Text format metadata area header */ - DEV_IO_MDA_CONTENT, /* Text format metadata area content */ - DEV_IO_MDA_EXTRA_HEADER, /* Header of any extra metadata areas on device */ - DEV_IO_MDA_EXTRA_CONTENT, /* Content of any extra metadata areas on device */ - DEV_IO_FMT1, /* Original LVM1 metadata format */ - DEV_IO_POOL, /* Pool metadata format */ - DEV_IO_LV, /* Content written to an LV */ - DEV_IO_LOG /* Logging messages */ -} dev_io_reason_t; - -/* - * Is this I/O for a device's extra metadata area? - */ -#define EXTRA_IO(reason) ((reason) == DEV_IO_MDA_EXTRA_HEADER || (reason) == DEV_IO_MDA_EXTRA_CONTENT) -#define DEV_DEVBUF(dev, reason) (EXTRA_IO((reason)) ? &(dev)->last_extra_devbuf : &(dev)->last_devbuf) -#define DEV_DEVBUF_DATA(dev, reason) ((char *) DEV_DEVBUF((dev), (reason))->buf + DEV_DEVBUF((dev), (reason))->data_offset) - -struct device_area { - struct device *dev; - uint64_t start; /* Bytes */ - uint64_t size; /* Bytes */ -}; - -struct device_buffer { - uint64_t data_offset; /* Offset to start of requested data within buf */ - void *malloc_address; /* Start of allocated memory */ - void *buf; /* Aligned buffer that contains data within it */ - struct device_area where; /* Location of buf */ - dev_io_reason_t reason; - unsigned write:1; /* 1 if write; 0 if read */ - - lvm_callback_fn_t dev_read_callback_fn; - void *dev_read_callback_context; - struct dm_list aio_queued; /* Queue of async I/O waiting to be issued */ -}; - /* * All devices in LVM will be represented by one of these. * pointer comparisons are valid. @@ -125,8 +71,6 @@ struct device { uint64_t end; struct dm_list open_list; struct dev_ext ext; - struct device_buffer last_devbuf; /* Last data buffer read from the device */ - struct device_buffer last_extra_devbuf; /* Last data buffer read from the device for extra metadata area */ const char *vgid; /* if device is an LV */ const char *lvid; /* if device is an LV */ @@ -135,11 +79,33 @@ struct device { char _padding[7]; }; +/* + * All I/O is annotated with the reason it is performed. + */ +typedef enum dev_io_reason { + DEV_IO_SIGNATURES = 0, /* Scanning device signatures */ + DEV_IO_LABEL, /* LVM PV disk label */ + DEV_IO_MDA_HEADER, /* Text format metadata area header */ + DEV_IO_MDA_CONTENT, /* Text format metadata area content */ + DEV_IO_MDA_EXTRA_HEADER, /* Header of any extra metadata areas on device */ + DEV_IO_MDA_EXTRA_CONTENT, /* Content of any extra metadata areas on device */ + DEV_IO_FMT1, /* Original LVM1 metadata format */ + DEV_IO_POOL, /* Pool metadata format */ + DEV_IO_LV, /* Content written to an LV */ + DEV_IO_LOG /* Logging messages */ +} dev_io_reason_t; + struct device_list { struct dm_list list; struct device *dev; }; +struct device_area { + struct device *dev; + uint64_t start; /* Bytes */ + uint64_t size; /* Bytes */ +}; + /* * Support for external device info. */ @@ -179,19 +145,9 @@ int dev_test_excl(struct device *dev); int dev_fd(struct device *dev); const char *dev_name(const struct device *dev); -/* Returns a read-only buffer */ -const char *dev_read(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason); -const char *dev_read_circular(struct device *dev, uint64_t offset, size_t len, - uint64_t offset2, size_t len2, dev_io_reason_t reason); - -/* Passes the data (or error) to dev_read_callback_fn */ -void dev_read_callback(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, - unsigned ioflags, lvm_callback_fn_t dev_read_callback_fn, void *callback_context); - -/* Read data and copy it into a supplied private buffer. */ -/* Only use for tiny reads or on unimportant code paths. */ -int dev_read_buf(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *retbuf); - +int dev_read(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *buffer); +int dev_read_circular(struct device *dev, uint64_t offset, size_t len, + uint64_t offset2, size_t len2, dev_io_reason_t reason, char *buf); int dev_write(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, void *buffer); int dev_append(struct device *dev, size_t len, dev_io_reason_t reason, char *buffer); int dev_set(struct device *dev, uint64_t offset, size_t len, dev_io_reason_t reason, int value); @@ -201,15 +157,7 @@ struct device *dev_create_file(const char *filename, struct device *dev, struct dm_str_list *alias, int use_malloc); void dev_destroy_file(struct device *dev); -void devbufs_release(struct device *dev); - /* Return a valid device name from the alias list; NULL otherwise */ const char *dev_name_confirmed(struct device *dev, int quiet); -struct cmd_context; -int dev_async_getevents(void); -int dev_async_setup(struct cmd_context *cmd); -void dev_async_exit(void); -int dev_async_reset(struct cmd_context *cmd); - #endif diff --git a/lib/filters/filter-composite.c b/lib/filters/filter-composite.c index 83a0f02bb..c63589640 100644 --- a/lib/filters/filter-composite.c +++ b/lib/filters/filter-composite.c @@ -52,13 +52,13 @@ static void _composite_destroy(struct dev_filter *f) dm_free(f); } -static int _dump(struct dev_filter *f, struct dm_pool *mem, int merge_existing) +static int _dump(struct dev_filter *f, int merge_existing) { struct dev_filter **filters; for (filters = (struct dev_filter **) f->private; *filters; ++filters) if ((*filters)->dump && - !(*filters)->dump(*filters, mem, merge_existing)) + !(*filters)->dump(*filters, merge_existing)) return_0; return 1; diff --git a/lib/filters/filter-persistent.c b/lib/filters/filter-persistent.c index 7542e0200..5bc0861fd 100644 --- a/lib/filters/filter-persistent.c +++ b/lib/filters/filter-persistent.c @@ -87,7 +87,7 @@ static int _read_array(struct pfilter *pf, struct dm_config_tree *cft, return 1; } -int persistent_filter_load(struct dm_pool *mem, struct dev_filter *f, struct dm_config_tree **cft_out) +int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out) { struct pfilter *pf = (struct pfilter *) f->private; struct dm_config_tree *cft; @@ -116,7 +116,7 @@ int persistent_filter_load(struct dm_pool *mem, struct dev_filter *f, struct dm_ if (!(cft = config_open(CONFIG_FILE_SPECIAL, pf->file, 1))) return_0; - if (!config_file_read(mem, cft)) + if (!config_file_read(cft)) goto_out; log_debug_devs("Loading persistent filter cache from %s", pf->file); @@ -175,7 +175,7 @@ static void _write_array(struct pfilter *pf, FILE *fp, const char *path, fprintf(fp, "\n\t]\n"); } -static int _persistent_filter_dump(struct dev_filter *f, struct dm_pool *mem, int merge_existing) +static int _persistent_filter_dump(struct dev_filter *f, int merge_existing) { struct pfilter *pf; char *tmp_file; @@ -234,7 +234,7 @@ static int _persistent_filter_dump(struct dev_filter *f, struct dm_pool *mem, in lvm_stat_ctim(&ts, &info); if (merge_existing && timespeccmp(&ts, &pf->ctime, !=)) /* Keep cft open to avoid losing lock */ - persistent_filter_load(mem, f, &cft); + persistent_filter_load(f, &cft); tmp_file = alloca(strlen(pf->file) + 5); sprintf(tmp_file, "%s.tmp", pf->file); diff --git a/lib/filters/filter.h b/lib/filters/filter.h index 5dbf0b497..d75f6e11c 100644 --- a/lib/filters/filter.h +++ b/lib/filters/filter.h @@ -53,6 +53,6 @@ typedef enum { } filter_mode_t; struct dev_filter *usable_filter_create(struct dev_types *dt, filter_mode_t mode); -int persistent_filter_load(struct dm_pool *mem, struct dev_filter *f, struct dm_config_tree **cft_out); +int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out); #endif /* _LVM_FILTER_H */ diff --git a/lib/format1/disk-rep.c b/lib/format1/disk-rep.c index cf34e91cb..41955afc0 100644 --- a/lib/format1/disk-rep.c +++ b/lib/format1/disk-rep.c @@ -205,7 +205,7 @@ int munge_pvd(struct device *dev, struct pv_disk *pvd) static int _read_pvd(struct device *dev, struct pv_disk *pvd) { - if (!dev_read_buf(dev, UINT64_C(0), sizeof(*pvd), DEV_IO_FMT1, pvd)) { + if (!dev_read(dev, UINT64_C(0), sizeof(*pvd), DEV_IO_FMT1, pvd)) { log_very_verbose("Failed to read PV data from %s", dev_name(dev)); return 0; @@ -216,7 +216,7 @@ static int _read_pvd(struct device *dev, struct pv_disk *pvd) static int _read_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk) { - if (!dev_read_buf(dev, pos, sizeof(*disk), DEV_IO_FMT1, disk)) + if (!dev_read(dev, pos, sizeof(*disk), DEV_IO_FMT1, disk)) return_0; _xlate_lvd(disk); @@ -228,7 +228,7 @@ int read_vgd(struct device *dev, struct vg_disk *vgd, struct pv_disk *pvd) { uint64_t pos = pvd->vg_on_disk.base; - if (!dev_read_buf(dev, pos, sizeof(*vgd), DEV_IO_FMT1, vgd)) + if (!dev_read(dev, pos, sizeof(*vgd), DEV_IO_FMT1, vgd)) return_0; _xlate_vgd(vgd); @@ -252,7 +252,7 @@ static int _read_uuids(struct disk_list *data) uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size; while (pos < end && num_read < data->vgd.pv_cur) { - if (!dev_read_buf(data->dev, pos, sizeof(buffer), DEV_IO_FMT1, buffer)) + if (!dev_read(data->dev, pos, sizeof(buffer), DEV_IO_FMT1, buffer)) return_0; if (!(ul = dm_pool_alloc(data->mem, sizeof(*ul)))) @@ -311,7 +311,7 @@ static int _read_extents(struct disk_list *data) if (!extents) return_0; - if (!dev_read_buf(data->dev, pos, len, DEV_IO_FMT1, extents)) + if (!dev_read(data->dev, pos, len, DEV_IO_FMT1, extents)) return_0; _xlate_extents(extents, data->pvd.pe_total); diff --git a/lib/format1/format1.c b/lib/format1/format1.c index 7d84e6981..b3569e08e 100644 --- a/lib/format1/format1.c +++ b/lib/format1/format1.c @@ -182,7 +182,7 @@ static struct volume_group *_format1_vg_read(struct format_instance *fid, struct metadata_area *mda __attribute__((unused)), struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)), unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused)), unsigned ioflags) + int single_device __attribute__((unused))) { struct volume_group *vg; struct disk_list *dl; diff --git a/lib/format1/lvm1-label.c b/lib/format1/lvm1-label.c index d334fdc2e..3b8a655e9 100644 --- a/lib/format1/lvm1-label.c +++ b/lib/format1/lvm1-label.c @@ -54,17 +54,15 @@ static int _lvm1_write(struct label *label __attribute__((unused)), void *buf __ return 0; } -static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, unsigned ioflags, - lvm_callback_fn_t read_label_callback_fn, void *read_label_callback_context) +static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, + struct label **label) { struct pv_disk *pvd = (struct pv_disk *) buf; struct vg_disk vgd; struct lvmcache_info *info; - struct label *label = NULL; const char *vgid = FMT_LVM1_ORPHAN_VG_NAME; const char *vgname = FMT_LVM1_ORPHAN_VG_NAME; unsigned exported = 0; - int r = 0; munge_pvd(dev, pvd); @@ -78,9 +76,8 @@ static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, unsigne if (!(info = lvmcache_add(l, (char *)pvd->pv_uuid, dev, vgname, vgid, exported))) - goto_out; - - label = lvmcache_get_label(info); + return_0; + *label = lvmcache_get_label(info); lvmcache_set_device_size(info, ((uint64_t)xlate32(pvd->pv_size)) << SECTOR_SHIFT); lvmcache_set_ext_version(info, 0); @@ -89,13 +86,7 @@ static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, unsigne lvmcache_del_bas(info); lvmcache_make_valid(info); - r = 1; - -out: - if (read_label_callback_fn) - read_label_callback_fn(!r, 0, read_label_callback_context, label); - - return r; + return 1; } static int _lvm1_initialise_label(struct labeller *l __attribute__((unused)), struct label *label) diff --git a/lib/format_pool/disk_rep.c b/lib/format_pool/disk_rep.c index 4b2e7fb12..374ff44a0 100644 --- a/lib/format_pool/disk_rep.c +++ b/lib/format_pool/disk_rep.c @@ -40,7 +40,7 @@ static int __read_pool_disk(const struct format_type *fmt, struct device *dev, char buf[512] __attribute__((aligned(8))); /* FIXME: Need to check the cache here first */ - if (!dev_read_buf(dev, UINT64_C(0), 512, DEV_IO_POOL, buf)) { + if (!dev_read(dev, UINT64_C(0), 512, DEV_IO_POOL, buf)) { log_very_verbose("Failed to read PV data from %s", dev_name(dev)); return 0; diff --git a/lib/format_pool/format_pool.c b/lib/format_pool/format_pool.c index c6990580b..f6e5e011b 100644 --- a/lib/format_pool/format_pool.c +++ b/lib/format_pool/format_pool.c @@ -103,7 +103,7 @@ static struct volume_group *_pool_vg_read(struct format_instance *fid, struct metadata_area *mda __attribute__((unused)), struct cached_vg_fmtdata **vg_fmtdata __attribute__((unused)), unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused)), unsigned ioflags) + int single_device __attribute__((unused))) { struct volume_group *vg; struct user_subpool *usp; diff --git a/lib/format_pool/pool_label.c b/lib/format_pool/pool_label.c index 888a2eb8c..2e30a7b19 100644 --- a/lib/format_pool/pool_label.c +++ b/lib/format_pool/pool_label.c @@ -55,19 +55,12 @@ static int _pool_write(struct label *label __attribute__((unused)), void *buf __ return 0; } -static int _pool_read(struct labeller *l, struct device *dev, void *buf, unsigned ioflags, - lvm_callback_fn_t read_label_callback_fn, void *read_label_callback_context) +static int _pool_read(struct labeller *l, struct device *dev, void *buf, + struct label **label) { struct pool_list pl; - struct label *label; - int r; - r = read_pool_label(&pl, l, dev, buf, &label); - - if (read_label_callback_fn) - read_label_callback_fn(!r, 0, read_label_callback_context, label); - - return r; + return read_pool_label(&pl, l, dev, buf, label); } static int _pool_initialise_label(struct labeller *l __attribute__((unused)), struct label *label) diff --git a/lib/format_text/archive.c b/lib/format_text/archive.c index 2186de860..72ec40b66 100644 --- a/lib/format_text/archive.c +++ b/lib/format_text/archive.c @@ -135,8 +135,8 @@ static struct dm_list *_scan_archive(struct dm_pool *mem, dm_list_init(results); - /* Use versionsort to handle numbers beyond 5 digits */ - if ((count = scandir(dir, &dirent, NULL, versionsort)) < 0) { + /* Sort fails beyond 5-digit indexes */ + if ((count = scandir(dir, &dirent, NULL, alphasort)) < 0) { log_error("Couldn't scan the archive directory (%s).", dir); return 0; } diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index 2c8e75145..1eb665436 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -320,7 +320,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd, } dm_list_iterate_items(mda, &tf->metadata_areas_in_use) { - if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0, 0))) + if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0))) stack; break; } diff --git a/lib/format_text/export.c b/lib/format_text/export.c index 08e1cda43..e5352376d 100644 --- a/lib/format_text/export.c +++ b/lib/format_text/export.c @@ -23,7 +23,6 @@ #include "lvm-version.h" #include "toolcontext.h" #include "config-util.h" -#include "layout.h" #include #include @@ -124,12 +123,11 @@ static int _extend_buffer(struct formatter *f) log_debug_metadata("Doubling metadata output buffer to " FMTu32, f->data.buf.size * 2); - if (!(newbuf = dm_malloc_aligned(f->data.buf.size * 2, 0))) - return_0; - - memcpy(newbuf, f->data.buf.start, f->data.buf.size); - free(f->data.buf.start); - + if (!(newbuf = dm_realloc(f->data.buf.start, + f->data.buf.size * 2))) { + log_error("Buffer reallocation failed."); + return 0; + } f->data.buf.start = newbuf; f->data.buf.size *= 2; @@ -1066,7 +1064,7 @@ size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf) return_0; f->data.buf.size = 65536; /* Initial metadata limit */ - if (!(f->data.buf.start = dm_malloc_aligned(f->data.buf.size, 0))) { + if (!(f->data.buf.start = dm_malloc(f->data.buf.size))) { log_error("text_export buffer allocation failed"); goto out; } @@ -1081,12 +1079,7 @@ size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf) goto_out; } - f->data.buf.used += 1; /* Terminating NUL */ - - /* Zero fill up to next alignment boundary */ - memset(f->data.buf.start + f->data.buf.used, 0, MDA_ALIGNMENT - f->data.buf.used % MDA_ALIGNMENT); - - r = f->data.buf.used; + r = f->data.buf.used + 1; *buf = f->data.buf.start; out: diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 2e7e2b85e..be9a8b906 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -37,12 +37,6 @@ #include #include -/* - * Round up offset within buffer to next location that is an exact multiple of alignment. - * (We shouldn't assume the start of the metadata area was aligned the same way when it was created.) - */ -#define ALIGN_ABSOLUTE(offset, buffer_start, alignment) ((offset) + (alignment) - UINT64_C(1) - ((buffer_start) + (offset) + (alignment) - UINT64_C(1)) % (alignment)) - static struct format_instance *_text_create_text_instance(const struct format_type *fmt, const struct format_instance_ctx *fic); @@ -182,10 +176,9 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, uint64_t offset2; size_t size; size_t size2; - const char *buf = NULL; + char *buf=NULL; struct device_area *area; struct mda_context *mdac; - unsigned circular = 0; int r=0; mdac = (struct mda_context *) mda->metadata_locn; @@ -197,7 +190,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, if (!dev_open_readonly(area->dev)) return_0; - if (!(mdah = raw_read_mda_header(fmt->cmd->mem, area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fmt, area, mda_is_primary(mda)))) goto_out; rlocn = mdah->raw_locns; @@ -226,7 +219,6 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, prev_sector); if (prev_sector > prev_sector2) goto_out; - /* * FIXME: for some reason, the whole metadata region from * area->start to area->start+area->size is not used. @@ -235,13 +227,10 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, * "dm_config_maybe_section" returning true when there's no valid * metadata in a sector (sectors with all nulls). */ + if (!(buf = dm_malloc(size + size2))) + goto_out; - circular = size2 ? 1 : 0; - - if (circular) { - if (!(buf = dev_read_circular(area->dev, offset, size, offset2, size2, MDA_CONTENT_REASON(mda_is_primary(mda))))) - goto_out; - } else if (!(buf = dev_read(area->dev, offset, size, MDA_CONTENT_REASON(mda_is_primary(mda))))) + if (!dev_read_circular(area->dev, offset, size, offset2, size2, MDA_CONTENT_REASON(mda_is_primary(mda)), buf)) goto_out; /* @@ -272,20 +261,20 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, size += SECTOR_SIZE; } } - if (circular) - dm_free((void *)buf); + dm_free(buf); buf = NULL; } r = 1; out: - if (circular) - dm_free((void *)buf); + dm_free(buf); if (!dev_close(area->dev)) stack; return r; } + + static int _text_lv_setup(struct format_instance *fid __attribute__((unused)), struct logical_volume *lv) { @@ -326,27 +315,19 @@ static void _xlate_mdah(struct mda_header *mdah) } } -struct process_raw_mda_header_params { - struct mda_header *mdah; - struct device_area dev_area; - lvm_callback_fn_t mdah_callback_fn; - void *mdah_callback_context; - int ret; -}; - -static void _process_raw_mda_header(int failed, unsigned ioflags, void *context, const void *data) +static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, int primary_mda) { - struct process_raw_mda_header_params *prmp = context; - struct mda_header *mdah = prmp->mdah; - struct device_area *dev_area = &prmp->dev_area; + if (!dev_open_readonly(dev_area->dev)) + return_0; + + if (!dev_read(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, MDA_HEADER_REASON(primary_mda), mdah)) { + if (!dev_close(dev_area->dev)) + stack; + return_0; + } if (!dev_close(dev_area->dev)) - goto_bad; - - if (failed) - goto_bad; - - memcpy(mdah, data, MDA_HEADER_SIZE); + return_0; if (mdah->checksum_xl != xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic, MDA_HEADER_SIZE - @@ -354,7 +335,7 @@ static void _process_raw_mda_header(int failed, unsigned ioflags, void *context, log_error("Incorrect metadata area header checksum on %s" " at offset " FMTu64, dev_name(dev_area->dev), dev_area->start); - goto bad; + return 0; } _xlate_mdah(mdah); @@ -363,83 +344,42 @@ static void _process_raw_mda_header(int failed, unsigned ioflags, void *context, log_error("Wrong magic number in metadata area header on %s" " at offset " FMTu64, dev_name(dev_area->dev), dev_area->start); - goto bad; + return 0; } if (mdah->version != FMTT_VERSION) { log_error("Incompatible metadata area header version: %d on %s" " at offset " FMTu64, mdah->version, dev_name(dev_area->dev), dev_area->start); - goto bad; + return 0; } if (mdah->start != dev_area->start) { log_error("Incorrect start sector in metadata area header: " FMTu64 " on %s at offset " FMTu64, mdah->start, dev_name(dev_area->dev), dev_area->start); - goto bad; + return 0; } - goto out; - -bad: - prmp->ret = 0; -out: - if (prmp->mdah_callback_fn) - prmp->mdah_callback_fn(!prmp->ret, ioflags, prmp->mdah_callback_context, mdah); + return 1; } -static struct mda_header *_raw_read_mda_header(struct dm_pool *mem, struct device_area *dev_area, int primary_mda, - unsigned ioflags, lvm_callback_fn_t mdah_callback_fn, void *mdah_callback_context) +struct mda_header *raw_read_mda_header(const struct format_type *fmt, + struct device_area *dev_area, int primary_mda) { struct mda_header *mdah; - struct process_raw_mda_header_params *prmp; - if (!(mdah = dm_pool_alloc(mem, MDA_HEADER_SIZE))) { + if (!(mdah = dm_pool_alloc(fmt->cmd->mem, MDA_HEADER_SIZE))) { log_error("struct mda_header allocation failed"); return NULL; } - if (!(prmp = dm_pool_zalloc(mem, sizeof (*prmp)))) { - log_error("struct process_raw_mda_header_params allocation failed"); - dm_pool_free(mem, mdah); + if (!_raw_read_mda_header(mdah, dev_area, primary_mda)) { + dm_pool_free(fmt->cmd->mem, mdah); return NULL; } - if (!dev_open_readonly(dev_area->dev)) { - dm_pool_free(mem, mdah); - return_NULL; - } - - prmp->mdah = mdah; - prmp->dev_area = *dev_area; - prmp->mdah_callback_fn = mdah_callback_fn; - prmp->mdah_callback_context = mdah_callback_context; - prmp->ret = 1; - - dev_read_callback(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, MDA_HEADER_REASON(primary_mda), - ioflags, _process_raw_mda_header, prmp); - if (mdah_callback_fn) - return mdah; - - if (!prmp->ret) - return_NULL; - else - return mdah; -} - -struct mda_header *raw_read_mda_header(struct dm_pool *mem, struct device_area *dev_area, int primary_mda) -{ - return _raw_read_mda_header(mem, dev_area, primary_mda, 0, NULL, NULL); -} - -int raw_read_mda_header_callback(struct dm_pool *mem, struct device_area *dev_area, int primary_mda, - unsigned ioflags, lvm_callback_fn_t mdah_callback_fn, void *mdah_callback_context) -{ - if (!_raw_read_mda_header(mem, dev_area, primary_mda, ioflags, mdah_callback_fn, mdah_callback_context)) - return_0; - - return 1; + return mdah; } static int _raw_write_mda_header(const struct format_type *fmt, @@ -467,13 +407,13 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, int *precommitted) { size_t len; + char vgnamebuf[NAME_LEN + 2] __attribute__((aligned(8))); struct raw_locn *rlocn, *rlocn_precommitted; struct lvmcache_info *info; struct lvmcache_vgsummary vgsummary_orphan = { .vgname = FMT_TEXT_ORPHAN_VG_NAME, }; int rlocn_was_ignored; - const char *buf; memcpy(&vgsummary_orphan.vgid, FMT_TEXT_ORPHAN_VG_NAME, sizeof(FMT_TEXT_ORPHAN_VG_NAME)); @@ -508,12 +448,12 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, /* FIXME Loop through rlocns two-at-a-time. List null-terminated. */ /* FIXME Ignore if checksum incorrect!!! */ - if (!(buf = dev_read(dev_area->dev, dev_area->start + rlocn->offset, - NAME_LEN + 2, MDA_CONTENT_REASON(primary_mda)))) + if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, + sizeof(vgnamebuf), MDA_CONTENT_REASON(primary_mda), vgnamebuf)) goto_bad; - if (!strncmp(buf, vgname, len = strlen(vgname)) && - (isspace(*(buf + len)) || *(buf + len) == '{')) + if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && + (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{')) return rlocn; log_debug_metadata("Volume group name found in %smetadata on %s at " FMTu64 " does " @@ -530,46 +470,25 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, } /* - * Find first aligned offset after end of existing metadata. - * Based on the alignment provided, this is the exact offset to use for the new metadata. - * The caller is responsible for validating the result. + * Determine offset for uncommitted metadata */ static uint64_t _next_rlocn_offset(struct raw_locn *rlocn, struct mda_header *mdah, uint64_t mdac_area_start, uint64_t alignment) { - uint64_t old_end, new_start_offset; - int old_wrapped = 0; /* Does the old metadata wrap around? */ + uint64_t new_start_offset; if (!rlocn) /* Find an empty slot */ - /* FIXME Assumes only one VG per mdah for now */ - return ALIGN_ABSOLUTE(MDA_HEADER_SIZE, mdac_area_start, alignment); + /* FIXME Assume only one VG per mdah for now */ + return alignment; - /* First find the end of the old metadata */ - old_end = rlocn->offset + rlocn->size; + /* Calculate new start position within buffer rounded up to absolute alignment */ + new_start_offset = rlocn->offset + rlocn->size + + (alignment - (mdac_area_start + rlocn->offset + rlocn->size) % alignment); - if (old_end > mdah->size) { - old_wrapped = 1; - old_end -= (mdah->size - MDA_HEADER_SIZE); - } + /* If new location is beyond the end of the buffer, wrap around back to start of circular buffer */ + if (new_start_offset > mdah->size - MDA_HEADER_SIZE) + new_start_offset -= (mdah->size - MDA_HEADER_SIZE); - /* Calculate new start position relative to start of buffer rounded up to absolute alignment */ - new_start_offset = ALIGN_ABSOLUTE(old_end, mdac_area_start, alignment); - - /* If new location is beyond the end of the buffer, return to start of circular buffer and realign */ - if (new_start_offset >= mdah->size) { - /* If the start of the buffer is occupied, move past it */ - if (old_wrapped || rlocn->offset == MDA_HEADER_SIZE) - new_start_offset = old_end; - else - new_start_offset = MDA_HEADER_SIZE; - - new_start_offset = ALIGN_ABSOLUTE(new_start_offset, mdac_area_start, alignment); - } - - /* - * Note that we don't check here that this location isn't inside the existing metadata. - * If it is, then it means this value of alignment cannot be used. - */ return new_start_offset; } @@ -583,7 +502,7 @@ static int _raw_holds_vgname(struct format_instance *fid, if (!dev_open_readonly(dev_area->dev)) return_0; - if (!(mdah = raw_read_mda_header(fid->fmt->cmd->mem, dev_area, 0))) + if (!(mdah = raw_read_mda_header(fid->fmt, dev_area, 0))) return_0; if (_find_vg_rlocn(dev_area, mdah, 0, vgname, &noprecommit)) @@ -600,7 +519,7 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, struct device_area *area, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int precommitted, unsigned ioflags, + int precommitted, int single_device, int primary_mda) { struct volume_group *vg = NULL; @@ -610,7 +529,7 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, char *desc; uint32_t wrap = 0; - if (!(mdah = raw_read_mda_header(fid->fmt->cmd->mem, area, primary_mda))) + if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda))) goto_out; if (!(rlocn = _find_vg_rlocn(area, mdah, primary_mda, vgname, &precommitted))) { @@ -633,7 +552,7 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, (off_t) (area->start + rlocn->offset), (uint32_t) (rlocn->size - wrap), (off_t) (area->start + MDA_HEADER_SIZE), - wrap, calc_crc, rlocn->checksum, ioflags, &when, + wrap, calc_crc, rlocn->checksum, &when, &desc)) && (!use_previous_vg || !*use_previous_vg)) goto_out; @@ -660,7 +579,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, struct metadata_area *mda, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int single_device, unsigned ioflags) + int single_device) { struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; @@ -668,7 +587,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, if (!dev_open_readonly(mdac->area.dev)) return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, ioflags, single_device, mda_is_primary(mda)); + vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, single_device, mda_is_primary(mda)); if (!dev_close(mdac->area.dev)) stack; @@ -680,7 +599,7 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, unsigned ioflags) + unsigned *use_previous_vg) { struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; @@ -688,7 +607,7 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, if (!dev_open_readonly(mdac->area.dev)) return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, ioflags, 0, mda_is_primary(mda)); + vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, 0, mda_is_primary(mda)); if (!dev_close(mdac->area.dev)) stack; @@ -696,59 +615,6 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, return vg; } -static int _metadata_fits_into_buffer(struct mda_context *mdac, struct mda_header *mdah, - struct raw_locn *rlocn, uint64_t new_wrap) -{ - uint64_t old_wrap = 0; /* Amount of wrap around in existing metadata */ - uint64_t old_end = 0; /* The (byte after the) end of the existing metadata */ - uint64_t new_end; /* The (byte after the) end of the new metadata */ - uint64_t old_start = 0; /* The start of the existing metadata */ - uint64_t new_start = mdac->rlocn.offset; /* The proposed start of the new metadata */ - - /* - * If the (aligned) start of the new metadata is already beyond the end - * of the buffer this means it didn't fit with the given alignment. - * (The caller has already tried to wrap it back to the start - * of the buffer but the alignment pushed it back outside.) - */ - if (new_start >= mdah->size) - return_0; - - /* Does the total amount of metadata, old and new, fit inside the buffer? */ - if (MDA_HEADER_SIZE + (rlocn ? rlocn->size : 0) + mdac->rlocn.size >= mdah->size) - return_0; - - /* If there's existing metadata, set old_start, old_end and old_wrap. */ - if (rlocn) { - old_start = rlocn->offset; - old_end = old_start + rlocn->size; - - /* Does the existing metadata wrap around the end of the buffer? */ - if (old_end > mdah->size) - old_wrap = old_end - mdah->size; - } - - new_end = new_wrap ? new_wrap + MDA_HEADER_SIZE : new_start + mdac->rlocn.size; - - /* If both wrap around, there's necessarily overlap */ - if (new_wrap && old_wrap) - return_0; - - /* If there's no existing metadata, we're OK */ - if (!rlocn) - return 1; - - /* If either wraps around, there's overlap if the new end falls beyond the old start */ - if ((new_wrap || old_wrap) && (new_end > old_start)) - return_0; - - /* If there's no wrap, check there's no overlap */ - if (!new_wrap && !old_wrap && (old_end > new_start) && (old_start < new_end)) - return_0; - - return 1; -} - static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, struct metadata_area *mda) { @@ -758,12 +624,10 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, struct mda_header *mdah; struct pv_list *pvl; int r = 0; - uint64_t new_wrap = 0; /* Number of bytes of new metadata that wrap around to start of buffer */ - uint64_t alignment = MDA_ALIGNMENT; + uint64_t new_wrap = 0, old_wrap = 0, new_end; int found = 0; int noprecommit = 0; const char *old_vg_name = NULL; - uint64_t new_size_rounded = 0; /* Ignore any mda on a PV outside the VG. vgsplit relies on this */ dm_list_iterate_items(pvl, &vg->pvs) { @@ -778,19 +642,12 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!found) return 1; - /* - * This is paired with the following closes: - * - at the end of this fn if returning 0 - * - in _vg_commit_raw_rlocn regardless of return code - * which handles commit (but not pre-commit) and revert. - */ if (!dev_open(mdac->area.dev)) return_0; - if (!(mdah = raw_read_mda_header(fid->fmt->cmd->mem, &mdac->area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; - /* Following space is zero-filled up to the next MDA_ALIGNMENT boundary */ if (!fidtc->raw_metadata_buf && !(fidtc->raw_metadata_buf_size = text_vg_export_raw(vg, "", &fidtc->raw_metadata_buf))) { @@ -800,64 +657,37 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, rlocn = _find_vg_rlocn(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit); + mdac->rlocn.offset = _next_rlocn_offset(rlocn, mdah, mdac->area.start, MDA_ORIGINAL_ALIGNMENT); mdac->rlocn.size = fidtc->raw_metadata_buf_size; - /* Find where the new metadata would be written with our preferred alignment */ - mdac->rlocn.offset = _next_rlocn_offset(rlocn, mdah, mdac->area.start, alignment); - - /* If metadata extends beyond the buffer, return to the start instead of wrapping it */ if (mdac->rlocn.offset + mdac->rlocn.size > mdah->size) - mdac->rlocn.offset = ALIGN_ABSOLUTE(MDA_HEADER_SIZE, mdac->area.start, alignment); + new_wrap = (mdac->rlocn.offset + mdac->rlocn.size) - mdah->size; - /* - * If the metadata doesn't fit into the buffer correctly with these - * settings, fall back to the 512-byte alignment used by the original - * LVM2 code and allow the metadata to be split into two parts, - * wrapping around from the end of the circular buffer back to the - * beginning. - */ - if (!_metadata_fits_into_buffer(mdac, mdah, rlocn, 0)) { - alignment = MDA_ORIGINAL_ALIGNMENT; - mdac->rlocn.offset = _next_rlocn_offset(rlocn, mdah, mdac->area.start, alignment); + if (rlocn && (rlocn->offset + rlocn->size > mdah->size)) + old_wrap = (rlocn->offset + rlocn->size) - mdah->size; - /* Does the new metadata wrap around? */ - if (mdac->rlocn.offset + mdac->rlocn.size > mdah->size) - new_wrap = (mdac->rlocn.offset + mdac->rlocn.size) - mdah->size; - else - new_wrap = 0; + new_end = new_wrap ? new_wrap + MDA_HEADER_SIZE : + mdac->rlocn.offset + mdac->rlocn.size; - if (!_metadata_fits_into_buffer(mdac, mdah, rlocn, new_wrap)) { - log_error("VG %s metadata on %s (" FMTu64 " bytes) too large for circular buffer (" FMTu64 " bytes with " FMTu64 " used)", - vg->name, dev_name(mdac->area.dev), mdac->rlocn.size, mdah->size - MDA_HEADER_SIZE, rlocn ? rlocn->size : 0); - goto out; - } - - new_size_rounded = mdac->rlocn.size; - } else { - /* Round up to a multiple of the new alignment */ - if (mdac->rlocn.offset + new_size_rounded < mdah->size) - new_size_rounded = (mdac->rlocn.size | (alignment - 1)) + 1; - else - new_size_rounded = mdac->rlocn.size; + if ((new_wrap && old_wrap) || + (rlocn && (new_wrap || old_wrap) && (new_end > rlocn->offset)) || + (MDA_HEADER_SIZE + (rlocn ? rlocn->size : 0) + mdac->rlocn.size >= mdah->size)) { + log_error("VG %s metadata on %s (" FMTu64 " bytes) too large for circular buffer (" FMTu64 " bytes with " FMTu64 " used)", + vg->name, dev_name(mdac->area.dev), mdac->rlocn.size, mdah->size - MDA_HEADER_SIZE, rlocn ? rlocn->size : 0); + goto out; } - log_debug_metadata("Writing %s metadata to %s at " FMTu64 " len " FMTu64 " (rounded to " FMTu64 ") of " FMTu64 " aligned to " FMTu64, + log_debug_metadata("Writing %s metadata to %s at " FMTu64 " len " FMTu64 " of " FMTu64, vg->name, dev_name(mdac->area.dev), mdac->area.start + - mdac->rlocn.offset, mdac->rlocn.size - new_wrap, new_size_rounded, mdac->rlocn.size, alignment); + mdac->rlocn.offset, mdac->rlocn.size - new_wrap, mdac->rlocn.size); - if (!new_wrap) { - /* Write text out, in alignment-sized blocks */ - if (!dev_write(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, - (size_t) new_size_rounded, MDA_CONTENT_REASON(mda_is_primary(mda)), - fidtc->raw_metadata_buf)) - goto_out; - } else { - /* Write text out, circularly */ - if (!dev_write(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, - (size_t) (mdac->rlocn.size - new_wrap), MDA_CONTENT_REASON(mda_is_primary(mda)), - fidtc->raw_metadata_buf)) - goto_out; + /* Write text out, circularly */ + if (!dev_write(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, + (size_t) (mdac->rlocn.size - new_wrap), MDA_CONTENT_REASON(mda_is_primary(mda)), + fidtc->raw_metadata_buf)) + goto_out; + if (new_wrap) { log_debug_metadata("Writing wrapped metadata to %s at " FMTu64 " len " FMTu64 " of " FMTu64, dev_name(mdac->area.dev), mdac->area.start + MDA_HEADER_SIZE, new_wrap, mdac->rlocn.size); @@ -919,7 +749,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, if (!found) return 1; - if (!(mdah = raw_read_mda_header(fid->fmt->cmd->mem, &mdac->area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit))) { @@ -957,9 +787,10 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, rlocn->offset = mdac->rlocn.offset; rlocn->size = mdac->rlocn.size; rlocn->checksum = mdac->rlocn.checksum; - log_debug_metadata("%sCommitting %s %smetadata (%u) to %s header at " FMTu64 " (offset " FMTu64 ", size " FMTu64 ")", - precommit ? "Pre-" : "", vg->name, mda_is_ignored(mda) ? "(ignored) " : "", vg->seqno, - dev_name(mdac->area.dev), mdac->area.start, mdac->rlocn.offset, mdac->rlocn.size); + log_debug_metadata("%sCommitting %s %smetadata (%u) to %s header at " + FMTu64, precommit ? "Pre-" : "", vg->name, + mda_is_ignored(mda) ? "(ignored) " : "", vg->seqno, + dev_name(mdac->area.dev), mdac->area.start); } else log_debug_metadata("Wiping pre-committed %s %smetadata from %s " "header at " FMTu64, vg->name, @@ -979,7 +810,6 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, out: if (!precommit) { - /* This is an paired with the open at the start of _vg_write_raw */ if (!dev_close(mdac->area.dev)) stack; @@ -1039,7 +869,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, if (!dev_open(mdac->area.dev)) return_0; - if (!(mdah = raw_read_mda_header(fid->fmt->cmd->mem, &mdac->area, mda_is_primary(mda)))) + if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, mda_is_primary(mda), vg->name, &noprecommit))) { @@ -1102,8 +932,7 @@ static struct volume_group *_vg_read_file(struct format_instance *fid, struct metadata_area *mda, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused)), - unsigned ioflags __attribute__((unused))) + int single_device __attribute__((unused))) { struct text_context *tc = (struct text_context *) mda->metadata_locn; @@ -1114,8 +943,7 @@ static struct volume_group *_vg_read_precommit_file(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg __attribute__((unused)), - unsigned ioflags __attribute__((unused))) + unsigned *use_previous_vg __attribute__((unused))) { struct text_context *tc = (struct text_context *) mda->metadata_locn; struct volume_group *vg; @@ -1347,137 +1175,16 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) return 1; } -struct vgname_from_mda_params{ - const struct format_type *fmt; - const struct mda_header *mdah; - struct device_area *dev_area; - int primary_mda; - struct lvmcache_vgsummary *vgsummary; - uint64_t *mda_free_sectors; - lvm_callback_fn_t update_vgsummary_fn; - void *update_vgsummary_context; - uint32_t wrap; - unsigned used_cached_metadata; - int ret; -}; - -static void _vgname_from_mda_process(int failed, unsigned ioflags, void *context, const void *data) -{ - struct vgname_from_mda_params *vfmp = context; - const struct mda_header *mdah = vfmp->mdah; - struct device_area *dev_area = vfmp->dev_area; - struct lvmcache_vgsummary *vgsummary = vfmp->vgsummary; - uint64_t *mda_free_sectors = vfmp->mda_free_sectors; - const struct raw_locn *rlocn = mdah->raw_locns; - uint64_t buffer_size, current_usage; - - if (failed) { - vfmp->ret = 0; - goto_out; - } - - /* Ignore this entry if the characters aren't permissible */ - if (!validate_name(vgsummary->vgname)) { - vfmp->ret = 0; - goto_out; - } - - log_debug_metadata("%s: %s metadata at " FMTu64 " size " FMTu64 " with wrap " FMTu32 - " (in area at " FMTu64 " size " FMTu64 - ") for %s (" FMTVGID ")", - dev_name(dev_area->dev), - vfmp->used_cached_metadata ? "Using cached" : "Found", - dev_area->start + rlocn->offset, - rlocn->size, vfmp->wrap, dev_area->start, dev_area->size, vgsummary->vgname, - (char *)&vgsummary->vgid); - - if (mda_free_sectors) { - current_usage = ALIGN_ABSOLUTE(rlocn->size, dev_area->start + rlocn->offset, MDA_ALIGNMENT); - - buffer_size = mdah->size - MDA_HEADER_SIZE; - - if (current_usage * 2 >= buffer_size) - *mda_free_sectors = UINT64_C(0); - else - *mda_free_sectors = ((buffer_size - 2 * current_usage) / 2) >> SECTOR_SHIFT; - } - -out: - vfmp->update_vgsummary_fn(!vfmp->ret, ioflags, vfmp->update_vgsummary_context, vfmp->vgsummary); -} - -static void _vgname_from_mda_validate(int failed, unsigned ioflags, void *context, const void *data) -{ - struct vgname_from_mda_params *vfmp = context; - const char *buffer = data; - const struct format_type *fmt = vfmp->fmt; - const struct mda_header *mdah = vfmp->mdah; - struct device_area *dev_area = vfmp->dev_area; - struct lvmcache_vgsummary *vgsummary = vfmp->vgsummary; - const struct raw_locn *rlocn = mdah->raw_locns; - unsigned len = 0; - char buf[NAME_LEN + 1] __attribute__((aligned(8))); - - if (failed) { - vfmp->ret = 0; - goto_out; - } - - memcpy(buf, buffer, NAME_LEN); - - while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && - len < (NAME_LEN - 1)) - len++; - - buf[len] = '\0'; - - /* Ignore this entry if the characters aren't permissible */ - if (!validate_name(buf)) { - vfmp->ret = 0; - goto_out; - } - - /* We found a VG - now check the metadata */ - if (rlocn->offset + rlocn->size > mdah->size) - vfmp->wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); - - if (vfmp->wrap > rlocn->offset) { - log_error("%s: metadata (" FMTu64 " bytes) too large for circular buffer (" FMTu64 " bytes)", - dev_name(dev_area->dev), rlocn->size, mdah->size - MDA_HEADER_SIZE); - vfmp->ret = 0; - goto out; - } - - /* Did we see this metadata before? */ - vgsummary->mda_checksum = rlocn->checksum; - vgsummary->mda_size = rlocn->size; - - if (lvmcache_lookup_mda(vgsummary)) - vfmp->used_cached_metadata = 1; - - /* FIXME 64-bit */ - if (!text_vgsummary_import(fmt, dev_area->dev, MDA_CONTENT_REASON(vfmp->primary_mda), - (off_t) (dev_area->start + rlocn->offset), - (uint32_t) (rlocn->size - vfmp->wrap), - (off_t) (dev_area->start + MDA_HEADER_SIZE), - vfmp->wrap, calc_crc, vgsummary->vgname ? 1 : 0, ioflags, - vgsummary, _vgname_from_mda_process, vfmp)) { - vfmp->ret = 0; - goto_out; - } - -out: - if (!vfmp->ret && vfmp->update_vgsummary_fn) - vfmp->update_vgsummary_fn(1, ioflags, vfmp->update_vgsummary_context, vfmp->vgsummary); -} - int vgname_from_mda(const struct format_type *fmt, - const struct mda_header *mdah, int primary_mda, struct device_area *dev_area, - struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors, unsigned ioflags, - lvm_callback_fn_t update_vgsummary_fn, void *update_vgsummary_context) + struct mda_header *mdah, int primary_mda, struct device_area *dev_area, + struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors) { - const struct raw_locn *rlocn; - struct vgname_from_mda_params *vfmp; + struct raw_locn *rlocn; + uint32_t wrap = 0; + unsigned int len = 0; + char buf[NAME_LEN + 1] __attribute__((aligned(8))); + uint64_t buffer_size, current_usage; + unsigned used_cached_metadata = 0; if (mda_free_sectors) *mda_free_sectors = ((dev_area->size - MDA_HEADER_SIZE) / 2) >> SECTOR_SHIFT; @@ -1499,29 +1206,72 @@ int vgname_from_mda(const struct format_type *fmt, return 0; } - if (!(vfmp = dm_pool_zalloc(fmt->cmd->mem, sizeof(*vfmp)))) { - log_error("vgname_from_mda_params allocation failed"); + /* Do quick check for a vgname */ + if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, + NAME_LEN, MDA_CONTENT_REASON(primary_mda), buf)) + return_0; + + while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && + len < (NAME_LEN - 1)) + len++; + + buf[len] = '\0'; + + /* Ignore this entry if the characters aren't permissible */ + if (!validate_name(buf)) + return_0; + + /* We found a VG - now check the metadata */ + if (rlocn->offset + rlocn->size > mdah->size) + wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); + + if (wrap > rlocn->offset) { + log_error("%s: metadata (" FMTu64 " bytes) too large for circular buffer (" FMTu64 " bytes)", + dev_name(dev_area->dev), rlocn->size, mdah->size - MDA_HEADER_SIZE); return 0; } - vfmp->fmt = fmt; - vfmp->mdah = mdah; - vfmp->dev_area = dev_area; - vfmp->vgsummary = vgsummary; - vfmp->primary_mda = primary_mda; - vfmp->mda_free_sectors = mda_free_sectors; - vfmp->update_vgsummary_fn = update_vgsummary_fn; - vfmp->update_vgsummary_context = update_vgsummary_context; - vfmp->ret = 1; + /* Did we see this metadata before? */ + vgsummary->mda_checksum = rlocn->checksum; + vgsummary->mda_size = rlocn->size; - /* Do quick check for a vgname */ - /* We cannot read the full metadata here because the name has to be validated before we use the size field */ - dev_read_callback(dev_area->dev, dev_area->start + rlocn->offset, NAME_LEN, MDA_CONTENT_REASON(primary_mda), - ioflags, _vgname_from_mda_validate, vfmp); - if (update_vgsummary_fn) - return 1; - else - return vfmp->ret; + if (lvmcache_lookup_mda(vgsummary)) + used_cached_metadata = 1; + + /* FIXME 64-bit */ + if (!text_vgsummary_import(fmt, dev_area->dev, MDA_CONTENT_REASON(primary_mda), + (off_t) (dev_area->start + rlocn->offset), + (uint32_t) (rlocn->size - wrap), + (off_t) (dev_area->start + MDA_HEADER_SIZE), + wrap, calc_crc, vgsummary->vgname ? 1 : 0, + vgsummary)) + return_0; + + /* Ignore this entry if the characters aren't permissible */ + if (!validate_name(vgsummary->vgname)) + return_0; + + log_debug_metadata("%s: %s metadata at " FMTu64 " size " FMTu64 + " (in area at " FMTu64 " size " FMTu64 + ") for %s (" FMTVGID ")", + dev_name(dev_area->dev), + used_cached_metadata ? "Using cached" : "Found", + dev_area->start + rlocn->offset, + rlocn->size, dev_area->start, dev_area->size, vgsummary->vgname, + (char *)&vgsummary->vgid); + + if (mda_free_sectors) { + current_usage = (rlocn->size + SECTOR_SIZE - UINT64_C(1)) - + (rlocn->size + SECTOR_SIZE - UINT64_C(1)) % SECTOR_SIZE; + buffer_size = mdah->size - MDA_HEADER_SIZE; + + if (current_usage * 2 >= buffer_size) + *mda_free_sectors = UINT64_C(0); + else + *mda_free_sectors = ((buffer_size - 2 * current_usage) / 2) >> SECTOR_SHIFT; + } + + return 1; } static int _scan_raw(const struct format_type *fmt, const char *vgname __attribute__((unused))) @@ -1546,14 +1296,14 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu continue; } - if (!(mdah = raw_read_mda_header(fmt->cmd->mem, &rl->dev_area, 0))) { + if (!(mdah = raw_read_mda_header(fmt, &rl->dev_area, 0))) { stack; goto close_dev; } /* TODO: caching as in vgname_from_mda() (trigger this code?) */ - if (vgname_from_mda(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL, 0, NULL, NULL)) { - vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0, 0, 0); + if (vgname_from_mda(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { + vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0, 0); if (vg) lvmcache_update_vg(vg, 0); } @@ -2024,8 +1774,9 @@ static int _mda_export_text_raw(struct metadata_area *mda, struct dm_config_node *parent) { struct mda_context *mdc = (struct mda_context *) mda->metadata_locn; + char mdah[MDA_HEADER_SIZE]; /* temporary */ - if (!mdc || !_raw_read_mda_header(cft->mem, &mdc->area, mda_is_primary(mda), 0, NULL, NULL)) + if (!mdc || !_raw_read_mda_header((struct mda_header *)mdah, &mdc->area, mda_is_primary(mda))) return 1; /* pretend the MDA does not exist */ return config_make_nodes(cft, parent, NULL, diff --git a/lib/format_text/import-export.h b/lib/format_text/import-export.h index 2c329d076..894d88141 100644 --- a/lib/format_text/import-export.h +++ b/lib/format_text/import-export.h @@ -80,7 +80,7 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, - uint32_t checksum, unsigned ioflags, + uint32_t checksum, time_t *when, char **desc); int text_vgsummary_import(const struct format_type *fmt, @@ -88,9 +88,7 @@ int text_vgsummary_import(const struct format_type *fmt, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, - int checksum_only, unsigned ioflags, - struct lvmcache_vgsummary *vgsummary, - lvm_callback_fn_t process_vgsummary_fn, - void *process_vgsummary_context); + int checksum_only, + struct lvmcache_vgsummary *vgsummary); #endif diff --git a/lib/format_text/import.c b/lib/format_text/import.c index 0138ddd8b..da4cefdb8 100644 --- a/lib/format_text/import.c +++ b/lib/format_text/import.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -16,7 +16,6 @@ #include "lib.h" #include "metadata.h" #include "import-export.h" -#include "toolcontext.h" /* FIXME Use tidier inclusion method */ static struct text_vg_version_ops *(_text_vsn_list[2]); @@ -33,55 +32,6 @@ static void _init_text_import(void) _text_import_initialised = 1; } -struct import_vgsummary_params { - const struct format_type *fmt; - struct dm_config_tree *cft; - int checksum_only; - struct lvmcache_vgsummary *vgsummary; - lvm_callback_fn_t process_vgsummary_fn; - void *process_vgsummary_context; - int ret; -}; - -static void _import_vgsummary(int failed, unsigned ioflags, void *context, const void *data) -{ - struct import_vgsummary_params *ivsp = context; - struct text_vg_version_ops **vsn; - - if (failed) { - ivsp->ret = 0; - goto_out; - } - - if (ivsp->checksum_only) - /* Checksum matches already-cached content - no need to reparse. */ - goto out; - - /* - * Find a set of version functions that can read this file - */ - for (vsn = &_text_vsn_list[0]; *vsn; vsn++) { - if (!(*vsn)->check_version(ivsp->cft)) - continue; - - if (!(*vsn)->read_vgsummary(ivsp->fmt, ivsp->cft, ivsp->vgsummary)) { - ivsp->ret = 0; - goto_out; - } - - goto out; - } - - /* Nothing found */ - ivsp->ret = 0; - -out: - config_destroy(ivsp->cft); - - if (ivsp->process_vgsummary_fn) - ivsp->process_vgsummary_fn(!ivsp->ret, ioflags, ivsp->process_vgsummary_context, NULL); -} - /* * Find out vgname on a given device. */ @@ -90,76 +40,30 @@ int text_vgsummary_import(const struct format_type *fmt, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, - int checksum_only, unsigned ioflags, - struct lvmcache_vgsummary *vgsummary, - lvm_callback_fn_t process_vgsummary_fn, - void *process_vgsummary_context) + int checksum_only, + struct lvmcache_vgsummary *vgsummary) { - struct import_vgsummary_params *ivsp; + struct dm_config_tree *cft; + struct text_vg_version_ops **vsn; + int r = 0; _init_text_import(); - if (!(ivsp = dm_pool_zalloc(fmt->cmd->mem, sizeof(*ivsp)))) { - log_error("Failed to allocate import_vgsummary_params struct."); - return 0; - } - - if (!(ivsp->cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0))) + if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0))) return_0; - ivsp->fmt = fmt; - ivsp->checksum_only = checksum_only; - ivsp->vgsummary = vgsummary; - ivsp->process_vgsummary_fn = process_vgsummary_fn; - ivsp->process_vgsummary_context = process_vgsummary_context; - ivsp->ret = 1; - - if (!dev) { - if (!config_file_read(fmt->cmd->mem, ivsp->cft)) { - log_error("Couldn't read volume group metadata."); - ivsp->ret = 0; - } - _import_vgsummary(!ivsp->ret, ioflags, ivsp, NULL); - } else if (!config_file_read_fd(fmt->cmd->mem, ivsp->cft, dev, reason, offset, size, - offset2, size2, checksum_fn, - vgsummary->mda_checksum, - checksum_only, 1, ioflags, &_import_vgsummary, ivsp)) { + if ((!dev && !config_file_read(cft)) || + (dev && !config_file_read_fd(cft, dev, reason, offset, size, + offset2, size2, checksum_fn, + vgsummary->mda_checksum, + checksum_only, 1))) { log_error("Couldn't read volume group metadata."); - return 0; + goto out; } - return ivsp->ret; -} - -struct cached_vg_fmtdata { - uint32_t cached_mda_checksum; - size_t cached_mda_size; -}; - -struct import_vg_params { - struct format_instance *fid; - struct dm_config_tree *cft; - int single_device; - int skip_parse; - unsigned *use_previous_vg; - struct volume_group *vg; - uint32_t checksum; - uint32_t total_size; - time_t *when; - struct cached_vg_fmtdata **vg_fmtdata; - char **desc; -}; - -static void _import_vg(int failed, unsigned ioflags, void *context, const void *data) -{ - struct import_vg_params *ivp = context; - struct text_vg_version_ops **vsn; - - ivp->vg = NULL; - - if (ivp->skip_parse) { - if (ivp->use_previous_vg) - *ivp->use_previous_vg = 1; + if (checksum_only) { + /* Checksum matches already-cached content - no need to reparse. */ + r = 1; goto out; } @@ -167,28 +71,26 @@ static void _import_vg(int failed, unsigned ioflags, void *context, const void * * Find a set of version functions that can read this file */ for (vsn = &_text_vsn_list[0]; *vsn; vsn++) { - if (!(*vsn)->check_version(ivp->cft)) + if (!(*vsn)->check_version(cft)) continue; - if (!(ivp->vg = (*vsn)->read_vg(ivp->fid, ivp->cft, ivp->single_device, 0))) + if (!(*vsn)->read_vgsummary(fmt, cft, vgsummary)) goto_out; - (*vsn)->read_desc(ivp->vg->vgmem, ivp->cft, ivp->when, ivp->desc); + r = 1; break; } - if (ivp->vg && ivp->vg_fmtdata && *ivp->vg_fmtdata) { - (*ivp->vg_fmtdata)->cached_mda_size = ivp->total_size; - (*ivp->vg_fmtdata)->cached_mda_checksum = ivp->checksum; - } - - if (ivp->use_previous_vg) - *ivp->use_previous_vg = 0; - -out: - config_destroy(ivp->cft); + out: + config_destroy(cft); + return r; } +struct cached_vg_fmtdata { + uint32_t cached_mda_checksum; + size_t cached_mda_size; +}; + struct volume_group *text_vg_import_fd(struct format_instance *fid, const char *file, struct cached_vg_fmtdata **vg_fmtdata, @@ -198,10 +100,13 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, off_t offset, uint32_t size, off_t offset2, uint32_t size2, checksum_fn_t checksum_fn, - uint32_t checksum, unsigned ioflags, + uint32_t checksum, time_t *when, char **desc) { - struct import_vg_params *ivp; + struct volume_group *vg = NULL; + struct dm_config_tree *cft; + struct text_vg_version_ops **vsn; + int skip_parse; if (vg_fmtdata && !*vg_fmtdata && !(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) { @@ -209,48 +114,56 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, return NULL; } - if (!(ivp = dm_pool_zalloc(fid->fmt->cmd->mem, sizeof(*ivp)))) { - log_error("Failed to allocate import_vgsummary_params struct."); - return NULL; - } - _init_text_import(); - ivp->fid = fid; - ivp->when = when; - *ivp->when = 0; - ivp->desc = desc; - *ivp->desc = NULL; - ivp->single_device = single_device; - ivp->use_previous_vg = use_previous_vg; - ivp->checksum = checksum; - ivp->total_size = size + size2; - ivp->vg_fmtdata = vg_fmtdata; + *desc = NULL; + *when = 0; - if (!(ivp->cft = config_open(CONFIG_FILE_SPECIAL, file, 0))) + if (!(cft = config_open(CONFIG_FILE_SPECIAL, file, 0))) return_NULL; /* Does the metadata match the already-cached VG? */ - ivp->skip_parse = vg_fmtdata && - ((*vg_fmtdata)->cached_mda_checksum == checksum) && - ((*vg_fmtdata)->cached_mda_size == ivp->total_size); + skip_parse = vg_fmtdata && + ((*vg_fmtdata)->cached_mda_checksum == checksum) && + ((*vg_fmtdata)->cached_mda_size == (size + size2)); - if (!dev && !config_file_read(fid->mem, ivp->cft)) { - config_destroy(ivp->cft); - return_NULL; + if ((!dev && !config_file_read(cft)) || + (dev && !config_file_read_fd(cft, dev, MDA_CONTENT_REASON(primary_mda), offset, size, + offset2, size2, checksum_fn, checksum, + skip_parse, 1))) + goto_out; + + if (skip_parse) { + if (use_previous_vg) + *use_previous_vg = 1; + goto out; } - if (dev) { - if (!config_file_read_fd(fid->mem, ivp->cft, dev, MDA_CONTENT_REASON(primary_mda), offset, size, - offset2, size2, checksum_fn, checksum, - ivp->skip_parse, 1, ioflags, &_import_vg, ivp)) { - config_destroy(ivp->cft); - return_NULL; - } - } else - _import_vg(0, 0, ivp, NULL); + /* + * Find a set of version functions that can read this file + */ + for (vsn = &_text_vsn_list[0]; *vsn; vsn++) { + if (!(*vsn)->check_version(cft)) + continue; - return ivp->vg; + if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0))) + goto_out; + + (*vsn)->read_desc(vg->vgmem, cft, when, desc); + break; + } + + if (vg && vg_fmtdata && *vg_fmtdata) { + (*vg_fmtdata)->cached_mda_size = (size + size2); + (*vg_fmtdata)->cached_mda_checksum = checksum; + } + + if (use_previous_vg) + *use_previous_vg = 0; + + out: + config_destroy(cft); + return vg; } struct volume_group *text_vg_import_file(struct format_instance *fid, @@ -258,7 +171,7 @@ struct volume_group *text_vg_import_file(struct format_instance *fid, time_t *when, char **desc) { return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, 0, (off_t)0, 0, (off_t)0, 0, NULL, 0, - 0, when, desc); + when, desc); } static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft, diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index 98a0b362f..1746b9ccd 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -17,7 +17,6 @@ #define _LVM_TEXT_LAYOUT_H #include "config.h" -#include "format-text.h" #include "metadata.h" #include "lvmcache.h" #include "uuid.h" @@ -81,9 +80,8 @@ struct mda_header { struct raw_locn raw_locns[0]; /* NULL-terminated list */ } __attribute__ ((packed)); -struct mda_header *raw_read_mda_header(struct dm_pool *mem, struct device_area *dev_area, int primary_mda); -int raw_read_mda_header_callback(struct dm_pool *mem, struct device_area *dev_area, int primary_mda, - unsigned ioflags, lvm_callback_fn_t mdah_callback_fn, void *mdah_callback_context); +struct mda_header *raw_read_mda_header(const struct format_type *fmt, + struct device_area *dev_area, int primary_mda); struct mda_lists { struct dm_list dirs; @@ -105,11 +103,9 @@ struct mda_context { #define LVM2_LABEL "LVM2 001" #define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize()) #define MDA_ORIGINAL_ALIGNMENT 512 /* Original alignment used for start of VG metadata content */ -#define MDA_ALIGNMENT 4096 /* Default alignment in bytes since 2.02.177 for start of VG metadata content. */ -int vgname_from_mda(const struct format_type *fmt, const struct mda_header *mdah, int primary_mda, +int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah, int primary_mda, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, - uint64_t *mda_free_sectors, unsigned ioflags, - lvm_callback_fn_t update_vgsummary_callback_fn, void *update_vgsummary_callback_context); + uint64_t *mda_free_sectors); #endif diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 45136e9c4..7058f70c6 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -19,7 +19,6 @@ #include "label.h" #include "xlate.h" #include "lvmcache.h" -#include "toolcontext.h" #include #include @@ -36,14 +35,14 @@ static int _text_can_handle(struct labeller *l __attribute__((unused)), return 0; } -struct dl_setup_baton { +struct _dl_setup_baton { struct disk_locn *pvh_dlocn_xl; struct device *dev; }; static int _da_setup(struct disk_locn *da, void *baton) { - struct dl_setup_baton *p = baton; + struct _dl_setup_baton *p = baton; p->pvh_dlocn_xl->offset = xlate64(da->offset); p->pvh_dlocn_xl->size = xlate64(da->size); p->pvh_dlocn_xl++; @@ -57,7 +56,7 @@ static int _ba_setup(struct disk_locn *ba, void *baton) static int _mda_setup(struct metadata_area *mda, void *baton) { - struct dl_setup_baton *p = baton; + struct _dl_setup_baton *p = baton; struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; if (mdac->area.dev != p->dev) @@ -72,7 +71,7 @@ static int _mda_setup(struct metadata_area *mda, void *baton) static int _dl_null_termination(void *baton) { - struct dl_setup_baton *p = baton; + struct _dl_setup_baton *p = baton; p->pvh_dlocn_xl->offset = xlate64(UINT64_C(0)); p->pvh_dlocn_xl->size = xlate64(UINT64_C(0)); @@ -87,7 +86,7 @@ static int _text_write(struct label *label, void *buf) struct pv_header *pvhdr; struct pv_header_extension *pvhdr_ext; struct lvmcache_info *info; - struct dl_setup_baton baton; + struct _dl_setup_baton baton; char buffer[64] __attribute__((aligned(8))); int ba1, da1, mda1, mda2; @@ -319,106 +318,18 @@ static int _text_initialise_label(struct labeller *l __attribute__((unused)), return 1; } -struct update_mda_baton { +struct _update_mda_baton { struct lvmcache_info *info; struct label *label; - int nr_outstanding_mdas; - unsigned ioflags; - lvm_callback_fn_t read_label_callback_fn; - void *read_label_callback_context; - int ret; }; -struct process_mda_header_params { - struct update_mda_baton *umb; - struct metadata_area *mda; - struct device *dev; - struct lvmcache_vgsummary vgsummary; - int ret; -}; - -static void _process_vgsummary(int failed, unsigned ioflags, void *context, const void *data) -{ - struct process_mda_header_params *pmp = context; - const struct lvmcache_vgsummary *vgsummary = data; - - --pmp->umb->nr_outstanding_mdas; - - /* FIXME Need to distinguish genuine errors here */ - if (failed) - goto_out; - - if (!lvmcache_update_vgname_and_id(pmp->umb->info, vgsummary)) { - pmp->umb->ret = 0; - pmp->ret = 0; - } - -out: - if (!pmp->umb->nr_outstanding_mdas && pmp->umb->ret) - lvmcache_make_valid(pmp->umb->info); - - if (!dev_close(pmp->dev)) - stack; - - if (!pmp->umb->nr_outstanding_mdas && pmp->umb->read_label_callback_fn) - pmp->umb->read_label_callback_fn(!pmp->umb->ret, ioflags, pmp->umb->read_label_callback_context, pmp->umb->label); -} - -static void _process_mda_header(int failed, unsigned ioflags, void *context, const void *data) -{ - struct process_mda_header_params *pmp = context; - const struct mda_header *mdah = data; - struct update_mda_baton *umb = pmp->umb; - const struct format_type *fmt = umb->label->labeller->fmt; - struct metadata_area *mda = pmp->mda; - struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; - - if (failed) - goto_bad; - - mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns)); - - if (mda_is_ignored(mda)) { - log_debug_metadata("Ignoring mda on device %s at offset " FMTu64, - dev_name(mdac->area.dev), - mdac->area.start); - goto bad; - } - - if (!vgname_from_mda(fmt, mdah, mda_is_primary(mda), &mdac->area, &pmp->vgsummary, &mdac->free_sectors, ioflags, _process_vgsummary, pmp)) { - /* FIXME Separate fatal and non-fatal error cases? */ - goto_bad; - } - - return; - -bad: - _process_vgsummary(1, ioflags, pmp, NULL); - return; -} - -static int _count_mda(struct metadata_area *mda, void *baton) -{ - struct update_mda_baton *umb = baton; - - umb->nr_outstanding_mdas++; - - return 1; -} - static int _update_mda(struct metadata_area *mda, void *baton) { - struct process_mda_header_params *pmp; - struct update_mda_baton *umb = baton; - const struct format_type *fmt = umb->label->labeller->fmt; - struct dm_pool *mem = umb->label->labeller->fmt->cmd->mem; + struct _update_mda_baton *p = baton; + const struct format_type *fmt = p->label->labeller->fmt; struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; - unsigned ioflags = umb->ioflags; - - if (!(pmp = dm_pool_zalloc(mem, sizeof(*pmp)))) { - log_error("struct process_mda_header_params allocation failed"); - return 0; - } + struct mda_header *mdah; + struct lvmcache_vgsummary vgsummary = { 0 }; /* * Using the labeller struct to preserve info about @@ -427,34 +338,45 @@ static int _update_mda(struct metadata_area *mda, void *baton) * TODO: make lvmcache smarter and move this cache logic there */ - pmp->dev = mdac->area.dev; - pmp->umb = umb; - pmp->mda = mda; - if (!dev_open_readonly(mdac->area.dev)) { mda_set_ignored(mda, 1); stack; - if (!--umb->nr_outstanding_mdas && umb->read_label_callback_fn) - umb->read_label_callback_fn(!umb->ret, ioflags, umb->read_label_callback_context, umb->label); return 1; } - pmp->ret = 1; - - if (!raw_read_mda_header_callback(fmt->cmd->mem, &mdac->area, mda_is_primary(mda), ioflags, _process_mda_header, pmp)) { - _process_vgsummary(1, ioflags, pmp, NULL); + if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda)))) { stack; + goto close_dev; + } + + mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns)); + + if (mda_is_ignored(mda)) { + log_debug_metadata("Ignoring mda on device %s at offset " FMTu64, + dev_name(mdac->area.dev), + mdac->area.start); + if (!dev_close(mdac->area.dev)) + stack; return 1; } - if (umb->read_label_callback_fn) - return 1; - else - return pmp->ret; + if (vgname_from_mda(fmt, mdah, mda_is_primary(mda), &mdac->area, &vgsummary, + &mdac->free_sectors) && + !lvmcache_update_vgname_and_id(p->info, &vgsummary)) { + if (!dev_close(mdac->area.dev)) + stack; + return_0; + } + +close_dev: + if (!dev_close(mdac->area.dev)) + stack; + + return 1; } -static int _text_read(struct labeller *l, struct device *dev, void *buf, unsigned ioflags, - lvm_callback_fn_t read_label_callback_fn, void *read_label_callback_context) +static int _text_read(struct labeller *l, struct device *dev, void *buf, + struct label **label) { struct label_header *lh = (struct label_header *) buf; struct pv_header *pvhdr; @@ -463,9 +385,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf, unsigne struct disk_locn *dlocn_xl; uint64_t offset; uint32_t ext_version; - struct dm_pool *mem = l->fmt->cmd->mem; - struct update_mda_baton *umb; - struct label *label; + struct _update_mda_baton baton; /* * PV header base @@ -475,9 +395,9 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf, unsigne if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev, FMT_TEXT_ORPHAN_VG_NAME, FMT_TEXT_ORPHAN_VG_NAME, 0))) - goto_bad; + return_0; - label = lvmcache_get_label(info); + *label = lvmcache_get_label(info); lvmcache_set_device_size(info, xlate64(pvhdr->device_size_xl)); @@ -523,41 +443,16 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf, unsigne lvmcache_add_ba(info, offset, xlate64(dlocn_xl->size)); dlocn_xl++; } - out: - if (!(umb = dm_pool_zalloc(mem, sizeof(*umb)))) { - log_error("baton allocation failed"); - goto_bad; - } + baton.info = info; + baton.label = *label; - umb->info = info; - umb->label = label; - umb->ioflags = ioflags; - umb->read_label_callback_fn = read_label_callback_fn; - umb->read_label_callback_context = read_label_callback_context; + if (!lvmcache_foreach_mda(info, _update_mda, &baton)) + return_0; - umb->ret = 1; - - if (!lvmcache_foreach_mda(info, _count_mda, umb)) - goto_bad; - - if (!umb->nr_outstanding_mdas) { - lvmcache_make_valid(info); - if (read_label_callback_fn) - read_label_callback_fn(0, ioflags, read_label_callback_context, label); - return 1; - } - - if (!lvmcache_foreach_mda(info, _update_mda, umb)) - goto_bad; + lvmcache_make_valid(info); return 1; - -bad: - if (read_label_callback_fn) - read_label_callback_fn(1, ioflags, read_label_callback_context, NULL); - - return 0; } static void _text_destroy_label(struct labeller *l __attribute__((unused)), diff --git a/lib/label/label.c b/lib/label/label.c index 32fad826f..46dd667bd 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2018 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * @@ -25,8 +25,6 @@ /* FIXME Allow for larger labels? Restricted to single sector currently */ -static struct dm_pool *_labeller_mem; - /* * Internal labeller struct. */ @@ -59,13 +57,7 @@ static struct labeller_i *_alloc_li(const char *name, struct labeller *l) int label_init(void) { - if (!(_labeller_mem = dm_pool_create("label scan", 128))) { - log_error("Labeller pool creation failed."); - return 0; - } - dm_list_init(&_labellers); - return 1; } @@ -80,8 +72,6 @@ void label_exit(void) } dm_list_init(&_labellers); - - dm_pool_destroy(_labeller_mem); } int label_register_handler(struct labeller *handler) @@ -118,74 +108,32 @@ static void _update_lvmcache_orphan(struct lvmcache_info *info) stack; } -struct find_labeller_params { - struct device *dev; - uint64_t scan_sector; /* Sector to be scanned */ - uint64_t label_sector; /* Sector where label found */ - lvm_callback_fn_t process_label_data_fn; - void *process_label_data_context; - - struct label **result; - - int ret; -}; - -static void _set_label_read_result(int failed, unsigned ioflags, void *context, const void *data) +static struct labeller *_find_labeller(struct device *dev, char *buf, + uint64_t *label_sector, + uint64_t scan_sector) { - struct find_labeller_params *flp = context; - struct label **result = flp->result; - struct label *label = (struct label *) data; - - if (failed) { - flp->ret = 0; - goto_out; - } - - /* Fix up device and label sector which the low-level code doesn't set */ - if (label) { - label->dev = flp->dev; - label->sector = flp->label_sector; - } - - if (result) - *result = (struct label *) label; - -out: - if (!dev_close(flp->dev)) - stack; - - if (flp->process_label_data_fn) { - log_debug_io("Completed label reading for %s", dev_name(flp->dev)); - flp->process_label_data_fn(!flp->ret, ioflags, flp->process_label_data_context, NULL); - } -} - -static void _find_labeller(int failed, unsigned ioflags, void *context, const void *data) -{ - struct find_labeller_params *flp = context; - const char *readbuf = data; - struct device *dev = flp->dev; - uint64_t scan_sector = flp->scan_sector; - char labelbuf[LABEL_SIZE] __attribute__((aligned(8))); struct labeller_i *li; - struct labeller *l = NULL; /* Set when a labeller claims the label */ - const struct label_header *lh; + struct labeller *r = NULL; + struct label_header *lh; struct lvmcache_info *info; uint64_t sector; + int found = 0; + char readbuf[LABEL_SCAN_SIZE] __attribute__((aligned(8))); - if (failed) { + if (!dev_read(dev, scan_sector << SECTOR_SHIFT, + LABEL_SCAN_SIZE, DEV_IO_LABEL, readbuf)) { log_debug_devs("%s: Failed to read label area", dev_name(dev)); - _set_label_read_result(1, ioflags, flp, NULL); - return; + goto out; } /* Scan a few sectors for a valid label */ for (sector = 0; sector < LABEL_SCAN_SECTORS; sector += LABEL_SIZE >> SECTOR_SHIFT) { - lh = (struct label_header *) (readbuf + (sector << SECTOR_SHIFT)); + lh = (struct label_header *) (readbuf + + (sector << SECTOR_SHIFT)); if (!strncmp((char *)lh->id, LABEL_ID, sizeof(lh->id))) { - if (l) { + if (found) { log_error("Ignoring additional label on %s at " "sector %" PRIu64, dev_name(dev), sector + scan_sector); @@ -205,7 +153,7 @@ static void _find_labeller(int failed, unsigned ioflags, void *context, const vo "ignoring", dev_name(dev)); continue; } - if (l) + if (found) continue; } @@ -216,44 +164,46 @@ static void _find_labeller(int failed, unsigned ioflags, void *context, const vo "sector %" PRIu64, dev_name(dev), li->name, sector + scan_sector); - if (l) { + if (found) { log_error("Ignoring additional label " "on %s at sector %" PRIu64, dev_name(dev), sector + scan_sector); continue; } - memcpy(labelbuf, lh, LABEL_SIZE); - flp->label_sector = sector + scan_sector; - l = li->l; + r = li->l; + memcpy(buf, lh, LABEL_SIZE); + if (label_sector) + *label_sector = sector + scan_sector; + found = 1; break; } } } - if (!l) { + out: + if (!found) { if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) _update_lvmcache_orphan(info); log_very_verbose("%s: No label detected", dev_name(dev)); - flp->ret = 0; - _set_label_read_result(1, ioflags, flp, NULL); - } else - (void) (l->ops->read)(l, dev, labelbuf, ioflags, &_set_label_read_result, flp); + } + + return r; } /* FIXME Also wipe associated metadata area headers? */ int label_remove(struct device *dev) { - char labelbuf[LABEL_SIZE] __attribute__((aligned(8))); + char buf[LABEL_SIZE] __attribute__((aligned(8))); + char readbuf[LABEL_SCAN_SIZE] __attribute__((aligned(8))); int r = 1; uint64_t sector; int wipe; struct labeller_i *li; struct label_header *lh; struct lvmcache_info *info; - const char *readbuf = NULL; - memset(labelbuf, 0, LABEL_SIZE); + memset(buf, 0, LABEL_SIZE); log_very_verbose("Scanning for labels to wipe from %s", dev_name(dev)); @@ -266,7 +216,7 @@ int label_remove(struct device *dev) */ dev_flush(dev); - if (!(readbuf = dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, DEV_IO_LABEL))) { + if (!dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, DEV_IO_LABEL, readbuf)) { log_debug_devs("%s: Failed to read label area", dev_name(dev)); goto out; } @@ -274,7 +224,8 @@ int label_remove(struct device *dev) /* Scan first few sectors for anything looking like a label */ for (sector = 0; sector < LABEL_SCAN_SECTORS; sector += LABEL_SIZE >> SECTOR_SHIFT) { - lh = (struct label_header *) (readbuf + (sector << SECTOR_SHIFT)); + lh = (struct label_header *) (readbuf + + (sector << SECTOR_SHIFT)); wipe = 0; @@ -294,7 +245,8 @@ int label_remove(struct device *dev) if (wipe) { log_very_verbose("%s: Wiping label at sector %" PRIu64, dev_name(dev), sector); - if (dev_write(dev, sector << SECTOR_SHIFT, LABEL_SIZE, DEV_IO_LABEL, labelbuf)) { + if (dev_write(dev, sector << SECTOR_SHIFT, LABEL_SIZE, DEV_IO_LABEL, + buf)) { /* Also remove the PV record from cache. */ info = lvmcache_info_from_pvid(dev->pvid, dev, 0); if (info) @@ -315,39 +267,21 @@ int label_remove(struct device *dev) return r; } -static int _label_read(struct device *dev, uint64_t scan_sector, struct label **result, - unsigned ioflags, lvm_callback_fn_t process_label_data_fn, void *process_label_data_context) +int label_read(struct device *dev, struct label **result, + uint64_t scan_sector) { + char buf[LABEL_SIZE] __attribute__((aligned(8))); + struct labeller *l; + uint64_t sector; struct lvmcache_info *info; - struct find_labeller_params *flp; + int r = 0; if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 1))) { log_debug_devs("Reading label from lvmcache for %s", dev_name(dev)); - if (result) - *result = lvmcache_get_label(info); - if (process_label_data_fn) { - log_debug_io("Completed label reading for %s", dev_name(dev)); - process_label_data_fn(0, ioflags, process_label_data_context, NULL); - } + *result = lvmcache_get_label(info); return 1; } - if (!(flp = dm_pool_zalloc(_labeller_mem, sizeof *flp))) { - log_error("find_labeller_params allocation failed."); - return 0; - } - - flp->dev = dev; - flp->scan_sector = scan_sector; - flp->result = result; - flp->process_label_data_fn = process_label_data_fn; - flp->process_label_data_context = process_label_data_context; - flp->ret = 1; - - /* Ensure result is always wiped as a precaution */ - if (result) - *result = NULL; - log_debug_devs("Reading label from device %s", dev_name(dev)); if (!dev_open_readonly(dev)) { @@ -356,26 +290,19 @@ static int _label_read(struct device *dev, uint64_t scan_sector, struct label ** if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) _update_lvmcache_orphan(info); - return 0; + return r; } - dev_read_callback(dev, scan_sector << SECTOR_SHIFT, LABEL_SCAN_SIZE, DEV_IO_LABEL, ioflags, _find_labeller, flp); - if (process_label_data_fn) - return 1; - else - return flp->ret; -} + if ((l = _find_labeller(dev, buf, §or, scan_sector))) + if ((r = (l->ops->read)(l, dev, buf, result)) && result && *result) { + (*result)->dev = dev; + (*result)->sector = sector; + } -/* result may be NULL if caller doesn't need it */ -int label_read(struct device *dev, struct label **result, uint64_t scan_sector) -{ - return _label_read(dev, scan_sector, result, 0, NULL, NULL); -} + if (!dev_close(dev)) + stack; -int label_read_callback(struct device *dev, uint64_t scan_sector, unsigned ioflags, - lvm_callback_fn_t process_label_data_fn, void *process_label_data_context) -{ - return _label_read(dev, scan_sector, NULL, ioflags, process_label_data_fn, process_label_data_context); + return r; } /* Caller may need to use label_get_handler to create label struct! */ diff --git a/lib/label/label.h b/lib/label/label.h index 6d31eb053..ea1129019 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -62,8 +62,8 @@ struct label_ops { /* * Read a label from a volume. */ - int (*read) (struct labeller *l, struct device *dev, void *buf, - unsigned ioflags, lvm_callback_fn_t label_read_callback_fn, void *label_read_callback_context); + int (*read) (struct labeller * l, struct device * dev, + void *buf, struct label ** label); /* * Populate label_type etc. @@ -96,8 +96,6 @@ struct labeller *label_get_handler(const char *name); int label_remove(struct device *dev); int label_read(struct device *dev, struct label **result, uint64_t scan_sector); -int label_read_callback(struct device *dev, uint64_t scan_sector, - unsigned ioflags, lvm_callback_fn_t process_label_data_fn, void *process_label_data_context); int label_write(struct device *dev, struct label *label); struct label *label_create(struct labeller *labeller); void label_destroy(struct label *label); diff --git a/lib/metadata/metadata-liblvm.c b/lib/metadata/metadata-liblvm.c index d192b1597..a4284bc89 100644 --- a/lib/metadata/metadata-liblvm.c +++ b/lib/metadata/metadata-liblvm.c @@ -491,6 +491,7 @@ static int _pvremove_check(struct cmd_context *cmd, const char *name, { static const char really_wipe_msg[] = "Really WIPE LABELS from physical volume"; struct device *dev; + struct label *label; struct pv_list *pvl; struct physical_volume *pv = NULL; int used; @@ -505,7 +506,7 @@ static int _pvremove_check(struct cmd_context *cmd, const char *name, /* Is there a pv here already? */ /* If not, this is an error unless you used -f. */ - if (!label_read(dev, NULL, 0)) { + if (!label_read(dev, &label, 0)) { if (force_count) return 1; log_error("No PV label found on %s.", name); diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index b5b97c2f4..2249d2fc7 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3912,9 +3912,9 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, use_previous_vg = 0; if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg) || + !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0, 0)) && !use_previous_vg)) { + !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) { inconsistent = 1; vg_fmtdata = NULL; continue; @@ -4104,9 +4104,9 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, use_previous_vg = 0; if ((use_precommitted && - !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg) || + !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0, 0)) && !use_previous_vg)) { + !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) { inconsistent = 1; vg_fmtdata = NULL; continue; diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index f6b19f44b..1fa14e839 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -48,6 +48,7 @@ */ #define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz)) + /* Various flags */ /* See metadata-exported.h for the complete list. */ /* Note that the bits no longer necessarily correspond to LVM1 disk format */ @@ -80,12 +81,12 @@ struct metadata_area_ops { struct metadata_area * mda, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int single_device, unsigned ioflags); + int single_device); struct volume_group *(*vg_read_precommit) (struct format_instance * fi, const char *vg_name, struct metadata_area * mda, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, unsigned ioflags); + unsigned *use_previous_vg); /* * Write out complete VG metadata. You must ensure internal * consistency before calling. eg. PEs can't refer to PVs not diff --git a/make.tmpl.in b/make.tmpl.in index 414b1dd5c..bdf234918 100644 --- a/make.tmpl.in +++ b/make.tmpl.in @@ -62,8 +62,7 @@ CLDFLAGS += @CLDFLAGS@ ELDFLAGS += @ELDFLAGS@ LDDEPS += @LDDEPS@ LIB_SUFFIX = @LIB_SUFFIX@ -LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS) $(AIO_LIBS) -AIO_LIBS = @AIO_LIBS@ +LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS) DL_LIBS = @DL_LIBS@ RT_LIBS = @RT_LIBS@ M_LIBS = @M_LIBS@ diff --git a/tools/command.c b/tools/command.c index a40b327b4..8944399b3 100644 --- a/tools/command.c +++ b/tools/command.c @@ -1357,11 +1357,13 @@ static void _create_opt_names_alpha(void) qsort(opt_names_alpha, ARG_COUNT, sizeof(long), _long_name_compare); } -static int _copy_line(char *line, int max_line, int *position, int *len) +static int _copy_line(char *line, int max_line, int *position) { int p = *position; int i = 0; + memset(line, 0, max_line); + while (1) { line[i] = _command_input[p]; i++; @@ -1375,9 +1377,7 @@ static int _copy_line(char *line, int max_line, int *position, int *len) if (i == (max_line - 1)) break; } - line[i] = '\0'; *position = p; - *len = i + 1; return 1; } @@ -1395,7 +1395,6 @@ int define_commands(struct cmd_context *cmdtool, const char *run_name) int prev_was_oo = 0; int prev_was_op = 0; int copy_pos = 0; - int copy_len = 0; int skip = 0; int i; @@ -1406,14 +1405,14 @@ int define_commands(struct cmd_context *cmdtool, const char *run_name) /* Process each line of command-lines-input.h (from command-lines.in) */ - while (_copy_line(line, MAX_LINE, ©_pos, ©_len)) { + while (_copy_line(line, MAX_LINE, ©_pos)) { if (line[0] == '\n') break; if ((n = strchr(line, '\n'))) *n = '\0'; - memcpy(line_orig, line, copy_len); + memcpy(line_orig, line, sizeof(line)); _split_line(line, &line_argc, line_argv, ' '); if (!line_argc) diff --git a/tools/toollib.c b/tools/toollib.c index 3f27a7af4..451f24dab 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -116,7 +116,6 @@ int become_daemon(struct cmd_context *cmd, int skip_lvm) /* FIXME Clean up properly here */ _exit(ECMD_FAILED); } - dev_async_reset(cmd); dev_close_all(); return 1; From acb42ec4652450f7cd73440929fd57f1487282bf Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 30 Jan 2018 10:46:08 +0000 Subject: [PATCH 02/87] [device/bcache] Initial code drop. Compiles. Not written tests yet. --- lib/device/bcache.c | 833 ++++++++++++++++++++++++++++++++++++++++++++ lib/device/bcache.h | 83 +++++ 2 files changed, 916 insertions(+) create mode 100644 lib/device/bcache.c create mode 100644 lib/device/bcache.h diff --git a/lib/device/bcache.c b/lib/device/bcache.c new file mode 100644 index 000000000..1be626c6d --- /dev/null +++ b/lib/device/bcache.c @@ -0,0 +1,833 @@ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "bcache.h" +#include "dm-logging.h" +#include "log.h" + +#define SECTOR_SHIFT 9L + +//---------------------------------------------------------------- + +// Assumes the list is not empty. +static inline struct dm_list *_list_pop(struct dm_list *head) +{ + struct dm_list *l; + + l = head->n; + dm_list_del(l); + return l; +} + +//---------------------------------------------------------------- + +struct control_block { + struct dm_list list; + void *context; + struct iocb cb; +}; + +struct cb_set { + struct dm_list free; + struct dm_list allocated; + struct control_block *vec; +} control_block_set; + +static struct cb_set *_cb_set_create(unsigned nr) +{ + int i; + struct cb_set *cbs = malloc(sizeof(*cbs)); + + if (!cbs) + return NULL; + + cbs->vec = malloc(nr * sizeof(*cbs->vec)); + if (!cbs->vec) { + free(cbs); + return NULL; + } + + dm_list_init(&cbs->free); + dm_list_init(&cbs->allocated); + + for (i = 0; i < nr; i++) + dm_list_add(&cbs->free, &cbs->vec[i].list); + + return cbs; +} + +static bool _cb_set_destroy(struct cb_set *cbs) +{ + if (!dm_list_empty(&cbs->allocated)) { + // FIXME: I think we should propogate this up. + log_error("async io still in flight"); + return false; + } + + free(cbs->vec); + free(cbs); + return 0; +} + +static struct control_block *_cb_alloc(struct cb_set *cbs, void *context) +{ + struct control_block *cb; + + if (dm_list_empty(&cbs->free)) + return NULL; + + cb = dm_list_item(_list_pop(&cbs->free), struct control_block); + cb->context = context; + dm_list_add(&cbs->allocated, &cb->list); + + return cb; +} + +static void _cb_free(struct cb_set *cbs, struct control_block *cb) +{ + dm_list_del(&cb->list); + dm_list_add_h(&cbs->free, &cb->list); +} + +static struct control_block *_iocb_to_cb(struct iocb *icb) +{ + return dm_list_struct_base(icb, struct control_block, cb); +} + +//---------------------------------------------------------------- + +// FIXME: write a sync engine too +enum dir { + DIR_READ, + DIR_WRITE +}; + +struct io_engine { + io_context_t aio_context; + struct cb_set *cbs; +}; + +static struct io_engine *_engine_create(unsigned max_io) +{ + int r; + struct io_engine *e = malloc(sizeof(*e)); + + if (!e) + return NULL; + + e->aio_context = 0; + r = io_setup(max_io, &e->aio_context); + if (r < 0) { + log_warn("io_setup failed"); + return NULL; + } + + e->cbs = _cb_set_create(max_io); + if (!e->cbs) { + log_warn("couldn't create control block set"); + free(e); + return NULL; + } + + return e; +} + +static void _engine_destroy(struct io_engine *e) +{ + _cb_set_destroy(e->cbs); + io_destroy(e->aio_context); + free(e); +} + +static bool _engine_issue(struct io_engine *e, int fd, enum dir d, + sector_t sb, sector_t se, void *data, void *context) +{ + int r; + struct iocb *cb_array[1]; + struct control_block *cb; + + if (((uint64_t) data) & (PAGE_SIZE - 1)) { + log_err("misaligned data buffer"); + return false; + } + + cb = _cb_alloc(e->cbs, context); + if (!cb) { + log_err("couldn't allocate control block"); + return false; + } + + memset(&cb->cb, 0, sizeof(cb->cb)); + + cb->cb.aio_fildes = (int) fd; + cb->cb.u.c.buf = data; + cb->cb.u.c.offset = sb << SECTOR_SHIFT; + cb->cb.u.c.nbytes = (se - sb) << SECTOR_SHIFT; + cb->cb.aio_lio_opcode = (d == DIR_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE; + + cb_array[0] = &cb->cb; + r = io_submit(e->aio_context, 1, cb_array); + if (r < 0) { + log_sys_error("io_submit", ""); + _cb_free(e->cbs, cb); + return false; + } + + return true; +} + +#define MAX_IO 64 +typedef void complete_fn(void *context, int io_error); + +static bool _engine_wait(struct io_engine *e, complete_fn fn) +{ + int i, r; + struct io_event event[MAX_IO]; + struct control_block *cb; + + memset(&event, 0, sizeof(event)); + r = io_getevents(e->aio_context, 1, MAX_IO, event, NULL); + if (r < 0) { + log_sys_error("io_getevents", ""); + return false; + } + + for (i = 0; i < r; i++) { + struct io_event *ev = event + i; + + cb = _iocb_to_cb((struct iocb *) ev->obj); + + if (ev->res == cb->cb.u.c.nbytes) + fn((void *) cb->context, 0); + + else if ((int) ev->res < 0) + fn(cb->context, (int) ev->res); + + else { + log_err("short io"); + fn(cb->context, -ENODATA); + } + + _cb_free(e->cbs, cb); + } + + return true; +} + +//---------------------------------------------------------------- + +#define MIN_BLOCKS 16 +#define WRITEBACK_LOW_THRESHOLD_PERCENT 33 +#define WRITEBACK_HIGH_THRESHOLD_PERCENT 66 + +//---------------------------------------------------------------- + +static void *_alloc_aligned(size_t len, size_t alignment) +{ + void *result = NULL; + int r = posix_memalign(&result, alignment, len); + if (r) + return NULL; + + return result; +} + +//---------------------------------------------------------------- + +static bool _test_flags(struct block *b, unsigned bits) +{ + return (b->flags & bits) != 0; +} + +static void _set_flags(struct block *b, unsigned bits) +{ + b->flags |= bits; +} + +static void _clear_flags(struct block *b, unsigned bits) +{ + b->flags &= ~bits; +} + +//---------------------------------------------------------------- + +enum block_flags { + BF_IO_PENDING = (1 << 0), + BF_DIRTY = (1 << 1), +}; + +struct bcache { + int fd; + sector_t block_sectors; + uint64_t nr_data_blocks; + uint64_t nr_cache_blocks; + + struct io_engine *engine; + + void *raw_data; + struct block *raw_blocks; + + /* + * Lists that categorise the blocks. + */ + unsigned nr_locked; + unsigned nr_dirty; + unsigned nr_io_pending; + + struct dm_list free; + struct dm_list errored; + struct dm_list dirty; + struct dm_list clean; + struct dm_list io_pending; + + /* + * Hash table. + */ + unsigned nr_buckets; + unsigned hash_mask; + struct dm_list *buckets; + + /* + * Statistics + */ + unsigned read_hits; + unsigned read_misses; + unsigned write_zeroes; + unsigned write_hits; + unsigned write_misses; + unsigned prefetches; +}; + +//---------------------------------------------------------------- + +/* 2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */ +#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL + +static unsigned _hash(struct bcache *cache, int fd, uint64_t index) +{ + uint64_t h = (index << 10) & fd; + h *= GOLDEN_RATIO_PRIME_64; + return h & cache->hash_mask; +} + +static struct block *_hash_lookup(struct bcache *cache, int fd, uint64_t index) +{ + struct block *b; + unsigned h = _hash(cache, fd, index); + + dm_list_iterate_items_gen (b, cache->buckets + h, hash) + if (b->index == index) + return b; + + return NULL; +} + +static void _hash_insert(struct block *b) +{ + unsigned h = _hash(b->cache, b->fd, b->index); + dm_list_add_h(b->cache->buckets + h, &b->hash); +} + +static void _hash_remove(struct block *b) +{ + dm_list_del(&b->hash); +} + +/* + * Must return a power of 2. + */ +static unsigned _calc_nr_buckets(unsigned nr_blocks) +{ + unsigned r = 8; + unsigned n = nr_blocks / 4; + + if (n < 8) + n = 8; + + while (r < n) + r <<= 1; + + return r; +} + +static int _hash_table_init(struct bcache *cache, unsigned nr_entries) +{ + unsigned i; + + cache->nr_buckets = _calc_nr_buckets(nr_entries); + cache->hash_mask = cache->nr_buckets - 1; + cache->buckets = malloc(cache->nr_buckets * sizeof(*cache->buckets)); + if (!cache->buckets) + return -ENOMEM; + + for (i = 0; i < cache->nr_buckets; i++) + dm_list_init(cache->buckets + i); + + return 0; +} + +static void _hash_table_exit(struct bcache *cache) +{ + free(cache->buckets); +} + +//---------------------------------------------------------------- + +static int _init_free_list(struct bcache *cache, unsigned count) +{ + unsigned i; + size_t block_size = cache->block_sectors << SECTOR_SHIFT; + unsigned char *data = + (unsigned char *) _alloc_aligned(count * block_size, PAGE_SIZE); + + /* Allocate the data for each block. We page align the data. */ + if (!data) + return -ENOMEM; + + cache->raw_data = data; + cache->raw_blocks = malloc(count * sizeof(*cache->raw_blocks)); + + if (!cache->raw_blocks) + free(cache->raw_data); + + for (i = 0; i < count; i++) { + struct block *b = cache->raw_blocks + i; + b->cache = cache; + b->data = data + (block_size * i); + dm_list_add(&cache->free, &b->list); + } + + return 0; +} + +static void _exit_free_list(struct bcache *cache) +{ + free(cache->raw_data); + free(cache->raw_blocks); +} + +static struct block *_alloc_block(struct bcache *cache) +{ + struct block *b = dm_list_struct_base(_list_pop(&cache->free), struct block, list); + return b; +} + +/*---------------------------------------------------------------- + * Clean/dirty list management. + * Always use these methods to ensure nr_dirty_ is correct. + *--------------------------------------------------------------*/ + +static void _unlink_block(struct block *b) +{ + if (_test_flags(b, BF_DIRTY)) + b->cache->nr_dirty--; + + dm_list_del(&b->list); +} + +static void _link_block(struct block *b) +{ + struct bcache *cache = b->cache; + + if (_test_flags(b, BF_DIRTY)) { + dm_list_add(&cache->dirty, &b->list); + cache->nr_dirty++; + } else + dm_list_add(&cache->clean, &b->list); +} + +static void _relink(struct block *b) +{ + _unlink_block(b); + _link_block(b); +} + +/*---------------------------------------------------------------- + * Low level IO handling + * + * We cannot have two concurrent writes on the same block. + * eg, background writeback, put with dirty, flush? + * + * To avoid this we introduce some restrictions: + * + * i) A held block can never be written back. + * ii) You cannot get a block until writeback has completed. + * + *--------------------------------------------------------------*/ + +/* + * |b->list| should be valid (either pointing to itself, on one of the other + * lists. + */ +static bool _issue_low_level(struct block *b, enum dir d) +{ + struct bcache *cache = b->cache; + sector_t sb = b->index * cache->block_sectors; + sector_t se = sb + cache->block_sectors; + + if (_test_flags(b, BF_IO_PENDING)) + return false; + + _set_flags(b, BF_IO_PENDING); + return _engine_issue(cache->engine, cache->fd, d, sb, se, b->data, b); +} + +static inline bool _issue_read(struct block *b) +{ + return _issue_low_level(b, DIR_READ); +} + +static inline bool _issue_write(struct block *b) +{ + return _issue_low_level(b, DIR_WRITE); +} + +static void _complete_io(void *context, int err) +{ + struct block *b = context; + struct bcache *cache = b->cache; + + b->error = err; + _clear_flags(b, BF_IO_PENDING); + cache->nr_io_pending--; + + /* + * b is on the io_pending list, so we don't want to use unlink_block. + * Which would incorrectly adjust nr_dirty. + */ + dm_list_del(&b->list); + + if (b->error) + dm_list_add(&cache->errored, &b->list); + + else { + _clear_flags(b, BF_DIRTY); + _link_block(b); + } +} + +static int _wait_io(struct bcache *cache) +{ + return _engine_wait(cache->engine, _complete_io); +} + +/*---------------------------------------------------------------- + * High level IO handling + *--------------------------------------------------------------*/ + +static void _wait_all(struct bcache *cache) +{ + while (!dm_list_empty(&cache->io_pending)) + _wait_io(cache); +} + +static void _wait_specific(struct block *b) +{ + while (_test_flags(b, BF_IO_PENDING)) + _wait_io(b->cache); +} + +static unsigned _writeback(struct bcache *cache, unsigned count) +{ + unsigned actual = 0; + struct block *b, *tmp; + + dm_list_iterate_items_gen_safe (b, tmp, &cache->dirty, list) { + if (actual == count) + break; + + // We can't writeback anything that's still in use. + if (!b->ref_count) { + _issue_write(b); + actual++; + } + } + + return actual; +} + +/*---------------------------------------------------------------- + * High level allocation + *--------------------------------------------------------------*/ + +static struct block *_find_unused_clean_block(struct bcache *cache) +{ + struct block *b; + + dm_list_iterate_items (b, &cache->clean) { + if (!b->ref_count) { + _unlink_block(b); + _hash_remove(b); + return b; + } + } + + return NULL; +} + +static struct block *_new_block(struct bcache *cache, block_address index) +{ + struct block *b; + + b = _alloc_block(cache); + while (!b && cache->nr_locked < cache->nr_cache_blocks) { + b = _find_unused_clean_block(cache); + if (!b) { + if (dm_list_empty(&cache->io_pending)) + _writeback(cache, 16); + _wait_io(cache); + } + } + + if (b) { + dm_list_init(&b->list); + dm_list_init(&b->hash); + b->flags = 0; + b->index = index; + b->ref_count = 0; + b->error = 0; + + _hash_insert(b); + } + + return b; +} + +/*---------------------------------------------------------------- + * Block reference counting + *--------------------------------------------------------------*/ +static void _zero_block(struct block *b) +{ + b->cache->write_zeroes++; + memset(b->data, 0, b->cache->block_sectors << SECTOR_SHIFT); + _set_flags(b, BF_DIRTY); +} + +static void _hit(struct block *b, unsigned flags) +{ + struct bcache *cache = b->cache; + + if (flags & (GF_ZERO | GF_DIRTY)) + cache->write_hits++; + else + cache->read_hits++; + + _relink(b); +} + +static void _miss(struct bcache *cache, unsigned flags) +{ + if (flags & (GF_ZERO | GF_DIRTY)) + cache->write_misses++; + else + cache->read_misses++; +} + +static struct block *_lookup_or_read_block(struct bcache *cache, + int fd, block_address index, + unsigned flags) +{ + struct block *b = _hash_lookup(cache, fd, index); + + if (b) { + // FIXME: this is insufficient. We need to also catch a read + // lock of a write locked block. Ref count needs to distinguish. + if (b->ref_count && (flags & (GF_DIRTY | GF_ZERO))) { + log_err("concurrent write lock attempted"); + return NULL; + } + + if (_test_flags(b, BF_IO_PENDING)) { + _miss(cache, flags); + _wait_specific(b); + + } else + _hit(b, flags); + + _unlink_block(b); + + if (flags & GF_ZERO) + _zero_block(b); + + } else { + _miss(cache, flags); + + b = _new_block(cache, index); + if (b) { + if (flags & GF_ZERO) + _zero_block(b); + + else { + _issue_read(b); + _wait_specific(b); + + // we know the block is clean and unerrored. + _unlink_block(b); + } + } + } + + if (b && !b->error) { + if (flags & (GF_DIRTY | GF_ZERO)) + _set_flags(b, BF_DIRTY); + + _link_block(b); + return b; + } + + return NULL; +} + +static void _preemptive_writeback(struct bcache *cache) +{ + // FIXME: this ignores those blocks that are in the error state. Track + // nr_clean instead? + unsigned nr_available = cache->nr_cache_blocks - (cache->nr_dirty - cache->nr_io_pending); + if (nr_available < (WRITEBACK_LOW_THRESHOLD_PERCENT * cache->nr_cache_blocks / 100)) + _writeback(cache, (WRITEBACK_HIGH_THRESHOLD_PERCENT * cache->nr_cache_blocks / 100) - nr_available); + +} + +/*---------------------------------------------------------------- + * Public interface + *--------------------------------------------------------------*/ +struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) +{ + int r; + struct bcache *cache; + + cache = malloc(sizeof(*cache)); + if (!cache) + return NULL; + + cache->block_sectors = block_sectors; + cache->nr_cache_blocks = nr_cache_blocks; + + cache->engine = _engine_create(nr_cache_blocks < 1024u ? nr_cache_blocks : 1024u); + if (!cache->engine) { + free(cache); + return NULL; + } + + cache->nr_locked = 0; + cache->nr_dirty = 0; + cache->nr_io_pending = 0; + + dm_list_init(&cache->free); + dm_list_init(&cache->errored); + dm_list_init(&cache->dirty); + dm_list_init(&cache->clean); + dm_list_init(&cache->io_pending); + + if (_hash_table_init(cache, nr_cache_blocks)) { + _engine_destroy(cache->engine); + free(cache); + } + + cache->read_hits = 0; + cache->read_misses = 0; + cache->write_zeroes = 0; + cache->write_hits = 0; + cache->write_misses = 0; + cache->prefetches = 0; + + r = _init_free_list(cache, nr_cache_blocks); + if (r) { + _engine_destroy(cache->engine); + _hash_table_exit(cache); + free(cache); + } + + return cache; +} + +void bcache_destroy(struct bcache *cache) +{ + if (cache->nr_locked) + log_warn("some blocks are still locked\n"); + + bcache_flush(cache); + _wait_all(cache); + _exit_free_list(cache); + _hash_table_exit(cache); + _engine_destroy(cache->engine); + free(cache); +} + +void bcache_prefetch(struct bcache *cache, int fd, block_address index) +{ + struct block *b = _hash_lookup(cache, fd, index); + + if (!b) { + cache->prefetches++; + + b = _new_block(cache, index); + if (b) + _issue_read(b); + } +} + +bool bcache_get(struct bcache *cache, int fd, block_address index, + unsigned flags, struct block **result) +{ + struct block *b = _lookup_or_read_block(cache, fd, index, flags); + if (b) { + if (!b->ref_count) + cache->nr_locked++; + b->ref_count++; + + *result = b; + return true; + } + + *result = NULL; + log_err("couldn't get block"); + return false; +} + +void bcache_put(struct block *b) +{ + if (!b->ref_count) { + log_err("ref count on bcache block already zero"); + return; + } + + b->ref_count--; + if (!b->ref_count) + b->cache->nr_locked--; + + if (_test_flags(b, BF_DIRTY)) + _preemptive_writeback(b->cache); +} + +int bcache_flush(struct bcache *cache) +{ + while (!dm_list_empty(&cache->dirty)) { + struct block *b = dm_list_item(_list_pop(&cache->dirty), struct block); + if (b->ref_count || _test_flags(b, BF_IO_PENDING)) + // The superblock may well be still locked. + continue; + + _issue_write(b); + } + + _wait_all(cache); + + return dm_list_empty(&cache->errored) ? 0 : -EIO; +} + +//---------------------------------------------------------------- + diff --git a/lib/device/bcache.h b/lib/device/bcache.h new file mode 100644 index 000000000..1f4262e96 --- /dev/null +++ b/lib/device/bcache.h @@ -0,0 +1,83 @@ +#ifndef BCACHE_H +#define BCACHE_H + +#include + +#include "libdevmapper.h" + +/*----------------------------------------------------------------*/ + +typedef uint64_t block_address; +typedef uint64_t sector_t; + +struct bcache; +struct block { + /* clients may only access these three fields */ + int fd; + uint64_t index; + void *data; + + struct bcache *cache; + struct dm_list list; + struct dm_list hash; + + unsigned flags; + unsigned ref_count; + int error; +}; + +struct bcache *bcache_create(sector_t block_size, unsigned nr_cache_blocks); +void bcache_destroy(struct bcache *cache); + +enum bcache_get_flags { + /* + * The block will be zeroed before get_block returns it. This + * potentially avoids a read if the block is not already in the cache. + * GF_DIRTY is implicit. + */ + GF_ZERO = (1 << 0), + + /* + * Indicates the caller is intending to change the data in the block, a + * writeback will occur after the block is released. + */ + GF_DIRTY = (1 << 1) +}; + +typedef uint64_t block_address; + +unsigned bcache_get_max_prefetches(struct bcache *cache); + +/* + * Use the prefetch method to take advantage of asynchronous IO. For example, + * if you wanted to read a block from many devices concurrently you'd do + * something like this: + * + * dm_list_iterate_items (dev, &devices) + * bcache_prefetch(cache, dev->fd, block); + * + * dm_list_iterate_items (dev, &devices) { + * if (!bcache_get(cache, dev->fd, block, &b)) + * fail(); + * + * process_block(b); + * } + * + * It's slightly sub optimal, since you may not run the gets in the order that + * they complete. But we're talking a very small difference, and it's worth it + * to keep callbacks out of this interface. + */ +void bcache_prefetch(struct bcache *cache, int fd, block_address index); + +/* + * Returns true on success. + */ +bool bcache_get(struct bcache *cache, int fd, block_address index, + unsigned flags, struct block **result); +void bcache_put(struct block *b); + +int bcache_flush(struct bcache *cache); + +/*----------------------------------------------------------------*/ + +#endif From da7e13ef88541c4ca4b067f4f47ad7b43d346501 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 30 Jan 2018 12:13:48 +0000 Subject: [PATCH 03/87] [lib/device/bcache] Tweaks after Kabi's review --- aclocal.m4 | 210 ++++++++++++++++++++++++++--------------- configure | 18 +++- include/configure.h.in | 16 +++- lib/Makefile.in | 1 + lib/device/bcache.c | 108 ++++++++++++--------- lib/device/bcache.h | 14 +++ 6 files changed, 242 insertions(+), 125 deletions(-) diff --git a/aclocal.m4 b/aclocal.m4 index 4ab64a82f..07ea0b664 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -69,32 +69,63 @@ AC_DEFUN([AX_PYTHON_MODULE],[ fi ]) -# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- -# serial 1 (pkg-config-0.24) -# -# Copyright © 2004 Scott James Remnant . -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. +dnl pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- +dnl serial 11 (pkg-config-0.29) +dnl +dnl Copyright © 2004 Scott James Remnant . +dnl Copyright © 2012-2015 Dan Nicholson +dnl +dnl This program is free software; you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation; either version 2 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, but +dnl WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA +dnl 02111-1307, USA. +dnl +dnl As a special exception to the GNU General Public License, if you +dnl distribute this file as part of a program that contains a +dnl configuration script generated by Autoconf, you may include it under +dnl the same distribution terms that you use for the rest of that +dnl program. -# PKG_PROG_PKG_CONFIG([MIN-VERSION]) -# ---------------------------------- +dnl PKG_PREREQ(MIN-VERSION) +dnl ----------------------- +dnl Since: 0.29 +dnl +dnl Verify that the version of the pkg-config macros are at least +dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's +dnl installed version of pkg-config, this checks the developer's version +dnl of pkg.m4 when generating configure. +dnl +dnl To ensure that this macro is defined, also add: +dnl m4_ifndef([PKG_PREREQ], +dnl [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])]) +dnl +dnl See the "Since" comment for each macro you use to see what version +dnl of the macros you require. +m4_defun([PKG_PREREQ], +[m4_define([PKG_MACROS_VERSION], [0.29]) +m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, + [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) +])dnl PKG_PREREQ + +dnl PKG_PROG_PKG_CONFIG([MIN-VERSION]) +dnl ---------------------------------- +dnl Since: 0.16 +dnl +dnl Search for the pkg-config tool and set the PKG_CONFIG variable to +dnl first found in the path. Checks that the version of pkg-config found +dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is +dnl used since that's the first version where most current features of +dnl pkg-config existed. AC_DEFUN([PKG_PROG_PKG_CONFIG], [m4_pattern_forbid([^_?PKG_[A-Z_]+$]) m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$]) @@ -116,18 +147,19 @@ if test -n "$PKG_CONFIG"; then PKG_CONFIG="" fi fi[]dnl -])# PKG_PROG_PKG_CONFIG +])dnl PKG_PROG_PKG_CONFIG -# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# -# Check to see whether a particular set of modules exists. Similar -# to PKG_CHECK_MODULES(), but does not set variables or print errors. -# -# Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -# only at the first occurence in configure.ac, so if the first place -# it's called might be skipped (such as if it is within an "if", you -# have to call PKG_CHECK_EXISTS manually -# -------------------------------------------------------------- +dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------------------------------- +dnl Since: 0.18 +dnl +dnl Check to see whether a particular set of modules exists. Similar to +dnl PKG_CHECK_MODULES(), but does not set variables or print errors. +dnl +dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) +dnl only at the first occurence in configure.ac, so if the first place +dnl it's called might be skipped (such as if it is within an "if", you +dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl if test -n "$PKG_CONFIG" && \ @@ -137,8 +169,10 @@ m4_ifvaln([$3], [else $3])dnl fi]) -# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) -# --------------------------------------------- +dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES]) +dnl --------------------------------------------- +dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting +dnl pkg_failed based on the result. m4_define([_PKG_CONFIG], [if test -n "$$1"; then pkg_cv_[]$1="$$1" @@ -150,10 +184,11 @@ m4_define([_PKG_CONFIG], else pkg_failed=untried fi[]dnl -])# _PKG_CONFIG +])dnl _PKG_CONFIG -# _PKG_SHORT_ERRORS_SUPPORTED -# ----------------------------- +dnl _PKG_SHORT_ERRORS_SUPPORTED +dnl --------------------------- +dnl Internal check to see if pkg-config supports short errors. AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED], [AC_REQUIRE([PKG_PROG_PKG_CONFIG]) if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -161,19 +196,17 @@ if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then else _pkg_short_errors_supported=no fi[]dnl -])# _PKG_SHORT_ERRORS_SUPPORTED +])dnl _PKG_SHORT_ERRORS_SUPPORTED -# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], -# [ACTION-IF-NOT-FOUND]) -# -# -# Note that if there is a possibility the first call to -# PKG_CHECK_MODULES might not happen, you should be sure to include an -# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac -# -# -# -------------------------------------------------------------- +dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl -------------------------------------------------------------- +dnl Since: 0.4.0 +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES might not happen, you should be sure to include an +dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac AC_DEFUN([PKG_CHECK_MODULES], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl @@ -227,16 +260,40 @@ else AC_MSG_RESULT([yes]) $3 fi[]dnl -])# PKG_CHECK_MODULES +])dnl PKG_CHECK_MODULES -# PKG_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable pkgconfigdir as the location where a module -# should install pkg-config .pc files. By default the directory is -# $libdir/pkgconfig, but the default can be changed by passing -# DIRECTORY. The user can override through the --with-pkgconfigdir -# parameter. +dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND], +dnl [ACTION-IF-NOT-FOUND]) +dnl --------------------------------------------------------------------- +dnl Since: 0.29 +dnl +dnl Checks for existence of MODULES and gathers its build flags with +dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags +dnl and VARIABLE-PREFIX_LIBS from --libs. +dnl +dnl Note that if there is a possibility the first call to +dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to +dnl include an explicit call to PKG_PROG_PKG_CONFIG in your +dnl configure.ac. +AC_DEFUN([PKG_CHECK_MODULES_STATIC], +[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl +_save_PKG_CONFIG=$PKG_CONFIG +PKG_CONFIG="$PKG_CONFIG --static" +PKG_CHECK_MODULES($@) +PKG_CONFIG=$_save_PKG_CONFIG[]dnl +])dnl PKG_CHECK_MODULES_STATIC + + +dnl PKG_INSTALLDIR([DIRECTORY]) +dnl ------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable pkgconfigdir as the location where a module +dnl should install pkg-config .pc files. By default the directory is +dnl $libdir/pkgconfig, but the default can be changed by passing +dnl DIRECTORY. The user can override through the --with-pkgconfigdir +dnl parameter. AC_DEFUN([PKG_INSTALLDIR], [m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])]) m4_pushdef([pkg_description], @@ -247,16 +304,18 @@ AC_ARG_WITH([pkgconfigdir], AC_SUBST([pkgconfigdir], [$with_pkgconfigdir]) m4_popdef([pkg_default]) m4_popdef([pkg_description]) -]) dnl PKG_INSTALLDIR +])dnl PKG_INSTALLDIR -# PKG_NOARCH_INSTALLDIR(DIRECTORY) -# ------------------------- -# Substitutes the variable noarch_pkgconfigdir as the location where a -# module should install arch-independent pkg-config .pc files. By -# default the directory is $datadir/pkgconfig, but the default can be -# changed by passing DIRECTORY. The user can override through the -# --with-noarch-pkgconfigdir parameter. +dnl PKG_NOARCH_INSTALLDIR([DIRECTORY]) +dnl -------------------------------- +dnl Since: 0.27 +dnl +dnl Substitutes the variable noarch_pkgconfigdir as the location where a +dnl module should install arch-independent pkg-config .pc files. By +dnl default the directory is $datadir/pkgconfig, but the default can be +dnl changed by passing DIRECTORY. The user can override through the +dnl --with-noarch-pkgconfigdir parameter. AC_DEFUN([PKG_NOARCH_INSTALLDIR], [m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])]) m4_pushdef([pkg_description], @@ -267,13 +326,15 @@ AC_ARG_WITH([noarch-pkgconfigdir], AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir]) m4_popdef([pkg_default]) m4_popdef([pkg_description]) -]) dnl PKG_NOARCH_INSTALLDIR +])dnl PKG_NOARCH_INSTALLDIR -# PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, -# [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) -# ------------------------------------------- -# Retrieves the value of the pkg-config variable for the given module. +dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE, +dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND]) +dnl ------------------------------------------- +dnl Since: 0.28 +dnl +dnl Retrieves the value of the pkg-config variable for the given module. AC_DEFUN([PKG_CHECK_VAR], [AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl @@ -282,7 +343,7 @@ _PKG_CONFIG([$1], [variable="][$3]["], [$2]) AS_VAR_COPY([$1], [pkg_cv_][$1]) AS_VAR_IF([$1], [""], [$5], [$4])dnl -])# PKG_CHECK_VAR +])dnl PKG_CHECK_VAR # Copyright (C) 1999-2014 Free Software Foundation, Inc. # @@ -536,5 +597,4 @@ AC_DEFUN([AM_RUN_LOG], echo "$as_me:$LINENO: \$? = $ac_status" >&AS_MESSAGE_LOG_FD (exit $ac_status); }]) - m4_include([acinclude.m4]) diff --git a/configure b/configure index 5409f6847..7f412492c 100755 --- a/configure +++ b/configure @@ -886,6 +886,7 @@ infodir docdir oldincludedir includedir +runstatedir localstatedir sharedstatedir sysconfdir @@ -1101,6 +1102,7 @@ datadir='${datarootdir}' sysconfdir='${prefix}/etc' sharedstatedir='${prefix}/com' localstatedir='${prefix}/var' +runstatedir='${localstatedir}/run' includedir='${prefix}/include' oldincludedir='/usr/include' docdir='${datarootdir}/doc/${PACKAGE}' @@ -1353,6 +1355,15 @@ do | -silent | --silent | --silen | --sile | --sil) silent=yes ;; + -runstatedir | --runstatedir | --runstatedi | --runstated \ + | --runstate | --runstat | --runsta | --runst | --runs \ + | --run | --ru | --r) + ac_prev=runstatedir ;; + -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \ + | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \ + | --run=* | --ru=* | --r=*) + runstatedir=$ac_optarg ;; + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) ac_prev=sbindir ;; -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ @@ -1490,7 +1501,7 @@ fi for ac_var in exec_prefix prefix bindir sbindir libexecdir datarootdir \ datadir sysconfdir sharedstatedir localstatedir includedir \ oldincludedir docdir infodir htmldir dvidir pdfdir psdir \ - libdir localedir mandir + libdir localedir mandir runstatedir do eval ac_val=\$$ac_var # Remove trailing slashes. @@ -1643,6 +1654,7 @@ Fine tuning of the installation directories: --sysconfdir=DIR read-only single-machine data [PREFIX/etc] --sharedstatedir=DIR modifiable architecture-independent data [PREFIX/com] --localstatedir=DIR modifiable single-machine data [PREFIX/var] + --runstatedir=DIR modifiable per-process data [LOCALSTATEDIR/run] --libdir=DIR object code libraries [EPREFIX/lib] --includedir=DIR C header files [PREFIX/include] --oldincludedir=DIR C header files for non-gcc [/usr/include] @@ -12967,7 +12979,7 @@ if ${am_cv_pathless_PYTHON+:} false; then : $as_echo_n "(cached) " >&6 else - for am_cv_pathless_PYTHON in python python2 python3 python3.5 python3.4 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do + for am_cv_pathless_PYTHON in python python2 python3 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do test "$am_cv_pathless_PYTHON" = none && break prog="import sys # split strings by '.' and convert to numeric. Append some zeros @@ -13535,7 +13547,7 @@ if ${am_cv_pathless_PYTHON+:} false; then : $as_echo_n "(cached) " >&6 else - for am_cv_pathless_PYTHON in python python2 python3 python3.5 python3.4 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do + for am_cv_pathless_PYTHON in python python2 python3 python3.3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 python2.1 python2.0 none; do test "$am_cv_pathless_PYTHON" = none && break prog="import sys # split strings by '.' and convert to numeric. Append some zeros diff --git a/include/configure.h.in b/include/configure.h.in index be2f66031..a4c93d6f2 100644 --- a/include/configure.h.in +++ b/include/configure.h.in @@ -347,9 +347,6 @@ /* Define to 1 if the system has the type `ptrdiff_t'. */ #undef HAVE_PTRDIFF_T -/* Define to 1 if the compiler has the `__builtin_clz` builtin. */ -#undef HAVE___BUILTIN_CLZ - /* Define to 1 if you have the header file. */ #undef HAVE_READLINE_HISTORY_H @@ -478,9 +475,16 @@ /* Define to 1 if you have the `strtoull' function. */ #undef HAVE_STRTOULL +/* Define to 1 if `st_blocks' is a member of `struct stat'. */ +#undef HAVE_STRUCT_STAT_ST_BLOCKS + /* Define to 1 if `st_rdev' is a member of `struct stat'. */ #undef HAVE_STRUCT_STAT_ST_RDEV +/* Define to 1 if your `struct stat' has `st_blocks'. Deprecated, use + `HAVE_STRUCT_STAT_ST_BLOCKS' instead. */ +#undef HAVE_ST_BLOCKS + /* Define to 1 if you have the header file. */ #undef HAVE_SYSLOG_H @@ -552,6 +556,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_UTSNAME_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_VFS_H + /* Define to 1 if you have the header file. */ #undef HAVE_SYS_WAIT_H @@ -591,6 +598,9 @@ /* Define to 1 if the system has the type `_Bool'. */ #undef HAVE__BOOL +/* Define to 1 if the system has the `__builtin_clz' built-in function */ +#undef HAVE___BUILTIN_CLZ + /* Internalization package */ #undef INTL_PACKAGE diff --git a/lib/Makefile.in b/lib/Makefile.in index 1fdaca8ee..bd68edc34 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -55,6 +55,7 @@ SOURCES =\ config/config.c \ datastruct/btree.c \ datastruct/str_list.c \ + device/bcache.c \ device/dev-cache.c \ device/dev-ext.c \ device/dev-io.c \ diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 1be626c6d..3b8cf789b 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -1,3 +1,17 @@ +/* + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #define _GNU_SOURCE #include @@ -22,6 +36,11 @@ //---------------------------------------------------------------- +static void log_sys_warn(const char *syscall) +{ + log_warn("%s failed: %s", syscall, strerror(errno)); +} + // Assumes the list is not empty. static inline struct dm_list *_list_pop(struct dm_list *head) { @@ -49,14 +68,14 @@ struct cb_set { static struct cb_set *_cb_set_create(unsigned nr) { int i; - struct cb_set *cbs = malloc(sizeof(*cbs)); + struct cb_set *cbs = dm_malloc(sizeof(*cbs)); if (!cbs) return NULL; - cbs->vec = malloc(nr * sizeof(*cbs->vec)); + cbs->vec = dm_malloc(nr * sizeof(*cbs->vec)); if (!cbs->vec) { - free(cbs); + dm_free(cbs); return NULL; } @@ -69,17 +88,18 @@ static struct cb_set *_cb_set_create(unsigned nr) return cbs; } -static bool _cb_set_destroy(struct cb_set *cbs) +static void _cb_set_destroy(struct cb_set *cbs) { + // We know this is always called after a wait_all. So there should + // never be in flight IO. if (!dm_list_empty(&cbs->allocated)) { - // FIXME: I think we should propogate this up. + // bail out log_error("async io still in flight"); - return false; + return; } - free(cbs->vec); - free(cbs); - return 0; + dm_free(cbs->vec); + dm_free(cbs); } static struct control_block *_cb_alloc(struct cb_set *cbs, void *context) @@ -123,7 +143,7 @@ struct io_engine { static struct io_engine *_engine_create(unsigned max_io) { int r; - struct io_engine *e = malloc(sizeof(*e)); + struct io_engine *e = dm_malloc(sizeof(*e)); if (!e) return NULL; @@ -138,7 +158,7 @@ static struct io_engine *_engine_create(unsigned max_io) e->cbs = _cb_set_create(max_io); if (!e->cbs) { log_warn("couldn't create control block set"); - free(e); + dm_free(e); return NULL; } @@ -149,7 +169,7 @@ static void _engine_destroy(struct io_engine *e) { _cb_set_destroy(e->cbs); io_destroy(e->aio_context); - free(e); + dm_free(e); } static bool _engine_issue(struct io_engine *e, int fd, enum dir d, @@ -160,13 +180,13 @@ static bool _engine_issue(struct io_engine *e, int fd, enum dir d, struct control_block *cb; if (((uint64_t) data) & (PAGE_SIZE - 1)) { - log_err("misaligned data buffer"); + log_warn("misaligned data buffer"); return false; } cb = _cb_alloc(e->cbs, context); if (!cb) { - log_err("couldn't allocate control block"); + log_warn("couldn't allocate control block"); return false; } @@ -181,7 +201,7 @@ static bool _engine_issue(struct io_engine *e, int fd, enum dir d, cb_array[0] = &cb->cb; r = io_submit(e->aio_context, 1, cb_array); if (r < 0) { - log_sys_error("io_submit", ""); + log_sys_warn("io_submit"); _cb_free(e->cbs, cb); return false; } @@ -201,7 +221,7 @@ static bool _engine_wait(struct io_engine *e, complete_fn fn) memset(&event, 0, sizeof(event)); r = io_getevents(e->aio_context, 1, MAX_IO, event, NULL); if (r < 0) { - log_sys_error("io_getevents", ""); + log_sys_warn("io_getevents"); return false; } @@ -217,7 +237,7 @@ static bool _engine_wait(struct io_engine *e, complete_fn fn) fn(cb->context, (int) ev->res); else { - log_err("short io"); + log_warn("short io"); fn(cb->context, -ENODATA); } @@ -341,7 +361,7 @@ static void _hash_insert(struct block *b) dm_list_add_h(b->cache->buckets + h, &b->hash); } -static void _hash_remove(struct block *b) +static inline void _hash_remove(struct block *b) { dm_list_del(&b->hash); } @@ -363,30 +383,30 @@ static unsigned _calc_nr_buckets(unsigned nr_blocks) return r; } -static int _hash_table_init(struct bcache *cache, unsigned nr_entries) +static bool _hash_table_init(struct bcache *cache, unsigned nr_entries) { unsigned i; cache->nr_buckets = _calc_nr_buckets(nr_entries); cache->hash_mask = cache->nr_buckets - 1; - cache->buckets = malloc(cache->nr_buckets * sizeof(*cache->buckets)); + cache->buckets = dm_malloc(cache->nr_buckets * sizeof(*cache->buckets)); if (!cache->buckets) - return -ENOMEM; + return false; for (i = 0; i < cache->nr_buckets; i++) dm_list_init(cache->buckets + i); - return 0; + return true; } static void _hash_table_exit(struct bcache *cache) { - free(cache->buckets); + dm_free(cache->buckets); } //---------------------------------------------------------------- -static int _init_free_list(struct bcache *cache, unsigned count) +static bool _init_free_list(struct bcache *cache, unsigned count) { unsigned i; size_t block_size = cache->block_sectors << SECTOR_SHIFT; @@ -395,13 +415,13 @@ static int _init_free_list(struct bcache *cache, unsigned count) /* Allocate the data for each block. We page align the data. */ if (!data) - return -ENOMEM; + return false; cache->raw_data = data; - cache->raw_blocks = malloc(count * sizeof(*cache->raw_blocks)); + cache->raw_blocks = dm_malloc(count * sizeof(*cache->raw_blocks)); if (!cache->raw_blocks) - free(cache->raw_data); + dm_free(cache->raw_data); for (i = 0; i < count; i++) { struct block *b = cache->raw_blocks + i; @@ -410,13 +430,13 @@ static int _init_free_list(struct bcache *cache, unsigned count) dm_list_add(&cache->free, &b->list); } - return 0; + return true; } static void _exit_free_list(struct bcache *cache) { - free(cache->raw_data); - free(cache->raw_blocks); + dm_free(cache->raw_data); + dm_free(cache->raw_blocks); } static struct block *_alloc_block(struct bcache *cache) @@ -519,7 +539,7 @@ static void _complete_io(void *context, int err) } } -static int _wait_io(struct bcache *cache) +static bool _wait_io(struct bcache *cache) { return _engine_wait(cache->engine, _complete_io); } @@ -646,7 +666,7 @@ static struct block *_lookup_or_read_block(struct bcache *cache, // FIXME: this is insufficient. We need to also catch a read // lock of a write locked block. Ref count needs to distinguish. if (b->ref_count && (flags & (GF_DIRTY | GF_ZERO))) { - log_err("concurrent write lock attempted"); + log_warn("concurrent write lock attempted"); return NULL; } @@ -706,10 +726,9 @@ static void _preemptive_writeback(struct bcache *cache) *--------------------------------------------------------------*/ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) { - int r; struct bcache *cache; - cache = malloc(sizeof(*cache)); + cache = dm_malloc(sizeof(*cache)); if (!cache) return NULL; @@ -718,7 +737,7 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) cache->engine = _engine_create(nr_cache_blocks < 1024u ? nr_cache_blocks : 1024u); if (!cache->engine) { - free(cache); + dm_free(cache); return NULL; } @@ -732,9 +751,10 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) dm_list_init(&cache->clean); dm_list_init(&cache->io_pending); - if (_hash_table_init(cache, nr_cache_blocks)) { + if (!_hash_table_init(cache, nr_cache_blocks)) { _engine_destroy(cache->engine); - free(cache); + dm_free(cache); + return NULL; } cache->read_hits = 0; @@ -744,11 +764,11 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) cache->write_misses = 0; cache->prefetches = 0; - r = _init_free_list(cache, nr_cache_blocks); - if (r) { + if (!_init_free_list(cache, nr_cache_blocks)) { _engine_destroy(cache->engine); _hash_table_exit(cache); - free(cache); + dm_free(cache); + return NULL; } return cache; @@ -757,14 +777,14 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) void bcache_destroy(struct bcache *cache) { if (cache->nr_locked) - log_warn("some blocks are still locked\n"); + log_warn("some blocks are still locked"); bcache_flush(cache); _wait_all(cache); _exit_free_list(cache); _hash_table_exit(cache); _engine_destroy(cache->engine); - free(cache); + dm_free(cache); } void bcache_prefetch(struct bcache *cache, int fd, block_address index) @@ -794,14 +814,14 @@ bool bcache_get(struct bcache *cache, int fd, block_address index, } *result = NULL; - log_err("couldn't get block"); + log_warn("couldn't get block"); return false; } void bcache_put(struct block *b) { if (!b->ref_count) { - log_err("ref count on bcache block already zero"); + log_warn("ref count on bcache block already zero"); return; } diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 1f4262e96..322774469 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -1,3 +1,17 @@ +/* + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + #ifndef BCACHE_H #define BCACHE_H From 7a475bef323cae74d602f2aae8074b106ddb3f4b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 31 Jan 2018 10:04:17 +0000 Subject: [PATCH 04/87] [build] Quieten the build down It was hard to see warnings with the long command lines scrolling by so quickly. Use 'make V=1' if you need to see all the gritty details. --- make.tmpl.in | 62 +++++++++++++++++++++++------------- man/Makefile.in | 81 ++++++++++++++++++++++------------------------- tools/Makefile.in | 73 +++++++++++++++++++++++++++--------------- 3 files changed, 125 insertions(+), 91 deletions(-) diff --git a/make.tmpl.in b/make.tmpl.in index bdf234918..75134caa7 100644 --- a/make.tmpl.in +++ b/make.tmpl.in @@ -13,6 +13,12 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +ifeq ($(V),1) + Q= +else + Q=@ +endif + SHELL = @SHELL@ @SET_MAKE@ @@ -438,59 +444,70 @@ endif .LIBPATTERNS = lib%.so lib%.a %.o: %.c - $(CC) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(WCFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@ + @echo " [CC] $<" + $(Q) $(CC) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(WCFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@ %.o: %.cpp - $(CXX) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(CXXFLAGS) $(CXXFLAGS_$@) $< -o $@ + @echo " [CXX] $<" + $(Q) $(CXX) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(CXXFLAGS) $(CXXFLAGS_$@) $< -o $@ %.pot: %.c Makefile - $(CC) -E $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) -include $(top_builddir)/include/pogen.h $(DEFS) $(WFLAGS) $(CFLAGS) $< >$@ + @echo " [CC] $@" + $(Q) $(CC) -E $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) -include $(top_builddir)/include/pogen.h $(DEFS) $(WFLAGS) $(CFLAGS) $< >$@ %.so: %.o - $(CC) -c $(CFLAGS) $(CLDFLAGS) $< $(LIBS) -o $@ + @echo " [CC] $<" + $(Q) $(CC) -c $(CFLAGS) $(CLDFLAGS) $< $(LIBS) -o $@ ifneq (,$(LIB_SHARED)) TARGETS += $(LIB_SHARED).$(LIB_VERSION) $(LIB_SHARED).$(LIB_VERSION): $(OBJECTS) $(LDDEPS) + @echo " [CC] $@" ifeq ("@LIB_SUFFIX@","so") - $(CC) -shared -Wl,-soname,$(notdir $@) \ + $(Q) $(CC) -shared -Wl,-soname,$(notdir $@) \ $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@ endif ifeq ("@LIB_SUFFIX@","dylib") - $(CC) -dynamiclib -dylib_current_version,$(LIB_VERSION) \ + $(Q) $(CC) -dynamiclib -dylib_current_version,$(LIB_VERSION) \ $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@ endif $(LIB_SHARED): $(LIB_SHARED).$(LIB_VERSION) - $(LN_S) -f $( /dev/null %.d: %.c $(INC_LNS) - $(MKDIR_P) $(dir $@); \ + @echo " [DEP] $<" + $(Q) $(MKDIR_P) $(dir $@); \ set -e; \ FILE=`echo $@ | sed 's/\\//\\\\\\//g;s/\\.d//g'`; \ DEPS=`echo $(DEPS) | sed -e 's/\\//\\\\\\//g'`; \ @@ -501,7 +518,8 @@ $(LIB_STATIC): $(OBJECTS) [ -s $@ ] || $(RM) $@ %.mo: %.po - $(MSGFMT) -o $@ $< + @echo " [MSGFMT] $<" + $(Q) $(MSGFMT) -o $@ $< CLEAN_TARGETS += \ $(SOURCES:%.c=%.d) $(SOURCES:%.c=%.gcno) $(SOURCES:%.c=%.gcda) \ @@ -523,7 +541,7 @@ endif $(RM) $(DISTCLEAN_TARGETS) Makefile .exported_symbols_generated: $(EXPORTED_HEADER) .exported_symbols $(DEPS) - set -e; \ + $(Q) set -e; \ ( cat $(srcdir)/.exported_symbols; \ if test -n "$(EXPORTED_HEADER)"; then \ $(CC) -E -P $(INCLUDES) $(DEFS) $(EXPORTED_HEADER) | \ @@ -536,13 +554,13 @@ EXPORTED_SYMBOLS := $(wildcard $(srcdir)/.exported_symbols.Base $(srcdir)/.expor .export.sym: .exported_symbols_generated $(EXPORTED_SYMBOLS) ifeq (,$(firstword $(EXPORTED_SYMBOLS))) - set -e; (echo "Base {"; echo " global:";\ + $(Q) set -e; (echo "Base {"; echo " global:";\ $(SED) "s/^/ /;s/$$/;/" $<;\ echo "};";\ echo "Local {"; echo " local:"; echo " *;"; echo "};";\ ) > $@ else - set -e;\ + $(Q) set -e;\ R=$$($(SORT) $^ | uniq -u);\ test -z "$$R" || { echo "Mismatch between symbols in shared library and lists in .exported_symbols.* files: $$R"; false; } ;\ ( for i in $$(echo $(EXPORTED_SYMBOLS) | tr ' ' '\n' | $(SORT) -rnt_ -k5 ); do\ diff --git a/man/Makefile.in b/man/Makefile.in index cd1106f11..a40e1689e 100644 --- a/man/Makefile.in +++ b/man/Makefile.in @@ -16,6 +16,12 @@ srcdir = @srcdir@ top_srcdir = @top_srcdir@ top_builddir = @top_builddir@ +ifeq ($(V),1) + Q= +else + Q=@ +endif + FSADMMAN = fsadm.8 BLKDEACTIVATEMAN = blkdeactivate.8 DMEVENTDMAN = dmeventd.8 @@ -46,20 +52,6 @@ MAN8DM=dmsetup.8 dmstats.8 MAN8CLUSTER= MAN8SYSTEMD_GENERATORS=lvm2-activation-generator.8 -ifeq ("$(origin V)", "command line") - BUILD_VERBOSE = $(V) -endif -ifndef BUILD_VERBOSE - BUILD_VERBOSE = 0 -endif - -ifeq ($(BUILD_VERBOSE),1) - Q = -else - Q = @ -endif - - ifeq ($(MAKECMDGOALS),all_man) MAN_ALL="yes" endif @@ -164,13 +156,6 @@ SEE_ALSO=$(srcdir)/see_also.end .PRECIOUS: %.8_gen %.8_gen: $(srcdir)/%.8_des $(srcdir)/%.8_end $(MANGENERATOR) $(TESTMAN) - $(Q)( \ - if [ ! -s $(TESTMAN) ] ; then \ - echo "Copying pre-generated template $@" ; \ - else \ - echo "Generating template $@" ; \ - fi \ - ) $(Q)set -e ; ( \ if [ ! -s $(TESTMAN) ] ; then \ cat $(srcdir)/$(@:%.8_gen=%.8_pregen) ; \ @@ -184,7 +169,6 @@ SEE_ALSO=$(srcdir)/see_also.end ) > $@ define SUBSTVARS -$(Q)echo "Generating $@" $(Q)$(SED) -e "s+#VERSION#+$(LVM_VERSION)+" \ -e "s+#DEFAULT_SYS_DIR#+$(DEFAULT_SYS_DIR)+" \ -e "s+#DEFAULT_ARCHIVE_DIR#+$(DEFAULT_ARCHIVE_DIR)+" \ @@ -237,51 +221,62 @@ $(Q)$(SED) -i -e "s+\([ [:alpha:]]\)-\{7\}+\1\\\-\\\-\\\-\\\-\\\-\\\-\\\-+g" \ endef %.5: $(srcdir)/%.5_main - $(SUBSTVARS) - $(ESCAPEHYPHENS) + @echo " [MAN] $@" + $(Q) $(SUBSTVARS) + $(Q) $(ESCAPEHYPHENS) %.7: $(srcdir)/%.7_main - $(SUBSTVARS) - $(ESCAPEHYPHENS) + @echo " [MAN] $@" + $(Q) $(SUBSTVARS) + $(Q) $(ESCAPEHYPHENS) %.8: $(srcdir)/%.8_main - $(SUBSTVARS) - $(ESCAPEHYPHENS) + @echo " [MAN] $@" + $(Q) $(SUBSTVARS) + $(Q) $(ESCAPEHYPHENS) %.8: %.8_gen - $(SUBSTVARS) - $(ESCAPEHYPHENS) + @echo " [MAN] $@" + $(Q) $(SUBSTVARS) + $(Q) $(ESCAPEHYPHENS) $(MAN8SO): lvmconfig.8 - echo ".so $<" > $@ + @echo " [MAN] $@" + $(Q) echo ".so $<" > $@ install_man5: $(MAN5) - $(INSTALL) -d $(MAN5DIR) - $(INSTALL_DATA) $(MAN5) $(MAN5DIR)/ + @echo " [INSTALL] $<" + $(Q) $(INSTALL) -d $(MAN5DIR) + $(Q) $(INSTALL_DATA) $(MAN5) $(MAN5DIR)/ install_man7: $(MAN7) - $(INSTALL) -d $(MAN7DIR) - $(INSTALL_DATA) $(MAN7) $(MAN7DIR)/ + @echo " [INSTALL] $<" + $(Q) $(INSTALL) -d $(MAN7DIR) + $(Q) $(INSTALL_DATA) $(MAN7) $(MAN7DIR)/ install_man8: $(MAN8) $(MAN8SO) - $(INSTALL) -d $(MAN8DIR) - $(INSTALL_DATA) $(MAN8) $(MAN8SO) $(MAN8DIR)/ + @echo " [INSTALL] $<" + $(Q) $(INSTALL) -d $(MAN8DIR) + $(Q) $(INSTALL_DATA) $(MAN8) $(MAN8SO) $(MAN8DIR)/ install_lvm2: install_man5 install_man7 install_man8 install_cluster: $(MAN8CLUSTER) ifdef MAN8CLUSTER - $(INSTALL) -d $(MAN8DIR) - $(INSTALL_DATA) $(MAN8CLUSTER) $(MAN8DIR)/ + @echo " [INSTALL] $<" + $(Q) $(INSTALL) -d $(MAN8DIR) + $(Q) $(INSTALL_DATA) $(MAN8CLUSTER) $(MAN8DIR)/ endif install_device-mapper: $(MAN8DM) - $(INSTALL) -d $(MAN8DIR) - $(INSTALL_DATA) $(MAN8DM) $(MAN8DIR)/ + @echo " [INSTALL] $<" + $(Q) $(INSTALL) -d $(MAN8DIR) + $(Q) $(INSTALL_DATA) $(MAN8DM) $(MAN8DIR)/ install_systemd_generators: $(MAN8SYSTEMD_GENERATORS) - $(INSTALL) -d $(MAN8DIR) - $(INSTALL_DATA) $(MAN8SYSTEMD_GENERATORS) $(MAN8DIR)/ + @echo " [INSTALL] $<" + $(Q) $(INSTALL) -d $(MAN8DIR) + $(Q) $(INSTALL_DATA) $(MAN8SYSTEMD_GENERATORS) $(MAN8DIR)/ install: install_lvm2 install_device-mapper install_cluster diff --git a/tools/Makefile.in b/tools/Makefile.in index de5b628f8..61c6f385e 100644 --- a/tools/Makefile.in +++ b/tools/Makefile.in @@ -123,11 +123,13 @@ device-mapper: $(TARGETS_DM) CFLAGS_dmsetup.o += $(UDEV_CFLAGS) $(EXTRA_EXEC_CFLAGS) dmsetup: dmsetup.o $(top_builddir)/libdm/libdevmapper.$(LIB_SUFFIX) - $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \ + @echo " [CC] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \ -o $@ dmsetup.o -ldevmapper $(LIBS) dmsetup.static: dmsetup.o $(interfacebuilddir)/libdevmapper.a - $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) \ + @echo " [CC] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) \ -o $@ dmsetup.o -ldevmapper $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS) all: device-mapper @@ -137,15 +139,18 @@ CFLAGS_lvm.o += $(EXTRA_EXEC_CFLAGS) INCLUDES += -I$(top_builddir)/tools lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a - $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $(OBJECTS) lvm.o \ + @echo " [CC] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $(OBJECTS) lvm.o \ $(LVMLIBS) $(READLINE_LIBS) $(LIBS) DEFS_man-generator.o += -DMAN_PAGE_GENERATOR man-generator.c: command.c - $(LN_S) -f $< $@ + @echo " [LN] $@" + $(Q) $(LN_S) -f $< $@ man-generator: man-generator.o + @echo " [CC] $@" $(CC) $(CFLAGS) -o $@ $< ifeq ("@BUILD_LVMETAD@", "yes") @@ -153,32 +158,39 @@ lvm: $(top_builddir)/libdaemon/client/libdaemonclient.a endif lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a $(interfacebuilddir)/libdevmapper.a - $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) -o $@ \ + @echo " [CC] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) -o $@ \ $(OBJECTS) lvm-static.o $(LVMLIBS) $(STATIC_LIBS) $(LIBS) liblvm2cmd.a: $(top_builddir)/lib/liblvm-internal.a $(OBJECTS) lvmcmdlib.o lvm2cmd.o - cat $(top_builddir)/lib/liblvm-internal.a > $@ - $(AR) rs $@ $(OBJECTS) lvmcmdlib.o lvm2cmd.o + @echo " [AR] $@" + $(Q) cat $(top_builddir)/lib/liblvm-internal.a > $@ + $(Q) $(AR) rs $@ $(OBJECTS) lvmcmdlib.o lvm2cmd.o > /dev/null liblvm2cmd-static.a: $(top_builddir)/lib/liblvm-internal.a $(OBJECTS) lvmcmdlib.o lvm2cmd-static.o - cat $(top_builddir)/lib/liblvm-internal.a > $@ - $(AR) rs $@ $(OBJECTS) lvmcmdlib.o lvm2cmd-static.o + @echo " [AR] $@" + $(Q) cat $(top_builddir)/lib/liblvm-internal.a > $@ + $(Q) $(AR) rs $@ $(OBJECTS) lvmcmdlib.o lvm2cmd-static.o > /dev/null liblvm2cmd.$(LIB_SUFFIX): liblvm2cmd.a $(LDDEPS) - $(CC) -shared -Wl,-soname,$@.$(LIB_VERSION) \ + @echo " [CC] $@" + $(Q) $(CC) -shared -Wl,-soname,$@.$(LIB_VERSION) \ $(CFLAGS) $(CLDFLAGS) -o $@ \ @CLDWHOLEARCHIVE@ liblvm2cmd.a @CLDNOWHOLEARCHIVE@ \ $(LVMLIBS) $(LIBS) liblvm2cmd.$(LIB_SUFFIX).$(LIB_VERSION): liblvm2cmd.$(LIB_SUFFIX) - $(LN_S) -f $< $@ + @echo " [LN] $@" + $(Q) $(LN_S) -f $< $@ .commands: $(srcdir)/commands.h $(srcdir)/cmdnames.h Makefile - $(CC) -E -P $(srcdir)/cmdnames.h 2> /dev/null | \ + @echo " [CC] $<" + $(Q) $(CC) -E -P $(srcdir)/cmdnames.h 2> /dev/null | \ $(EGREP) -v '^ *(|#.*|config|devtypes|dumpconfig|formats|fullreport|help|lastlog|lvmchange|lvpoll|pvdata|segtypes|systemid|tags|version) *$$' > .commands command-count.h: $(srcdir)/command-lines.in Makefile - set -o pipefail && \ + @echo " [GEN] $@" + $(Q) set -o pipefail && \ ( cat $(top_srcdir)/tools/license.inc && \ echo "/* Do not edit. This file is generated by the Makefile. */" && \ echo -n "#define COMMAND_COUNT " && \ @@ -186,7 +198,8 @@ command-count.h: $(srcdir)/command-lines.in Makefile ) > $@ cmds.h: $(srcdir)/command-lines.in Makefile - set -o pipefail && \ + @echo " [GEN] $@" + $(Q) set -o pipefail && \ ( cat $(top_srcdir)/tools/license.inc && \ echo "/* Do not edit. This file is generated by the Makefile. */" && \ echo "cmd(CMD_NONE, none)" && \ @@ -195,7 +208,8 @@ cmds.h: $(srcdir)/command-lines.in Makefile ) > $@ command-lines-input.h: $(srcdir)/command-lines.in Makefile - set -o pipefail && \ + @echo " [GEN] $@" + $(Q) set -o pipefail && \ ( cat $(top_srcdir)/tools/license.inc && \ echo "/* Do not edit. This file is generated by the Makefile. */" && \ echo -en "const char _command_input[] =\n\n\"" && \ @@ -217,18 +231,22 @@ endif install_dmsetup_dynamic install_dmsetup_static install_cmdlib_include: $(srcdir)/lvm2cmd.h - $(INSTALL_DATA) -D $< $(includedir)/$( Date: Wed, 31 Jan 2018 11:28:51 +0000 Subject: [PATCH 05/87] [unit tests] remove old unit tests that weren't built or run. --- configure | 5 +- configure.in | 3 - old-tests/config/.gitignore | 1 - old-tests/config/Makefile | 5 - old-tests/config/config_t.c | 37 -- old-tests/config/empty_array.conf | 1 - old-tests/config/vg0 | 169 ----- old-tests/datastruct/hash_t.c | 106 ---- old-tests/dev-mgr/dev_cache_t.c | 54 -- old-tests/device/Makefile.in | 29 - old-tests/device/dev_cache_t.c | 70 --- old-tests/device/fill_device.c | 27 - old-tests/device/random.c | 116 ---- old-tests/device/random.h | 32 - old-tests/filters/Makefile.in | 35 -- old-tests/filters/pfilter_t.c | 121 ---- old-tests/filters/rfilter_t.c | 92 --- old-tests/filters/sample.cfg | 21 - old-tests/format1/Makefile.in | 52 -- old-tests/format1/get_pvs_t.c | 73 --- old-tests/format1/get_vgs_t.c | 69 --- old-tests/format1/pretty_print.c | 86 --- old-tests/format1/pretty_print.h | 28 - old-tests/format1/read_pv_t.c | 75 --- old-tests/format1/read_vg_t.c | 75 --- old-tests/format1/write_vg_t.c | 77 --- old-tests/mm/Makefile.in | 33 - old-tests/mm/dbg_malloc_t.c | 156 ----- unit-tests/datastruct/Makefile.in | 32 - unit-tests/datastruct/TESTS | 1 - unit-tests/datastruct/bitset_t.c | 133 ---- unit-tests/mm/Makefile.in | 31 - unit-tests/mm/TESTS | 1 - unit-tests/mm/check_results | 31 - unit-tests/mm/pool_valgrind_t.c | 181 ------ unit-tests/regex/Makefile.in | 37 -- unit-tests/regex/TESTS | 3 - unit-tests/regex/dev_patterns | 2 - unit-tests/regex/devices.list | 880 --------------------------- unit-tests/regex/matcher_t.c | 156 ----- unit-tests/regex/matcher_t.expected | 16 - unit-tests/regex/matcher_t.expected2 | 1 - unit-tests/regex/matcher_t.expected3 | 3 - unit-tests/regex/nonprint_input | 4 - unit-tests/regex/nonprint_regexes | 3 - unit-tests/regex/parse_t.c | 118 ---- unit-tests/regex/random_regexes | 100 --- 47 files changed, 1 insertion(+), 3380 deletions(-) delete mode 100644 old-tests/config/.gitignore delete mode 100644 old-tests/config/Makefile delete mode 100644 old-tests/config/config_t.c delete mode 100644 old-tests/config/empty_array.conf delete mode 100644 old-tests/config/vg0 delete mode 100644 old-tests/datastruct/hash_t.c delete mode 100644 old-tests/dev-mgr/dev_cache_t.c delete mode 100644 old-tests/device/Makefile.in delete mode 100644 old-tests/device/dev_cache_t.c delete mode 100644 old-tests/device/fill_device.c delete mode 100644 old-tests/device/random.c delete mode 100644 old-tests/device/random.h delete mode 100644 old-tests/filters/Makefile.in delete mode 100644 old-tests/filters/pfilter_t.c delete mode 100644 old-tests/filters/rfilter_t.c delete mode 100644 old-tests/filters/sample.cfg delete mode 100644 old-tests/format1/Makefile.in delete mode 100644 old-tests/format1/get_pvs_t.c delete mode 100644 old-tests/format1/get_vgs_t.c delete mode 100644 old-tests/format1/pretty_print.c delete mode 100644 old-tests/format1/pretty_print.h delete mode 100644 old-tests/format1/read_pv_t.c delete mode 100644 old-tests/format1/read_vg_t.c delete mode 100644 old-tests/format1/write_vg_t.c delete mode 100644 old-tests/mm/Makefile.in delete mode 100644 old-tests/mm/dbg_malloc_t.c delete mode 100644 unit-tests/datastruct/Makefile.in delete mode 100644 unit-tests/datastruct/TESTS delete mode 100644 unit-tests/datastruct/bitset_t.c delete mode 100644 unit-tests/mm/Makefile.in delete mode 100644 unit-tests/mm/TESTS delete mode 100755 unit-tests/mm/check_results delete mode 100644 unit-tests/mm/pool_valgrind_t.c delete mode 100644 unit-tests/regex/Makefile.in delete mode 100644 unit-tests/regex/TESTS delete mode 100644 unit-tests/regex/dev_patterns delete mode 100644 unit-tests/regex/devices.list delete mode 100644 unit-tests/regex/matcher_t.c delete mode 100644 unit-tests/regex/matcher_t.expected delete mode 100644 unit-tests/regex/matcher_t.expected2 delete mode 100644 unit-tests/regex/matcher_t.expected3 delete mode 100644 unit-tests/regex/nonprint_input delete mode 100644 unit-tests/regex/nonprint_regexes delete mode 100644 unit-tests/regex/parse_t.c delete mode 100644 unit-tests/regex/random_regexes diff --git a/configure b/configure index 7f412492c..50110897c 100755 --- a/configure +++ b/configure @@ -15860,7 +15860,7 @@ _ACEOF ################################################################################ -ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile include/lvm-version.h lib/raid/Makefile lib/snapshot/Makefile lib/thin/Makefile lib/cache_segtype/Makefile libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile unit-tests/datastruct/Makefile unit-tests/regex/Makefile unit-tests/mm/Makefile" +ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/.symlinks include/Makefile lib/Makefile lib/format1/Makefile lib/format_pool/Makefile lib/locking/Makefile lib/mirror/Makefile include/lvm-version.h lib/raid/Makefile lib/snapshot/Makefile lib/thin/Makefile lib/cache_segtype/Makefile libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -16637,9 +16637,6 @@ do "test/unit/Makefile") CONFIG_FILES="$CONFIG_FILES test/unit/Makefile" ;; "tools/Makefile") CONFIG_FILES="$CONFIG_FILES tools/Makefile" ;; "udev/Makefile") CONFIG_FILES="$CONFIG_FILES udev/Makefile" ;; - "unit-tests/datastruct/Makefile") CONFIG_FILES="$CONFIG_FILES unit-tests/datastruct/Makefile" ;; - "unit-tests/regex/Makefile") CONFIG_FILES="$CONFIG_FILES unit-tests/regex/Makefile" ;; - "unit-tests/mm/Makefile") CONFIG_FILES="$CONFIG_FILES unit-tests/mm/Makefile" ;; *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;; esac diff --git a/configure.in b/configure.in index b8e162351..0d268e83f 100644 --- a/configure.in +++ b/configure.in @@ -2254,9 +2254,6 @@ test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile -unit-tests/datastruct/Makefile -unit-tests/regex/Makefile -unit-tests/mm/Makefile ]) AC_OUTPUT diff --git a/old-tests/config/.gitignore b/old-tests/config/.gitignore deleted file mode 100644 index 24600083d..000000000 --- a/old-tests/config/.gitignore +++ /dev/null @@ -1 +0,0 @@ -!Makefile diff --git a/old-tests/config/Makefile b/old-tests/config/Makefile deleted file mode 100644 index d753b646c..000000000 --- a/old-tests/config/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -config_t: config_t.c - gcc -g -I../../include config_t.c -L../../lib -llvm -o config_t - -clean: - rm config_t \ No newline at end of file diff --git a/old-tests/config/config_t.c b/old-tests/config/config_t.c deleted file mode 100644 index 9b9e56e60..000000000 --- a/old-tests/config/config_t.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Test program that reads, then writes a config file. - */ -#include - -#include "config.h" - -int main(int argc, char **argv) -{ - struct config_file *cf; - - if (argc != 2) { - fprintf(stderr, "Usage: %s \n", argv[0]); - exit(1); - } - - cf = create_config_file(); - if (cf == NULL) { - fprintf(stderr, "Couldn't create config_file object.\n"); - exit(1); - } - - - if (!read_config(cf, argv[1])) { - fprintf(stderr, "Couldn't read config file '%s'\n", argv[0]); - exit(1); - } - - if (!write_config(cf, "out")) { - fprintf(stderr, "Couldn't write config file 'out'\n"); - exit(1); - } - - destroy_config_file(cf); - dump_memory(); - return 0; -} diff --git a/old-tests/config/empty_array.conf b/old-tests/config/empty_array.conf deleted file mode 100644 index 0cc2f283f..000000000 --- a/old-tests/config/empty_array.conf +++ /dev/null @@ -1 +0,0 @@ -foo = [] \ No newline at end of file diff --git a/old-tests/config/vg0 b/old-tests/config/vg0 deleted file mode 100644 index bfa956789..000000000 --- a/old-tests/config/vg0 +++ /dev/null @@ -1,169 +0,0 @@ -# This file was originally generated by the LVM2 library -# Generated: Wed Jul 17 22:41:37 2002 - - -description = "Created *after* executing 'lvcreate --quiet -s -n snap -c 512k -L200M vg0/origin /dev/hda8'" -creation_time = 1026942097 - -vg0 { - id = "Qmd96y-771S-Esbb-Zp6u-8xo9-Cfmt-YvndHY" - seqno = 2 - status = ["RESIZEABLE", "READ", "WRITE"] - system_id = "reti1014805292" - extent_size = 8192 # 4 Megabytes - max_lv = 255 - max_pv = 255 - - physical_volumes { - - pv0 { - id = "8nRQub-EquY-VR1C-Ipdv-6hEO-FuFT-wnlN5R" - device = "/dev/discs/disc0/part8" # Hint only - - status = ["ALLOCATABLE"] - pe_start = 256 - pe_count = 501 # 1.95703 Gigabytes - } - - pv1 { - id = "mRU6Mf-z1Sv-Kuqw-Ct1v-eC42-mnqs-YD1RrL" - device = "/dev/discs/disc1/part2" # Hint only - - status = ["ALLOCATABLE"] - pe_start = 384 - pe_count = 7269 # 28.3945 Gigabytes - } - } - - logical_volumes { - - music { - id = "000000-0000-0000-0000-0000-0000-000000" - status = ["READ", "WRITE", "ALLOC_SIMPLE"] - read_ahead = 0 - segment_count = 2 - - segment1 { - start_extent = 0 - extent_count = 1024 # 4 Gigabytes - stripes = 1 - - areas = [ - "pv1", 0 - ] - } - segment2 { - start_extent = 1024 - extent_count = 2560 # 10 Gigabytes - stripes = 1 - - areas = [ - "pv1", 3584 - ] - } - } - - photos { - id = "000000-0000-0000-0000-0000-0000-000002" - status = ["READ", "WRITE", "ALLOC_SIMPLE"] - read_ahead = 0 - segment_count = 1 - - segment1 { - start_extent = 0 - extent_count = 1024 # 4 Gigabytes - stripes = 1 - - areas = [ - "pv1", 2048 - ] - } - } - - repositories { - id = "000000-0000-0000-0000-0000-0000-000003" - status = ["READ", "WRITE", "ALLOC_SIMPLE"] - read_ahead = 0 - segment_count = 1 - - segment1 { - start_extent = 0 - extent_count = 512 # 2 Gigabytes - stripes = 1 - - areas = [ - "pv1", 3072 - ] - } - } - - origin { - id = "000000-0000-0000-0000-0000-0000-000004" - status = ["READ", "WRITE", "ALLOC_SIMPLE"] - read_ahead = 0 - segment_count = 1 - - segment1 { - start_extent = 0 - extent_count = 50 # 200 Megabytes - stripes = 1 - - areas = [ - "pv1", 6144 - ] - } - } - - packages { - id = "000000-0000-0000-0000-0000-0000-000006" - status = ["READ", "WRITE", "ALLOC_SIMPLE"] - read_ahead = 0 - segment_count = 2 - - segment1 { - start_extent = 0 - extent_count = 451 # 1.76172 Gigabytes - stripes = 1 - - areas = [ - "pv0", 50 - ] - } - segment2 { - start_extent = 451 - extent_count = 573 # 2.23828 Gigabytes - stripes = 1 - - areas = [ - "pv1", 6194 - ] - } - } - - snap { - id = "000000-0000-0000-0000-0000-0000-000001" - status = ["READ", "WRITE", "ALLOC_SIMPLE"] - read_ahead = 0 - segment_count = 1 - - segment1 { - start_extent = 0 - extent_count = 50 # 200 Megabytes - stripes = 1 - - areas = [ - "pv0", 0 - ] - } - } - } - - snapshots { - - snapshot0 { - chunk_size = 1024 - origin = "origin" - cow_store = "snap" - } - } -} diff --git a/old-tests/datastruct/hash_t.c b/old-tests/datastruct/hash_t.c deleted file mode 100644 index 7c9e435ef..000000000 --- a/old-tests/datastruct/hash_t.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -static void _help(FILE *fp, const char *prog) -{ - fprintf(fp, "Usage : %s \n", prog); -} - -struct key_list { - struct key_list *next; - char key[1]; -}; - -static struct key_list *_create_word(int n) -{ - struct key_list *kl = dbg_malloc(sizeof(*kl) + 32); - snprintf(kl->key, 32, "abc%ddef%d", n, n); - kl->next = 0; - return kl; -} - -static struct key_list *_create_word_from_file(int n) -{ - char word[128], *ptr; - struct key_list *kl; - - if (!fgets(word, sizeof(word), stdin)) - return 0; - - for (ptr = word; *ptr; ptr++) { - if (*ptr == '\n') { - *ptr = 0; - break; - } - } - - kl = dbg_malloc(sizeof(*kl) + 32); - snprintf(kl->key, 32, "%s", word); - kl->next = 0; - return kl; -} - -static void _do_test(int table_size, int num_entries) -{ - int i; - hash_table_t ht = hash_create(table_size); - struct key_list *tmp, *key, *all = 0; - - for (i = 0; i < num_entries; i++) { - /* make up a word */ - if (!(key = _create_word_from_file(i))) { - log_error("Ran out of words !\n"); - exit(1); - } - - /* insert it */ - hash_insert(ht, key->key, key); - key->next = all; - all = key; - } - - for (key = all; key; key = key->next) { - tmp = (struct key_list *) hash_lookup(ht, key->key); - if (!tmp || (tmp != key)) { - log_error("lookup failed\n"); - exit(1); - } - } - - for (key = all; key; key = tmp) { - tmp = key->next; - dbg_free(key); - } - - hash_destroy(ht); -} - -int main(int argc, char **argv) -{ - init_log(); - - if (argc != 3) { - _help(stderr, argv[0]); - exit(1); - } - - _do_test(atoi(argv[1]), atoi(argv[2])); - - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/dev-mgr/dev_cache_t.c b/old-tests/dev-mgr/dev_cache_t.c deleted file mode 100644 index af9539f5c..000000000 --- a/old-tests/dev-mgr/dev_cache_t.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "dev-cache.h" -#include "log.h" - -#include - -int main(int argc, char **argv) -{ - int i; - struct device *dev; - struct dev_iter *iter; - - init_log(); - if (!dev_cache_init()) { - log_error("couldn't initialise dev_cache_init failed\n"); - exit(1); - } - - for (i = 1; i < argc; i++) { - if (!dev_cache_add_dir(argv[i])) { - log_error("couldn't add '%s' to dev_cache\n"); - exit(1); - } - } - - if (!(iter = dev_iter_create(NULL))) { - log_error("couldn't create iterator\n"); - exit(1); - } - - while ((dev = dev_iter_next(iter))) - printf("%s\n", dev->name); - - dev_iter_destroy(iter): - dev_cache_exit(); - - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/device/Makefile.in b/old-tests/device/Makefile.in deleted file mode 100644 index a389eb101..000000000 --- a/old-tests/device/Makefile.in +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ -VPATH = @srcdir@ - -SOURCES=\ - dev_cache_t.c - -TARGETS=dev_cache_t - -include $(top_builddir)/make.tmpl - -dev_cache_t: dev_cache_t.o $(top_srcdir)/lib/liblvm.a - $(CC) -o dev_cache_t dev_cache_t.o -L$(top_builddir)/lib -llvm - diff --git a/old-tests/device/dev_cache_t.c b/old-tests/device/dev_cache_t.c deleted file mode 100644 index c3d8c287d..000000000 --- a/old-tests/device/dev_cache_t.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "dev-cache.h" -#include "log.h" - -#include - -int main(int argc, char **argv) -{ - int i; - struct device *dev; - struct dev_iter *iter; - struct list_head *tmp; - struct dm_str_list *sl; - - if (argc < 2) { - fprintf(stderr, "usage: %s \n", argv[0]); - exit(1); - } - - init_log(stderr); - init_debug(_LOG_INFO); - - if (!dev_cache_init()) { - log_err("couldn't initialise dev_cache_init failed"); - exit(1); - } - - for (i = 1; i < argc; i++) { - if (!dev_cache_add_dir(argv[i])) { - log_err("couldn't add '%s' to dev_cache", argv[i]); - exit(1); - } - } - - if (!(iter = dev_iter_create(NULL))) { - log_err("couldn't create iterator"); - exit(1); - } - - while ((dev = dev_iter_get(iter))) { - printf("%s", dev->name); - - list_for_each(tmp, &dev->aliases) { - sl = list_entry(tmp, struct dm_str_list, list); - printf(", %s", sl->str); - } - printf("\n"); - } - - dev_iter_destroy(iter); - dev_cache_exit(); - - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/device/fill_device.c b/old-tests/device/fill_device.c deleted file mode 100644 index 5947b61bf..000000000 --- a/old-tests/device/fill_device.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "device.h" -#include "random.h" - -#include - - - - -int main(int argc, char **argv) -{ - -} diff --git a/old-tests/device/random.c b/old-tests/device/random.c deleted file mode 100644 index 96479f572..000000000 --- a/old-tests/device/random.c +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "random.h" -#include "log.h" - -int32_t _a[56]; -int32_t *_r; - -static inline int32_t _mod_diff(int32_t x, int32_t y) -{ - return (x - y) & 0x7fffffff; -} - -static int32_t _flip_cycle(void) -{ - int32_t *ii, *jj; - for (ii = _a + 1, jj = _a + 32; jj <= _a + 55; ii++, jj++) - *ii = _mod_diff(*ii, *jj); - - for (jj = _a + 1; ii <= _a + 55; ii++, jj++) - *ii = _mod_diff(*ii, *jj); - - _r = _a + 54; - return _a[55]; -} - -static void rand_init(int32_t seed) -{ - int64_t i; - int64_t prev = seed, next = 1; - - seed = prev = _mod_diff(prev, 0); /* strip the sign */ - _a[55] = prev; - for (i = 21; i; i = (i + 21) % 55) { - _a[i] = next; - next = _mod_diff(prev, next); - if(seed & 1) - seed = 0x40000000L + (seed >> 1); - else - seed >>= 1; - - next = _mod_diff(next, seed); - prev = _a[i]; - } - - _flip_cycle(); - _flip_cycle(); - _flip_cycle(); - _flip_cycle(); - _flip_cycle(); -} - -/* - * FIXME: move this to be an inline in the - * header. - */ -int32_t rand_get(void) -{ - return (*_r >= 0) ? *_r-- : _flip_cycle(); -} - - -/* - * just used by rand_check - */ -#define t31 0x80000000 -static int32_t _uniform(int32_t m) -{ - uint32_t t = t31 - (t31 % m); - int32_t r; - - do - r = next_rand(sc); - - while (t <= (uint32_t) r); - - return r % m; -} - -/* - * Checks I've copied the code correctly. - */ -int rand_check(void) -{ - int j; - - rand_init(-314159L); - - if (next_rand(sc) != 119318998) { - log_err("Random number generator failed check 1"); - return 0; - } - - for(j = 1; j <= 133; j++) - rand_get(); - - if (_uniform(0x55555555L) != 748103812) { - log_err("Random number generator failed check 2"); - return 0; - } - - return 1; -} diff --git a/old-tests/device/random.h b/old-tests/device/random.h deleted file mode 100644 index 4663988cd..000000000 --- a/old-tests/device/random.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * Random number generator snarfed from the - * Stanford Graphbase. - */ - -#ifndef _LVM_RANDOM_H -#define _LVM_RANDOM_H - -void rand_init(int32_t seed); -int32_t rand_get(void); - -/* - * Note this will reset the seed. - */ -int rand_check(void); - -#endif diff --git a/old-tests/filters/Makefile.in b/old-tests/filters/Makefile.in deleted file mode 100644 index 69e7ca907..000000000 --- a/old-tests/filters/Makefile.in +++ /dev/null @@ -1,35 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ -VPATH = @srcdir@ - -SOURCES=\ - rfilter_t.c \ - pfilter_t.c - -TARGETS=\ - rfilter_t \ - pfilter_t - -include $(top_builddir)/make.tmpl - -rfilter_t: rfilter_t.o $(top_srcdir)/lib/liblvm.a - $(CC) -o rfilter_t rfilter_t.o -L$(top_builddir)/lib -llvm - -pfilter_t: pfilter_t.o $(top_srcdir)/lib/liblvm.a - $(CC) -o pfilter_t pfilter_t.o -L$(top_builddir)/lib -llvm - diff --git a/old-tests/filters/pfilter_t.c b/old-tests/filters/pfilter_t.c deleted file mode 100644 index e83c3ed5a..000000000 --- a/old-tests/filters/pfilter_t.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "filter-persistent.h" -#include "log.h" -#include "config.h" -#include "filter-regex.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -int main(int argc, char **argv) -{ - struct config_file *cft; - struct config_node *cn; - struct dev_filter *rfilter, *pfilter; - struct dev_iter *iter; - struct device *dev; - - if (argc < 2) { - fprintf(stderr, "Usage : %s \n", - argv[0]); - exit(1); - } - - init_log(stderr); - init_debug(_LOG_DEBUG); - - if (!dev_cache_init()) { - fprintf(stderr, "couldn't initialise dev_cache_init failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev")) { - fprintf(stderr, "couldn't add '/dev' to dev_cache\n"); - exit(1); - } - - if (!(cft = create_config_file())) { - fprintf(stderr, "couldn't create config file\n"); - exit(1); - } - - if (!read_config(cft, argv[1])) { - fprintf(stderr, "couldn't read config file\n"); - exit(1); - } - - if (!(cn = find_config_node(cft->root, "/devices/filter", '/'))) { - fprintf(stderr, "couldn't find filter section\n"); - exit(1); - } - - if (!(rfilter = regex_filter_create(cn->v))) { - fprintf(stderr, "couldn't build filter\n"); - exit(1); - } - - if (!(pfilter = persistent_filter_create(rfilter, "./pfilter.cfg"))) { - fprintf(stderr, "couldn't build filter\n"); - exit(1); - } - - if (!(iter = dev_iter_create(pfilter))) { - log_err("couldn't create iterator"); - exit(1); - } - - fprintf(stderr, "filling cache\n"); - while ((dev = dev_iter_get(iter))) - ; - dev_iter_destroy(iter); - - fprintf(stderr, "dumping\n"); - if (!persistent_filter_dump(pfilter)) { - fprintf(stderr, "couldn't dump pfilter\n"); - exit(1); - } - - fprintf(stderr, "loading\n"); - if (!persistent_filter_load(pfilter)) { - fprintf(stderr, "couldn't load pfilter\n"); - exit(1); - } - - if (!(iter = dev_iter_create(pfilter))) { - log_err("couldn't create iterator"); - exit(1); - } - - while ((dev = dev_iter_get(iter))) - printf("%s\n", dev_name(dev)); - - dev_iter_destroy(iter); - pfilter->destroy(pfilter); - dev_cache_exit(); - destroy_config_file(cft); - - dump_memory(); - fin_log(); - return 0; -} - diff --git a/old-tests/filters/rfilter_t.c b/old-tests/filters/rfilter_t.c deleted file mode 100644 index 488378db8..000000000 --- a/old-tests/filters/rfilter_t.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "filter-regex.h" -#include "config.h" -#include "log.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -int main(int argc, char **argv) -{ - struct config_file *cft; - struct config_node *cn; - struct dev_filter *filter; - struct dev_iter *iter; - struct device *dev; - - if (argc < 2) { - fprintf(stderr, "Usage : %s \n", argv[0]); - exit(1); - } - - init_log(stderr); - init_debug(_LOG_DEBUG); - - if (!(cft = create_config_file())) { - fprintf(stderr, "couldn't create config file\n"); - exit(1); - } - - if (!read_config(cft, argv[1])) { - fprintf(stderr, "couldn't read config file\n"); - exit(1); - } - - if (!(cn = find_config_node(cft->root, "/devices/filter", '/'))) { - fprintf(stderr, "couldn't find filter section\n"); - exit(1); - } - - if (!dev_cache_init()) { - fprintf(stderr, "couldn't initialise dev_cache_init failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev")) { - fprintf(stderr, "couldn't add '/dev' to dev_cache\n"); - exit(1); - } - - if (!(filter = regex_filter_create(cn->v))) { - fprintf(stderr, "couldn't build filter\n"); - exit(1); - } - - if (!(iter = dev_iter_create(filter))) { - log_err("couldn't create iterator"); - exit(1); - } - - while ((dev = dev_iter_get(iter))) - printf("%s\n", dev_name(dev)); - - dev_iter_destroy(iter); - filter->destroy(filter); - dev_cache_exit(); - destroy_config_file(cft); - - dump_memory(); - fin_log(); - return 0; -} - diff --git a/old-tests/filters/sample.cfg b/old-tests/filters/sample.cfg deleted file mode 100644 index 02f98dea8..000000000 --- a/old-tests/filters/sample.cfg +++ /dev/null @@ -1,21 +0,0 @@ -devices { - - # first match is final, eg. /dev/ide/cdrom - # get's rejected due to the first pattern - - filter=["r/cdrom/", # don't touch the music ! - "a/hd[a-d][0-9]+/", - "a/ide/", - "a/sd/", - "a/md/", - "a|loop/[0-9]+|", # accept devfs style loop back - "r/loop/", # and reject old style - "a/dasd/", - "a/dac960/", - "a/nbd/", - "a/ida/", - "a/cciss/", - "a/ubd/", - "r/.*/"] # reject all others - -} diff --git a/old-tests/format1/Makefile.in b/old-tests/format1/Makefile.in deleted file mode 100644 index e4b51d245..000000000 --- a/old-tests/format1/Makefile.in +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ -VPATH = @srcdir@ - -SOURCES=\ - read_vg_t.c \ - write_vg_t.c \ - pretty_print.c \ - get_pvs_t.c \ - read_pv_t.c \ - get_vgs_t.c - -TARGETS=\ - read_vg_t \ - write_vg_t \ - get_pvs_t \ - read_pv_t \ - get_vgs_t - -include $(top_builddir)/make.tmpl - -read_vg_t: read_vg_t.o pretty_print.o $(top_builddir)/lib/liblvm.a - $(CC) -o read_vg_t read_vg_t.o pretty_print.o -L$(top_builddir)/lib -llvm - -write_vg_t: write_vg_t.o pretty_print.o $(top_builddir)/lib/liblvm.a - $(CC) -o write_vg_t write_vg_t.o pretty_print.o \ - -L$(top_builddir)/lib -llvm - -get_pvs_t: get_pvs_t.o pretty_print.o $(top_builddir)/lib/liblvm.a - $(CC) -o get_pvs_t get_pvs_t.o pretty_print.o -L$(top_builddir)/lib -llvm - -read_pv_t: read_pv_t.o pretty_print.o $(top_builddir)/lib/liblvm.a - $(CC) -o read_pv_t read_pv_t.o pretty_print.o -L$(top_builddir)/lib -llvm - -get_vgs_t: get_vgs_t.o pretty_print.o $(top_builddir)/lib/liblvm.a - $(CC) -o get_vgs_t get_vgs_t.o pretty_print.o -L$(top_builddir)/lib -llvm - diff --git a/old-tests/format1/get_pvs_t.c b/old-tests/format1/get_pvs_t.c deleted file mode 100644 index d17f0b0df..000000000 --- a/old-tests/format1/get_pvs_t.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "log.h" -#include "format1.h" -#include "pretty_print.h" -#include "list.h" - -#include - -int main(int argc, char **argv) -{ - struct io_space *ios; - struct list_head *pvs, *tmp; - struct dm_pool *mem; - - init_log(stderr); - init_debug(_LOG_INFO); - - if (!dev_cache_init()) { - fprintf(stderr, "init of dev-cache failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev/loop")) { - fprintf(stderr, "couldn't add /dev to dir-cache\n"); - exit(1); - } - - if (!(mem = dm_pool_create(10 * 1024))) { - fprintf(stderr, "couldn't create pool\n"); - exit(1); - } - - ios = create_lvm1_format("/dev", mem, NULL); - - if (!ios) { - fprintf(stderr, "failed to create io_space for format1\n"); - exit(1); - } - - pvs = ios->get_pvs(ios); - - if (!pvs) { - fprintf(stderr, "couldn't read vg %s\n", argv[1]); - exit(1); - } - - list_for_each(tmp, pvs) { - struct pv_list *pvl = list_entry(tmp, struct pv_list, list); - dump_pv(&pvl->pv, stdout); - } - - ios->destroy(ios); - - dm_pool_destroy(mem); - dev_cache_exit(); - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/format1/get_vgs_t.c b/old-tests/format1/get_vgs_t.c deleted file mode 100644 index 4910c6ded..000000000 --- a/old-tests/format1/get_vgs_t.c +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "log.h" -#include "format1.h" -#include "pretty_print.h" -#include "list.h" - -#include - -int main(int argc, char **argv) -{ - struct io_space *ios; - struct list_head *vgs; - struct dm_pool *mem; - - init_log(stderr); - init_debug(_LOG_INFO); - - if (!dev_cache_init()) { - fprintf(stderr, "init of dev-cache failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev/loop")) { - fprintf(stderr, "couldn't add /dev to dir-cache\n"); - exit(1); - } - - if (!(mem = dm_pool_create(10 * 1024))) { - fprintf(stderr, "couldn't create pool\n"); - exit(1); - } - - ios = create_lvm1_format("/dev", mem, NULL); - - if (!ios) { - fprintf(stderr, "failed to create io_space for format1\n"); - exit(1); - } - - vgs = ios->get_vgs(ios); - - if (!vgs) { - fprintf(stderr, "couldn't read vg names\n"); - exit(1); - } - - dump_vg_names(vgs, stdout); - ios->destroy(ios); - - dm_pool_destroy(mem); - dev_cache_exit(); - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/format1/pretty_print.c b/old-tests/format1/pretty_print.c deleted file mode 100644 index 62e089abd..000000000 --- a/old-tests/format1/pretty_print.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "pretty_print.h" - -void dump_pv(struct physical_volume *pv, FILE *fp) -{ - fprintf(fp, "physical_volume {\n"); - fprintf(fp, "\tname = '%s'\n", pv->dev->name); - fprintf(fp, "\tvg_name = '%s'\n", pv->vg_name); - fprintf(fp, "\tsize = %llu\n", pv->size); - fprintf(fp, "\tpe_size = %llu\n", pv->pe_size); - fprintf(fp, "\tpe_start = %llu\n", pv->pe_start); - fprintf(fp, "\tpe_count = %u\n", pv->pe_count); - fprintf(fp, "\tpe_allocated = %u\n", pv->pe_allocated); - fprintf(fp, "}\n\n"); -} - -void dump_lv(struct logical_volume *lv, FILE *fp) -{ - int i; - - fprintf(fp, "logical_volume {\n"); - fprintf(fp, "\tname = '%s'\n", lv->name); - fprintf(fp, "\tsize = %llu\n", lv->size); - fprintf(fp, "\tle_count = %u\n", lv->le_count); - - fprintf(fp, "\tmap {\n"); - for (i = 0; i < lv->le_count; i++) { - struct physical_volume *pv = lv->map[i].pv; - - fprintf(fp, "\t\tpv = '%s', ", - pv ? pv->dev->name : "null ???"); - fprintf(fp, "\textent = %u\n", lv->map[i].pe); - } - fprintf(fp, "\t}\n}\n\n"); -} - -void dump_vg(struct volume_group *vg, FILE *fp) -{ - struct list_head *tmp; - - fprintf(fp, "volume_group {\n"); - fprintf(fp, "\tname = '%s'\n", vg->name); - fprintf(fp, "\textent_size = %llu\n", vg->extent_size); - fprintf(fp, "\textent_count = %d\n", vg->extent_count); - fprintf(fp, "\tfree_count = %d\n", vg->free_count); - fprintf(fp, "\tmax_lv = %d\n", vg->max_lv); - fprintf(fp, "\tmax_pv = %d\n", vg->max_pv); - fprintf(fp, "\tpv_count = %d\n", vg->pv_count); - fprintf(fp, "\tlv_count = %d\n", vg->lv_count); - fprintf(fp, "}\n\n"); - - list_for_each(tmp, &vg->pvs) { - struct pv_list *pvl = list_entry(tmp, struct pv_list, list); - dump_pv(&pvl->pv, fp); - } - - list_for_each(tmp, &vg->lvs) { - struct lv_list *lvl = list_entry(tmp, struct lv_list, list); - dump_lv(&lvl->lv, fp); - } -} - -void dump_vg_names(struct list_head *vg_names, FILE *fp) -{ - struct list_head *tmp; - struct name_list *nl; - - list_for_each(tmp, vg_names) { - nl = list_entry(tmp, struct name_list, list); - fprintf(fp, "%s\n", nl->name); - } -} diff --git a/old-tests/format1/pretty_print.h b/old-tests/format1/pretty_print.h deleted file mode 100644 index 20bd0394c..000000000 --- a/old-tests/format1/pretty_print.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifndef _LVM_PRETTY_PRINT -#define _LVM_PRETTY_PRINT - -#include "metadata.h" - -#include - -void dump_pv(struct physical_volume *pv, FILE *fp); -void dump_lv(struct logical_volume *lv, FILE *fp); -void dump_vg(struct volume_group *vg, FILE *fp); -void dump_vg_names(struct list_head *vg_names, FILE *fp); - -#endif diff --git a/old-tests/format1/read_pv_t.c b/old-tests/format1/read_pv_t.c deleted file mode 100644 index 271cc1e1d..000000000 --- a/old-tests/format1/read_pv_t.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "log.h" -#include "format1.h" -#include "pretty_print.h" -#include "list.h" - -#include - -int main(int argc, char **argv) -{ - struct io_space *ios; - struct physical_volume *pv; - struct dm_pool *mem; - struct device *dev; - - if (argc != 2) { - fprintf(stderr, "usage: read_pv_t \n"); - exit(1); - } - - init_log(stderr); - init_debug(_LOG_INFO); - - if (!dev_cache_init()) { - fprintf(stderr, "init of dev-cache failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev/loop")) { - fprintf(stderr, "couldn't add /dev to dir-cache\n"); - exit(1); - } - - if (!(mem = dm_pool_create(10 * 1024))) { - fprintf(stderr, "couldn't create pool\n"); - exit(1); - } - - ios = create_lvm1_format("/dev", mem, NULL); - - if (!ios) { - fprintf(stderr, "failed to create io_space for format1\n"); - exit(1); - } - - pv = ios->pv_read(ios, argv[1]); - - if (!pv) { - fprintf(stderr, "couldn't read pv %s\n", dev->name); - exit(1); - } - - dump_pv(pv, stdout); - ios->destroy(ios); - - dm_pool_destroy(mem); - dev_cache_exit(); - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/format1/read_vg_t.c b/old-tests/format1/read_vg_t.c deleted file mode 100644 index 1fb141496..000000000 --- a/old-tests/format1/read_vg_t.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "log.h" -#include "format1.h" -#include "pretty_print.h" - -#include - -int main(int argc, char **argv) -{ - struct io_space *ios; - struct volume_group *vg; - struct dm_pool *mem; - - if (argc != 2) { - fprintf(stderr, "usage: read_vg_t \n"); - exit(1); - } - - init_log(stderr); - init_debug(_LOG_INFO); - - if (!dev_cache_init()) { - fprintf(stderr, "init of dev-cache failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev/loop")) { - fprintf(stderr, "couldn't add /dev to dir-cache\n"); - exit(1); - } - - if (!(mem = dm_pool_create(10 * 1024))) { - fprintf(stderr, "couldn't create pool\n"); - exit(1); - } - - ios = create_lvm1_format("/dev", mem, NULL); - - if (!ios) { - fprintf(stderr, "failed to create io_space for format1\n"); - exit(1); - } - - vg = ios->vg_read(ios, argv[1]); - - if (!vg) { - fprintf(stderr, "couldn't read vg %s\n", argv[1]); - exit(1); - } - - dump_vg(vg, stdout); - - ios->destroy(ios); - - dm_pool_destroy(mem); - dev_cache_exit(); - dump_memory(); - fin_log(); - return 0; -} - diff --git a/old-tests/format1/write_vg_t.c b/old-tests/format1/write_vg_t.c deleted file mode 100644 index 29682ad2e..000000000 --- a/old-tests/format1/write_vg_t.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "log.h" -#include "format1.h" -#include "pretty_print.h" - -#include - -int main(int argc, char **argv) -{ - struct io_space *ios; - struct volume_group *vg; - struct dm_pool *mem; - - if (argc != 2) { - fprintf(stderr, "usage: read_vg_t \n"); - exit(1); - } - - init_log(stderr); - init_debug(_LOG_INFO); - - if (!dev_cache_init()) { - fprintf(stderr, "init of dev-cache failed\n"); - exit(1); - } - - if (!dev_cache_add_dir("/dev/loop")) { - fprintf(stderr, "couldn't add /dev to dir-cache\n"); - exit(1); - } - - if (!(mem = dm_pool_create(10 * 1024))) { - fprintf(stderr, "couldn't create pool\n"); - exit(1); - } - - ios = create_lvm1_format("/dev", mem, NULL); - - if (!ios) { - fprintf(stderr, "failed to create io_space for format1\n"); - exit(1); - } - - vg = ios->vg_read(ios, argv[1]); - - if (!vg) { - fprintf(stderr, "couldn't read vg %s\n", argv[1]); - exit(1); - } - - if (!ios->vg_write(ios, vg)) { - fprintf(stderr, "couldn't write vg\n"); - exit(1); - } - - ios->destroy(ios); - - dm_pool_destroy(mem); - dev_cache_exit(); - dump_memory(); - fin_log(); - return 0; -} diff --git a/old-tests/mm/Makefile.in b/old-tests/mm/Makefile.in deleted file mode 100644 index ec60fa604..000000000 --- a/old-tests/mm/Makefile.in +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ -VPATH = @srcdir@ - -SOURCES=\ - dbg_malloc_t.c - -TARGETS=dbg_malloc_t - -include $(top_builddir)/make.tmpl - -dbg_malloc_t: dbg_malloc_t.o - $(CC) $(CFLAGS) -o dbg_malloc_t dbg_malloc_t.o \ - -L$(top_builddir)/lib -llvm - -pool_t: pool_t.o - $(CC) $(CFLAGS) -o pool_t pool_t.o -L$(top_builddir)/lib -llvm - diff --git a/old-tests/mm/dbg_malloc_t.c b/old-tests/mm/dbg_malloc_t.c deleted file mode 100644 index 8536405ce..000000000 --- a/old-tests/mm/dbg_malloc_t.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "log.h" - -#include -#include - - -static void _print_help(FILE *out, const char *prog) -{ - fprintf(out, "usage : %s [-hlbufd]\n\n", prog); - fprintf(out, " h : this message\n"); - fprintf(out, " l : cause memory leak\n"); - fprintf(out, " b : overrun memory block\n"); - fprintf(out, " u : underrun memory block\n"); - fprintf(out, " f : free random pointer\n"); - fprintf(out, " d : free block twice\n"); -} - -struct block_list { - struct block_list *next; - char dummy[9]; -}; - -static void _leak_memory(void) -{ - int i; - struct block_list *b, *head, **l = &head, *n; - - /* allocate a list of blocks */ - for (i = 0; i < 1000; i++) { - - if (!(b = dbg_malloc(sizeof(*b)))) { - log_fatal("Couldn't allocate memory"); - exit(1); - } - - b->next = 0; - *l = b; - l = &b->next; - } - - /* free off every other block */ - for (b = head, i = 0; b; b = n, i++) { - n = b->next; - if(i & 0x1) - dbg_free(b); - } -} - -static void _bounds_overrun(void) -{ - char *b; - - /* allocate a block */ - b = dbg_malloc(534); - - /* overrun */ - b[534] = 56; - - /* free it, which should trigger the bounds error */ - dbg_free(b); -} - -static void _bounds_underrun(void) -{ - char *b; - - /* allocate a block */ - b = dbg_malloc(534); - - /* underrun */ - *(b - 1) = 56; - - /* free it, which should trigger the bounds error */ - dbg_free(b); -} - -static void _free_dud(void) -{ - char *b; - - /* allocate a block */ - b = dbg_malloc(534); - - /* free it, which should trigger the bounds error */ - dbg_free(b + 100); -} - -static void _free_twice(void) -{ - char *b; - - /* allocate a block */ - b = dbg_malloc(534); - - /* free it, which should trigger the bounds error */ - dbg_free(b); - dbg_free(b); -} - -int main(int argc, char **argv) -{ - char opt; - - init_log(stderr); - init_debug(_LOG_DEBUG); - opt = getopt(argc, argv, "hlbufd"); - switch(opt) { - case EOF: - case 'h': - _print_help(stdout, argv[0]); - break; - - case 'l': - _leak_memory(); - break; - - case 'b': - _bounds_overrun(); - break; - - case 'u': - _bounds_underrun(); - break; - - case 'f': - _free_dud(); - break; - - case 'd': - _free_twice(); - break; - - case '?': - fprintf(stderr, "Unknown option -%c\n", opt); - exit(1); - } - - dump_memory(); - fin_log(); - return 0; -} diff --git a/unit-tests/datastruct/Makefile.in b/unit-tests/datastruct/Makefile.in deleted file mode 100644 index 4f0e7a49b..000000000 --- a/unit-tests/datastruct/Makefile.in +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ - -SOURCES=\ - bitset_t.c - -TARGETS=\ - bitset_t - -include $(top_builddir)/make.tmpl - -INCLUDES += -I$(top_srcdir)/libdm -DM_DEPS = $(top_builddir)/libdm/libdevmapper.so -DM_LIBS = -ldevmapper $(LIBS) - -bitset_t: bitset_t.o $(DM_DEPS) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ bitset_t.o $(DM_LIBS) diff --git a/unit-tests/datastruct/TESTS b/unit-tests/datastruct/TESTS deleted file mode 100644 index ba88fb784..000000000 --- a/unit-tests/datastruct/TESTS +++ /dev/null @@ -1 +0,0 @@ -bitset iteration:$TEST_TOOL ./bitset_t \ No newline at end of file diff --git a/unit-tests/datastruct/bitset_t.c b/unit-tests/datastruct/bitset_t.c deleted file mode 100644 index be9b8fd9d..000000000 --- a/unit-tests/datastruct/bitset_t.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (C) 2010 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libdevmapper.h" - -#include - -enum { - NR_BITS = 137 -}; - -static void test_get_next(struct dm_pool *mem) -{ - int i, j, last, first; - dm_bitset_t bs = dm_bitset_create(mem, NR_BITS); - - for (i = 0; i < NR_BITS; i++) - assert(!dm_bit(bs, i)); - - for (i = 0, j = 1; i < NR_BITS; i += j, j++) - dm_bit_set(bs, i); - - first = 1; - for (i = 0, j = 1; i < NR_BITS; i += j, j++) { - if (first) { - last = dm_bit_get_first(bs); - first = 0; - } else - last = dm_bit_get_next(bs, last); - - assert(last == i); - } - - assert(dm_bit_get_next(bs, last) == -1); -} - -static void bit_flip(dm_bitset_t bs, int bit) -{ - int old = dm_bit(bs, bit); - if (old) - dm_bit_clear(bs, bit); - else - dm_bit_set(bs, bit); -} - -static void test_equal(struct dm_pool *mem) -{ - dm_bitset_t bs1 = dm_bitset_create(mem, NR_BITS); - dm_bitset_t bs2 = dm_bitset_create(mem, NR_BITS); - - int i, j; - for (i = 0, j = 1; i < NR_BITS; i += j, j++) { - dm_bit_set(bs1, i); - dm_bit_set(bs2, i); - } - - assert(dm_bitset_equal(bs1, bs2)); - assert(dm_bitset_equal(bs2, bs1)); - - for (i = 0; i < NR_BITS; i++) { - bit_flip(bs1, i); - assert(!dm_bitset_equal(bs1, bs2)); - assert(!dm_bitset_equal(bs2, bs1)); - - assert(dm_bitset_equal(bs1, bs1)); /* comparing with self */ - bit_flip(bs1, i); - } -} - -static void test_and(struct dm_pool *mem) -{ - dm_bitset_t bs1 = dm_bitset_create(mem, NR_BITS); - dm_bitset_t bs2 = dm_bitset_create(mem, NR_BITS); - dm_bitset_t bs3 = dm_bitset_create(mem, NR_BITS); - - int i, j; - for (i = 0, j = 1; i < NR_BITS; i += j, j++) { - dm_bit_set(bs1, i); - dm_bit_set(bs2, i); - } - - dm_bit_and(bs3, bs1, bs2); - - assert(dm_bitset_equal(bs1, bs2)); - assert(dm_bitset_equal(bs1, bs3)); - assert(dm_bitset_equal(bs2, bs3)); - - dm_bit_clear_all(bs1); - dm_bit_clear_all(bs2); - - for (i = 0; i < NR_BITS; i++) { - if (i % 2) - dm_bit_set(bs1, i); - else - dm_bit_set(bs2, i); - } - - dm_bit_and(bs3, bs1, bs2); - for (i = 0; i < NR_BITS; i++) - assert(!dm_bit(bs3, i)); -} - -int main(int argc, char **argv) -{ - typedef void (*test_fn)(struct dm_pool *); - static test_fn tests[] = { - test_get_next, - test_equal, - test_and - }; - - int i; - for (i = 0; i < DM_ARRAY_SIZE(tests); ++i) { - struct dm_pool *mem = dm_pool_create("bitset test", 1024); - assert(mem); - tests[i](mem); - dm_pool_destroy(mem); - } - - return 0; -} - diff --git a/unit-tests/mm/Makefile.in b/unit-tests/mm/Makefile.in deleted file mode 100644 index a40e2e235..000000000 --- a/unit-tests/mm/Makefile.in +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ -VPATH = @srcdir@ - -SOURCES=\ - pool_valgrind_t.c - -TARGETS=\ - pool_valgrind_t - -include $(top_builddir)/make.tmpl -DM_LIBS = -ldevmapper $(LIBS) - -pool_valgrind_t: pool_valgrind_t.o - $(CC) $(CFLAGS) -o $@ pool_valgrind_t.o $(LDFLAGS) $(DM_LIBS) - diff --git a/unit-tests/mm/TESTS b/unit-tests/mm/TESTS deleted file mode 100644 index 3bf31544a..000000000 --- a/unit-tests/mm/TESTS +++ /dev/null @@ -1 +0,0 @@ -valgrind pool awareness:valgrind ./pool_valgrind_t 2>&1 | ./check_results diff --git a/unit-tests/mm/check_results b/unit-tests/mm/check_results deleted file mode 100755 index a7b0975a5..000000000 --- a/unit-tests/mm/check_results +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env ruby1.9 - -require 'pp' - -patterns = [ - /Invalid read of size 1/, - /Invalid write of size 1/, - /Invalid read of size 1/, - /still reachable: [0-9,]+ bytes in 3 blocks/ - ] - -lines = STDIN.readlines -pp lines - -result = catch(:done) do - patterns.each do |pat| - loop do - throw(:done, false) if lines.size == 0 - - line = lines.shift - if line =~ pat - STDERR.puts "matched #{pat}" - break; - end - end - end - - throw(:done, true) -end - -exit(result ? 0 : 1) diff --git a/unit-tests/mm/pool_valgrind_t.c b/unit-tests/mm/pool_valgrind_t.c deleted file mode 100644 index 704f1168e..000000000 --- a/unit-tests/mm/pool_valgrind_t.c +++ /dev/null @@ -1,181 +0,0 @@ -#include "libdevmapper.h" - -#include - -/* - * Checks that valgrind is picking up unallocated pool memory as - * uninitialised, even if the chunk has been recycled. - * - * $ valgrind --track-origins=yes ./pool_valgrind_t - * - * ==7023== Memcheck, a memory error detector - * ==7023== Copyright (C) 2002-2009, and GNU GPL'd, by Julian Seward et al. - * ==7023== Using Valgrind-3.6.0.SVN-Debian and LibVEX; rerun with -h for copyright info - * ==7023== Command: ./pool_valgrind_t - * ==7023== - * first branch worked (as expected) - * ==7023== Conditional jump or move depends on uninitialised value(s) - * ==7023== at 0x4009AC: main (in /home/ejt/work/lvm2/unit-tests/mm/pool_valgrind_t) - * ==7023== Uninitialised value was created by a client request - * ==7023== at 0x4E40CB8: dm_pool_free (in /home/ejt/work/lvm2/libdm/ioctl/libdevmapper.so.1.02) - * ==7023== by 0x4009A8: main (in /home/ejt/work/lvm2/unit-tests/mm/pool_valgrind_t) - * ==7023== - * second branch worked (valgrind should have flagged this as an error) - * ==7023== - * ==7023== HEAP SUMMARY: - * ==7023== in use at exit: 0 bytes in 0 blocks - * ==7023== total heap usage: 2 allocs, 2 frees, 2,104 bytes allocated - * ==7023== - * ==7023== All heap blocks were freed -- no leaks are possible - * ==7023== - * ==7023== For counts of detected and suppressed errors, rerun with: -v - * ==7023== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 4 from 4) - */ - -#define COUNT 10 - -static void check_free() -{ - int i; - char *blocks[COUNT]; - struct dm_pool *p = dm_pool_create("blah", 1024); - - for (i = 0; i < COUNT; i++) - blocks[i] = dm_pool_alloc(p, 37); - - /* check we can access the last block */ - blocks[COUNT - 1][0] = 'E'; - if (blocks[COUNT - 1][0] == 'E') - printf("first branch worked (as expected)\n"); - - dm_pool_free(p, blocks[5]); - - if (blocks[COUNT - 1][0] == 'E') - printf("second branch worked (valgrind should have flagged this as an error)\n"); - - dm_pool_destroy(p); -} - -/* Checks that freed chunks are marked NOACCESS */ -static void check_free2() -{ - struct dm_pool *p = dm_pool_create("", 900); /* 900 will get - * rounded up to 1024, - * 1024 would have got - * rounded up to - * 2048 */ - char *data1, *data2; - - assert(p); - data1 = dm_pool_alloc(p, 123); - assert(data1); - - data1 = dm_pool_alloc(p, 1024); - assert(data1); - - data2 = dm_pool_alloc(p, 123); - assert(data2); - - data2[0] = 'A'; /* should work fine */ - - dm_pool_free(p, data1); - - /* - * so now the first chunk is active, the second chunk has become - * the free one. - */ - data2[0] = 'B'; /* should prompt an invalid write error */ - - dm_pool_destroy(p); -} - -static void check_alignment() -{ - /* - * Pool always tries to allocate blocks with particular alignment. - * So there are potentially small gaps between allocations. This - * test checks that valgrind is spotting illegal accesses to these - * gaps. - */ - - int i, sum; - struct dm_pool *p = dm_pool_create("blah", 1024); - char *data1, *data2; - char buffer[16]; - - - data1 = dm_pool_alloc_aligned(p, 1, 4); - assert(data1); - data2 = dm_pool_alloc_aligned(p, 1, 4); - assert(data1); - - snprintf(buffer, sizeof(buffer), "%c", *(data1 + 1)); /* invalid read size 1 */ - dm_pool_destroy(p); -} - -/* - * Looking at the code I'm not sure allocations that are near the chunk - * size are working. So this test is trying to exhibit a specific problem. - */ -static void check_allocation_near_chunk_size() -{ - int i; - char *data; - struct dm_pool *p = dm_pool_create("", 900); - - /* - * allocate a lot and then free everything so we know there - * is a spare chunk. - */ - for (i = 0; i < 1000; i++) { - data = dm_pool_alloc(p, 37); - memset(data, 0, 37); - assert(data); - } - - dm_pool_empty(p); - - /* now we allocate something close to the chunk size ... */ - data = dm_pool_alloc(p, 1020); - assert(data); - memset(data, 0, 1020); - - dm_pool_destroy(p); -} - -/* FIXME: test the dbg_malloc at exit (this test should be in dbg_malloc) */ -static void check_leak_detection() -{ - int i; - struct dm_pool *p = dm_pool_create("", 1024); - - for (i = 0; i < 10; i++) - dm_pool_alloc(p, (i + 1) * 37); -} - -/* we shouldn't get any errors from this one */ -static void check_object_growth() -{ - int i; - struct dm_pool *p = dm_pool_create("", 32); - char data[100] = { 0 }; - void *obj; - - dm_pool_begin_object(p, 43); - for (i = 1; i < 100; i++) - dm_pool_grow_object(p, data, i); - obj = dm_pool_end_object(p); - - dm_pool_destroy(p); -} - -int main(int argc, char **argv) -{ - check_free(); - check_free2(); - check_alignment(); - check_allocation_near_chunk_size(); - check_leak_detection(); - check_object_growth(); - return 0; -} diff --git a/unit-tests/regex/Makefile.in b/unit-tests/regex/Makefile.in deleted file mode 100644 index 76b7dee36..000000000 --- a/unit-tests/regex/Makefile.in +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. -# Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ - -SOURCES=\ - parse_t.c \ - matcher_t.c - -TARGETS=\ - parse_t \ - matcher_t - -include $(top_builddir)/make.tmpl - -INCLUDES += -I$(top_srcdir)/libdm -DM_DEPS = $(top_builddir)/libdm/libdevmapper.so -DM_LIBS = -ldevmapper $(LIBS) - -parse_t: parse_t.o $(DM_DEPS) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ parse_t.o $(DM_LIBS) - -matcher_t: matcher_t.o $(DM_DEPS) - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ matcher_t.o $(DM_LIBS) diff --git a/unit-tests/regex/TESTS b/unit-tests/regex/TESTS deleted file mode 100644 index 329794293..000000000 --- a/unit-tests/regex/TESTS +++ /dev/null @@ -1,3 +0,0 @@ -dfa matching:$TEST_TOOL ./matcher_t --fingerprint dev_patterns < devices.list > matcher_t.output && diff -u matcher_t.expected matcher_t.output -dfa matching:$TEST_TOOL ./matcher_t --fingerprint random_regexes < /dev/null > matcher_t.output && diff -u matcher_t.expected2 matcher_t.output -dfa with non-print regex chars:$TEST_TOOL ./matcher_t nonprint_regexes < nonprint_input > matcher_t.output && diff -u matcher_t.expected3 matcher_t.output \ No newline at end of file diff --git a/unit-tests/regex/dev_patterns b/unit-tests/regex/dev_patterns deleted file mode 100644 index 34459ba60..000000000 --- a/unit-tests/regex/dev_patterns +++ /dev/null @@ -1,2 +0,0 @@ -"loop/[0-9]+" -"hd[a-d][0-5]+" diff --git a/unit-tests/regex/devices.list b/unit-tests/regex/devices.list deleted file mode 100644 index 91af305e1..000000000 --- a/unit-tests/regex/devices.list +++ /dev/null @@ -1,880 +0,0 @@ -/dev -/dev/.devfsd -/dev/cpu -/dev/cpu/mtrr -/dev/netlink -/dev/netlink/route -/dev/netlink/skip -/dev/netlink/USERSOCK -/dev/netlink/fwmonitor -/dev/netlink/ARPD -/dev/netlink/ROUTE6 -/dev/netlink/IP6_FW -/dev/netlink/tap0 -/dev/netlink/tap1 -/dev/netlink/tap2 -/dev/netlink/tap3 -/dev/netlink/tap4 -/dev/netlink/tap5 -/dev/netlink/tap6 -/dev/netlink/tap7 -/dev/netlink/tap8 -/dev/netlink/tap9 -/dev/netlink/tap10 -/dev/netlink/tap11 -/dev/netlink/tap12 -/dev/netlink/tap13 -/dev/netlink/tap14 -/dev/netlink/tap15 -/dev/shm -/dev/mem -/dev/kmem -/dev/null -/dev/port -/dev/zero -/dev/full -/dev/random -/dev/urandom -/dev/tty -/dev/console -/dev/vc -/dev/vc/1 -/dev/vc/2 -/dev/vc/3 -/dev/vc/4 -/dev/vc/5 -/dev/vc/6 -/dev/vc/7 -/dev/vc/8 -/dev/vc/9 -/dev/vc/10 -/dev/vc/11 -/dev/vc/12 -/dev/vc/13 -/dev/vc/14 -/dev/vc/15 -/dev/vc/16 -/dev/vc/17 -/dev/vc/18 -/dev/vc/19 -/dev/vc/20 -/dev/vc/21 -/dev/vc/22 -/dev/vc/23 -/dev/vc/24 -/dev/vc/25 -/dev/vc/26 -/dev/vc/27 -/dev/vc/28 -/dev/vc/29 -/dev/vc/30 -/dev/vc/31 -/dev/vc/32 -/dev/vc/33 -/dev/vc/34 -/dev/vc/35 -/dev/vc/36 -/dev/vc/37 -/dev/vc/38 -/dev/vc/39 -/dev/vc/40 -/dev/vc/41 -/dev/vc/42 -/dev/vc/43 -/dev/vc/44 -/dev/vc/45 -/dev/vc/46 -/dev/vc/47 -/dev/vc/48 -/dev/vc/49 -/dev/vc/50 -/dev/vc/51 -/dev/vc/52 -/dev/vc/53 -/dev/vc/54 -/dev/vc/55 -/dev/vc/56 -/dev/vc/57 -/dev/vc/58 -/dev/vc/59 -/dev/vc/60 -/dev/vc/61 -/dev/vc/62 -/dev/vc/63 -/dev/vc/0 -/dev/ptmx -/dev/misc -/dev/misc/psaux -/dev/pty -/dev/pty/m0 -/dev/pty/m1 -/dev/pty/m2 -/dev/pty/m3 -/dev/pty/m4 -/dev/pty/m5 -/dev/pty/m6 -/dev/pty/m7 -/dev/pty/m8 -/dev/pty/m9 -/dev/pty/m10 -/dev/pty/m11 -/dev/pty/m12 -/dev/pty/m13 -/dev/pty/m14 -/dev/pty/m15 -/dev/pty/m16 -/dev/pty/m17 -/dev/pty/m18 -/dev/pty/m19 -/dev/pty/m20 -/dev/pty/m21 -/dev/pty/m22 -/dev/pty/m23 -/dev/pty/m24 -/dev/pty/m25 -/dev/pty/m26 -/dev/pty/m27 -/dev/pty/m28 -/dev/pty/m29 -/dev/pty/m30 -/dev/pty/m31 -/dev/pty/m32 -/dev/pty/m33 -/dev/pty/m34 -/dev/pty/m35 -/dev/pty/m36 -/dev/pty/m37 -/dev/pty/m38 -/dev/pty/m39 -/dev/pty/m40 -/dev/pty/m41 -/dev/pty/m42 -/dev/pty/m43 -/dev/pty/m44 -/dev/pty/m45 -/dev/pty/m46 -/dev/pty/m47 -/dev/pty/m48 -/dev/pty/m49 -/dev/pty/m50 -/dev/pty/m51 -/dev/pty/m52 -/dev/pty/m53 -/dev/pty/m54 -/dev/pty/m55 -/dev/pty/m56 -/dev/pty/m57 -/dev/pty/m58 -/dev/pty/m59 -/dev/pty/m60 -/dev/pty/m61 -/dev/pty/m62 -/dev/pty/m63 -/dev/pty/m64 -/dev/pty/m65 -/dev/pty/m66 -/dev/pty/m67 -/dev/pty/m68 -/dev/pty/m69 -/dev/pty/m70 -/dev/pty/m71 -/dev/pty/m72 -/dev/pty/m73 -/dev/pty/m74 -/dev/pty/m75 -/dev/pty/m76 -/dev/pty/m77 -/dev/pty/m78 -/dev/pty/m79 -/dev/pty/m80 -/dev/pty/m81 -/dev/pty/m82 -/dev/pty/m83 -/dev/pty/m84 -/dev/pty/m85 -/dev/pty/m86 -/dev/pty/m87 -/dev/pty/m88 -/dev/pty/m89 -/dev/pty/m90 -/dev/pty/m91 -/dev/pty/m92 -/dev/pty/m93 -/dev/pty/m94 -/dev/pty/m95 -/dev/pty/m96 -/dev/pty/m97 -/dev/pty/m98 -/dev/pty/m99 -/dev/pty/m100 -/dev/pty/m101 -/dev/pty/m102 -/dev/pty/m103 -/dev/pty/m104 -/dev/pty/m105 -/dev/pty/m106 -/dev/pty/m107 -/dev/pty/m108 -/dev/pty/m109 -/dev/pty/m110 -/dev/pty/m111 -/dev/pty/m112 -/dev/pty/m113 -/dev/pty/m114 -/dev/pty/m115 -/dev/pty/m116 -/dev/pty/m117 -/dev/pty/m118 -/dev/pty/m119 -/dev/pty/m120 -/dev/pty/m121 -/dev/pty/m122 -/dev/pty/m123 -/dev/pty/m124 -/dev/pty/m125 -/dev/pty/m126 -/dev/pty/m127 -/dev/pty/m128 -/dev/pty/m129 -/dev/pty/m130 -/dev/pty/m131 -/dev/pty/m132 -/dev/pty/m133 -/dev/pty/m134 -/dev/pty/m135 -/dev/pty/m136 -/dev/pty/m137 -/dev/pty/m138 -/dev/pty/m139 -/dev/pty/m140 -/dev/pty/m141 -/dev/pty/m142 -/dev/pty/m143 -/dev/pty/m144 -/dev/pty/m145 -/dev/pty/m146 -/dev/pty/m147 -/dev/pty/m148 -/dev/pty/m149 -/dev/pty/m150 -/dev/pty/m151 -/dev/pty/m152 -/dev/pty/m153 -/dev/pty/m154 -/dev/pty/m155 -/dev/pty/m156 -/dev/pty/m157 -/dev/pty/m158 -/dev/pty/m159 -/dev/pty/m160 -/dev/pty/m161 -/dev/pty/m162 -/dev/pty/m163 -/dev/pty/m164 -/dev/pty/m165 -/dev/pty/m166 -/dev/pty/m167 -/dev/pty/m168 -/dev/pty/m169 -/dev/pty/m170 -/dev/pty/m171 -/dev/pty/m172 -/dev/pty/m173 -/dev/pty/m174 -/dev/pty/m175 -/dev/pty/m176 -/dev/pty/m177 -/dev/pty/m178 -/dev/pty/m179 -/dev/pty/m180 -/dev/pty/m181 -/dev/pty/m182 -/dev/pty/m183 -/dev/pty/m184 -/dev/pty/m185 -/dev/pty/m186 -/dev/pty/m187 -/dev/pty/m188 -/dev/pty/m189 -/dev/pty/m190 -/dev/pty/m191 -/dev/pty/m192 -/dev/pty/m193 -/dev/pty/m194 -/dev/pty/m195 -/dev/pty/m196 -/dev/pty/m197 -/dev/pty/m198 -/dev/pty/m199 -/dev/pty/m200 -/dev/pty/m201 -/dev/pty/m202 -/dev/pty/m203 -/dev/pty/m204 -/dev/pty/m205 -/dev/pty/m206 -/dev/pty/m207 -/dev/pty/m208 -/dev/pty/m209 -/dev/pty/m210 -/dev/pty/m211 -/dev/pty/m212 -/dev/pty/m213 -/dev/pty/m214 -/dev/pty/m215 -/dev/pty/m216 -/dev/pty/m217 -/dev/pty/m218 -/dev/pty/m219 -/dev/pty/m220 -/dev/pty/m221 -/dev/pty/m222 -/dev/pty/m223 -/dev/pty/m224 -/dev/pty/m225 -/dev/pty/m226 -/dev/pty/m227 -/dev/pty/m228 -/dev/pty/m229 -/dev/pty/m230 -/dev/pty/m231 -/dev/pty/m232 -/dev/pty/m233 -/dev/pty/m234 -/dev/pty/m235 -/dev/pty/m236 -/dev/pty/m237 -/dev/pty/m238 -/dev/pty/m239 -/dev/pty/m240 -/dev/pty/m241 -/dev/pty/m242 -/dev/pty/m243 -/dev/pty/m244 -/dev/pty/m245 -/dev/pty/m246 -/dev/pty/m247 -/dev/pty/m248 -/dev/pty/m249 -/dev/pty/m250 -/dev/pty/m251 -/dev/pty/m252 -/dev/pty/m253 -/dev/pty/m254 -/dev/pty/m255 -/dev/pts -/dev/pts/0 -/dev/pts/1 -/dev/pts/2 -/dev/pts/3 -/dev/pts/4 -/dev/pts/5 -/dev/pts/6 -/dev/pts/7 -/dev/vcc -/dev/vcc/0 -/dev/vcc/a -/dev/vcc/1 -/dev/vcc/a1 -/dev/vcc/2 -/dev/vcc/a2 -/dev/vcc/3 -/dev/vcc/a3 -/dev/vcc/5 -/dev/vcc/a5 -/dev/vcc/4 -/dev/vcc/a4 -/dev/vcc/6 -/dev/vcc/a6 -/dev/vcc/7 -/dev/vcc/a7 -/dev/tts -/dev/tts/0 -/dev/cua -/dev/cua/0 -/dev/ide -/dev/ide/host0 -/dev/ide/host0/bus0 -/dev/ide/host0/bus0/target0 -/dev/ide/host0/bus0/target0/lun0 -/dev/ide/host0/bus0/target0/lun0/disc -/dev/ide/host0/bus0/target0/lun0/part1 -/dev/ide/host0/bus0/target0/lun0/part2 -/dev/ide/host0/bus0/target0/lun0/part3 -/dev/ide/host0/bus0/target0/lun0/part4 -/dev/ide/host0/bus0/target0/lun0/part5 -/dev/ide/host0/bus0/target0/lun0/part6 -/dev/ide/host0/bus0/target0/lun0/part7 -/dev/ide/host0/bus0/target0/lun0/part8 -/dev/ide/host0/bus0/target1 -/dev/ide/host0/bus0/target1/lun0 -/dev/ide/host0/bus0/target1/lun0/disc -/dev/ide/host0/bus0/target1/lun0/part1 -/dev/ide/host0/bus1 -/dev/ide/host0/bus1/target0 -/dev/ide/host0/bus1/target0/lun0 -/dev/ide/host0/bus1/target0/lun0/disc -/dev/ide/host0/bus1/target0/lun0/part1 -/dev/ide/host0/bus1/target1 -/dev/ide/host0/bus1/target1/lun0 -/dev/discs -/dev/discs/disc0 -/dev/discs/disc1 -/dev/discs/disc2 -/dev/floppy -/dev/floppy/0u1440 -/dev/floppy/0u1680 -/dev/floppy/0u1722 -/dev/floppy/0u1743 -/dev/floppy/0u1760 -/dev/floppy/0u1920 -/dev/floppy/0u1840 -/dev/floppy/0u1600 -/dev/floppy/0u360 -/dev/floppy/0u720 -/dev/floppy/0u820 -/dev/floppy/0u830 -/dev/floppy/0u1040 -/dev/floppy/0u1120 -/dev/floppy/0u800 -/dev/floppy/0 -/dev/loop -/dev/loop/0 -/dev/loop/1 -/dev/loop/2 -/dev/loop/3 -/dev/loop/4 -/dev/loop/5 -/dev/loop/6 -/dev/loop/7 -/dev/cdroms -/dev/sound -/dev/sound/dsp -/dev/sound/dsp1 -/dev/sound/mixer -/dev/sound/midi -/dev/usb -/dev/root -/dev/initctl -/dev/xconsole -/dev/fd -/dev/stdin -/dev/stdout -/dev/stderr -/dev/route -/dev/skip -/dev/USERSOCK -/dev/fwmonitor -/dev/ARPD -/dev/ROUTE6 -/dev/IP6_FW -/dev/tap0 -/dev/tap1 -/dev/tap2 -/dev/tap3 -/dev/tap4 -/dev/tap5 -/dev/tap6 -/dev/tap7 -/dev/tap8 -/dev/tap9 -/dev/tap10 -/dev/tap11 -/dev/tap12 -/dev/tap13 -/dev/tap14 -/dev/tap15 -/dev/tty1 -/dev/tty2 -/dev/tty3 -/dev/tty4 -/dev/tty5 -/dev/tty6 -/dev/tty7 -/dev/tty8 -/dev/tty9 -/dev/tty10 -/dev/tty11 -/dev/tty12 -/dev/tty13 -/dev/tty14 -/dev/tty15 -/dev/tty16 -/dev/tty17 -/dev/tty18 -/dev/tty19 -/dev/tty20 -/dev/tty21 -/dev/tty22 -/dev/tty23 -/dev/tty24 -/dev/tty25 -/dev/tty26 -/dev/tty27 -/dev/tty28 -/dev/tty29 -/dev/tty30 -/dev/tty31 -/dev/tty32 -/dev/tty33 -/dev/tty34 -/dev/tty35 -/dev/tty36 -/dev/tty37 -/dev/tty38 -/dev/tty39 -/dev/tty40 -/dev/tty41 -/dev/tty42 -/dev/tty43 -/dev/tty44 -/dev/tty45 -/dev/tty46 -/dev/tty47 -/dev/tty48 -/dev/tty49 -/dev/tty50 -/dev/tty51 -/dev/tty52 -/dev/tty53 -/dev/tty54 -/dev/tty55 -/dev/tty56 -/dev/tty57 -/dev/tty58 -/dev/tty59 -/dev/tty60 -/dev/tty61 -/dev/tty62 -/dev/tty63 -/dev/tty0 -/dev/psaux -/dev/ptyp0 -/dev/ptyp1 -/dev/ptyp2 -/dev/ptyp3 -/dev/ptyp4 -/dev/ptyp5 -/dev/ptyp6 -/dev/ptyp7 -/dev/ptyp8 -/dev/ptyp9 -/dev/ptypa -/dev/ptypb -/dev/ptypc -/dev/ptypd -/dev/ptype -/dev/ptypf -/dev/ptyq0 -/dev/ptyq1 -/dev/ptyq2 -/dev/ptyq3 -/dev/ptyq4 -/dev/ptyq5 -/dev/ptyq6 -/dev/ptyq7 -/dev/ptyq8 -/dev/ptyq9 -/dev/ptyqa -/dev/ptyqb -/dev/ptyqc -/dev/ptyqd -/dev/ptyqe -/dev/ptyqf -/dev/ptyr0 -/dev/ptyr1 -/dev/ptyr2 -/dev/ptyr3 -/dev/ptyr4 -/dev/ptyr5 -/dev/ptyr6 -/dev/ptyr7 -/dev/ptyr8 -/dev/ptyr9 -/dev/ptyra -/dev/ptyrb -/dev/ptyrc -/dev/ptyrd -/dev/ptyre -/dev/ptyrf -/dev/ptys0 -/dev/ptys1 -/dev/ptys2 -/dev/ptys3 -/dev/ptys4 -/dev/ptys5 -/dev/ptys6 -/dev/ptys7 -/dev/ptys8 -/dev/ptys9 -/dev/ptysa -/dev/ptysb -/dev/ptysc -/dev/ptysd -/dev/ptyse -/dev/ptysf -/dev/ptyt0 -/dev/ptyt1 -/dev/ptyt2 -/dev/ptyt3 -/dev/ptyt4 -/dev/ptyt5 -/dev/ptyt6 -/dev/ptyt7 -/dev/ptyt8 -/dev/ptyt9 -/dev/ptyta -/dev/ptytb -/dev/ptytc -/dev/ptytd -/dev/ptyte -/dev/ptytf -/dev/ptyu0 -/dev/ptyu1 -/dev/ptyu2 -/dev/ptyu3 -/dev/ptyu4 -/dev/ptyu5 -/dev/ptyu6 -/dev/ptyu7 -/dev/ptyu8 -/dev/ptyu9 -/dev/ptyua -/dev/ptyub -/dev/ptyuc -/dev/ptyud -/dev/ptyue -/dev/ptyuf -/dev/ptyv0 -/dev/ptyv1 -/dev/ptyv2 -/dev/ptyv3 -/dev/ptyv4 -/dev/ptyv5 -/dev/ptyv6 -/dev/ptyv7 -/dev/ptyv8 -/dev/ptyv9 -/dev/ptyva -/dev/ptyvb -/dev/ptyvc -/dev/ptyvd -/dev/ptyve -/dev/ptyvf -/dev/ptyw0 -/dev/ptyw1 -/dev/ptyw2 -/dev/ptyw3 -/dev/ptyw4 -/dev/ptyw5 -/dev/ptyw6 -/dev/ptyw7 -/dev/ptyw8 -/dev/ptyw9 -/dev/ptywa -/dev/ptywb -/dev/ptywc -/dev/ptywd -/dev/ptywe -/dev/ptywf -/dev/ptyx0 -/dev/ptyx1 -/dev/ptyx2 -/dev/ptyx3 -/dev/ptyx4 -/dev/ptyx5 -/dev/ptyx6 -/dev/ptyx7 -/dev/ptyx8 -/dev/ptyx9 -/dev/ptyxa -/dev/ptyxb -/dev/ptyxc -/dev/ptyxd -/dev/ptyxe -/dev/ptyxf -/dev/ptyy0 -/dev/ptyy1 -/dev/ptyy2 -/dev/ptyy3 -/dev/ptyy4 -/dev/ptyy5 -/dev/ptyy6 -/dev/ptyy7 -/dev/ptyy8 -/dev/ptyy9 -/dev/ptyya -/dev/ptyyb -/dev/ptyyc -/dev/ptyyd -/dev/ptyye -/dev/ptyyf -/dev/ptyz0 -/dev/ptyz1 -/dev/ptyz2 -/dev/ptyz3 -/dev/ptyz4 -/dev/ptyz5 -/dev/ptyz6 -/dev/ptyz7 -/dev/ptyz8 -/dev/ptyz9 -/dev/ptyza -/dev/ptyzb -/dev/ptyzc -/dev/ptyzd -/dev/ptyze -/dev/ptyzf -/dev/ptya0 -/dev/ptya1 -/dev/ptya2 -/dev/ptya3 -/dev/ptya4 -/dev/ptya5 -/dev/ptya6 -/dev/ptya7 -/dev/ptya8 -/dev/ptya9 -/dev/ptyaa -/dev/ptyab -/dev/ptyac -/dev/ptyad -/dev/ptyae -/dev/ptyaf -/dev/ptyb0 -/dev/ptyb1 -/dev/ptyb2 -/dev/ptyb3 -/dev/ptyb4 -/dev/ptyb5 -/dev/ptyb6 -/dev/ptyb7 -/dev/ptyb8 -/dev/ptyb9 -/dev/ptyba -/dev/ptybb -/dev/ptybc -/dev/ptybd -/dev/ptybe -/dev/ptybf -/dev/ptyc0 -/dev/ptyc1 -/dev/ptyc2 -/dev/ptyc3 -/dev/ptyc4 -/dev/ptyc5 -/dev/ptyc6 -/dev/ptyc7 -/dev/ptyc8 -/dev/ptyc9 -/dev/ptyca -/dev/ptycb -/dev/ptycc -/dev/ptycd -/dev/ptyce -/dev/ptycf -/dev/ptyd0 -/dev/ptyd1 -/dev/ptyd2 -/dev/ptyd3 -/dev/ptyd4 -/dev/ptyd5 -/dev/ptyd6 -/dev/ptyd7 -/dev/ptyd8 -/dev/ptyd9 -/dev/ptyda -/dev/ptydb -/dev/ptydc -/dev/ptydd -/dev/ptyde -/dev/ptydf -/dev/ptye0 -/dev/ptye1 -/dev/ptye2 -/dev/ptye3 -/dev/ptye4 -/dev/ptye5 -/dev/ptye6 -/dev/ptye7 -/dev/ptye8 -/dev/ptye9 -/dev/ptyea -/dev/ptyeb -/dev/ptyec -/dev/ptyed -/dev/ptyee -/dev/ptyef -/dev/vcs -/dev/vcsa -/dev/vcs1 -/dev/vcsa1 -/dev/ttyS0 -/dev/cua0 -/dev/hda -/dev/hda1 -/dev/hda2 -/dev/hda3 -/dev/hda4 -/dev/hda5 -/dev/hda6 -/dev/hda7 -/dev/hda8 -/dev/hdb -/dev/hdb1 -/dev/hdc -/dev/hdc1 -/dev/fd0u1440 -/dev/fd0u1680 -/dev/fd0u1722 -/dev/fd0u1743 -/dev/fd0u1760 -/dev/fd0u1920 -/dev/fd0u1840 -/dev/fd0u1600 -/dev/fd0u360 -/dev/fd0u720 -/dev/fd0u820 -/dev/fd0u830 -/dev/fd0u1040 -/dev/fd0u1120 -/dev/fd0u800 -/dev/fd0 -/dev/loop0 -/dev/loop1 -/dev/loop2 -/dev/loop3 -/dev/loop4 -/dev/loop5 -/dev/loop6 -/dev/loop7 -/dev/dsp -/dev/dsp1 -/dev/mixer -/dev/midi -/dev/lvm -/dev/vg0 -/dev/vg0/group -/dev/vg0/packages -/dev/vg0/photos -/dev/vg0/music -/dev/log -/dev/MAKEDEV -/dev/printer -/dev/vcs2 -/dev/vcsa2 -/dev/vcs3 -/dev/vcsa3 -/dev/vcs5 -/dev/vcsa5 -/dev/vcs4 -/dev/vcsa4 -/dev/vcs6 -/dev/vcsa6 -/dev/nvidia0 -/dev/nvidia1 -/dev/nvidia2 -/dev/nvidia3 -/dev/nvidiactl -/dev/vcs7 -/dev/vcsa7 diff --git a/unit-tests/regex/matcher_t.c b/unit-tests/regex/matcher_t.c deleted file mode 100644 index 24975a11c..000000000 --- a/unit-tests/regex/matcher_t.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libdevmapper.h" -#include "log.h" - -#include -#include -#include -#include -#include -#include -#include -#include - - -static int _read_spec(const char *file, char ***regex, int *nregex) -{ - char buffer[1024], *start, *ptr; - FILE *fp = fopen(file, "r"); - int asize = 100; - char **rx = dm_malloc(sizeof(*rx) * asize); - int nr = 0; - - if (!fp) - return 0; - - while (fgets(buffer, sizeof(buffer),fp)) { - - /* trim leading whitespace */ - for (ptr = buffer; *ptr && isspace((int) *ptr); ptr++); - - if (!*ptr || *ptr == '#') - continue; - - if (*ptr == '\"') { - ptr++; - start = ptr; - while (*ptr && *ptr != '\"') { - if (*ptr == '\\') - ptr++; - ptr++; - } - - if (!*ptr) { - fprintf(stderr, "Formatting error : " - "No terminating quote\n"); - return 0; - } - - rx[nr] = dm_malloc((ptr - start) + 1); - strncpy(rx[nr], start, ptr - start); - rx[nr][ptr - start] = '\0'; - nr++; - } else { - fprintf(stderr, "%s", ptr); - fprintf(stderr, "Formatting error : \"\" " - "\n"); - return 0; - } - } - - *regex = rx; - *nregex = nr; - return 1; -} - -static void _free_regex(char **regex, int nregex) -{ - int i; - for (i = 0; i < nregex; i++) - dm_free(regex[i]); - - dm_free(regex); -} - -static void _scan_input(struct dm_regex *m, char **regex) -{ - char buffer[256], *ptr; - int r; - - while (fgets(buffer, sizeof(buffer), stdin)) { - if ((ptr = strchr(buffer, '\n'))) - *ptr = '\0'; - - r = dm_regex_match(m, buffer); - - if (r >= 0) - printf("%s : %s\n", buffer, regex[r]); - } -} - -int main(int argc, char **argv) -{ - struct dm_pool *mem; - struct dm_regex *scanner; - char **regex; - int nregex; - int ret = 0; - int want_finger_print = 0, i; - const char *pattern_file = NULL; - - for (i = 1; i < argc; i++) - if (!strcmp(argv[i], "--fingerprint")) - want_finger_print = 1; - - else - pattern_file = argv[i]; - - if (!pattern_file) { - fprintf(stderr, "Usage : %s [--fingerprint] \n", argv[0]); - exit(1); - } - - dm_log_init_verbose(_LOG_DEBUG); - - if (!(mem = dm_pool_create("match_regex", 10 * 1024))) { - fprintf(stderr, "Couldn't create pool\n"); - ret = 2; - goto err; - } - - if (!_read_spec(pattern_file, ®ex, &nregex)) { - fprintf(stderr, "Couldn't read the lex specification\n"); - ret = 3; - goto err; - } - - if (!(scanner = dm_regex_create(mem, (const char **)regex, nregex))) { - fprintf(stderr, "Couldn't build the lexer\n"); - ret = 4; - goto err; - } - - if (want_finger_print) - printf("fingerprint: %x\n", dm_regex_fingerprint(scanner)); - _scan_input(scanner, regex); - _free_regex(regex, nregex); - - err: - dm_pool_destroy(mem); - - return ret; -} diff --git a/unit-tests/regex/matcher_t.expected b/unit-tests/regex/matcher_t.expected deleted file mode 100644 index 0b986b0bf..000000000 --- a/unit-tests/regex/matcher_t.expected +++ /dev/null @@ -1,16 +0,0 @@ -fingerprint: 352b6c4f -/dev/loop/0 : loop/[0-9]+ -/dev/loop/1 : loop/[0-9]+ -/dev/loop/2 : loop/[0-9]+ -/dev/loop/3 : loop/[0-9]+ -/dev/loop/4 : loop/[0-9]+ -/dev/loop/5 : loop/[0-9]+ -/dev/loop/6 : loop/[0-9]+ -/dev/loop/7 : loop/[0-9]+ -/dev/hda1 : hd[a-d][0-5]+ -/dev/hda2 : hd[a-d][0-5]+ -/dev/hda3 : hd[a-d][0-5]+ -/dev/hda4 : hd[a-d][0-5]+ -/dev/hda5 : hd[a-d][0-5]+ -/dev/hdb1 : hd[a-d][0-5]+ -/dev/hdc1 : hd[a-d][0-5]+ diff --git a/unit-tests/regex/matcher_t.expected2 b/unit-tests/regex/matcher_t.expected2 deleted file mode 100644 index 735993769..000000000 --- a/unit-tests/regex/matcher_t.expected2 +++ /dev/null @@ -1 +0,0 @@ -fingerprint: eed8ceb8 diff --git a/unit-tests/regex/matcher_t.expected3 b/unit-tests/regex/matcher_t.expected3 deleted file mode 100644 index fa561497a..000000000 --- a/unit-tests/regex/matcher_t.expected3 +++ /dev/null @@ -1,3 +0,0 @@ -foo€bar : € -fooÂb : fooÂb -€ : € diff --git a/unit-tests/regex/nonprint_input b/unit-tests/regex/nonprint_input deleted file mode 100644 index 92a1807fb..000000000 --- a/unit-tests/regex/nonprint_input +++ /dev/null @@ -1,4 +0,0 @@ -foo.bar -foo€bar -fooÂb -€ diff --git a/unit-tests/regex/nonprint_regexes b/unit-tests/regex/nonprint_regexes deleted file mode 100644 index e164c213c..000000000 --- a/unit-tests/regex/nonprint_regexes +++ /dev/null @@ -1,3 +0,0 @@ -"foo€bar" -"fooÂb" -"€" diff --git a/unit-tests/regex/parse_t.c b/unit-tests/regex/parse_t.c deleted file mode 100644 index d536e5af4..000000000 --- a/unit-tests/regex/parse_t.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. - * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. - * - * This file is part of LVM2. - * - * This copyrighted material is made available to anyone wishing to use, - * modify, copy, or redistribute it subject to the terms and conditions - * of the GNU General Public License v.2. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* hack - using unexported internal function */ -#define DEBUG -#include "regex/parse_rx.c" - -#include -#include - -static void _pretty_print(struct rx_node *rx, int depth) -{ - int i; - for (i = 0; i < depth; i++) - printf(" "); - - /* display info about the node */ - switch (rx->type) { - case CAT: - printf("Cat"); - break; - - case OR: - printf("Or"); - break; - - case STAR: - printf("Star"); - break; - - case PLUS: - printf("Plus"); - break; - - case QUEST: - printf("Quest"); - break; - - case CHARSET: - printf("Charset : "); - for (i = 0; i < 256; i++) { - if (dm_bit(rx->charset, i) && isprint(i)) - printf("%c", (char) i); - } - break; - - default: - printf("Unknown type"); - } - printf("\n"); - - if (rx->left) - _pretty_print(rx->left, depth + 1); - - if (rx->right) - _pretty_print(rx->right, depth + 1); -} - -int main(int argc, char **argv) -{ - struct dm_pool *mem; - struct rx_node *rx; - int regex_print = 0; - int show_nodes = 0; - int regex_arg = 1; - - if (argc == 3 && !strcmp(argv[1], "-r")) { - regex_print++; - regex_arg++; - argc--; - } - - if (argc == 3 && !strcmp(argv[1], "-R")) { - regex_print++; - show_nodes++; - regex_arg++; - argc--; - } - - if (argc != 2) { - fprintf(stderr, "Usage : %s [-r] \n", argv[0]); - exit(0); - } - - dm_log_init_verbose(_LOG_DEBUG); - - if (!(mem = dm_pool_create("parse_regex", 1024))) { - fprintf(stderr, "Couldn't create pool\n"); - exit(1); - } - - if (!(rx = rx_parse_str(mem, argv[regex_arg]))) { - dm_pool_destroy(mem); - fprintf(stderr, "Couldn't parse regex\n"); - exit(1); - } - - if (regex_print) - _regex_print(rx, 0, show_nodes); - else - _pretty_print(rx, 0); - - dm_pool_destroy(mem); - - return 0; -} diff --git a/unit-tests/regex/random_regexes b/unit-tests/regex/random_regexes deleted file mode 100644 index 7b9362d8b..000000000 --- a/unit-tests/regex/random_regexes +++ /dev/null @@ -1,100 +0,0 @@ -"(((a?)(([Ub]*)|z))((([qr]|X)+)([Qn]*)))+" -"[HZejtuw]*" -"((B|s)*)|(((([Fv]l)(N+))(([el]|C)(tJ)))?)" -"((([Ma]?)|(t*))*)|((([cm]E)|(M?))|(([BE][EV])|([Qj][Mh])))" -"(((([bw]*)|([IO]*))((zK)*))|(((pU)|(i|q))|((z?)|([HL]?))))*" -"((([Pt]?)|[Tr])?)((Hq)*)" -"[HOXcfgikosvwxz]" -"[BCEFGHNPTUWfjlprsy]" -"((((aD)*)|([Xo]+))+)(([HKn](([Eq]|[JQ])(I*)))*)" -"([LNWYeghv]|e)*" -"(((y(L*))*)|((([EP]+)(W+))*))*" -"U*" -"((((R+)(W|[Qr]))|([py]+))+)([LM]*)" -"(([DOjx](D(b?)))|([Ke]*))*" -"((([ls](c|[FT]))*)([JS]*))*" -"((l?)|(([Gz]+)|(D*)))*" -"[ABgjn]" -"(((q|[dg])?)|([Uk]*))((([Fl]?)|([Ry]+))|(([IR]|c)|(T?)))" -"((([an]|P)|[Jw])((a*)|(m*)))*" -"((((R[ht])(h+))?)|(([pz](n?))+))+" -"(((([Dc]b)([Sp][Ii]))|((k|F)*))|[Uiovz])*" -"[Res]*" -"[Zl]|a" -"^[ANZdf]$" -"[En]|(((Q+)(U+))([pt]*))" -"[ADEIMQUWXZhklrsvz]" -"(((S(y*))*)|(j*))*" -"n*" -"[NUau]*" -"((((Z*)(D|[Nd]))|(([np]|B)+))|(([Xy][Fi])*))+" -"((([EZ]?)|(d[HR]))*)((([Hg]|q)(P+))*)" -"q" -"((m*)|(p|B))|((((x?)|(t+))(([Sb][PX])(O|[HM])))+)" -"((((A*)(z[RS]))*)|(((z+)(Q*))+))*" -"(((M*)([Uu]*))+)|[Uk]" -"[imv]" -"[GLSchtw](([Yw]((F[Dd])|([Tw]+)))?)" -"([MOZj]*)(S|[Wknr])" -"((G|q)*)[BHKN]" -"((((NW)|([Ao]?))|((l|[UV])+))+)|((i|(z*))*)" -"((((Z+)|([IR]?))|(L*))|([JKQ]+))+" -"([Bdin](S*))+" -"[HLNSTp]*" -"(((J*)([Bq]|[Yu]))*)|([Kv]*)" -"(((([BJ]|[Zy])(wI))*)(y*))+" -"(((hF)+)|(H*))*" -"((([QU][Pj])([GQ]?))+)|[PWo]" -"(((([cq][BX])?)|((f[DI])*))*)(([GM]*)[SVYr])" -"(([Zt]*)|((qx)|(([BV]+)(f?))))*" -"[ILWYhsx]*" -"(([Uy]*)|[sv])|([NSc]*)" -"((c*)|([JUfhy]?))+" -"(((q*)([So]*))(((g[jq])(j?))+))*" -"((b+)|(((T+)([fw]T))?))*" -"((([DS]?)|([Th]|u))(Q*))*" -"[FKLX]|((([fw](L?))(([gq]*)|(O?)))?)" -"((([HZ]+)u)*)|[APWijn]" -"(e*)|(((v?)|((J+)(Hb)))?)" -"(e|((w+)f))*" -"[BEHKPQVdelnqy]" -"((((B|N)(s*))|[Rr])(((g?)|([rv]+))+))+" -"(((s*)|(K*))([AP]G))*" -"[CELTp]" -"(([Fq]?)|([Al]+))*" -"((((r?)|(y[jx]))|([mp]*))+)|((B(S*))*)" -"((([Eq]+)|(Y[ds]))|(x|(i|[Ku])))[IJNrvy]" -"((([NO]*)[Ix])+)([Jenq]+)" -"(((([HP]*)(j|y))*)[Ylqvy])*" -"[PTv]+" -"[AINSZhpx]|([EOYZ]*)" -"([ABCFQv]*)((([Zx]|h)+)|([ej]*))" -"((([pr]*)|(([Dq]|p)|(H?)))?)([NRUXmoq]*)" -"(([er]*)|([mx]*))(((nV)([am]?))+)" -"[BHPRlpu]" -"(((([Ah]|[tx])|(e|[uy]))?)((([fl]+)([Vz]|v))*))*" -"[AGdm]" -"(((K*)^(O*)$)|(B?))*" -"((([Ks]|[Ka])*)|([FSTab]?))?" -"(([kw]+)[ei])(([Hy]*)(([Mc]*)|(G|f)))" -"((((e*)|(Zf))|(R|[nq]))((([Jz]v)([Rj]+))+))*" -"(((a?)|(e?))(([Uc]*)(S+)))*" -"((((E+)([MZ]?))+)|(((s|[Az])|z)*))?" -"((((i[MO])*)|((LH)*))|(((BA)|([AI]+))|[Ug]))*" -"[EGHILcho]*" -"(((Z[vw])?)((z|g)+))(((H|U)([iv]Q))|([qw]?))" -"(([ehmr]|((L[Uw])*))+)((a+)I)" -"[EKNSWYagj](((v|[TX])|([Uk]+))*)" -"(((R[Mo])|(O*))|([Fm]|([qw]*)))((m*)|((S|[Ki])?))" -"((((kP)|c)?)((([do]+)|([Gi]?))*))*" -"((^(B|W)$|([Ww]+))([no]*))|((([iv]?)|(M*))|((x|L)?))" -"[AEGPRSbcfhsy]" -"[Wbcf]|((([MO]?)|([NT]|m))(([Oo]?)([Wg]*)))" -"(((YZ)*)[PQVei])*" -"[GJKYt][AEGWdegmnt]" -"^[CDEGJKNUVYZagkv]$" -"([DPWbx]*)|(((q|B)|(P|u))((M[Bq])*))" -"[FHIJRTVYZdiorsuvz]*" -"([MWoqvz]*)|^(l*)" -"(((I|[Rx])*)((X[Mf])([Xa]L)))([Ha]|([HY]*))" -"(((l|[Sd])*)((([Ix]+)|([XY]?))(Z*)))+" From cb2c4542a6028ef09d30bfdcdaef4e7ddb740ca7 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Wed, 31 Jan 2018 11:35:47 +0000 Subject: [PATCH 06/87] [git] Update .gitignore --- .gitignore | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/.gitignore b/.gitignore index a890ceec8..0ac0a32f9 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,51 @@ make.tmpl /cscope.out /tags /tmp/ + +tools/man-generator +tools/man-generator.c + +test/lib/lvchange +test/lib/lvconvert +test/lib/lvcreate +test/lib/lvdisplay +test/lib/lvextend +test/lib/lvmconfig +test/lib/lvmdiskscan +test/lib/lvmsadc +test/lib/lvmsar +test/lib/lvreduce +test/lib/lvremove +test/lib/lvrename +test/lib/lvresize +test/lib/lvs +test/lib/lvscan +test/lib/pvchange +test/lib/pvck +test/lib/pvcreate +test/lib/pvdisplay +test/lib/pvmove +test/lib/pvremove +test/lib/pvresize +test/lib/pvs +test/lib/pvscan +test/lib/vgcfgbackup +test/lib/vgcfgrestore +test/lib/vgchange +test/lib/vgck +test/lib/vgconvert +test/lib/vgcreate +test/lib/vgdisplay +test/lib/vgexport +test/lib/vgextend +test/lib/vgimport +test/lib/vgimportclone +test/lib/vgmerge +test/lib/vgmknodes +test/lib/vgreduce +test/lib/vgremove +test/lib/vgrename +test/lib/vgs +test/lib/vgscan +test/lib/vgsplit + From 46867a45d28ecb667d08df8d8cb497fffc10814d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 1 Feb 2018 09:54:56 +0000 Subject: [PATCH 07/87] [device/bcache] stub a unit test --- include/.symlinks.in | 1 + lib/device/bcache.h | 1 + test/unit/Makefile.in | 3 ++- test/unit/bcache_t.c | 38 ++++++++++++++++++++++++++++++++++++++ test/unit/run.c | 1 + test/unit/units.h | 1 + 6 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 test/unit/bcache_t.c diff --git a/include/.symlinks.in b/include/.symlinks.in index a1b5c1e05..3d5075058 100644 --- a/include/.symlinks.in +++ b/include/.symlinks.in @@ -14,6 +14,7 @@ @top_srcdir@/lib/config/defaults.h @top_srcdir@/lib/datastruct/btree.h @top_srcdir@/lib/datastruct/str_list.h +@top_srcdir@/lib/device/bcache.h @top_srcdir@/lib/device/dev-cache.h @top_srcdir@/lib/device/dev-ext-udev-constants.h @top_srcdir@/lib/device/dev-type.h diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 322774469..0747b5ac5 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -16,6 +16,7 @@ #define BCACHE_H #include +#include #include "libdevmapper.h" diff --git a/test/unit/Makefile.in b/test/unit/Makefile.in index 3de30f2e7..8127ec006 100644 --- a/test/unit/Makefile.in +++ b/test/unit/Makefile.in @@ -16,6 +16,7 @@ top_builddir = @top_builddir@ VPATH = $(srcdir) UNITS = \ + bcache_t.c \ bitset_t.c\ config_t.c\ dmlist_t.c\ @@ -37,7 +38,7 @@ SOURCES = $(UNITS) endif ifeq ("$(TESTING)", "yes") -LDLIBS += -ldevmapper @CUNIT_LIBS@ +LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio @CUNIT_LIBS@ CFLAGS += @CUNIT_CFLAGS@ check: unit diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c new file mode 100644 index 000000000..da274a9d9 --- /dev/null +++ b/test/unit/bcache_t.c @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2018 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "units.h" +#include "bcache.h" + +int bcache_init(void) +{ + return 0; +} + +int bcache_fini(void) +{ + return 0; +} + +static void test_create(void) +{ + struct bcache *cache = bcache_create(8, 16); + CU_ASSERT_PTR_NOT_NULL(cache); + bcache_destroy(cache); +} + +CU_TestInfo bcache_list[] = { + { (char*)"create", test_create }, + CU_TEST_INFO_NULL +}; diff --git a/test/unit/run.c b/test/unit/run.c index 7372138d5..82090ba55 100644 --- a/test/unit/run.c +++ b/test/unit/run.c @@ -13,6 +13,7 @@ .pTests = n##_list } CU_SuiteInfo suites[] = { + USE(bcache), USE(bitset), USE(config), USE(dmlist), diff --git a/test/unit/units.h b/test/unit/units.h index 9eaa82f25..319e7ceb9 100644 --- a/test/unit/units.h +++ b/test/unit/units.h @@ -23,6 +23,7 @@ int n ## _init(void); \ int n ## _fini(void); +DECL(bcache); DECL(bitset); DECL(config); DECL(dmlist); From 0f0eb04edb00cb5e95c47cb9ca2e70574e6e49ce Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Thu, 1 Feb 2018 14:52:43 +0000 Subject: [PATCH 08/87] [device/bcache] some more work on bcache --- lib/device/bcache.c | 103 +++++++++++++++++++---------- lib/device/bcache.h | 2 +- test/unit/Makefile.in | 2 +- test/unit/bcache_t.c | 147 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 218 insertions(+), 36 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 3b8cf789b..5a7b2f93c 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -167,12 +167,19 @@ static struct io_engine *_engine_create(unsigned max_io) static void _engine_destroy(struct io_engine *e) { + int r; + _cb_set_destroy(e->cbs); - io_destroy(e->aio_context); + + // io_destroy is really slow + r = io_destroy(e->aio_context); + if (r) + log_sys_warn("io_destroy"); + dm_free(e); } -static bool _engine_issue(struct io_engine *e, int fd, enum dir d, +static bool _engine_issue(struct io_engine *e, enum dir d, int fd, sector_t sb, sector_t se, void *data, void *context) { int r; @@ -290,7 +297,6 @@ enum block_flags { }; struct bcache { - int fd; sector_t block_sectors; uint64_t nr_data_blocks; uint64_t nr_cache_blocks; @@ -488,33 +494,6 @@ static void _relink(struct block *b) * *--------------------------------------------------------------*/ -/* - * |b->list| should be valid (either pointing to itself, on one of the other - * lists. - */ -static bool _issue_low_level(struct block *b, enum dir d) -{ - struct bcache *cache = b->cache; - sector_t sb = b->index * cache->block_sectors; - sector_t se = sb + cache->block_sectors; - - if (_test_flags(b, BF_IO_PENDING)) - return false; - - _set_flags(b, BF_IO_PENDING); - return _engine_issue(cache->engine, cache->fd, d, sb, se, b->data, b); -} - -static inline bool _issue_read(struct block *b) -{ - return _issue_low_level(b, DIR_READ); -} - -static inline bool _issue_write(struct block *b) -{ - return _issue_low_level(b, DIR_WRITE); -} - static void _complete_io(void *context, int err) { struct block *b = context; @@ -539,6 +518,39 @@ static void _complete_io(void *context, int err) } } +/* + * |b->list| should be valid (either pointing to itself, on one of the other + * lists. + */ +static bool _issue_low_level(struct block *b, enum dir d) +{ + struct bcache *cache = b->cache; + sector_t sb = b->index * cache->block_sectors; + sector_t se = sb + cache->block_sectors; + + if (_test_flags(b, BF_IO_PENDING)) + return false; + + _set_flags(b, BF_IO_PENDING); + if (!_engine_issue(cache->engine, d, b->fd, sb, se, b->data, b)) { + _complete_io(b, -EIO); + return false; + } + + return true; + +} + +static inline bool _issue_read(struct block *b) +{ + return _issue_low_level(b, DIR_READ); +} + +static inline bool _issue_write(struct block *b) +{ + return _issue_low_level(b, DIR_WRITE); +} + static bool _wait_io(struct bcache *cache) { return _engine_wait(cache->engine, _complete_io); @@ -598,7 +610,7 @@ static struct block *_find_unused_clean_block(struct bcache *cache) return NULL; } -static struct block *_new_block(struct bcache *cache, block_address index) +static struct block *_new_block(struct bcache *cache, int fd, block_address index) { struct block *b; @@ -616,6 +628,7 @@ static struct block *_new_block(struct bcache *cache, block_address index) dm_list_init(&b->list); dm_list_init(&b->hash); b->flags = 0; + b->fd = fd; b->index = index; b->ref_count = 0; b->error = 0; @@ -685,7 +698,7 @@ static struct block *_lookup_or_read_block(struct bcache *cache, } else { _miss(cache, flags); - b = _new_block(cache, index); + b = _new_block(cache, fd, index); if (b) { if (flags & GF_ZERO) _zero_block(b); @@ -728,6 +741,21 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) { struct bcache *cache; + if (!nr_cache_blocks) { + log_warn("bcache must have at least one cache block"); + return NULL; + } + + if (!block_sectors) { + log_warn("bcache must have a non zero block size"); + return NULL; + } + + if (block_sectors & ((PAGE_SIZE >> SECTOR_SHIFT) - 1)) { + log_warn("bcache block size must be a multiple of page size"); + return NULL; + } + cache = dm_malloc(sizeof(*cache)); if (!cache) return NULL; @@ -787,6 +815,11 @@ void bcache_destroy(struct bcache *cache) dm_free(cache); } +unsigned bcache_nr_cache_blocks(struct bcache *cache) +{ + return cache->nr_cache_blocks; +} + void bcache_prefetch(struct bcache *cache, int fd, block_address index) { struct block *b = _hash_lookup(cache, fd, index); @@ -794,7 +827,7 @@ void bcache_prefetch(struct bcache *cache, int fd, block_address index) if (!b) { cache->prefetches++; - b = _new_block(cache, index); + b = _new_block(cache, fd, index); if (b) _issue_read(b); } @@ -803,7 +836,9 @@ void bcache_prefetch(struct bcache *cache, int fd, block_address index) bool bcache_get(struct bcache *cache, int fd, block_address index, unsigned flags, struct block **result) { - struct block *b = _lookup_or_read_block(cache, fd, index, flags); + struct block *b; + + b = _lookup_or_read_block(cache, fd, index, flags); if (b) { if (!b->ref_count) cache->nr_locked++; diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 0747b5ac5..273034773 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -61,7 +61,7 @@ enum bcache_get_flags { typedef uint64_t block_address; -unsigned bcache_get_max_prefetches(struct bcache *cache); +unsigned bcache_nr_cache_blocks(struct bcache *cache); /* * Use the prefetch method to take advantage of asynchronous IO. For example, diff --git a/test/unit/Makefile.in b/test/unit/Makefile.in index 8127ec006..5cf92ba10 100644 --- a/test/unit/Makefile.in +++ b/test/unit/Makefile.in @@ -47,7 +47,7 @@ $(TARGETS): $(OBJECTS) $(top_builddir)/libdm/libdevmapper.$(LIB_SUFFIX) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \ -o $@ $(OBJECTS) $(LDLIBS) -unit: $(TARGETS) +unit: $(TARGETS) $(top_builddir)/lib/liblvm-internal.a @echo Running unit tests LD_LIBRARY_PATH=$(top_builddir)/libdm ./$(TARGETS) endif diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c index da274a9d9..ef927214b 100644 --- a/test/unit/bcache_t.c +++ b/test/unit/bcache_t.c @@ -12,9 +12,21 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#define _GNU_SOURCE + +#include +#include +#include +#include + #include "units.h" #include "bcache.h" +#define MEG 2048 +#define SECTOR_SHIFT 9 + +static const char *_test_path = "test.bin"; + int bcache_init(void) { return 0; @@ -25,6 +37,42 @@ int bcache_fini(void) return 0; } +static int open_file(const char *path) +{ + return open(path, O_EXCL | O_RDWR | O_DIRECT, 0666); +} + +static int _prep_file(const char *path) +{ + int fd, r; + + fd = open(path, O_CREAT | O_TRUNC | O_EXCL | O_RDWR | O_DIRECT, 0666); + if (fd < 0) + return -1; + + r = fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, (16 * MEG) << SECTOR_SHIFT); + if (r) { + close(fd); + return -1; + } + + close(fd); + return 0; +} + + +static int test_init(void) +{ + unlink(_test_path); + return _prep_file(_test_path); +} + +static int test_exit(void) +{ + unlink(_test_path); + return 0; +} + static void test_create(void) { struct bcache *cache = bcache_create(8, 16); @@ -32,7 +80,106 @@ static void test_create(void) bcache_destroy(cache); } +static void test_nr_cache_blocks_must_be_positive(void) +{ + struct bcache *cache = bcache_create(8, 0); + CU_ASSERT_PTR_NULL(cache); +} + +static void test_block_size_must_be_positive(void) +{ + struct bcache *cache = bcache_create(0, 16); + CU_ASSERT_PTR_NULL(cache); +} + +static void test_block_size_must_be_multiple_of_page_size(void) +{ + unsigned i; + struct bcache *cache; + + { + static unsigned _bad_examples[] = {3, 9, 13, 1025}; + + for (i = 0; i < DM_ARRAY_SIZE(_bad_examples); i++) { + cache = bcache_create(_bad_examples[i], 16); + CU_ASSERT_PTR_NULL(cache); + } + } + + { + // Only testing a few sizes because io_destroy is seriously + // slow. + for (i = 1; i < 25; i++) { + cache = bcache_create(8 * i, 16); + CU_ASSERT_PTR_NOT_NULL(cache); + bcache_destroy(cache); + } + } +} + +static void test_reads_work(void) +{ + int fd; + + // FIXME: add fixtures. + test_init(); + fd = open_file("./test.bin"); + CU_ASSERT(fd >= 0); + + { + int i; + struct block *b; + struct bcache *cache = bcache_create(8, 16); + + CU_ASSERT(bcache_get(cache, fd, 0, 0, &b)); + for (i = 0; i < 8 << SECTOR_SHIFT; i++) + CU_ASSERT(((unsigned char *) b->data)[i] == 0); + bcache_put(b); + + bcache_destroy(cache); + } + + close(fd); + + test_exit(); +} + +static void test_prefetch_works(void) +{ + int fd; + + // FIXME: add fixtures. + test_init(); + fd = open_file("./test.bin"); + CU_ASSERT(fd >= 0); + + { + int i; + struct block *b; + struct bcache *cache = bcache_create(8, 16); + + for (i = 0; i < 16; i++) + bcache_prefetch(cache, fd, i); + + for (i = 0; i < 16; i++) { + CU_ASSERT(bcache_get(cache, fd, i, 0, &b)); + bcache_put(b); + } + + bcache_destroy(cache); + } + + close(fd); + + test_exit(); +} + CU_TestInfo bcache_list[] = { { (char*)"create", test_create }, + { (char*)"nr cache block must be positive", test_nr_cache_blocks_must_be_positive }, + { (char*)"block size must be positive", test_block_size_must_be_positive }, + { (char*)"block size must be multiple of page size", test_block_size_must_be_multiple_of_page_size }, + { (char*)"reads work", test_reads_work }, + { (char*)"prefetch works", test_prefetch_works }, CU_TEST_INFO_NULL }; From c4c4acfd423323cbe8fbfbdb8c1882edb4a92b29 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 2 Feb 2018 07:59:49 +0000 Subject: [PATCH 09/87] [device/bcache] Add a couple of invalidate methods --- lib/device/bcache.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ lib/device/bcache.h | 13 ++++++++++ 2 files changed, 73 insertions(+) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 5a7b2f93c..09ef6ef7f 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -884,5 +884,65 @@ int bcache_flush(struct bcache *cache) return dm_list_empty(&cache->errored) ? 0 : -EIO; } +static void _recycle_block(struct bcache *cache, struct block *b) +{ + _unlink_block(b); + _hash_remove(b); + dm_list_add(&cache->free, &b->list); +} + +/* + * You can safely call this with a NULL block. + */ +static void _invalidate_block(struct bcache *cache, struct block *b) +{ + if (!b) + return; + + if (_test_flags(b, BF_IO_PENDING)) + _wait_specific(b); + + if (b->ref_count) + log_warn("bcache_invalidate: block (%d, %llu) still held", + b->fd, (unsigned long long) index); + else { + if (_test_flags(b, BF_DIRTY)) { + _issue_write(b); + _wait_specific(b); + } + + _recycle_block(cache, b); + } +} + +void bcache_invalidate(struct bcache *cache, int fd, block_address index) +{ + _invalidate_block(cache, _hash_lookup(cache, fd, index)); +} + +// FIXME: switch to a trie, or maybe 1 hash table per fd? To save iterating +// through the whole cache. +void bcache_invalidate_fd(struct bcache *cache, int fd) +{ + struct block *b, *tmp; + + // Start writing back any dirty blocks on this fd. + dm_list_iterate_items_safe (b, tmp, &cache->dirty) + if (b->fd == fd) + _issue_write(b); + + _wait_all(cache); + + // Everything should be in the clean list now. + dm_list_iterate_items_safe (b, tmp, &cache->clean) + if (b->fd == fd) + _invalidate_block(cache, b); + + // Except they could be in the errored list :) + dm_list_iterate_items_safe (b, tmp, &cache->errored) + if (b->fd == fd) + _recycle_block(cache, b); +} + //---------------------------------------------------------------- diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 273034773..5c68e3c29 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -93,6 +93,19 @@ void bcache_put(struct block *b); int bcache_flush(struct bcache *cache); +/* + * Removes a block from the cache. If the block is dirty it will be written + * back first. If the block is currently held a warning will be issued, and it + * will not be removed. + */ +void bcache_invalidate(struct bcache *cache, int fd, block_address index); + +/* + * Invalidates all blocks on the given descriptor. Call this before closing + * the descriptor to make sure everything is written back. + */ +void bcache_invalidate_fd(struct bcache *cache, int fd); + /*----------------------------------------------------------------*/ #endif From 1563b936911d26c665b590f200884c9ed31ab7c3 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 2 Feb 2018 12:06:14 +0000 Subject: [PATCH 10/87] [device/bcache] Add bcache_max_prefetches() Ignore prefetches if max io is in flight. --- lib/device/bcache.c | 23 +++++++++++++++-------- lib/device/bcache.h | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 09ef6ef7f..4283ce5cf 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -216,17 +216,18 @@ static bool _engine_issue(struct io_engine *e, enum dir d, int fd, return true; } -#define MAX_IO 64 +#define MAX_IO 1024 +#define MAX_EVENT 64 typedef void complete_fn(void *context, int io_error); static bool _engine_wait(struct io_engine *e, complete_fn fn) { int i, r; - struct io_event event[MAX_IO]; + struct io_event event[MAX_EVENT]; struct control_block *cb; memset(&event, 0, sizeof(event)); - r = io_getevents(e->aio_context, 1, MAX_IO, event, NULL); + r = io_getevents(e->aio_context, 1, MAX_EVENT, event, NULL); if (r < 0) { log_sys_warn("io_getevents"); return false; @@ -300,6 +301,7 @@ struct bcache { sector_t block_sectors; uint64_t nr_data_blocks; uint64_t nr_cache_blocks; + unsigned max_io; struct io_engine *engine; @@ -762,8 +764,8 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) cache->block_sectors = block_sectors; cache->nr_cache_blocks = nr_cache_blocks; - - cache->engine = _engine_create(nr_cache_blocks < 1024u ? nr_cache_blocks : 1024u); + cache->max_io = nr_cache_blocks < MAX_IO ? nr_cache_blocks : MAX_IO; + cache->engine = _engine_create(cache->max_io); if (!cache->engine) { dm_free(cache); return NULL; @@ -820,16 +822,21 @@ unsigned bcache_nr_cache_blocks(struct bcache *cache) return cache->nr_cache_blocks; } +unsigned bcache_max_prefetches(struct bcache *cache) +{ + return cache->max_io; +} + void bcache_prefetch(struct bcache *cache, int fd, block_address index) { struct block *b = _hash_lookup(cache, fd, index); if (!b) { - cache->prefetches++; - b = _new_block(cache, fd, index); - if (b) + if (b && (cache->nr_io_pending < cache->max_io)) { + cache->prefetches++; _issue_read(b); + } } } diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 5c68e3c29..14204bec0 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -62,6 +62,7 @@ enum bcache_get_flags { typedef uint64_t block_address; unsigned bcache_nr_cache_blocks(struct bcache *cache); +unsigned bcache_max_prefetches(struct bcache *cache); /* * Use the prefetch method to take advantage of asynchronous IO. For example, From 19647d1cd44d029a8aa2f7e74dbdbd1f114a8c08 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 2 Feb 2018 14:34:45 +0000 Subject: [PATCH 11/87] [device/bcache] fix bug in _alloc_block --- lib/device/bcache.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 4283ce5cf..e5d0e1bae 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -449,8 +449,10 @@ static void _exit_free_list(struct bcache *cache) static struct block *_alloc_block(struct bcache *cache) { - struct block *b = dm_list_struct_base(_list_pop(&cache->free), struct block, list); - return b; + if (dm_list_empty(&cache->free)) + return NULL; + + return dm_list_struct_base(_list_pop(&cache->free), struct block, list); } /*---------------------------------------------------------------- From 0d0fab3d2ddb0c0f16c01e569e3f1f218701592e Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 2 Feb 2018 14:35:11 +0000 Subject: [PATCH 12/87] [device/bcache] another unit test --- test/unit/bcache_t.c | 48 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c index ef927214b..5532c9266 100644 --- a/test/unit/bcache_t.c +++ b/test/unit/bcache_t.c @@ -50,7 +50,7 @@ static int _prep_file(const char *path) if (fd < 0) return -1; - r = fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, (16 * MEG) << SECTOR_SHIFT); + r = fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, (1 * MEG) << SECTOR_SHIFT); if (r) { close(fd); return -1; @@ -174,6 +174,51 @@ static void test_prefetch_works(void) test_exit(); } +#define NR_FILES 4 +static void test_multiple_files(void) +{ + unsigned i; + int fd[NR_FILES]; + char buffer[128]; + + + // FIXME: add fixtures. + test_init(); + for (i = 0; i < NR_FILES; i++) { + snprintf(buffer, sizeof(buffer), "./test%u.bin", i); + unlink(buffer); + _prep_file(buffer); + fd[i] = open_file(buffer); + CU_ASSERT(fd[i] >= 0); + } + + { + struct block *b; + struct bcache *cache = bcache_create(8, 16); + + for (i = 0; i < 64; i++) { + if (!bcache_get(cache, fd[i % NR_FILES], i, 0, &b)) { + CU_ASSERT(false); + } else + bcache_put(b); + } + + bcache_destroy(cache); + } + + for (i = 0; i < NR_FILES; i++) + close(fd[i]); + + test_exit(); +} + +// Tests to be written +// Open multiple files and prove the blocks are coming from the correct file +// show invalidate works +// show invalidate_fd works +// show writeback is working +// check zeroing +// CU_TestInfo bcache_list[] = { { (char*)"create", test_create }, { (char*)"nr cache block must be positive", test_nr_cache_blocks_must_be_positive }, @@ -181,5 +226,6 @@ CU_TestInfo bcache_list[] = { { (char*)"block size must be multiple of page size", test_block_size_must_be_multiple_of_page_size }, { (char*)"reads work", test_reads_work }, { (char*)"prefetch works", test_prefetch_works }, + { (char*)"multiple files", test_multiple_files }, CU_TEST_INFO_NULL }; From b03e55a5130ffdf6be9188b227c59e6793dc0dfc Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 2 Feb 2018 15:38:46 +0000 Subject: [PATCH 13/87] [device/bcache] rename a unit test --- test/unit/bcache_t.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c index 5532c9266..3db9cc71b 100644 --- a/test/unit/bcache_t.c +++ b/test/unit/bcache_t.c @@ -175,7 +175,7 @@ static void test_prefetch_works(void) } #define NR_FILES 4 -static void test_multiple_files(void) +static void test_read_multiple_files(void) { unsigned i; int fd[NR_FILES]; @@ -226,6 +226,6 @@ CU_TestInfo bcache_list[] = { { (char*)"block size must be multiple of page size", test_block_size_must_be_multiple_of_page_size }, { (char*)"reads work", test_reads_work }, { (char*)"prefetch works", test_prefetch_works }, - { (char*)"multiple files", test_multiple_files }, + { (char*)"read multiple files", test_read_multiple_files }, CU_TEST_INFO_NULL }; From 8ae3b244fcbc207b51a81514e51008fe64d13368 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 2 Feb 2018 15:39:17 +0000 Subject: [PATCH 14/87] [build] include test/unit/Makefile rather than recursive build FIXME: unit tests are not currently run as part of make check. --- Makefile.in | 23 +----------------- test/Makefile.in | 2 +- test/unit/Makefile.in | 54 ++++++++++++++----------------------------- 3 files changed, 19 insertions(+), 60 deletions(-) diff --git a/Makefile.in b/Makefile.in index 31d428d9b..146ed558a 100644 --- a/Makefile.in +++ b/Makefile.in @@ -212,28 +212,7 @@ endif endif ifeq ("$(TESTING)", "yes") -# testing and report generation -RUBY=ruby1.9 -Ireport-generators/lib -Ireport-generators/test - -.PHONY: unit-test ruby-test test-programs - -# FIXME: put dependencies on libdm and liblvm -# FIXME: Should be handled by Makefiles in subdirs, not here at top level. -test-programs: - cd unit-tests/regex && $(MAKE) - cd unit-tests/datastruct && $(MAKE) - cd unit-tests/mm && $(MAKE) - -unit-test: test-programs - $(RUBY) report-generators/unit_test.rb $(shell find . -name TESTS) - $(RUBY) report-generators/title_page.rb - -memcheck: test-programs - $(RUBY) report-generators/memcheck.rb $(shell find . -name TESTS) - $(RUBY) report-generators/title_page.rb - -ruby-test: - $(RUBY) report-generators/test/ts.rb +include test/unit/Makefile endif ifneq ($(shell which ctags),) diff --git a/test/Makefile.in b/test/Makefile.in index 230ce5bb6..097b2fa21 100644 --- a/test/Makefile.in +++ b/test/Makefile.in @@ -27,7 +27,7 @@ datarootdir = @datarootdir@ LVM_TEST_RESULTS ?= results -SUBDIRS = api unit +SUBDIRS = api SOURCES = lib/not.c lib/harness.c CXXSOURCES = lib/runner.cpp CXXFLAGS += $(EXTRA_EXEC_CFLAGS) diff --git a/test/unit/Makefile.in b/test/unit/Makefile.in index 5cf92ba10..2e2c81935 100644 --- a/test/unit/Makefile.in +++ b/test/unit/Makefile.in @@ -1,4 +1,4 @@ -# Copyright (C) 2011-2017 Red Hat, Inc. All rights reserved. +# Copyright (C) 2011-2018 Red Hat, Inc. All rights reserved. # # This file is part of LVM2. # @@ -10,44 +10,24 @@ # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -top_builddir = @top_builddir@ +UNIT_SOURCE=\ + test/unit/bcache_t.c \ + test/unit/bitset_t.c\ + test/unit/config_t.c\ + test/unit/dmlist_t.c\ + test/unit/dmstatus_t.c\ + test/unit/matcher_t.c\ + test/unit/percent_t.c\ + test/unit/string_t.c\ + test/unit/run.c +UNIT_OBJECTS=$(UNIT_SOURCE:%.c=%.o) -VPATH = $(srcdir) -UNITS = \ - bcache_t.c \ - bitset_t.c\ - config_t.c\ - dmlist_t.c\ - dmstatus_t.c\ - matcher_t.c\ - percent_t.c\ - string_t.c\ - run.c +UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio -lcunit -ifeq ("@TESTING@", "yes") -SOURCES = $(UNITS) -TARGETS = run -endif - -include $(top_builddir)/make.tmpl - -ifeq ($(MAKECMDGOALS),distclean) -SOURCES = $(UNITS) -endif - -ifeq ("$(TESTING)", "yes") -LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio @CUNIT_LIBS@ -CFLAGS += @CUNIT_CFLAGS@ - -check: unit - -$(TARGETS): $(OBJECTS) $(top_builddir)/libdm/libdevmapper.$(LIB_SUFFIX) +test/unit/run: $(UNIT_OBJECTS) libdm/libdevmapper.$(LIB_SUFFIX) lib/liblvm-internal.a $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \ - -o $@ $(OBJECTS) $(LDLIBS) + -o $@ $(UNIT_OBJECTS) $(UNIT_LDLIBS) -unit: $(TARGETS) $(top_builddir)/lib/liblvm-internal.a +unit-test: test/unit/run @echo Running unit tests - LD_LIBRARY_PATH=$(top_builddir)/libdm ./$(TARGETS) -endif + LD_LIBRARY_PATH=libdm test/unit/run From 467adfa082c3be10d012fa156db7810d23221648 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 5 Feb 2018 16:04:23 +0000 Subject: [PATCH 15/87] [device/bcache] More tests and some bug fixes --- lib/device/bcache.c | 127 +++++---- lib/device/bcache.h | 32 ++- test/unit/Makefile.in | 26 +- test/unit/bcache_t.c | 647 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 645 insertions(+), 187 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index e5d0e1bae..86b56c086 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -130,44 +130,21 @@ static struct control_block *_iocb_to_cb(struct iocb *icb) //---------------------------------------------------------------- // FIXME: write a sync engine too -enum dir { - DIR_READ, - DIR_WRITE -}; - -struct io_engine { +struct async_engine { + struct io_engine e; io_context_t aio_context; struct cb_set *cbs; }; -static struct io_engine *_engine_create(unsigned max_io) +static struct async_engine *_to_async(struct io_engine *e) { - int r; - struct io_engine *e = dm_malloc(sizeof(*e)); - - if (!e) - return NULL; - - e->aio_context = 0; - r = io_setup(max_io, &e->aio_context); - if (r < 0) { - log_warn("io_setup failed"); - return NULL; - } - - e->cbs = _cb_set_create(max_io); - if (!e->cbs) { - log_warn("couldn't create control block set"); - dm_free(e); - return NULL; - } - - return e; + return container_of(e, struct async_engine, e); } -static void _engine_destroy(struct io_engine *e) +static void _async_destroy(struct io_engine *ioe) { int r; + struct async_engine *e = _to_async(ioe); _cb_set_destroy(e->cbs); @@ -179,12 +156,13 @@ static void _engine_destroy(struct io_engine *e) dm_free(e); } -static bool _engine_issue(struct io_engine *e, enum dir d, int fd, - sector_t sb, sector_t se, void *data, void *context) +static bool _async_issue(struct io_engine *ioe, enum dir d, int fd, + sector_t sb, sector_t se, void *data, void *context) { int r; struct iocb *cb_array[1]; struct control_block *cb; + struct async_engine *e = _to_async(ioe); if (((uint64_t) data) & (PAGE_SIZE - 1)) { log_warn("misaligned data buffer"); @@ -218,13 +196,13 @@ static bool _engine_issue(struct io_engine *e, enum dir d, int fd, #define MAX_IO 1024 #define MAX_EVENT 64 -typedef void complete_fn(void *context, int io_error); -static bool _engine_wait(struct io_engine *e, complete_fn fn) +static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) { int i, r; struct io_event event[MAX_EVENT]; struct control_block *cb; + struct async_engine *e = _to_async(ioe); memset(&event, 0, sizeof(event)); r = io_getevents(e->aio_context, 1, MAX_EVENT, event, NULL); @@ -255,6 +233,36 @@ static bool _engine_wait(struct io_engine *e, complete_fn fn) return true; } +struct io_engine *create_async_io_engine(unsigned max_io) +{ + int r; + struct async_engine *e = dm_malloc(sizeof(*e)); + + if (!e) + return NULL; + + e->e.destroy = _async_destroy; + e->e.issue = _async_issue; + e->e.wait = _async_wait; + + e->aio_context = 0; + r = io_setup(max_io, &e->aio_context); + if (r < 0) { + log_warn("io_setup failed"); + dm_free(e); + return NULL; + } + + e->cbs = _cb_set_create(max_io); + if (!e->cbs) { + log_warn("couldn't create control block set"); + dm_free(e); + return NULL; + } + + return &e->e; +} + //---------------------------------------------------------------- #define MIN_BLOCKS 16 @@ -536,7 +544,9 @@ static bool _issue_low_level(struct block *b, enum dir d) return false; _set_flags(b, BF_IO_PENDING); - if (!_engine_issue(cache->engine, d, b->fd, sb, se, b->data, b)) { + dm_list_add(&cache->io_pending, &b->list); + + if (!cache->engine->issue(cache->engine, d, b->fd, sb, se, b->data, b)) { _complete_io(b, -EIO); return false; } @@ -557,7 +567,7 @@ static inline bool _issue_write(struct block *b) static bool _wait_io(struct bcache *cache) { - return _engine_wait(cache->engine, _complete_io); + return cache->engine->wait(cache->engine, _complete_io); } /*---------------------------------------------------------------- @@ -614,17 +624,20 @@ static struct block *_find_unused_clean_block(struct bcache *cache) return NULL; } -static struct block *_new_block(struct bcache *cache, int fd, block_address index) +static struct block *_new_block(struct bcache *cache, int fd, block_address index, bool can_wait) { struct block *b; b = _alloc_block(cache); - while (!b && cache->nr_locked < cache->nr_cache_blocks) { + while (!b && !dm_list_empty(&cache->clean)) { b = _find_unused_clean_block(cache); if (!b) { - if (dm_list_empty(&cache->io_pending)) - _writeback(cache, 16); - _wait_io(cache); + if (can_wait) { + if (dm_list_empty(&cache->io_pending)) + _writeback(cache, 16); // FIXME: magic number + _wait_io(cache); + } else + return NULL; } } @@ -702,7 +715,7 @@ static struct block *_lookup_or_read_block(struct bcache *cache, } else { _miss(cache, flags); - b = _new_block(cache, fd, index); + b = _new_block(cache, fd, index, true); if (b) { if (flags & GF_ZERO) _zero_block(b); @@ -741,9 +754,11 @@ static void _preemptive_writeback(struct bcache *cache) /*---------------------------------------------------------------- * Public interface *--------------------------------------------------------------*/ -struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) +struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks, + struct io_engine *engine) { struct bcache *cache; + unsigned max_io = engine->max_io(engine); if (!nr_cache_blocks) { log_warn("bcache must have at least one cache block"); @@ -766,13 +781,8 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) cache->block_sectors = block_sectors; cache->nr_cache_blocks = nr_cache_blocks; - cache->max_io = nr_cache_blocks < MAX_IO ? nr_cache_blocks : MAX_IO; - cache->engine = _engine_create(cache->max_io); - if (!cache->engine) { - dm_free(cache); - return NULL; - } - + cache->max_io = nr_cache_blocks < max_io ? nr_cache_blocks : max_io; + cache->engine = engine; cache->nr_locked = 0; cache->nr_dirty = 0; cache->nr_io_pending = 0; @@ -784,7 +794,7 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) dm_list_init(&cache->io_pending); if (!_hash_table_init(cache, nr_cache_blocks)) { - _engine_destroy(cache->engine); + cache->engine->destroy(cache->engine); dm_free(cache); return NULL; } @@ -797,7 +807,7 @@ struct bcache *bcache_create(sector_t block_sectors, unsigned nr_cache_blocks) cache->prefetches = 0; if (!_init_free_list(cache, nr_cache_blocks)) { - _engine_destroy(cache->engine); + cache->engine->destroy(cache->engine); _hash_table_exit(cache); dm_free(cache); return NULL; @@ -815,7 +825,7 @@ void bcache_destroy(struct bcache *cache) _wait_all(cache); _exit_free_list(cache); _hash_table_exit(cache); - _engine_destroy(cache->engine); + cache->engine->destroy(cache->engine); dm_free(cache); } @@ -834,10 +844,12 @@ void bcache_prefetch(struct bcache *cache, int fd, block_address index) struct block *b = _hash_lookup(cache, fd, index); if (!b) { - b = _new_block(cache, fd, index); - if (b && (cache->nr_io_pending < cache->max_io)) { - cache->prefetches++; - _issue_read(b); + if (cache->nr_io_pending < cache->max_io) { + b = _new_block(cache, fd, index, false); + if (b) { + cache->prefetches++; + _issue_read(b); + } } } } @@ -881,9 +893,10 @@ int bcache_flush(struct bcache *cache) { while (!dm_list_empty(&cache->dirty)) { struct block *b = dm_list_item(_list_pop(&cache->dirty), struct block); - if (b->ref_count || _test_flags(b, BF_IO_PENDING)) + if (b->ref_count || _test_flags(b, BF_IO_PENDING)) { // The superblock may well be still locked. continue; + } _issue_write(b); } diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 14204bec0..818dee290 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -15,6 +15,7 @@ #ifndef BCACHE_H #define BCACHE_H +#include #include #include @@ -22,9 +23,34 @@ /*----------------------------------------------------------------*/ +// FIXME: move somewhere more sensible +#define container_of(v, t, head) \ + ((t *)((const char *)(v) - (const char *)&((t *) 0)->head)) + +/*----------------------------------------------------------------*/ + +enum dir { + DIR_READ, + DIR_WRITE +}; + typedef uint64_t block_address; typedef uint64_t sector_t; +typedef void io_complete_fn(void *context, int io_error); + +struct io_engine { + void (*destroy)(struct io_engine *e); + bool (*issue)(struct io_engine *e, enum dir d, int fd, + sector_t sb, sector_t se, void *data, void *context); + bool (*wait)(struct io_engine *e, io_complete_fn fn); + unsigned (*max_io)(struct io_engine *e); +}; + +struct io_engine *create_async_io_engine(unsigned max_io); + +/*----------------------------------------------------------------*/ + struct bcache; struct block { /* clients may only access these three fields */ @@ -41,7 +67,11 @@ struct block { int error; }; -struct bcache *bcache_create(sector_t block_size, unsigned nr_cache_blocks); +/* + * Ownership of engine passes. Engine will be destroyed even if this fails. + */ +struct bcache *bcache_create(sector_t block_size, unsigned nr_cache_blocks, + struct io_engine *engine); void bcache_destroy(struct bcache *cache); enum bcache_get_flags { diff --git a/test/unit/Makefile.in b/test/unit/Makefile.in index 2e2c81935..a070329ac 100644 --- a/test/unit/Makefile.in +++ b/test/unit/Makefile.in @@ -12,22 +12,28 @@ UNIT_SOURCE=\ test/unit/bcache_t.c \ - test/unit/bitset_t.c\ - test/unit/config_t.c\ - test/unit/dmlist_t.c\ - test/unit/dmstatus_t.c\ - test/unit/matcher_t.c\ - test/unit/percent_t.c\ - test/unit/string_t.c\ - test/unit/run.c + + +# test/unit/run.c + +# test/unit/bitset_t.c\ +# test/unit/config_t.c\ +# test/unit/dmlist_t.c\ +# test/unit/dmstatus_t.c\ +# test/unit/matcher_t.c\ +# test/unit/percent_t.c\ +# test/unit/string_t.c\ + UNIT_OBJECTS=$(UNIT_SOURCE:%.c=%.o) -UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio -lcunit +UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio test/unit/run: $(UNIT_OBJECTS) libdm/libdevmapper.$(LIB_SUFFIX) lib/liblvm-internal.a - $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \ + @echo " [LD] $@" + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \ -o $@ $(UNIT_OBJECTS) $(UNIT_LDLIBS) +.PHONEY: unit-test unit-test: test/unit/run @echo Running unit tests LD_LIBRARY_PATH=libdm test/unit/run diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c index 3db9cc71b..c2d2df053 100644 --- a/test/unit/bcache_t.c +++ b/test/unit/bcache_t.c @@ -12,168 +12,540 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#define _GNU_SOURCE - -#include -#include -#include +#include +#include #include +#include -#include "units.h" #include "bcache.h" +#define SHOW_MOCK_CALLS 0 + +/*---------------------------------------------------------------- + * Assertions + *--------------------------------------------------------------*/ + +static jmp_buf _test_k; +#define TEST_FAILED 1 + +static void _fail(const char *fmt, ...) + __attribute__((format (printf, 1, 2))); + + +static void _fail(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + + longjmp(_test_k, TEST_FAILED); +} + +#define T_ASSERT(e) if (!(e)) {_fail("assertion failed: '%s'", # e);} + +/*---------------------------------------------------------------- + * Mock engine + *--------------------------------------------------------------*/ +struct mock_engine { + struct io_engine e; + struct dm_list expected_calls; + struct dm_list issued_io; + unsigned max_io; +}; + +enum method { + E_DESTROY, + E_ISSUE, + E_WAIT, + E_MAX_IO +}; + +struct mock_call { + struct dm_list list; + enum method m; +}; + +struct mock_io { + struct dm_list list; + int fd; + sector_t sb; + sector_t se; + void *data; + void *context; +}; + +static const char *_show_method(enum method m) +{ + switch (m) { + case E_DESTROY: + return "destroy()"; + case E_ISSUE: + return "issue()"; + case E_WAIT: + return "wait()"; + case E_MAX_IO: + return "max_io()"; + } + + return ""; +} + +static void _expect(struct mock_engine *e, enum method m) +{ + struct mock_call *mc = malloc(sizeof(*mc)); + mc->m = m; + dm_list_add(&e->expected_calls, &mc->list); +} + +static void _expect_read(struct mock_engine *e) +{ + // FIXME: finish + _expect(e, E_ISSUE); +} + +static void _expect_write(struct mock_engine *e) +{ + // FIXME: finish + _expect(e, E_ISSUE); +} + +static void _match(struct mock_engine *e, enum method m) +{ + struct mock_call *mc; + + if (dm_list_empty(&e->expected_calls)) + _fail("unexpected call to method %s\n", _show_method(m)); + + mc = dm_list_item(e->expected_calls.n, struct mock_call); + dm_list_del(&mc->list); + + if (mc->m != m) + _fail("expected %s, but got %s\n", _show_method(mc->m), _show_method(m)); +#if SHOW_MOCK_CALLS + else + fprintf(stderr, "%s called (expected)\n", _show_method(m)); +#endif + + free(mc); +} + +static void _no_outstanding_expectations(struct mock_engine *e) +{ + struct mock_call *mc; + + if (!dm_list_empty(&e->expected_calls)) { + fprintf(stderr, "unsatisfied expectations:\n"); + dm_list_iterate_items (mc, &e->expected_calls) + fprintf(stderr, " %s\n", _show_method(mc->m)); + } + T_ASSERT(dm_list_empty(&e->expected_calls)); +} + +static struct mock_engine *_to_mock(struct io_engine *e) +{ + return container_of(e, struct mock_engine, e); +} + +static void _mock_destroy(struct io_engine *e) +{ + struct mock_engine *me = _to_mock(e); + + _match(me, E_DESTROY); + T_ASSERT(dm_list_empty(&me->issued_io)); + T_ASSERT(dm_list_empty(&me->expected_calls)); + free(_to_mock(e)); +} + +static bool _mock_issue(struct io_engine *e, enum dir d, int fd, + sector_t sb, sector_t se, void *data, void *context) +{ + struct mock_io *io; + struct mock_engine *me = _to_mock(e); + + _match(me, E_ISSUE); + io = malloc(sizeof(*io)); + if (!io) + abort(); + + io->fd = fd; + io->sb = sb; + io->se = se; + io->data = data; + io->context = context; + + dm_list_add(&me->issued_io, &io->list); + return true; +} + +static bool _mock_wait(struct io_engine *e, io_complete_fn fn) +{ + struct mock_io *io; + struct mock_engine *me = _to_mock(e); + _match(me, E_WAIT); + + // FIXME: provide a way to control how many are completed and whether + // they error. + T_ASSERT(!dm_list_empty(&me->issued_io)); + io = dm_list_item(me->issued_io.n, struct mock_io); + dm_list_del(&io->list); + fn(io->context, 0); + return true; +} + +static unsigned _mock_max_io(struct io_engine *e) +{ + struct mock_engine *me = _to_mock(e); + _match(me, E_MAX_IO); + return me->max_io; +} + +static struct mock_engine *_mock_create(unsigned max_io) +{ + struct mock_engine *m = malloc(sizeof(*m)); + + m->e.destroy = _mock_destroy; + m->e.issue = _mock_issue; + m->e.wait = _mock_wait; + m->e.max_io = _mock_max_io; + + m->max_io = max_io; + dm_list_init(&m->expected_calls); + dm_list_init(&m->issued_io); + + return m; +} + +/*---------------------------------------------------------------- + * Tests + *--------------------------------------------------------------*/ #define MEG 2048 #define SECTOR_SHIFT 9 -static const char *_test_path = "test.bin"; - -int bcache_init(void) +static void good_create(sector_t block_size, unsigned nr_cache_blocks) { - return 0; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + _expect(me, E_MAX_IO); + cache = bcache_create(block_size, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + _expect(me, E_DESTROY); + bcache_destroy(cache); } -int bcache_fini(void) +static void bad_create(sector_t block_size, unsigned nr_cache_blocks) { - return 0; -} + struct bcache *cache; + struct mock_engine *me = _mock_create(16); -static int open_file(const char *path) -{ - return open(path, O_EXCL | O_RDWR | O_DIRECT, 0666); -} + _expect(me, E_MAX_IO); + cache = bcache_create(block_size, nr_cache_blocks, &me->e); + T_ASSERT(!cache); -static int _prep_file(const char *path) -{ - int fd, r; - - fd = open(path, O_CREAT | O_TRUNC | O_EXCL | O_RDWR | O_DIRECT, 0666); - if (fd < 0) - return -1; - - r = fallocate(fd, FALLOC_FL_ZERO_RANGE, 0, (1 * MEG) << SECTOR_SHIFT); - if (r) { - close(fd); - return -1; - } - - close(fd); - return 0; -} - - -static int test_init(void) -{ - unlink(_test_path); - return _prep_file(_test_path); -} - -static int test_exit(void) -{ - unlink(_test_path); - return 0; + _expect(me, E_DESTROY); + me->e.destroy(&me->e); } static void test_create(void) { - struct bcache *cache = bcache_create(8, 16); - CU_ASSERT_PTR_NOT_NULL(cache); - bcache_destroy(cache); + good_create(8, 16); } static void test_nr_cache_blocks_must_be_positive(void) { - struct bcache *cache = bcache_create(8, 0); - CU_ASSERT_PTR_NULL(cache); + bad_create(8, 0); } static void test_block_size_must_be_positive(void) { - struct bcache *cache = bcache_create(0, 16); - CU_ASSERT_PTR_NULL(cache); + bad_create(0, 16); } static void test_block_size_must_be_multiple_of_page_size(void) { + static unsigned _bad_examples[] = {3, 9, 13, 1025}; + unsigned i; + + for (i = 0; i < DM_ARRAY_SIZE(_bad_examples); i++) + bad_create(_bad_examples[i], 16); + + for (i = 1; i < 1000; i++) + good_create(i * 8, 16); +} + +static void test_get_triggers_read(void) +{ struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, 16, &me->e); + T_ASSERT(cache); { - static unsigned _bad_examples[] = {3, 9, 13, 1025}; - - for (i = 0; i < DM_ARRAY_SIZE(_bad_examples); i++) { - cache = bcache_create(_bad_examples[i], 16); - CU_ASSERT_PTR_NULL(cache); - } - } - - { - // Only testing a few sizes because io_destroy is seriously - // slow. - for (i = 1; i < 25; i++) { - cache = bcache_create(8 * i, 16); - CU_ASSERT_PTR_NOT_NULL(cache); - bcache_destroy(cache); - } - } -} - -static void test_reads_work(void) -{ - int fd; - - // FIXME: add fixtures. - test_init(); - fd = open_file("./test.bin"); - CU_ASSERT(fd >= 0); - - { - int i; + int fd = 17; // arbitrary key struct block *b; - struct bcache *cache = bcache_create(8, 16); - CU_ASSERT(bcache_get(cache, fd, 0, 0, &b)); - for (i = 0; i < 8 << SECTOR_SHIFT; i++) - CU_ASSERT(((unsigned char *) b->data)[i] == 0); + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, 0, 0, &b)); bcache_put(b); - - bcache_destroy(cache); } - close(fd); - - test_exit(); + _expect(me, E_DESTROY); + bcache_destroy(cache); } -static void test_prefetch_works(void) +static void test_repeated_reads_are_cached(void) { - int fd; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); - // FIXME: add fixtures. - test_init(); - fd = open_file("./test.bin"); - CU_ASSERT(fd >= 0); + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, 16, &me->e); + T_ASSERT(cache); { - int i; + int fd = 17; // arbitrary key + unsigned i; struct block *b; - struct bcache *cache = bcache_create(8, 16); - for (i = 0; i < 16; i++) - bcache_prefetch(cache, fd, i); + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + for (i = 0; i < 100; i++) { + T_ASSERT(bcache_get(cache, fd, 0, 0, &b)); + bcache_put(b); + } + } - for (i = 0; i < 16; i++) { - CU_ASSERT(bcache_get(cache, fd, i, 0, &b)); + _expect(me, E_DESTROY); + bcache_destroy(cache); +} + +static void test_block_gets_evicted_with_many_reads(void) +{ + const unsigned nr_cache_blocks = 16; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + { + int fd = 17; // arbitrary key + unsigned i; + struct block *b; + + for (i = 0; i < nr_cache_blocks; i++) { + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, i, 0, &b)); bcache_put(b); } - bcache_destroy(cache); + // Not enough cache blocks to hold this one + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, nr_cache_blocks, 0, &b)); + bcache_put(b); + + // Now if we run through we should find one block has been + // evicted. We go backwards because the oldest is normally + // evicted first. + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + for (i = nr_cache_blocks; i; i--) { + T_ASSERT(bcache_get(cache, fd, i - 1, 0, &b)); + bcache_put(b); + } } - close(fd); - - test_exit(); + _expect(me, E_DESTROY); + bcache_destroy(cache); } +static void test_prefetch_issues_a_read(void) +{ + const unsigned nr_cache_blocks = 16; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + { + int fd = 17; // arbitrary key + unsigned i; + struct block *b; + + for (i = 0; i < nr_cache_blocks; i++) { + // prefetch should not wait + _expect(me, E_ISSUE); + bcache_prefetch(cache, fd, i); + } + + + for (i = 0; i < nr_cache_blocks; i++) { + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, i, 0, &b)); + bcache_put(b); + } + } + + _expect(me, E_DESTROY); + bcache_destroy(cache); +} + +static void test_too_many_prefetches_does_not_trigger_a_wait(void) +{ + const unsigned nr_cache_blocks = 16; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + { + int fd = 17; // arbitrary key + unsigned i; + + for (i = 0; i < 10 * nr_cache_blocks; i++) { + // prefetch should not wait + if (i < nr_cache_blocks) + _expect(me, E_ISSUE); + bcache_prefetch(cache, fd, i); + } + + // Destroy will wait for any in flight IO triggered by prefetches. + for (i = 0; i < nr_cache_blocks; i++) + _expect(me, E_WAIT); + } + + _expect(me, E_DESTROY); + bcache_destroy(cache); +} + +static void test_dirty_data_gets_written_back(void) +{ + const unsigned nr_cache_blocks = 16; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + { + int fd = 17; // arbitrary key + struct block *b; + + // FIXME: be specific about the IO direction + // Expect the read + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b)); + bcache_put(b); + + // Expect the write + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + } + + _expect(me, E_DESTROY); + bcache_destroy(cache); +} + +static void test_zeroed_data_counts_as_dirty(void) +{ + const unsigned nr_cache_blocks = 16; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + cache = bcache_create(64, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + { + int fd = 17; // arbitrary key + struct block *b; + + // No read + T_ASSERT(bcache_get(cache, fd, 0, GF_ZERO, &b)); + bcache_put(b); + + // Expect the write + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + } + + _expect(me, E_DESTROY); + bcache_destroy(cache); +} + +static void test_flush_waits_for_all_dirty(void) +{ + const unsigned nr_cache_blocks = 128, count = 16; + struct bcache *cache; + struct mock_engine *me = _mock_create(16); + + // FIXME: use a fixture + _expect(me, E_MAX_IO); + + // I'm using a large nr of cache blocks to avoid triggering writeback + // early. + cache = bcache_create(64, nr_cache_blocks, &me->e); + T_ASSERT(cache); + + { + int fd = 17; // arbitrary key + unsigned i; + struct block *b; + + for (i = 0; i < count; i++) { + if (i % 2) { + T_ASSERT(bcache_get(cache, fd, i, GF_ZERO, &b)); + } else { + _expect_read(me); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, i, 0, &b)); + } + bcache_put(b); + } + + for (i = 0; i < count; i++) { + if (i % 2) + _expect_write(me); + } + + for (i = 0; i < count; i++) { + if (i % 2) + _expect(me, E_WAIT); + } + + bcache_flush(cache); + _no_outstanding_expectations(me); + } + + _expect(me, E_DESTROY); + bcache_destroy(cache); +} + +#if 0 #define NR_FILES 4 static void test_read_multiple_files(void) { @@ -211,21 +583,58 @@ static void test_read_multiple_files(void) test_exit(); } - +#endif // Tests to be written // Open multiple files and prove the blocks are coming from the correct file // show invalidate works // show invalidate_fd works // show writeback is working // check zeroing -// -CU_TestInfo bcache_list[] = { - { (char*)"create", test_create }, - { (char*)"nr cache block must be positive", test_nr_cache_blocks_must_be_positive }, - { (char*)"block size must be positive", test_block_size_must_be_positive }, - { (char*)"block size must be multiple of page size", test_block_size_must_be_multiple_of_page_size }, - { (char*)"reads work", test_reads_work }, - { (char*)"prefetch works", test_prefetch_works }, - { (char*)"read multiple files", test_read_multiple_files }, - CU_TEST_INFO_NULL + +struct test_details { + const char *name; + void (*fn)(void); }; + +int main(int argc, char **argv) +{ + static struct test_details _tests[] = { + {"simple create/destroy", test_create}, + {"nr cache blocks must be positive", test_nr_cache_blocks_must_be_positive}, + {"block size must be positive", test_block_size_must_be_positive}, + {"block size must be a multiple of page size", test_block_size_must_be_multiple_of_page_size}, + {"bcache_get() triggers read", test_get_triggers_read}, + {"repeated reads are cached", test_repeated_reads_are_cached}, + {"block get evicted with many reads", test_block_gets_evicted_with_many_reads}, + {"prefetch issues a read", test_prefetch_issues_a_read}, + {"too many prefetches does not trigger a wait", test_too_many_prefetches_does_not_trigger_a_wait}, + {"dirty data gets written back", test_dirty_data_gets_written_back}, + {"zeroed data counts as dirty", test_zeroed_data_counts_as_dirty}, + {"flush waits for all dirty", test_flush_waits_for_all_dirty}, + }; + + // We have to declare these as volatile because of the setjmp() + volatile unsigned i = 0, passed = 0; + + for (i = 0; i < DM_ARRAY_SIZE(_tests); i++) { + struct test_details *t = _tests + i; + fprintf(stderr, "[RUN ] %s\n", t->name); + + if (setjmp(_test_k)) + fprintf(stderr, "[ FAIL] %s\n", t->name); + else { + t->fn(); + passed++; + fprintf(stderr, "[ OK] %s\n", t->name); + } + } + + fprintf(stderr, "\n%u/%lu tests passed\n", passed, DM_ARRAY_SIZE(_tests)); + +#if 0 + test_prefetch_works(); + test_read_multiple_files(); +#endif + + return 0; +} From 6a57ed17a28aee0e8fb9557ec3c03a02f0b2a4be Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Mon, 5 Feb 2018 16:56:56 +0000 Subject: [PATCH 16/87] [device/bcache] add bcache_prefetch_bytes() and bcache_read_bytes() Not tested yet. --- lib/device/bcache.c | 59 +++++++++++++++++++++++++++++++++++++++++++++ lib/device/bcache.h | 10 ++++++++ 2 files changed, 69 insertions(+) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 86b56c086..1d8330609 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -966,5 +966,64 @@ void bcache_invalidate_fd(struct bcache *cache, int fd) _recycle_block(cache, b); } +static void byte_range_to_block_range(struct bcache *cache, off_t start, size_t len, + block_address *bb, block_address *be) +{ + block_address block_size = cache->block_sectors << SECTOR_SHIFT; + *bb = start / block_size; + *be = (start + len + block_size - 1) / block_size; +} + +void bcache_prefetch_bytes(struct bcache *cache, int fd, off_t start, size_t len) +{ + block_address bb, be; + + byte_range_to_block_range(cache, start, len, &bb, &be); + while (bb < be) { + bcache_prefetch(cache, fd, bb); + bb++; + } +} + +static off_t _min(off_t lhs, off_t rhs) +{ + if (rhs > lhs) + return rhs; + + return lhs; +} + +bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, void *data) +{ + struct block *b; + block_address bb, be, i; + unsigned char *udata = data; + off_t block_size = cache->block_sectors << SECTOR_SHIFT; + + byte_range_to_block_range(cache, start, len, &bb, &be); + for (i = bb; i < be; i++) + bcache_prefetch(cache, fd, i); + + for (i = bb; i < be; i++) { + if (!bcache_get(cache, fd, i, 0, &b)) + return false; + + if (i == bb) { + off_t block_offset = start % block_size; + size_t blen = _min(block_size - block_offset, len); + memcpy(udata, ((unsigned char *) b->data) + block_offset, blen); + len -= blen; + udata += blen; + } else { + size_t blen = _min(block_size, len); + memcpy(udata, b->data, blen); + len -= blen; + udata += blen; + } + } + + return true; +} + //---------------------------------------------------------------- diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 818dee290..7d38d3337 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -137,6 +137,16 @@ void bcache_invalidate(struct bcache *cache, int fd, block_address index); */ void bcache_invalidate_fd(struct bcache *cache, int fd); +/* + * Prefetches the blocks neccessary to satisfy a byte range. + */ +void bcache_prefetch_bytes(struct bcache *cache, int fd, off_t start, size_t len); + +/* + * Reads the bytes. + */ +bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, void *data); + /*----------------------------------------------------------------*/ #endif From 1cde30eba0fc4404f092fb5106f7cc7fcd66795b Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 6 Feb 2018 13:06:15 +0000 Subject: [PATCH 17/87] [device/bcache] More fiddling with tests --- test/unit/bcache_t.c | 580 +++++++++++++++++++------------------------ 1 file changed, 262 insertions(+), 318 deletions(-) diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c index c2d2df053..5fb67892e 100644 --- a/test/unit/bcache_t.c +++ b/test/unit/bcache_t.c @@ -218,6 +218,56 @@ static struct mock_engine *_mock_create(unsigned max_io) return m; } +/*---------------------------------------------------------------- + * Fixtures + *--------------------------------------------------------------*/ +struct fixture { + struct mock_engine *me; + struct bcache *cache; +}; + +static struct fixture *_fixture_init(unsigned nr_cache_blocks) +{ + struct fixture *f = malloc(sizeof(*f)); + + f->me = _mock_create(16); + T_ASSERT(f->me); + + _expect(f->me, E_MAX_IO); + f->cache = bcache_create(128, nr_cache_blocks, &f->me->e); + T_ASSERT(f->cache); + + return f; +} + +static void _fixture_exit(struct fixture *f) +{ + _expect(f->me, E_DESTROY); + bcache_destroy(f->cache); + + free(f); +} + +static void *_small_fixture_init(void) +{ + return _fixture_init(16); +} + +static void _small_fixture_exit(void *context) +{ + _fixture_exit(context); +} + +static void *_large_fixture_init(void) +{ + return _fixture_init(1024); +} + +static void _large_fixture_exit(void *context) +{ + _fixture_exit(context); +} + /*---------------------------------------------------------------- * Tests *--------------------------------------------------------------*/ @@ -250,22 +300,22 @@ static void bad_create(sector_t block_size, unsigned nr_cache_blocks) me->e.destroy(&me->e); } -static void test_create(void) +static void test_create(void *fixture) { good_create(8, 16); } -static void test_nr_cache_blocks_must_be_positive(void) +static void test_nr_cache_blocks_must_be_positive(void *fixture) { bad_create(8, 0); } -static void test_block_size_must_be_positive(void) +static void test_block_size_must_be_positive(void *fixture) { bad_create(0, 16); } -static void test_block_size_must_be_multiple_of_page_size(void) +static void test_block_size_must_be_multiple_of_page_size(void *fixture) { static unsigned _bad_examples[] = {3, 9, 13, 1025}; @@ -278,312 +328,195 @@ static void test_block_size_must_be_multiple_of_page_size(void) good_create(i * 8, 16); } -static void test_get_triggers_read(void) +static void test_get_triggers_read(void *context) { - struct bcache *cache; - struct mock_engine *me = _mock_create(16); + struct fixture *f = context; - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, 16, &me->e); - T_ASSERT(cache); + int fd = 17; // arbitrary key + struct block *b; - { - int fd = 17; // arbitrary key - struct block *b; - - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - T_ASSERT(bcache_get(cache, fd, 0, 0, &b)); - bcache_put(b); - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); + _expect(f->me, E_ISSUE); + _expect(f->me, E_WAIT); + T_ASSERT(bcache_get(f->cache, fd, 0, 0, &b)); + bcache_put(b); } -static void test_repeated_reads_are_cached(void) +static void test_repeated_reads_are_cached(void *context) { - struct bcache *cache; - struct mock_engine *me = _mock_create(16); + struct fixture *f = context; - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, 16, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - unsigned i; - struct block *b; - - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - for (i = 0; i < 100; i++) { - T_ASSERT(bcache_get(cache, fd, 0, 0, &b)); - bcache_put(b); - } - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -static void test_block_gets_evicted_with_many_reads(void) -{ - const unsigned nr_cache_blocks = 16; - struct bcache *cache; - struct mock_engine *me = _mock_create(16); - - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, nr_cache_blocks, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - unsigned i; - struct block *b; - - for (i = 0; i < nr_cache_blocks; i++) { - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - T_ASSERT(bcache_get(cache, fd, i, 0, &b)); - bcache_put(b); - } - - // Not enough cache blocks to hold this one - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - T_ASSERT(bcache_get(cache, fd, nr_cache_blocks, 0, &b)); - bcache_put(b); - - // Now if we run through we should find one block has been - // evicted. We go backwards because the oldest is normally - // evicted first. - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - for (i = nr_cache_blocks; i; i--) { - T_ASSERT(bcache_get(cache, fd, i - 1, 0, &b)); - bcache_put(b); - } - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -static void test_prefetch_issues_a_read(void) -{ - const unsigned nr_cache_blocks = 16; - struct bcache *cache; - struct mock_engine *me = _mock_create(16); - - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, nr_cache_blocks, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - unsigned i; - struct block *b; - - for (i = 0; i < nr_cache_blocks; i++) { - // prefetch should not wait - _expect(me, E_ISSUE); - bcache_prefetch(cache, fd, i); - } - - - for (i = 0; i < nr_cache_blocks; i++) { - _expect(me, E_WAIT); - T_ASSERT(bcache_get(cache, fd, i, 0, &b)); - bcache_put(b); - } - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -static void test_too_many_prefetches_does_not_trigger_a_wait(void) -{ - const unsigned nr_cache_blocks = 16; - struct bcache *cache; - struct mock_engine *me = _mock_create(16); - - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, nr_cache_blocks, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - unsigned i; - - for (i = 0; i < 10 * nr_cache_blocks; i++) { - // prefetch should not wait - if (i < nr_cache_blocks) - _expect(me, E_ISSUE); - bcache_prefetch(cache, fd, i); - } - - // Destroy will wait for any in flight IO triggered by prefetches. - for (i = 0; i < nr_cache_blocks; i++) - _expect(me, E_WAIT); - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -static void test_dirty_data_gets_written_back(void) -{ - const unsigned nr_cache_blocks = 16; - struct bcache *cache; - struct mock_engine *me = _mock_create(16); - - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, nr_cache_blocks, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - struct block *b; - - // FIXME: be specific about the IO direction - // Expect the read - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b)); - bcache_put(b); - - // Expect the write - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -static void test_zeroed_data_counts_as_dirty(void) -{ - const unsigned nr_cache_blocks = 16; - struct bcache *cache; - struct mock_engine *me = _mock_create(16); - - // FIXME: use a fixture - _expect(me, E_MAX_IO); - cache = bcache_create(64, nr_cache_blocks, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - struct block *b; - - // No read - T_ASSERT(bcache_get(cache, fd, 0, GF_ZERO, &b)); - bcache_put(b); - - // Expect the write - _expect(me, E_ISSUE); - _expect(me, E_WAIT); - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -static void test_flush_waits_for_all_dirty(void) -{ - const unsigned nr_cache_blocks = 128, count = 16; - struct bcache *cache; - struct mock_engine *me = _mock_create(16); - - // FIXME: use a fixture - _expect(me, E_MAX_IO); - - // I'm using a large nr of cache blocks to avoid triggering writeback - // early. - cache = bcache_create(64, nr_cache_blocks, &me->e); - T_ASSERT(cache); - - { - int fd = 17; // arbitrary key - unsigned i; - struct block *b; - - for (i = 0; i < count; i++) { - if (i % 2) { - T_ASSERT(bcache_get(cache, fd, i, GF_ZERO, &b)); - } else { - _expect_read(me); - _expect(me, E_WAIT); - T_ASSERT(bcache_get(cache, fd, i, 0, &b)); - } - bcache_put(b); - } - - for (i = 0; i < count; i++) { - if (i % 2) - _expect_write(me); - } - - for (i = 0; i < count; i++) { - if (i % 2) - _expect(me, E_WAIT); - } - - bcache_flush(cache); - _no_outstanding_expectations(me); - } - - _expect(me, E_DESTROY); - bcache_destroy(cache); -} - -#if 0 -#define NR_FILES 4 -static void test_read_multiple_files(void) -{ + int fd = 17; // arbitrary key unsigned i; - int fd[NR_FILES]; - char buffer[128]; + struct block *b; - - // FIXME: add fixtures. - test_init(); - for (i = 0; i < NR_FILES; i++) { - snprintf(buffer, sizeof(buffer), "./test%u.bin", i); - unlink(buffer); - _prep_file(buffer); - fd[i] = open_file(buffer); - CU_ASSERT(fd[i] >= 0); + _expect(f->me, E_ISSUE); + _expect(f->me, E_WAIT); + for (i = 0; i < 100; i++) { + T_ASSERT(bcache_get(f->cache, fd, 0, 0, &b)); + bcache_put(b); } - - { - struct block *b; - struct bcache *cache = bcache_create(8, 16); - - for (i = 0; i < 64; i++) { - if (!bcache_get(cache, fd[i % NR_FILES], i, 0, &b)) { - CU_ASSERT(false); - } else - bcache_put(b); - } - - bcache_destroy(cache); - } - - for (i = 0; i < NR_FILES; i++) - close(fd[i]); - - test_exit(); } -#endif + +static void test_block_gets_evicted_with_many_reads(void *context) +{ + struct fixture *f = context; + + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + const unsigned nr_cache_blocks = 16; + + int fd = 17; // arbitrary key + unsigned i; + struct block *b; + + for (i = 0; i < nr_cache_blocks; i++) { + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, i, 0, &b)); + bcache_put(b); + } + + // Not enough cache blocks to hold this one + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, nr_cache_blocks, 0, &b)); + bcache_put(b); + + // Now if we run through we should find one block has been + // evicted. We go backwards because the oldest is normally + // evicted first. + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + for (i = nr_cache_blocks; i; i--) { + T_ASSERT(bcache_get(cache, fd, i - 1, 0, &b)); + bcache_put(b); + } +} + +static void test_prefetch_issues_a_read(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + const unsigned nr_cache_blocks = 16; + + int fd = 17; // arbitrary key + unsigned i; + struct block *b; + + for (i = 0; i < nr_cache_blocks; i++) { + // prefetch should not wait + _expect(me, E_ISSUE); + bcache_prefetch(cache, fd, i); + } + + + for (i = 0; i < nr_cache_blocks; i++) { + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, i, 0, &b)); + bcache_put(b); + } +} + +static void test_too_many_prefetches_does_not_trigger_a_wait(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + + const unsigned nr_cache_blocks = 16; + int fd = 17; // arbitrary key + unsigned i; + + for (i = 0; i < 10 * nr_cache_blocks; i++) { + // prefetch should not wait + if (i < nr_cache_blocks) + _expect(me, E_ISSUE); + bcache_prefetch(cache, fd, i); + } + + // Destroy will wait for any in flight IO triggered by prefetches. + for (i = 0; i < nr_cache_blocks; i++) + _expect(me, E_WAIT); +} + +static void test_dirty_data_gets_written_back(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + + const unsigned nr_cache_blocks = 16; + int fd = 17; // arbitrary key + struct block *b; + + // FIXME: be specific about the IO direction + // Expect the read + _expect(me, E_ISSUE); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b)); + bcache_put(b); + + // Expect the write + _expect(me, E_ISSUE); + _expect(me, E_WAIT); +} + +static void test_zeroed_data_counts_as_dirty(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + + const unsigned nr_cache_blocks = 16; + int fd = 17; // arbitrary key + struct block *b; + + // No read + T_ASSERT(bcache_get(cache, fd, 0, GF_ZERO, &b)); + bcache_put(b); + + // Expect the write + _expect(me, E_ISSUE); + _expect(me, E_WAIT); +} + +static void test_flush_waits_for_all_dirty(void *context) +{ + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + + const unsigned count = 16; + int fd = 17; // arbitrary key + unsigned i; + struct block *b; + + for (i = 0; i < count; i++) { + if (i % 2) { + T_ASSERT(bcache_get(cache, fd, i, GF_ZERO, &b)); + } else { + _expect_read(me); + _expect(me, E_WAIT); + T_ASSERT(bcache_get(cache, fd, i, 0, &b)); + } + bcache_put(b); + } + + for (i = 0; i < count; i++) { + if (i % 2) + _expect_write(me); + } + + for (i = 0; i < count; i++) { + if (i % 2) + _expect(me, E_WAIT); + } + + bcache_flush(cache); + _no_outstanding_expectations(me); +} + // Tests to be written // Open multiple files and prove the blocks are coming from the correct file // show invalidate works @@ -593,37 +526,53 @@ static void test_read_multiple_files(void) struct test_details { const char *name; - void (*fn)(void); + void (*fn)(void *); + void *(*fixture_init)(void); + void (*fixture_exit)(void *); }; +#define TEST(name, fn) {name, fn, NULL, NULL} +#define TEST_S(name, fn) {name, fn, _small_fixture_init, _small_fixture_exit} +#define TEST_L(name, fn) {name, fn, _large_fixture_init, _large_fixture_exit} + int main(int argc, char **argv) { static struct test_details _tests[] = { - {"simple create/destroy", test_create}, - {"nr cache blocks must be positive", test_nr_cache_blocks_must_be_positive}, - {"block size must be positive", test_block_size_must_be_positive}, - {"block size must be a multiple of page size", test_block_size_must_be_multiple_of_page_size}, - {"bcache_get() triggers read", test_get_triggers_read}, - {"repeated reads are cached", test_repeated_reads_are_cached}, - {"block get evicted with many reads", test_block_gets_evicted_with_many_reads}, - {"prefetch issues a read", test_prefetch_issues_a_read}, - {"too many prefetches does not trigger a wait", test_too_many_prefetches_does_not_trigger_a_wait}, - {"dirty data gets written back", test_dirty_data_gets_written_back}, - {"zeroed data counts as dirty", test_zeroed_data_counts_as_dirty}, - {"flush waits for all dirty", test_flush_waits_for_all_dirty}, + TEST("simple create/destroy", test_create), + TEST("nr cache blocks must be positive", test_nr_cache_blocks_must_be_positive), + TEST("block size must be positive", test_block_size_must_be_positive), + TEST("block size must be a multiple of page size", test_block_size_must_be_multiple_of_page_size), + TEST_S("bcache_get() triggers read", test_get_triggers_read), + TEST_S("repeated reads are cached", test_repeated_reads_are_cached), + TEST_S("block get evicted with many reads", test_block_gets_evicted_with_many_reads), + TEST_S("prefetch issues a read", test_prefetch_issues_a_read), + TEST_S("too many prefetches does not trigger a wait", test_too_many_prefetches_does_not_trigger_a_wait), + TEST_S("dirty data gets written back", test_dirty_data_gets_written_back), + TEST_S("zeroed data counts as dirty", test_zeroed_data_counts_as_dirty), + TEST_L("flush waits for all dirty", test_flush_waits_for_all_dirty), }; // We have to declare these as volatile because of the setjmp() volatile unsigned i = 0, passed = 0; for (i = 0; i < DM_ARRAY_SIZE(_tests); i++) { + void *fixture; struct test_details *t = _tests + i; fprintf(stderr, "[RUN ] %s\n", t->name); if (setjmp(_test_k)) fprintf(stderr, "[ FAIL] %s\n", t->name); else { - t->fn(); + if (t->fixture_init) + fixture = t->fixture_init(); + else + fixture = NULL; + + t->fn(fixture); + + if (t->fixture_exit) + t->fixture_exit(fixture); + passed++; fprintf(stderr, "[ OK] %s\n", t->name); } @@ -631,10 +580,5 @@ int main(int argc, char **argv) fprintf(stderr, "\n%u/%lu tests passed\n", passed, DM_ARRAY_SIZE(_tests)); -#if 0 - test_prefetch_works(); - test_read_multiple_files(); -#endif - return 0; } From dc8034f5eb8d14b621c3d99ff58c95f74153c448 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Tue, 6 Feb 2018 15:10:44 +0000 Subject: [PATCH 18/87] [device/bcache] more work on bcache --- lib/device/bcache.c | 2 +- test/unit/bcache_t.c | 152 +++++++++++++++++++++++++++++-------------- 2 files changed, 103 insertions(+), 51 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 1d8330609..dce05efe5 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -365,7 +365,7 @@ static struct block *_hash_lookup(struct bcache *cache, int fd, uint64_t index) unsigned h = _hash(cache, fd, index); dm_list_iterate_items_gen (b, cache->buckets + h, hash) - if (b->index == index) + if (b->fd == fd && b->index == index) return b; return NULL; diff --git a/test/unit/bcache_t.c b/test/unit/bcache_t.c index 5fb67892e..07a45fe22 100644 --- a/test/unit/bcache_t.c +++ b/test/unit/bcache_t.c @@ -54,6 +54,7 @@ struct mock_engine { struct dm_list expected_calls; struct dm_list issued_io; unsigned max_io; + sector_t block_size; }; enum method { @@ -66,6 +67,11 @@ enum method { struct mock_call { struct dm_list list; enum method m; + + bool match_args; + enum dir d; + int fd; + block_address b; }; struct mock_io { @@ -97,23 +103,35 @@ static void _expect(struct mock_engine *e, enum method m) { struct mock_call *mc = malloc(sizeof(*mc)); mc->m = m; + mc->match_args = false; dm_list_add(&e->expected_calls, &mc->list); } -static void _expect_read(struct mock_engine *e) +static void _expect_read(struct mock_engine *e, int fd, block_address b) { - // FIXME: finish - _expect(e, E_ISSUE); + struct mock_call *mc = malloc(sizeof(*mc)); + mc->m = E_ISSUE; + mc->match_args = true; + mc->d = DIR_READ; + mc->fd = fd; + mc->b = b; + dm_list_add(&e->expected_calls, &mc->list); } -static void _expect_write(struct mock_engine *e) +static void _expect_write(struct mock_engine *e, int fd, block_address b) { - // FIXME: finish - _expect(e, E_ISSUE); + struct mock_call *mc = malloc(sizeof(*mc)); + mc->m = E_ISSUE; + mc->match_args = true; + mc->d = DIR_WRITE; + mc->fd = fd; + mc->b = b; + dm_list_add(&e->expected_calls, &mc->list); } -static void _match(struct mock_engine *e, enum method m) +static struct mock_call *_match_pop(struct mock_engine *e, enum method m) { + struct mock_call *mc; if (dm_list_empty(&e->expected_calls)) @@ -129,7 +147,12 @@ static void _match(struct mock_engine *e, enum method m) fprintf(stderr, "%s called (expected)\n", _show_method(m)); #endif - free(mc); + return mc; +} + +static void _match(struct mock_engine *e, enum method m) +{ + free(_match_pop(e, m)); } static void _no_outstanding_expectations(struct mock_engine *e) @@ -163,9 +186,18 @@ static bool _mock_issue(struct io_engine *e, enum dir d, int fd, sector_t sb, sector_t se, void *data, void *context) { struct mock_io *io; + struct mock_call *mc; struct mock_engine *me = _to_mock(e); - _match(me, E_ISSUE); + mc = _match_pop(me, E_ISSUE); + if (mc->match_args) { + T_ASSERT(d == mc->d); + T_ASSERT(fd == mc->fd); + T_ASSERT(sb == mc->b * me->block_size); + T_ASSERT(se == (mc->b + 1) * me->block_size); + } + free(mc); + io = malloc(sizeof(*io)); if (!io) abort(); @@ -202,7 +234,7 @@ static unsigned _mock_max_io(struct io_engine *e) return me->max_io; } -static struct mock_engine *_mock_create(unsigned max_io) +static struct mock_engine *_mock_create(unsigned max_io, sector_t block_size) { struct mock_engine *m = malloc(sizeof(*m)); @@ -212,6 +244,7 @@ static struct mock_engine *_mock_create(unsigned max_io) m->e.max_io = _mock_max_io; m->max_io = max_io; + m->block_size = block_size; dm_list_init(&m->expected_calls); dm_list_init(&m->issued_io); @@ -226,15 +259,15 @@ struct fixture { struct bcache *cache; }; -static struct fixture *_fixture_init(unsigned nr_cache_blocks) +static struct fixture *_fixture_init(sector_t block_size, unsigned nr_cache_blocks) { struct fixture *f = malloc(sizeof(*f)); - f->me = _mock_create(16); + f->me = _mock_create(16, block_size); T_ASSERT(f->me); _expect(f->me, E_MAX_IO); - f->cache = bcache_create(128, nr_cache_blocks, &f->me->e); + f->cache = bcache_create(block_size, nr_cache_blocks, &f->me->e); T_ASSERT(f->cache); return f; @@ -250,7 +283,7 @@ static void _fixture_exit(struct fixture *f) static void *_small_fixture_init(void) { - return _fixture_init(16); + return _fixture_init(128, 16); } static void _small_fixture_exit(void *context) @@ -260,7 +293,7 @@ static void _small_fixture_exit(void *context) static void *_large_fixture_init(void) { - return _fixture_init(1024); + return _fixture_init(128, 1024); } static void _large_fixture_exit(void *context) @@ -277,7 +310,7 @@ static void _large_fixture_exit(void *context) static void good_create(sector_t block_size, unsigned nr_cache_blocks) { struct bcache *cache; - struct mock_engine *me = _mock_create(16); + struct mock_engine *me = _mock_create(16, 128); _expect(me, E_MAX_IO); cache = bcache_create(block_size, nr_cache_blocks, &me->e); @@ -290,7 +323,7 @@ static void good_create(sector_t block_size, unsigned nr_cache_blocks) static void bad_create(sector_t block_size, unsigned nr_cache_blocks) { struct bcache *cache; - struct mock_engine *me = _mock_create(16); + struct mock_engine *me = _mock_create(16, 128); _expect(me, E_MAX_IO); cache = bcache_create(block_size, nr_cache_blocks, &me->e); @@ -335,7 +368,7 @@ static void test_get_triggers_read(void *context) int fd = 17; // arbitrary key struct block *b; - _expect(f->me, E_ISSUE); + _expect_read(f->me, fd, 0); _expect(f->me, E_WAIT); T_ASSERT(bcache_get(f->cache, fd, 0, 0, &b)); bcache_put(b); @@ -349,7 +382,7 @@ static void test_repeated_reads_are_cached(void *context) unsigned i; struct block *b; - _expect(f->me, E_ISSUE); + _expect_read(f->me, fd, 0); _expect(f->me, E_WAIT); for (i = 0; i < 100; i++) { T_ASSERT(bcache_get(f->cache, fd, 0, 0, &b)); @@ -370,14 +403,14 @@ static void test_block_gets_evicted_with_many_reads(void *context) struct block *b; for (i = 0; i < nr_cache_blocks; i++) { - _expect(me, E_ISSUE); + _expect_read(me, fd, i); _expect(me, E_WAIT); T_ASSERT(bcache_get(cache, fd, i, 0, &b)); bcache_put(b); } // Not enough cache blocks to hold this one - _expect(me, E_ISSUE); + _expect_read(me, fd, nr_cache_blocks); _expect(me, E_WAIT); T_ASSERT(bcache_get(cache, fd, nr_cache_blocks, 0, &b)); bcache_put(b); @@ -406,7 +439,7 @@ static void test_prefetch_issues_a_read(void *context) for (i = 0; i < nr_cache_blocks; i++) { // prefetch should not wait - _expect(me, E_ISSUE); + _expect_read(me, fd, i); bcache_prefetch(cache, fd, i); } @@ -431,7 +464,7 @@ static void test_too_many_prefetches_does_not_trigger_a_wait(void *context) for (i = 0; i < 10 * nr_cache_blocks; i++) { // prefetch should not wait if (i < nr_cache_blocks) - _expect(me, E_ISSUE); + _expect_read(me, fd, i); bcache_prefetch(cache, fd, i); } @@ -446,19 +479,17 @@ static void test_dirty_data_gets_written_back(void *context) struct mock_engine *me = f->me; struct bcache *cache = f->cache; - const unsigned nr_cache_blocks = 16; int fd = 17; // arbitrary key struct block *b; - // FIXME: be specific about the IO direction // Expect the read - _expect(me, E_ISSUE); + _expect_read(me, fd, 0); _expect(me, E_WAIT); T_ASSERT(bcache_get(cache, fd, 0, GF_DIRTY, &b)); bcache_put(b); // Expect the write - _expect(me, E_ISSUE); + _expect_write(me, fd, 0); _expect(me, E_WAIT); } @@ -468,7 +499,6 @@ static void test_zeroed_data_counts_as_dirty(void *context) struct mock_engine *me = f->me; struct bcache *cache = f->cache; - const unsigned nr_cache_blocks = 16; int fd = 17; // arbitrary key struct block *b; @@ -477,7 +507,7 @@ static void test_zeroed_data_counts_as_dirty(void *context) bcache_put(b); // Expect the write - _expect(me, E_ISSUE); + _expect_write(me, fd, 0); _expect(me, E_WAIT); } @@ -496,7 +526,7 @@ static void test_flush_waits_for_all_dirty(void *context) if (i % 2) { T_ASSERT(bcache_get(cache, fd, i, GF_ZERO, &b)); } else { - _expect_read(me); + _expect_read(me, fd, i); _expect(me, E_WAIT); T_ASSERT(bcache_get(cache, fd, i, 0, &b)); } @@ -505,7 +535,7 @@ static void test_flush_waits_for_all_dirty(void *context) for (i = 0; i < count; i++) { if (i % 2) - _expect_write(me); + _expect_write(me, fd, i); } for (i = 0; i < count; i++) { @@ -517,6 +547,25 @@ static void test_flush_waits_for_all_dirty(void *context) _no_outstanding_expectations(me); } +static void test_multiple_files(void * context) +{ + static int _fds[] = {1, 128, 345, 678, 890}; + + struct fixture *f = context; + struct mock_engine *me = f->me; + struct bcache *cache = f->cache; + struct block *b; + unsigned i; + + for (i = 0; i < DM_ARRAY_SIZE(_fds); i++) { + _expect_read(me, _fds[i], 0); + _expect(me, E_WAIT); + + T_ASSERT(bcache_get(cache, _fds[i], 0, 0, &b)); + bcache_put(b); + } +} + // Tests to be written // Open multiple files and prove the blocks are coming from the correct file // show invalidate works @@ -525,31 +574,34 @@ static void test_flush_waits_for_all_dirty(void *context) // check zeroing struct test_details { - const char *name; + const char *path; + const char *desc; void (*fn)(void *); void *(*fixture_init)(void); void (*fixture_exit)(void *); }; -#define TEST(name, fn) {name, fn, NULL, NULL} -#define TEST_S(name, fn) {name, fn, _small_fixture_init, _small_fixture_exit} -#define TEST_L(name, fn) {name, fn, _large_fixture_init, _large_fixture_exit} +#define PATH "device/bcache/" +#define TEST(path, name, fn) {PATH path, name, fn, NULL, NULL} +#define TEST_S(path, name, fn) {PATH path, name, fn, _small_fixture_init, _small_fixture_exit} +#define TEST_L(path, name, fn) {PATH path, name, fn, _large_fixture_init, _large_fixture_exit} int main(int argc, char **argv) { static struct test_details _tests[] = { - TEST("simple create/destroy", test_create), - TEST("nr cache blocks must be positive", test_nr_cache_blocks_must_be_positive), - TEST("block size must be positive", test_block_size_must_be_positive), - TEST("block size must be a multiple of page size", test_block_size_must_be_multiple_of_page_size), - TEST_S("bcache_get() triggers read", test_get_triggers_read), - TEST_S("repeated reads are cached", test_repeated_reads_are_cached), - TEST_S("block get evicted with many reads", test_block_gets_evicted_with_many_reads), - TEST_S("prefetch issues a read", test_prefetch_issues_a_read), - TEST_S("too many prefetches does not trigger a wait", test_too_many_prefetches_does_not_trigger_a_wait), - TEST_S("dirty data gets written back", test_dirty_data_gets_written_back), - TEST_S("zeroed data counts as dirty", test_zeroed_data_counts_as_dirty), - TEST_L("flush waits for all dirty", test_flush_waits_for_all_dirty), + TEST("create-destroy", "simple create/destroy", test_create), + TEST("cache-blocks-positive", "nr cache blocks must be positive", test_nr_cache_blocks_must_be_positive), + TEST("block-size-positive", "block size must be positive", test_block_size_must_be_positive), + TEST("block-size-multiple-page", "block size must be a multiple of page size", test_block_size_must_be_multiple_of_page_size), + TEST_S("get-reads", "bcache_get() triggers read", test_get_triggers_read), + TEST_S("reads-cached", "repeated reads are cached", test_repeated_reads_are_cached), + TEST_S("blocks-get-evicted", "block get evicted with many reads", test_block_gets_evicted_with_many_reads), + TEST_S("prefetch-reads", "prefetch issues a read", test_prefetch_issues_a_read), + TEST_S("prefetch-never-waits", "too many prefetches does not trigger a wait", test_too_many_prefetches_does_not_trigger_a_wait), + TEST_S("writeback-occurs", "dirty data gets written back", test_dirty_data_gets_written_back), + TEST_S("zero-flag-dirties", "zeroed data counts as dirty", test_zeroed_data_counts_as_dirty), + TEST_L("flush waits for all dirty", "flush waits for all dirty", test_flush_waits_for_all_dirty), + TEST_S("read-multiple-files", "read from multiple files", test_multiple_files), }; // We have to declare these as volatile because of the setjmp() @@ -558,10 +610,10 @@ int main(int argc, char **argv) for (i = 0; i < DM_ARRAY_SIZE(_tests); i++) { void *fixture; struct test_details *t = _tests + i; - fprintf(stderr, "[RUN ] %s\n", t->name); + fprintf(stderr, "[RUN ] %s\n", t->path); if (setjmp(_test_k)) - fprintf(stderr, "[ FAIL] %s\n", t->name); + fprintf(stderr, "[ FAIL] %s\n", t->path); else { if (t->fixture_init) fixture = t->fixture_init(); @@ -574,7 +626,7 @@ int main(int argc, char **argv) t->fixture_exit(fixture); passed++; - fprintf(stderr, "[ OK] %s\n", t->name); + fprintf(stderr, "[ OK] %s\n", t->path); } } From d9e6298edb0bc6533c22f7e95e613189abe89c99 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 8 Feb 2018 10:10:31 -0600 Subject: [PATCH 19/87] [device/bcache] fix missing max_io fn in bcache async engine --- lib/device/bcache.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index dce05efe5..cea4db406 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -134,6 +134,7 @@ struct async_engine { struct io_engine e; io_context_t aio_context; struct cb_set *cbs; + unsigned max_io; }; static struct async_engine *_to_async(struct io_engine *e) @@ -233,6 +234,12 @@ static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) return true; } +static unsigned _async_max_io(struct io_engine *ioe) +{ + struct async_engine *e = _to_async(ioe); + return e->max_io; +} + struct io_engine *create_async_io_engine(unsigned max_io) { int r; @@ -241,9 +248,12 @@ struct io_engine *create_async_io_engine(unsigned max_io) if (!e) return NULL; + e->max_io = max_io; + e->e.destroy = _async_destroy; e->e.issue = _async_issue; e->e.wait = _async_wait; + e->e.max_io = _async_max_io; e->aio_context = 0; r = io_setup(max_io, &e->aio_context); From 7be54bd687ead6aa21e04c8a85a648da369a3d88 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 8 Feb 2018 11:16:19 -0600 Subject: [PATCH 20/87] [device/bcache] fix min() function --- lib/device/bcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index cea4db406..52be94737 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -997,7 +997,7 @@ void bcache_prefetch_bytes(struct bcache *cache, int fd, off_t start, size_t len static off_t _min(off_t lhs, off_t rhs) { - if (rhs > lhs) + if (rhs < lhs) return rhs; return lhs; From 93fc9374294aae679faa23aef0bd631db2e9a31a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 8 Feb 2018 13:44:54 -0600 Subject: [PATCH 21/87] [device/bcache] bcache_read_bytes should put blocks --- lib/device/bcache.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 52be94737..5141083cd 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -1009,14 +1009,17 @@ bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, vo block_address bb, be, i; unsigned char *udata = data; off_t block_size = cache->block_sectors << SECTOR_SHIFT; + int errors = 0; byte_range_to_block_range(cache, start, len, &bb, &be); for (i = bb; i < be; i++) bcache_prefetch(cache, fd, i); for (i = bb; i < be; i++) { - if (!bcache_get(cache, fd, i, 0, &b)) - return false; + if (!bcache_get(cache, fd, i, 0, &b)) { + errors++; + continue; + } if (i == bb) { off_t block_offset = start % block_size; @@ -1030,9 +1033,11 @@ bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, vo len -= blen; udata += blen; } + + bcache_put(b); } - return true; + return errors ? false : true; } //---------------------------------------------------------------- From 697fa7aa1dbcc2ed8e141d9d13118cb38d609161 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 8 Feb 2018 09:32:19 -0600 Subject: [PATCH 22/87] [makefile] add -laio to makefiles --- daemons/clvmd/Makefile.in | 2 +- liblvm/Makefile.in | 2 +- scripts/Makefile.in | 2 +- tools/Makefile.in | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in index 7b1b49bf9..83af00e20 100644 --- a/daemons/clvmd/Makefile.in +++ b/daemons/clvmd/Makefile.in @@ -74,7 +74,7 @@ TARGETS = \ include $(top_builddir)/make.tmpl -LIBS += $(LVMINTERNAL_LIBS) -ldevmapper $(PTHREAD_LIBS) +LIBS += $(LVMINTERNAL_LIBS) -ldevmapper $(PTHREAD_LIBS) -laio CFLAGS += -fno-strict-aliasing $(EXTRA_EXEC_CFLAGS) INSTALL_TARGETS = \ diff --git a/liblvm/Makefile.in b/liblvm/Makefile.in index 6d0325c60..be3049a9e 100644 --- a/liblvm/Makefile.in +++ b/liblvm/Makefile.in @@ -43,7 +43,7 @@ LDDEPS += $(top_builddir)/lib/liblvm-internal.a include $(top_builddir)/make.tmpl LDFLAGS += -L$(top_builddir)/lib -L$(top_builddir)/daemons/dmeventd -LIBS += $(LVMINTERNAL_LIBS) -ldevmapper +LIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio .PHONY: install_dynamic install_static install_include install_pkgconfig diff --git a/scripts/Makefile.in b/scripts/Makefile.in index d06766f67..720ae9f0e 100644 --- a/scripts/Makefile.in +++ b/scripts/Makefile.in @@ -28,7 +28,7 @@ ifeq ("@APPLIB@", "yes") ifeq ("@BUILD_DMEVENTD@", "yes") LDFLAGS += -Wl,-rpath-link,$(top_builddir)/daemons/dmeventd endif - LVMLIBS = @LVM2APP_LIB@ -ldevmapper + LVMLIBS = @LVM2APP_LIB@ -ldevmapper -laio endif LVM_SCRIPTS = lvmdump.sh lvmconf.sh diff --git a/tools/Makefile.in b/tools/Makefile.in index 61c6f385e..103b76732 100644 --- a/tools/Makefile.in +++ b/tools/Makefile.in @@ -95,7 +95,7 @@ ifeq ("@STATIC_LINK@", "yes") INSTALL_CMDLIB_TARGETS += install_cmdlib_static endif -LVMLIBS = $(LVMINTERNAL_LIBS) -ldevmapper +LVMLIBS = $(LVMINTERNAL_LIBS) -ldevmapper -laio LIB_VERSION = $(LIB_VERSION_LVM) CLEAN_TARGETS = liblvm2cmd.$(LIB_SUFFIX) $(TARGETS_DM) \ From a7cb76ae94a90c89b86a810ceb4b6a91bab3441e Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 6 Feb 2018 15:18:11 -0600 Subject: [PATCH 23/87] scan: use bcache for label scan and vg read New label_scan function populates bcache for each device on the system. The two read paths are updated to get data from bcache. The bcache is not yet used for writing. bcache blocks for a device are invalidated when the device is written. --- lib/commands/toolcontext.c | 2 + lib/config/config.c | 10 +- lib/device/bcache.c | 6 +- lib/device/device.h | 1 + lib/format_text/archive.c | 2 +- lib/format_text/archiver.c | 2 +- lib/format_text/format-text.c | 217 +++++++----- lib/format_text/import-export.h | 8 +- lib/format_text/import.c | 70 +++- lib/format_text/import_vsn1.c | 51 +-- lib/format_text/layout.h | 2 +- lib/format_text/text_label.c | 15 +- lib/label/label.c | 583 ++++++++++++++++++++++++------- lib/label/label.h | 15 +- lib/metadata/metadata-exported.h | 13 + lib/metadata/metadata.c | 12 +- lib/metadata/metadata.h | 8 +- 17 files changed, 713 insertions(+), 304 deletions(-) diff --git a/lib/commands/toolcontext.c b/lib/commands/toolcontext.c index c99849587..fe6b8a384 100644 --- a/lib/commands/toolcontext.c +++ b/lib/commands/toolcontext.c @@ -2124,6 +2124,7 @@ int refresh_toolcontext(struct cmd_context *cmd) activation_release(); lvmcache_destroy(cmd, 0, 0); + label_scan_destroy(cmd); label_exit(); _destroy_segtypes(&cmd->segtypes); _destroy_formats(cmd, &cmd->formats); @@ -2243,6 +2244,7 @@ void destroy_toolcontext(struct cmd_context *cmd) archive_exit(cmd); backup_exit(cmd); lvmcache_destroy(cmd, 0, 0); + label_scan_destroy(cmd); label_exit(); _destroy_segtypes(&cmd->segtypes); _destroy_formats(cmd, &cmd->formats); diff --git a/lib/config/config.c b/lib/config/config.c index 8fca3728e..2d7db698b 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -23,6 +23,7 @@ #include "toolcontext.h" #include "lvm-file.h" #include "memlock.h" +#include "label.h" #include #include @@ -532,10 +533,15 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_r log_error("Failed to allocate circular buffer."); return 0; } - if (!dev_read_circular(dev, (uint64_t) offset, size, - (uint64_t) offset2, size2, reason, buf)) { + + if (!bcache_read_bytes(scan_bcache, dev->fd, offset, size, buf)) goto out; + + if (size2) { + if (!bcache_read_bytes(scan_bcache, dev->fd, offset2, size2, buf + size)) + goto out; } + fb = buf; } diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 5141083cd..38c909c12 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -223,7 +223,11 @@ static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) else if ((int) ev->res < 0) fn(cb->context, (int) ev->res); - else { + else if (ev->res >= (1 << SECTOR_SHIFT)) { + /* minimum acceptable read is 1 sector */ + fn((void *) cb->context, 0); + + } else { log_warn("short io"); fn(cb->context, -ENODATA); } diff --git a/lib/device/device.h b/lib/device/device.h index 503373f88..d5eb00f72 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -31,6 +31,7 @@ #define DEV_USED_FOR_LV 0x00000100 /* Is device used for an LV */ #define DEV_ASSUMED_FOR_LV 0x00000200 /* Is device assumed for an LV */ #define DEV_NOT_O_NOATIME 0x00000400 /* Don't use O_NOATIME */ +#define DEV_IN_BCACHE 0x00000800 /* dev fd is open and used in bcache */ /* * Support for external device info. diff --git a/lib/format_text/archive.c b/lib/format_text/archive.c index 72ec40b66..690bc7408 100644 --- a/lib/format_text/archive.c +++ b/lib/format_text/archive.c @@ -320,7 +320,7 @@ static void _display_archive(struct cmd_context *cmd, struct archive_file *af) * retrieve the archive time and description. */ /* FIXME Use variation on _vg_read */ - if (!(vg = text_vg_import_file(tf, af->path, &when, &desc))) { + if (!(vg = text_read_metadata_file(tf, af->path, &when, &desc))) { log_error("Unable to read archive file."); tf->fmt->ops->destroy_instance(tf); return; diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index 1eb665436..81b5da934 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -320,7 +320,7 @@ struct volume_group *backup_read_vg(struct cmd_context *cmd, } dm_list_iterate_items(mda, &tf->metadata_areas_in_use) { - if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL, 0))) + if (!(vg = mda->ops->vg_read(tf, vg_name, mda, NULL, NULL))) stack; break; } diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index be9a8b906..e9a34e696 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -317,47 +317,42 @@ static void _xlate_mdah(struct mda_header *mdah) static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area, int primary_mda) { - if (!dev_open_readonly(dev_area->dev)) - return_0; + log_debug_metadata("Reading mda header sector from %s at %llu", + dev_name(dev_area->dev), (unsigned long long)dev_area->start); - if (!dev_read(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, MDA_HEADER_REASON(primary_mda), mdah)) { - if (!dev_close(dev_area->dev)) - stack; - return_0; + if (!bcache_read_bytes(scan_bcache, dev_area->dev->fd, dev_area->start, MDA_HEADER_SIZE, mdah)) { + log_error("Failed to read metadata area header on %s at %llu", + dev_name(dev_area->dev), (unsigned long long)dev_area->start); + return 0; } - if (!dev_close(dev_area->dev)) - return_0; - if (mdah->checksum_xl != xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl)))) { - log_error("Incorrect metadata area header checksum on %s" - " at offset " FMTu64, dev_name(dev_area->dev), - dev_area->start); + log_error("Incorrect checksum in metadata area header on %s at %llu", + dev_name(dev_area->dev), (unsigned long long)dev_area->start); return 0; } _xlate_mdah(mdah); if (strncmp((char *)mdah->magic, FMTT_MAGIC, sizeof(mdah->magic))) { - log_error("Wrong magic number in metadata area header on %s" - " at offset " FMTu64, dev_name(dev_area->dev), - dev_area->start); + log_error("Wrong magic number in metadata area header on %s at %llu", + dev_name(dev_area->dev), (unsigned long long)dev_area->start); return 0; } if (mdah->version != FMTT_VERSION) { - log_error("Incompatible metadata area header version: %d on %s" - " at offset " FMTu64, mdah->version, - dev_name(dev_area->dev), dev_area->start); + log_error("Incompatible version %u metadata area header on %s at %llu", + mdah->version, + dev_name(dev_area->dev), (unsigned long long)dev_area->start); return 0; } if (mdah->start != dev_area->start) { - log_error("Incorrect start sector in metadata area header: " - FMTu64 " on %s at offset " FMTu64, mdah->start, - dev_name(dev_area->dev), dev_area->start); + log_error("Incorrect start sector %llu in metadata area header on %s at %llu", + (unsigned long long)mdah->start, + dev_name(dev_area->dev), (unsigned long long)dev_area->start); return 0; } @@ -390,18 +385,33 @@ static int _raw_write_mda_header(const struct format_type *fmt, mdah->version = FMTT_VERSION; mdah->start = start_byte; + label_scan_invalidate(dev); + + if (!dev_open(dev)) + return_0; + _xlate_mdah(mdah); mdah->checksum_xl = xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl))); - if (!dev_write(dev, start_byte, MDA_HEADER_SIZE, MDA_HEADER_REASON(primary_mda), mdah)) + if (!dev_write(dev, start_byte, MDA_HEADER_SIZE, MDA_HEADER_REASON(primary_mda), mdah)) { + dev_close(dev); return_0; + } + + if (dev_close(dev)) + stack; return 1; } -static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, +/* + * FIXME: unify this with read_metadata_location() which is used + * in the label scanning path. + */ + +static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, struct mda_header *mdah, int primary_mda, const char *vgname, int *precommitted) @@ -446,11 +456,13 @@ static struct raw_locn *_find_vg_rlocn(struct device_area *dev_area, if (rlocn_was_ignored) return rlocn; - /* FIXME Loop through rlocns two-at-a-time. List null-terminated. */ - /* FIXME Ignore if checksum incorrect!!! */ - if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, - sizeof(vgnamebuf), MDA_CONTENT_REASON(primary_mda), vgnamebuf)) - goto_bad; + /* + * Verify that the VG metadata pointed to by the rlocn + * begins with a valid vgname. + */ + memset(vgnamebuf, 0, sizeof(vgnamebuf)); + + bcache_read_bytes(scan_bcache, dev_area->dev->fd, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{')) @@ -505,7 +517,7 @@ static int _raw_holds_vgname(struct format_instance *fid, if (!(mdah = raw_read_mda_header(fid->fmt, dev_area, 0))) return_0; - if (_find_vg_rlocn(dev_area, mdah, 0, vgname, &noprecommit)) + if (_read_metadata_location_vg(dev_area, mdah, 0, vgname, &noprecommit)) r = 1; if (!dev_close(dev_area->dev)) @@ -520,7 +532,7 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, int precommitted, - int single_device, int primary_mda) + int primary_mda) { struct volume_group *vg = NULL; struct raw_locn *rlocn; @@ -532,7 +544,7 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda))) goto_out; - if (!(rlocn = _find_vg_rlocn(area, mdah, primary_mda, vgname, &precommitted))) { + if (!(rlocn = _read_metadata_location_vg(area, mdah, primary_mda, vgname, &precommitted))) { log_debug_metadata("VG %s not found on %s", vgname, dev_name(area->dev)); goto out; } @@ -546,26 +558,25 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, goto out; } - /* FIXME 64-bit */ - if (!(vg = text_vg_import_fd(fid, NULL, vg_fmtdata, use_previous_vg, single_device, area->dev, - primary_mda, - (off_t) (area->start + rlocn->offset), - (uint32_t) (rlocn->size - wrap), - (off_t) (area->start + MDA_HEADER_SIZE), - wrap, calc_crc, rlocn->checksum, &when, - &desc)) && (!use_previous_vg || !*use_previous_vg)) - goto_out; + vg = text_read_metadata(fid, NULL, vg_fmtdata, use_previous_vg, area->dev, primary_mda, + (off_t) (area->start + rlocn->offset), + (uint32_t) (rlocn->size - wrap), + (off_t) (area->start + MDA_HEADER_SIZE), + wrap, + calc_crc, + rlocn->checksum, + &when, &desc); - if (vg) - log_debug_metadata("Read %s %smetadata (%u) from %s at " FMTu64 " size " - FMTu64, vg->name, precommitted ? "pre-commit " : "", - vg->seqno, dev_name(area->dev), - area->start + rlocn->offset, rlocn->size); - else - log_debug_metadata("Skipped reading %smetadata from %s at " FMTu64 " size " - FMTu64 " with matching checksum.", precommitted ? "pre-commit " : "", - dev_name(area->dev), - area->start + rlocn->offset, rlocn->size); + if (!vg) { + /* FIXME: detect and handle errors, and distinguish from the optimization + that skips parsing the metadata which also returns NULL. */ + } + + log_debug_metadata("Found metadata on %s at %"FMTu64" size %"FMTu64" for VG %s", + dev_name(area->dev), + area->start + rlocn->offset, + rlocn->size, + vgname); if (vg && precommitted) vg->status |= PRECOMMITTED; @@ -578,8 +589,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, - int single_device) + unsigned *use_previous_vg) { struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; @@ -587,7 +597,7 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, if (!dev_open_readonly(mdac->area.dev)) return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, single_device, mda_is_primary(mda)); + vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, mda_is_primary(mda)); if (!dev_close(mdac->area.dev)) stack; @@ -607,7 +617,7 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, if (!dev_open_readonly(mdac->area.dev)) return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, 0, mda_is_primary(mda)); + vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, mda_is_primary(mda)); if (!dev_close(mdac->area.dev)) stack; @@ -655,7 +665,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, goto out; } - rlocn = _find_vg_rlocn(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit); + rlocn = _read_metadata_location_vg(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit); mdac->rlocn.offset = _next_rlocn_offset(rlocn, mdah, mdac->area.start, MDA_ORIGINAL_ALIGNMENT); mdac->rlocn.size = fidtc->raw_metadata_buf_size; @@ -681,6 +691,8 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, vg->name, dev_name(mdac->area.dev), mdac->area.start + mdac->rlocn.offset, mdac->rlocn.size - new_wrap, mdac->rlocn.size); + label_scan_invalidate(mdac->area.dev); + /* Write text out, circularly */ if (!dev_write(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, (size_t) (mdac->rlocn.size - new_wrap), MDA_CONTENT_REASON(mda_is_primary(mda)), @@ -752,7 +764,7 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; - if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit))) { + if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, mda_is_primary(mda), old_vg_name ? : vg->name, &noprecommit))) { mdah->raw_locns[0].offset = 0; mdah->raw_locns[0].size = 0; mdah->raw_locns[0].checksum = 0; @@ -872,7 +884,7 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; - if (!(rlocn = _find_vg_rlocn(&mdac->area, mdah, mda_is_primary(mda), vg->name, &noprecommit))) { + if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, mda_is_primary(mda), vg->name, &noprecommit))) { rlocn = &mdah->raw_locns[0]; mdah->raw_locns[1].offset = 0; } @@ -906,8 +918,10 @@ static struct volume_group *_vg_read_file_name(struct format_instance *fid, time_t when; char *desc; - if (!(vg = text_vg_import_file(fid, read_path, &when, &desc))) - return_NULL; + if (!(vg = text_read_metadata_file(fid, read_path, &when, &desc))) { + log_error("Failed to read VG %s from %s", vgname, read_path); + return NULL; + } /* * Currently you can only have a single volume group per @@ -931,8 +945,7 @@ static struct volume_group *_vg_read_file(struct format_instance *fid, const char *vgname, struct metadata_area *mda, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg __attribute__((unused)), - int single_device __attribute__((unused))) + unsigned *use_previous_vg __attribute__((unused))) { struct text_context *tc = (struct text_context *) mda->metadata_locn; @@ -1175,7 +1188,7 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) return 1; } -int vgname_from_mda(const struct format_type *fmt, +int read_metadata_location_summary(const struct format_type *fmt, struct mda_header *mdah, int primary_mda, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors) { @@ -1184,13 +1197,12 @@ int vgname_from_mda(const struct format_type *fmt, unsigned int len = 0; char buf[NAME_LEN + 1] __attribute__((aligned(8))); uint64_t buffer_size, current_usage; - unsigned used_cached_metadata = 0; if (mda_free_sectors) *mda_free_sectors = ((dev_area->size - MDA_HEADER_SIZE) / 2) >> SECTOR_SHIFT; if (!mdah) { - log_error(INTERNAL_ERROR "vgname_from_mda called with NULL pointer for mda_header"); + log_error(INTERNAL_ERROR "read_metadata_location_summary called with NULL pointer for mda_header"); return 0; } @@ -1201,15 +1213,12 @@ int vgname_from_mda(const struct format_type *fmt, * If no valid offset, do not try to search for vgname */ if (!rlocn->offset) { - log_debug("%s: found metadata with offset 0.", - dev_name(dev_area->dev)); + log_debug_metadata("Metadata location on %s at %"FMTu64" has offset 0.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return 0; } - /* Do quick check for a vgname */ - if (!dev_read(dev_area->dev, dev_area->start + rlocn->offset, - NAME_LEN, MDA_CONTENT_REASON(primary_mda), buf)) - return_0; + bcache_read_bytes(scan_bcache, dev_area->dev->fd, dev_area->start + rlocn->offset, NAME_LEN, buf); while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && len < (NAME_LEN - 1)) @@ -1218,47 +1227,66 @@ int vgname_from_mda(const struct format_type *fmt, buf[len] = '\0'; /* Ignore this entry if the characters aren't permissible */ - if (!validate_name(buf)) + if (!validate_name(buf)) { + log_error("Metadata location on %s at %"FMTu64" begins with invalid VG name.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return_0; + } /* We found a VG - now check the metadata */ if (rlocn->offset + rlocn->size > mdah->size) wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); if (wrap > rlocn->offset) { - log_error("%s: metadata (" FMTu64 " bytes) too large for circular buffer (" FMTu64 " bytes)", - dev_name(dev_area->dev), rlocn->size, mdah->size - MDA_HEADER_SIZE); + log_error("Metadata location on %s at %"FMTu64" is too large for circular buffer.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); return 0; } - /* Did we see this metadata before? */ + /* + * Did we see this metadata before? + * Look in lvmcache to see if there is vg info matching + * the checksum/size that we see in the mda_header (rlocn) + * on this device. If so, then vgsummary->name is is set + * and controls if the "checksum_only" flag passed to + * text_read_metadata_summary() is 1 or 0. + * + * If checksum_only = 1, then text_read_metadata_summary() + * will read the metadata from this device, and run the + * checksum function on it. If the calculated checksum + * of the metadata matches the checksum in the mda_header, + * which also matches the checksum saved in vginfo from + * another device, then it skips parsing the metadata into + * a config tree, which saves considerable cpu time. + */ + vgsummary->mda_checksum = rlocn->checksum; vgsummary->mda_size = rlocn->size; + lvmcache_lookup_mda(vgsummary); - if (lvmcache_lookup_mda(vgsummary)) - used_cached_metadata = 1; - - /* FIXME 64-bit */ - if (!text_vgsummary_import(fmt, dev_area->dev, MDA_CONTENT_REASON(primary_mda), + if (!text_read_metadata_summary(fmt, dev_area->dev, MDA_CONTENT_REASON(primary_mda), (off_t) (dev_area->start + rlocn->offset), (uint32_t) (rlocn->size - wrap), (off_t) (dev_area->start + MDA_HEADER_SIZE), wrap, calc_crc, vgsummary->vgname ? 1 : 0, - vgsummary)) - return_0; + vgsummary)) { + log_error("Metadata location on %s at %"FMTu64" has invalid summary for VG.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); + return 0; + } /* Ignore this entry if the characters aren't permissible */ - if (!validate_name(vgsummary->vgname)) - return_0; + if (!validate_name(vgsummary->vgname)) { + log_error("Metadata location on %s at %"FMTu64" has invalid VG name.", + dev_name(dev_area->dev), dev_area->start + rlocn->offset); + return 0; + } - log_debug_metadata("%s: %s metadata at " FMTu64 " size " FMTu64 - " (in area at " FMTu64 " size " FMTu64 - ") for %s (" FMTVGID ")", + log_debug_metadata("Found metadata summary on %s at %"FMTu64" size %"FMTu64" for VG %s", dev_name(dev_area->dev), - used_cached_metadata ? "Using cached" : "Found", dev_area->start + rlocn->offset, - rlocn->size, dev_area->start, dev_area->size, vgsummary->vgname, - (char *)&vgsummary->vgid); + rlocn->size, + vgsummary->vgname); if (mda_free_sectors) { current_usage = (rlocn->size + SECTOR_SIZE - UINT64_C(1)) - @@ -1301,8 +1329,7 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu goto close_dev; } - /* TODO: caching as in vgname_from_mda() (trigger this code?) */ - if (vgname_from_mda(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { + if (read_metadata_location_summary(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0, 0); if (vg) lvmcache_update_vg(vg, 0); @@ -1776,7 +1803,13 @@ static int _mda_export_text_raw(struct metadata_area *mda, struct mda_context *mdc = (struct mda_context *) mda->metadata_locn; char mdah[MDA_HEADER_SIZE]; /* temporary */ - if (!mdc || !_raw_read_mda_header((struct mda_header *)mdah, &mdc->area, mda_is_primary(mda))) + if (!mdc) { + log_error(INTERNAL_ERROR "mda_export_text_raw no mdc"); + return 1; /* pretend the MDA does not exist */ + } + + /* FIXME: why aren't ignore,start,size,free_sectors available? */ + if (!_raw_read_mda_header((struct mda_header *)mdah, &mdc->area, mda_is_primary(mda))) return 1; /* pretend the MDA does not exist */ return config_make_nodes(cft, parent, NULL, diff --git a/lib/format_text/import-export.h b/lib/format_text/import-export.h index 894d88141..920eb3e83 100644 --- a/lib/format_text/import-export.h +++ b/lib/format_text/import-export.h @@ -49,7 +49,6 @@ struct text_vg_version_ops { int (*check_version) (const struct dm_config_tree * cf); struct volume_group *(*read_vg) (struct format_instance * fid, const struct dm_config_tree *cf, - unsigned use_cached_pvs, unsigned allow_lvmetad_extensions); void (*read_desc) (struct dm_pool * mem, const struct dm_config_tree *cf, time_t *when, char **desc); @@ -68,14 +67,13 @@ int read_segtype_lvflags(uint64_t *status, char *segtype_str); int text_vg_export_file(struct volume_group *vg, const char *desc, FILE *fp); size_t text_vg_export_raw(struct volume_group *vg, const char *desc, char **buf); -struct volume_group *text_vg_import_file(struct format_instance *fid, +struct volume_group *text_read_metadata_file(struct format_instance *fid, const char *file, time_t *when, char **desc); -struct volume_group *text_vg_import_fd(struct format_instance *fid, +struct volume_group *text_read_metadata(struct format_instance *fid, const char *file, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int single_device, struct device *dev, int primary_mda, off_t offset, uint32_t size, off_t offset2, uint32_t size2, @@ -83,7 +81,7 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, uint32_t checksum, time_t *when, char **desc); -int text_vgsummary_import(const struct format_type *fmt, +int text_read_metadata_summary(const struct format_type *fmt, struct device *dev, dev_io_reason_t reason, off_t offset, uint32_t size, off_t offset2, uint32_t size2, diff --git a/lib/format_text/import.c b/lib/format_text/import.c index da4cefdb8..4b344856f 100644 --- a/lib/format_text/import.c +++ b/lib/format_text/import.c @@ -35,7 +35,7 @@ static void _init_text_import(void) /* * Find out vgname on a given device. */ -int text_vgsummary_import(const struct format_type *fmt, +int text_read_metadata_summary(const struct format_type *fmt, struct device *dev, dev_io_reason_t reason, off_t offset, uint32_t size, off_t offset2, uint32_t size2, @@ -52,17 +52,29 @@ int text_vgsummary_import(const struct format_type *fmt, if (!(cft = config_open(CONFIG_FILE_SPECIAL, NULL, 0))) return_0; - if ((!dev && !config_file_read(cft)) || - (dev && !config_file_read_fd(cft, dev, reason, offset, size, + if (dev) { + log_debug_metadata("Reading metadata summary from %s at %llu size %d (+%d)", + dev_name(dev), (unsigned long long)offset, + size, size2); + + if (!config_file_read_fd(cft, dev, reason, offset, size, offset2, size2, checksum_fn, vgsummary->mda_checksum, - checksum_only, 1))) { - log_error("Couldn't read volume group metadata."); - goto out; + checksum_only, 1)) { + /* FIXME: handle errors */ + log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); + goto out; + } + } else { + if (!config_file_read(cft)) { + log_error("Couldn't read volume group metadata from file."); + goto out; + } } if (checksum_only) { /* Checksum matches already-cached content - no need to reparse. */ + log_debug_metadata("Skipped parsing metadata on %s", dev_name(dev)); r = 1; goto out; } @@ -91,11 +103,10 @@ struct cached_vg_fmtdata { size_t cached_mda_size; }; -struct volume_group *text_vg_import_fd(struct format_instance *fid, +struct volume_group *text_read_metadata(struct format_instance *fid, const char *file, struct cached_vg_fmtdata **vg_fmtdata, unsigned *use_previous_vg, - int single_device, struct device *dev, int primary_mda, off_t offset, uint32_t size, off_t offset2, uint32_t size2, @@ -108,6 +119,15 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, struct text_vg_version_ops **vsn; int skip_parse; + /* + * This struct holds the checksum and size of the VG metadata + * that was read from a previous device. When we read the VG + * metadata from this device, we can skip parsing it into a + * cft (saving time) if the checksum of the metadata buffer + * we read from this device matches the size/checksum saved in + * the mda_header/rlocn struct on this device, and matches the + * size/checksum from the previous device. + */ if (vg_fmtdata && !*vg_fmtdata && !(*vg_fmtdata = dm_pool_zalloc(fid->mem, sizeof(**vg_fmtdata)))) { log_error("Failed to allocate VG fmtdata for text format."); @@ -127,15 +147,30 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, ((*vg_fmtdata)->cached_mda_checksum == checksum) && ((*vg_fmtdata)->cached_mda_size == (size + size2)); - if ((!dev && !config_file_read(cft)) || - (dev && !config_file_read_fd(cft, dev, MDA_CONTENT_REASON(primary_mda), offset, size, + + if (dev) { + log_debug_metadata("Reading metadata from %s at %llu size %d (+%d)", + dev_name(dev), (unsigned long long)offset, + size, size2); + + if (!config_file_read_fd(cft, dev, MDA_CONTENT_REASON(primary_mda), offset, size, offset2, size2, checksum_fn, checksum, - skip_parse, 1))) - goto_out; + skip_parse, 1)) { + /* FIXME: handle errors */ + log_error("Couldn't read volume group metadata from %s.", dev_name(dev)); + goto out; + } + } else { + if (!config_file_read(cft)) { + log_error("Couldn't read volume group metadata from file."); + goto out; + } + } if (skip_parse) { if (use_previous_vg) *use_previous_vg = 1; + log_debug_metadata("Skipped parsing metadata on %s", dev_name(dev)); goto out; } @@ -146,7 +181,7 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, if (!(*vsn)->check_version(cft)) continue; - if (!(vg = (*vsn)->read_vg(fid, cft, single_device, 0))) + if (!(vg = (*vsn)->read_vg(fid, cft, 0))) goto_out; (*vsn)->read_desc(vg->vgmem, cft, when, desc); @@ -166,12 +201,13 @@ struct volume_group *text_vg_import_fd(struct format_instance *fid, return vg; } -struct volume_group *text_vg_import_file(struct format_instance *fid, +struct volume_group *text_read_metadata_file(struct format_instance *fid, const char *file, time_t *when, char **desc) { - return text_vg_import_fd(fid, file, NULL, NULL, 0, NULL, 0, (off_t)0, 0, (off_t)0, 0, NULL, 0, - when, desc); + return text_read_metadata(fid, file, NULL, NULL, NULL, 0, + (off_t)0, 0, (off_t)0, 0, NULL, 0, + when, desc); } static struct volume_group *_import_vg_from_config_tree(const struct dm_config_tree *cft, @@ -191,7 +227,7 @@ static struct volume_group *_import_vg_from_config_tree(const struct dm_config_t * The only path to this point uses cached vgmetadata, * so it can use cached PV state too. */ - if (!(vg = (*vsn)->read_vg(fid, cft, 1, allow_lvmetad_extensions))) + if (!(vg = (*vsn)->read_vg(fid, cft, allow_lvmetad_extensions))) stack; else if ((vg_missing = vg_missing_pv_count(vg))) { log_verbose("There are %d physical volumes missing.", diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index 9267d4581..d51397a00 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -32,9 +32,7 @@ typedef int (*section_fn) (struct format_instance * fid, struct volume_group * vg, const struct dm_config_node * pvn, const struct dm_config_node * vgn, struct dm_hash_table * pv_hash, - struct dm_hash_table * lv_hash, - unsigned *scan_done_once, - unsigned report_missing_devices); + struct dm_hash_table * lv_hash); #define _read_int32(root, path, result) \ dm_config_get_uint32(root, path, (uint32_t *) (result)) @@ -180,9 +178,7 @@ static int _read_pv(struct format_instance *fid, struct volume_group *vg, const struct dm_config_node *pvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash, - struct dm_hash_table *lv_hash __attribute__((unused)), - unsigned *scan_done_once, - unsigned report_missing_devices) + struct dm_hash_table *lv_hash __attribute__((unused))) { struct dm_pool *mem = vg->vgmem; struct physical_volume *pv; @@ -226,10 +222,7 @@ static int _read_pv(struct format_instance *fid, if (!id_write_format(&pv->id, buffer, sizeof(buffer))) buffer[0] = '\0'; - if (report_missing_devices) - log_error_once("Couldn't find device with uuid %s.", buffer); - else - log_very_verbose("Couldn't find device with uuid %s.", buffer); + log_error_once("Couldn't find device with uuid %s.", buffer); } if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) @@ -574,9 +567,7 @@ static int _read_lvnames(struct format_instance *fid __attribute__((unused)), struct volume_group *vg, const struct dm_config_node *lvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash __attribute__((unused)), - struct dm_hash_table *lv_hash, - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash) { struct dm_pool *mem = vg->vgmem; struct logical_volume *lv; @@ -731,9 +722,7 @@ static int _read_historical_lvnames(struct format_instance *fid __attribute__((u struct volume_group *vg, const struct dm_config_node *hlvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash __attribute__((unused)), - struct dm_hash_table *lv_hash __attribute__((unused)), - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash __attribute__((unused))) { struct dm_pool *mem = vg->vgmem; struct generic_logical_volume *glv; @@ -802,9 +791,7 @@ static int _read_historical_lvnames_interconnections(struct format_instance *fid struct volume_group *vg, const struct dm_config_node *hlvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash __attribute__((unused)), - struct dm_hash_table *lv_hash __attribute__((unused)), - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash __attribute__((unused))) { struct dm_pool *mem = vg->vgmem; const char *historical_lv_name, *origin_name = NULL; @@ -914,9 +901,7 @@ static int _read_lvsegs(struct format_instance *fid, struct volume_group *vg, const struct dm_config_node *lvn, const struct dm_config_node *vgn __attribute__((unused)), struct dm_hash_table *pv_hash, - struct dm_hash_table *lv_hash, - unsigned *scan_done_once __attribute__((unused)), - unsigned report_missing_devices __attribute__((unused))) + struct dm_hash_table *lv_hash) { struct logical_volume *lv; @@ -977,12 +962,9 @@ static int _read_sections(struct format_instance *fid, struct volume_group *vg, const struct dm_config_node *vgn, struct dm_hash_table *pv_hash, struct dm_hash_table *lv_hash, - int optional, - unsigned *scan_done_once) + int optional) { const struct dm_config_node *n; - /* Only report missing devices when doing a scan */ - unsigned report_missing_devices = scan_done_once ? !*scan_done_once : 1; if (!dm_config_get_section(vgn, section, &n)) { if (!optional) { @@ -994,8 +976,7 @@ static int _read_sections(struct format_instance *fid, } for (n = n->child; n; n = n->sib) { - if (!fn(fid, vg, n, vgn, pv_hash, lv_hash, - scan_done_once, report_missing_devices)) + if (!fn(fid, vg, n, vgn, pv_hash, lv_hash)) return_0; } @@ -1004,7 +985,6 @@ static int _read_sections(struct format_instance *fid, static struct volume_group *_read_vg(struct format_instance *fid, const struct dm_config_tree *cft, - unsigned use_cached_pvs, unsigned allow_lvmetad_extensions) { const struct dm_config_node *vgn; @@ -1012,7 +992,6 @@ static struct volume_group *_read_vg(struct format_instance *fid, const char *str, *format_str, *system_id; struct volume_group *vg; struct dm_hash_table *pv_hash = NULL, *lv_hash = NULL; - unsigned scan_done_once = use_cached_pvs; uint64_t vgstatus; /* skip any top-level values */ @@ -1167,7 +1146,7 @@ static struct volume_group *_read_vg(struct format_instance *fid, } if (!_read_sections(fid, "physical_volumes", _read_pv, vg, - vgn, pv_hash, lv_hash, 0, &scan_done_once)) { + vgn, pv_hash, lv_hash, 0)) { log_error("Couldn't find all physical volumes for volume " "group %s.", vg->name); goto bad; @@ -1175,7 +1154,7 @@ static struct volume_group *_read_vg(struct format_instance *fid, if (allow_lvmetad_extensions) _read_sections(fid, "outdated_pvs", _read_pv, vg, - vgn, pv_hash, lv_hash, 1, &scan_done_once); + vgn, pv_hash, lv_hash, 1); else if (dm_config_has_node(vgn, "outdated_pvs")) log_error(INTERNAL_ERROR "Unexpected outdated_pvs section in metadata of VG %s.", vg->name); @@ -1187,28 +1166,28 @@ static struct volume_group *_read_vg(struct format_instance *fid, } if (!_read_sections(fid, "logical_volumes", _read_lvnames, vg, - vgn, pv_hash, lv_hash, 1, NULL)) { + vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all logical volume names for volume " "group %s.", vg->name); goto bad; } if (!_read_sections(fid, "historical_logical_volumes", _read_historical_lvnames, vg, - vgn, pv_hash, lv_hash, 1, NULL)) { + vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all historical logical volumes for volume " "group %s.", vg->name); goto bad; } if (!_read_sections(fid, "logical_volumes", _read_lvsegs, vg, - vgn, pv_hash, lv_hash, 1, NULL)) { + vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all logical volumes for " "volume group %s.", vg->name); goto bad; } if (!_read_sections(fid, "historical_logical_volumes", _read_historical_lvnames_interconnections, - vg, vgn, pv_hash, lv_hash, 1, NULL)) { + vg, vgn, pv_hash, lv_hash, 1)) { log_error("Couldn't read all removed logical volume interconnections " "for volume group %s.", vg->name); goto bad; diff --git a/lib/format_text/layout.h b/lib/format_text/layout.h index 1746b9ccd..2671bbf02 100644 --- a/lib/format_text/layout.h +++ b/lib/format_text/layout.h @@ -104,7 +104,7 @@ struct mda_context { #define MDA_SIZE_MIN (8 * (unsigned) lvm_getpagesize()) #define MDA_ORIGINAL_ALIGNMENT 512 /* Original alignment used for start of VG metadata content */ -int vgname_from_mda(const struct format_type *fmt, struct mda_header *mdah, int primary_mda, +int read_metadata_location_summary(const struct format_type *fmt, struct mda_header *mdah, int primary_mda, struct device_area *dev_area, struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors); diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 7058f70c6..1c322dd26 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -323,7 +323,7 @@ struct _update_mda_baton { struct label *label; }; -static int _update_mda(struct metadata_area *mda, void *baton) +static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) { struct _update_mda_baton *p = baton; const struct format_type *fmt = p->label->labeller->fmt; @@ -360,7 +360,7 @@ static int _update_mda(struct metadata_area *mda, void *baton) return 1; } - if (vgname_from_mda(fmt, mdah, mda_is_primary(mda), &mdac->area, &vgsummary, + if (read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, &vgsummary, &mdac->free_sectors) && !lvmcache_update_vgname_and_id(p->info, &vgsummary)) { if (!dev_close(mdac->area.dev)) @@ -375,10 +375,10 @@ close_dev: return 1; } -static int _text_read(struct labeller *l, struct device *dev, void *buf, - struct label **label) +static int _text_read(struct labeller *l, struct device *dev, void *label_buf, + struct label **label) { - struct label_header *lh = (struct label_header *) buf; + struct label_header *lh = (struct label_header *) label_buf; struct pv_header *pvhdr; struct pv_header_extension *pvhdr_ext; struct lvmcache_info *info; @@ -390,7 +390,7 @@ static int _text_read(struct labeller *l, struct device *dev, void *buf, /* * PV header base */ - pvhdr = (struct pv_header *) ((char *) buf + xlate32(lh->offset_xl)); + pvhdr = (struct pv_header *) ((char *) label_buf + xlate32(lh->offset_xl)); if (!(info = lvmcache_add(l, (char *)pvhdr->pv_uuid, dev, FMT_TEXT_ORPHAN_VG_NAME, @@ -447,8 +447,7 @@ out: baton.info = info; baton.label = *label; - if (!lvmcache_foreach_mda(info, _update_mda, &baton)) - return_0; + lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton); lvmcache_make_valid(info); diff --git a/lib/label/label.c b/lib/label/label.c index 46dd667bd..57d52484c 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -18,10 +18,14 @@ #include "crc.h" #include "xlate.h" #include "lvmcache.h" +#include "bcache.h" +#include "toolcontext.h" #include #include #include +#include + /* FIXME Allow for larger labels? Restricted to single sector currently */ @@ -96,101 +100,6 @@ struct labeller *label_get_handler(const char *name) return NULL; } -static void _update_lvmcache_orphan(struct lvmcache_info *info) -{ - struct lvmcache_vgsummary vgsummary_orphan = { - .vgname = lvmcache_fmt(info)->orphan_vg_name, - }; - - memcpy(&vgsummary_orphan.vgid, lvmcache_fmt(info)->orphan_vg_name, strlen(lvmcache_fmt(info)->orphan_vg_name)); - - if (!lvmcache_update_vgname_and_id(info, &vgsummary_orphan)) - stack; -} - -static struct labeller *_find_labeller(struct device *dev, char *buf, - uint64_t *label_sector, - uint64_t scan_sector) -{ - struct labeller_i *li; - struct labeller *r = NULL; - struct label_header *lh; - struct lvmcache_info *info; - uint64_t sector; - int found = 0; - char readbuf[LABEL_SCAN_SIZE] __attribute__((aligned(8))); - - if (!dev_read(dev, scan_sector << SECTOR_SHIFT, - LABEL_SCAN_SIZE, DEV_IO_LABEL, readbuf)) { - log_debug_devs("%s: Failed to read label area", dev_name(dev)); - goto out; - } - - /* Scan a few sectors for a valid label */ - for (sector = 0; sector < LABEL_SCAN_SECTORS; - sector += LABEL_SIZE >> SECTOR_SHIFT) { - lh = (struct label_header *) (readbuf + - (sector << SECTOR_SHIFT)); - - if (!strncmp((char *)lh->id, LABEL_ID, sizeof(lh->id))) { - if (found) { - log_error("Ignoring additional label on %s at " - "sector %" PRIu64, dev_name(dev), - sector + scan_sector); - } - if (xlate64(lh->sector_xl) != sector + scan_sector) { - log_very_verbose("%s: Label for sector %" PRIu64 - " found at sector %" PRIu64 - " - ignoring", dev_name(dev), - (uint64_t)xlate64(lh->sector_xl), - sector + scan_sector); - continue; - } - if (calc_crc(INITIAL_CRC, (uint8_t *)&lh->offset_xl, LABEL_SIZE - - ((uint8_t *) &lh->offset_xl - (uint8_t *) lh)) != - xlate32(lh->crc_xl)) { - log_very_verbose("Label checksum incorrect on %s - " - "ignoring", dev_name(dev)); - continue; - } - if (found) - continue; - } - - dm_list_iterate_items(li, &_labellers) { - if (li->l->ops->can_handle(li->l, (char *) lh, - sector + scan_sector)) { - log_very_verbose("%s: %s label detected at " - "sector %" PRIu64, - dev_name(dev), li->name, - sector + scan_sector); - if (found) { - log_error("Ignoring additional label " - "on %s at sector %" PRIu64, - dev_name(dev), - sector + scan_sector); - continue; - } - r = li->l; - memcpy(buf, lh, LABEL_SIZE); - if (label_sector) - *label_sector = sector + scan_sector; - found = 1; - break; - } - } - } - - out: - if (!found) { - if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) - _update_lvmcache_orphan(info); - log_very_verbose("%s: No label detected", dev_name(dev)); - } - - return r; -} - /* FIXME Also wipe associated metadata area headers? */ int label_remove(struct device *dev) { @@ -216,6 +125,8 @@ int label_remove(struct device *dev) */ dev_flush(dev); + label_scan_invalidate(dev); + if (!dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, DEV_IO_LABEL, readbuf)) { log_debug_devs("%s: Failed to read label area", dev_name(dev)); goto out; @@ -267,44 +178,6 @@ int label_remove(struct device *dev) return r; } -int label_read(struct device *dev, struct label **result, - uint64_t scan_sector) -{ - char buf[LABEL_SIZE] __attribute__((aligned(8))); - struct labeller *l; - uint64_t sector; - struct lvmcache_info *info; - int r = 0; - - if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 1))) { - log_debug_devs("Reading label from lvmcache for %s", dev_name(dev)); - *result = lvmcache_get_label(info); - return 1; - } - - log_debug_devs("Reading label from device %s", dev_name(dev)); - - if (!dev_open_readonly(dev)) { - stack; - - if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) - _update_lvmcache_orphan(info); - - return r; - } - - if ((l = _find_labeller(dev, buf, §or, scan_sector))) - if ((r = (l->ops->read)(l, dev, buf, result)) && result && *result) { - (*result)->dev = dev; - (*result)->sector = sector; - } - - if (!dev_close(dev)) - stack; - - return r; -} - /* Caller may need to use label_get_handler to create label struct! */ int label_write(struct device *dev, struct label *label) { @@ -323,6 +196,8 @@ int label_write(struct device *dev, struct label *label) return 0; } + label_scan_invalidate(dev); + memset(buf, 0, LABEL_SIZE); strncpy((char *)lh->id, LABEL_ID, sizeof(lh->id)); @@ -373,3 +248,445 @@ struct label *label_create(struct labeller *labeller) return label; } + + +/* global variable for accessing the bcache populated by label scan */ +struct bcache *scan_bcache; + +#define BCACHE_BLOCK_SIZE_IN_SECTORS 2048 /* 1MB */ + +static bool _in_bcache(struct device *dev) +{ + return (dev->flags & DEV_IN_BCACHE) ? true : false; +} + +static struct labeller *_find_lvm_header(struct device *dev, + char *scan_buf, + char *label_buf, + uint64_t *label_sector, + uint64_t scan_sector) +{ + struct labeller_i *li; + struct labeller *labeller_ret = NULL; + struct label_header *lh; + uint64_t sector; + int found = 0; + + /* + * Find which sector in scan_buf starts with a valid label, + * and copy it into label_buf. + */ + + for (sector = 0; sector < LABEL_SCAN_SECTORS; + sector += LABEL_SIZE >> SECTOR_SHIFT) { + lh = (struct label_header *) (scan_buf + (sector << SECTOR_SHIFT)); + + if (!strncmp((char *)lh->id, LABEL_ID, sizeof(lh->id))) { + if (found) { + log_error("Ignoring additional label on %s at sector %llu", + dev_name(dev), (unsigned long long)(sector + scan_sector)); + } + if (xlate64(lh->sector_xl) != sector + scan_sector) { + log_very_verbose("%s: Label for sector %llu found at sector %llu - ignoring.", + dev_name(dev), + (unsigned long long)xlate64(lh->sector_xl), + (unsigned long long)(sector + scan_sector)); + continue; + } + if (calc_crc(INITIAL_CRC, (uint8_t *)&lh->offset_xl, LABEL_SIZE - + ((uint8_t *) &lh->offset_xl - (uint8_t *) lh)) != + xlate32(lh->crc_xl)) { + log_very_verbose("Label checksum incorrect on %s - ignoring", dev_name(dev)); + continue; + } + if (found) + continue; + } + + dm_list_iterate_items(li, &_labellers) { + if (li->l->ops->can_handle(li->l, (char *) lh, sector + scan_sector)) { + log_very_verbose("%s: %s label detected at sector %llu", + dev_name(dev), li->name, + (unsigned long long)(sector + scan_sector)); + if (found) { + log_error("Ignoring additional label on %s at sector %llu", + dev_name(dev), + (unsigned long long)(sector + scan_sector)); + continue; + } + + labeller_ret = li->l; + found = 1; + + memcpy(label_buf, lh, LABEL_SIZE); + if (label_sector) + *label_sector = sector + scan_sector; + break; + } + } + } + + return labeller_ret; +} + +/* + * Process/parse the headers from the data read from a device. + * Populates lvmcache with device / mda locations / vgname + * so that vg_read(vgname) will know which devices/locations + * to read metadata from. + * + * If during processing, headers/metadata are found to be needed + * beyond the range of the scanned block, then additional reads + * are performed in the processing functions to get that data. + */ +static int _process_block(struct device *dev, struct block *bb, int *is_lvm_device) +{ + char label_buf[LABEL_SIZE] __attribute__((aligned(8))); + struct label *label = NULL; + struct labeller *labeller; + struct lvmcache_info *info; + uint64_t sector; + int ret = 0; + + /* + * Finds the data sector containing the label and copies into label_buf. + * label_buf: struct label_header + struct pv_header + struct pv_header_extension + * + * FIXME: we don't need to copy one sector from bb->data into label_buf, + * we can just point label_buf at one sector in ld->buf. + */ + if (!(labeller = _find_lvm_header(dev, bb->data, label_buf, §or, 0))) { + + /* + * Non-PVs exit here + * + * FIXME: check for PVs with errors that also exit here! + * i.e. this code cannot distinguish between a non-lvm + * device an an lvm device with errors. + */ + + log_very_verbose("%s: No lvm label detected", dev_name(dev)); + + if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) { + /* FIXME: if this case is actually happening, fix it. */ + log_warn("Device %s has no label, removing PV info from lvmcache.", dev_name(dev)); + lvmcache_del(info); + } + + *is_lvm_device = 0; + goto_out; + } + + *is_lvm_device = 1; + + /* + * This is the point where the scanning code dives into the rest of + * lvm. ops->read() is usually _text_read() which reads the pv_header, + * mda locations, mda contents. As these bits of data are read, they + * are saved into lvmcache as info/vginfo structs. + */ + + if ((ret = (labeller->ops->read)(labeller, dev, label_buf, &label)) && label) { + label->dev = dev; + label->sector = sector; + } else { + /* FIXME: handle errors */ + } + out: + return ret; +} + +/* + * Read or reread label/metadata from selected devs. + * + * Reads and looks at label_header, pv_header, pv_header_extension, + * mda_header, raw_locns, vg metadata from each device. + * + * Effect is populating lvmcache with latest info/vginfo (PV/VG) data + * from the devs. If a scanned device does not have a label_header, + * its info is removed from lvmcache. + */ + +static int _scan_list(struct dm_list *devs) +{ + struct dm_list wait_devs; + struct dm_list done_devs; + struct device_list *devl, *devl2; + struct block *bb; + int scan_failed_count = 0; + int scan_lvm_count = 0; + int rem_prefetches; + int scan_failed; + int is_lvm_device; + + dm_list_init(&wait_devs); + dm_list_init(&done_devs); + + log_debug_devs("Scanning %d devices.", dm_list_size(devs)); + + scan_more: + rem_prefetches = bcache_max_prefetches(scan_bcache); + + dm_list_iterate_items_safe(devl, devl2, devs) { + + /* + * If we prefetch more devs than blocks in the cache, then the + * cache will wait for earlier reads to complete, toss the + * results, and reuse those blocks before we've had a chance to + * use them. So, prefetch as many as are available, wait for + * and process them, then repeat. + */ + if (!rem_prefetches) + break; + + /* + * The in-bcache flag corresponds with this dev_open. + * Clearing the in-bcache flag should be paired with + * a dev_close. (This dev may already be in bcache.) + */ + if (!_in_bcache(devl->dev)) { + if (!dev_open_readonly(devl->dev)) { + log_debug_devs("%s: Failed to open device.", dev_name(devl->dev)); + continue; + } + } + + bcache_prefetch(scan_bcache, devl->dev->fd, 0); + + rem_prefetches--; + + dm_list_del(&devl->list); + dm_list_add(&wait_devs, &devl->list); + } + + dm_list_iterate_items_safe(devl, devl2, &wait_devs) { + bb = NULL; + + if (!bcache_get(scan_bcache, devl->dev->fd, 0, 0, &bb)) { + log_debug_devs("%s: Failed to scan device.", dev_name(devl->dev)); + scan_failed_count++; + scan_failed = 1; + } else { + log_debug_devs("Processing data from device %s fd %d block %p", dev_name(devl->dev), devl->dev->fd, bb); + _process_block(devl->dev, bb, &is_lvm_device); + scan_lvm_count++; + scan_failed = 0; + } + + if (bb) + bcache_put(bb); + + /* + * Keep the bcache block of lvm devices we have processed so + * that the vg_read phase can reuse it. If bcache failed to + * read the block, or the device does not belong to lvm, then + * drop it from bcache. + */ + if (scan_failed || !is_lvm_device) { + devl->dev->flags &= ~DEV_IN_BCACHE; + bcache_invalidate_fd(scan_bcache, devl->dev->fd); + dev_close(devl->dev); + } else { + /* The device must be kept open while it's in bcache. */ + devl->dev->flags |= DEV_IN_BCACHE; + } + + dm_list_del(&devl->list); + dm_list_add(&done_devs, &devl->list); + } + + if (!dm_list_empty(devs)) + goto scan_more; + + /* FIXME: let the caller know if some lvm devices failed to be scanned. */ + + log_debug_devs("Scanned %d devices: %d for lvm, %d failed.", + dm_list_size(&done_devs), scan_lvm_count, scan_failed_count); + + return 0; +} + +/* + * Scan and cache lvm data from all devices on the system. + * The cache should be empty/reset before calling this. + */ + +int label_scan(struct cmd_context *cmd) +{ + struct dm_list all_devs; + struct dev_iter *iter; + struct device_list *devl; + struct device *dev; + struct io_engine *ioe; + + log_debug_devs("Finding devices to scan"); + + dm_list_init(&all_devs); + + /* + * Iterate through all the devices in dev-cache (block devs that appear + * under /dev that could possibly hold a PV and are not excluded by + * filters). Read each to see if it's an lvm device, and if so + * populate lvmcache with some basic info about the device and the VG + * on it. This info will be used by the vg_read() phase of the + * command. + */ + dev_cache_full_scan(cmd->full_filter); + + if (!(iter = dev_iter_create(cmd->full_filter, 0))) { + log_error("Scanning failed to get devices."); + return 0; + } + + while ((dev = dev_iter_get(iter))) { + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + return 0; + devl->dev = dev; + dm_list_add(&all_devs, &devl->list); + + /* + * label_scan should not generally be called a second time, + * so this will usually not be true. + */ + if (_in_bcache(dev)) + bcache_invalidate_fd(scan_bcache, dev->fd); + }; + dev_iter_destroy(iter); + + if (!scan_bcache) { + + /* + * 100 is arbitrary, it's the max number of concurrent aio's + * possible, i.e, the number of devices that can be read at + * once. Should this be configurable? + */ + if (!(ioe = create_async_io_engine(100))) + return 0; + + /* + * Configure one cache block for each device on the system. + * We won't generally need to cache that many because some + * of the devs will not be lvm devices, and we don't need + * an entry for those. We might want to change this. + */ + if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, dm_list_size(&all_devs), ioe))) + return 0; + } + + return _scan_list(&all_devs); +} + +/* + * Scan and cache lvm data from the listed devices. If a device is already + * scanned and cached, this replaces the previously cached lvm data for the + * device. This is called when vg_read() wants to guarantee that it is using + * the latest data from the devices in the VG (since the scan populated bcache + * without a lock.) + */ + +int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs) +{ + struct device_list *devl; + + dm_list_iterate_items(devl, devs) { + if (_in_bcache(devl->dev)) + bcache_invalidate_fd(scan_bcache, devl->dev->fd); + } + + return _scan_list(devs); +} + +void label_scan_invalidate(struct device *dev) +{ + if (_in_bcache(dev)) { + dev->flags &= ~DEV_IN_BCACHE; + bcache_invalidate_fd(scan_bcache, dev->fd); + dev_close(dev); + } +} + +/* + * Undo label_scan() + * + * Close devices that are open because bcache is holding blocks for them. + * Destroy the bcache. + */ + +void label_scan_destroy(struct cmd_context *cmd) +{ + struct dev_iter *iter; + struct device *dev; + + if (!scan_bcache) + return; + + if (!(iter = dev_iter_create(cmd->full_filter, 0))) { + return; + } + + while ((dev = dev_iter_get(iter))) + label_scan_invalidate(dev); + dev_iter_destroy(iter); + + bcache_destroy(scan_bcache); + scan_bcache = NULL; +} + +/* + * Read (or re-read) and process (or re-process) the data for a device. This + * will reset (clear and repopulate) the bcache and lvmcache info for this + * device. There are only a couple odd places that want to reread a specific + * device, this is not a commonly used function. + */ + +/* FIXME: remove unused_sector arg */ + +int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector) +{ + struct dm_list one_dev; + struct device_list *devl; + int ret; + + /* scanning is done by list, so make a single item list for this dev */ + if (!(devl = dm_zalloc(sizeof(*devl)))) + return 0; + devl->dev = dev; + dm_list_init(&one_dev); + dm_list_add(&one_dev, &devl->list); + + if (_in_bcache(dev)) + bcache_invalidate_fd(scan_bcache, dev->fd); + + ret = _scan_list(&one_dev); + + /* + * FIXME: this ugliness of returning a pointer to the label is + * temporary until the callers can be updated to not use this. + */ + if (labelp) { + struct lvmcache_info *info; + + info = lvmcache_info_from_pvid(dev->pvid, dev, 1); + if (info) + *labelp = lvmcache_get_label(info); + } + + return ret; +} + +/* + * Read a label from a specfic, non-zero sector. This is used in only + * one place: pvck -> pv_analyze. + */ + +int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector) +{ + if (scan_sector) { + /* TODO: not yet implemented */ + /* When is this done? When does it make sense? Is it actually possible? */ + return 0; + } + + return label_read(dev, labelp, 0); +} + diff --git a/lib/label/label.h b/lib/label/label.h index ea1129019..d9e36bc33 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -18,6 +18,8 @@ #include "uuid.h" #include "device.h" +#include "bcache.h" +#include "toolcontext.h" #define LABEL_ID "LABELONE" #define LABEL_SIZE SECTOR_SIZE /* Think very carefully before changing this */ @@ -63,7 +65,7 @@ struct label_ops { * Read a label from a volume. */ int (*read) (struct labeller * l, struct device * dev, - void *buf, struct label ** label); + void *label_buf, struct label ** label); /* * Populate label_type etc. @@ -94,10 +96,17 @@ int label_register_handler(struct labeller *handler); struct labeller *label_get_handler(const char *name); int label_remove(struct device *dev); -int label_read(struct device *dev, struct label **result, - uint64_t scan_sector); int label_write(struct device *dev, struct label *label); struct label *label_create(struct labeller *labeller); void label_destroy(struct label *label); +extern struct bcache *scan_bcache; + +int label_scan(struct cmd_context *cmd); +int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs); +void label_scan_invalidate(struct device *dev); +void label_scan_destroy(struct cmd_context *cmd); +int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector); +int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector); + #endif diff --git a/lib/metadata/metadata-exported.h b/lib/metadata/metadata-exported.h index 2bc7927fe..73041cf32 100644 --- a/lib/metadata/metadata-exported.h +++ b/lib/metadata/metadata-exported.h @@ -377,6 +377,19 @@ struct pv_segment { */ #define FMT_INSTANCE_PRIVATE_MDAS 0x00000008U +/* + * Each VG has its own fid struct. The fid for a VG describes where + * the metadata for that VG can be found. The lists hold mda locations. + * + * label scan finds the metadata locations (devs and offsets) for a VG, + * and saves this info in lvmcache vginfo/info lists. + * + * vg_read() then creates an fid for a given VG, and the mda locations + * from lvmcache are copied onto the fid lists. Those mda locations + * are read again by vg_read() to get VG metadata that is used to + * create the 'vg' struct. + */ + struct format_instance { unsigned ref_count; /* Refs to this fid from VG and PV structs */ struct dm_pool *mem; diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 2249d2fc7..00b7737b3 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -719,6 +719,10 @@ int check_pv_dev_sizes(struct volume_group *vg) * source file. All the following and more are only used by liblvm: * * . get_pvs() + * . get_vgids() + * . get_vgnames() + * . lvmcache_get_vgids() + * . lvmcache_get_vgnames() * . the vg->pvs_to_write list and pv_to_write struct */ @@ -3909,12 +3913,16 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, /* Ensure contents of all metadata areas match - else do recovery */ inconsistent_mda_count=0; dm_list_iterate_items(mda, &fid->metadata_areas_in_use) { + struct device *mda_dev = mda_get_device(mda); + use_previous_vg = 0; + log_debug_metadata("Reading VG %s from %s", vgname, dev_name(mda_dev)); + if ((use_precommitted && !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) { + !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { inconsistent = 1; vg_fmtdata = NULL; continue; @@ -4106,7 +4114,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, if ((use_precommitted && !(vg = mda->ops->vg_read_precommit(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg) || (!use_precommitted && - !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg, 0)) && !use_previous_vg)) { + !(vg = mda->ops->vg_read(fid, vgname, mda, &vg_fmtdata, &use_previous_vg)) && !use_previous_vg)) { inconsistent = 1; vg_fmtdata = NULL; continue; diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index 1fa14e839..5b8d690cc 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -80,8 +80,7 @@ struct metadata_area_ops { const char *vg_name, struct metadata_area * mda, struct cached_vg_fmtdata **vg_fmtdata, - unsigned *use_previous_vg, - int single_device); + unsigned *use_previous_vg); struct volume_group *(*vg_read_precommit) (struct format_instance * fi, const char *vg_name, struct metadata_area * mda, @@ -183,6 +182,11 @@ void mda_set_ignored(struct metadata_area *mda, unsigned mda_ignored); unsigned mda_locns_match(struct metadata_area *mda1, struct metadata_area *mda2); struct device *mda_get_device(struct metadata_area *mda); +/* + * fic is used to create an fid. It's used to pass fmt/vgname/vgid args + * to create_instance() which creates an fid for the specified vg. + */ + struct format_instance_ctx { uint32_t type; union { From 4507ba3596a549697733e1b839f25af454ccf878 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 7 Feb 2018 13:14:08 -0600 Subject: [PATCH 24/87] scan: use new label_scan for lvmcache_label_scan To do label scanning, lvm code calls lvmcache_label_scan(). Change lvmcache_label_scan() to use the new label_scan() based on bcache. Also add lvmcache_label_rescan_vg() which calls the new label_scan_devs() which does label scanning on only the specified devices. This is for a subsequent commit and is not yet used. --- lib/cache/lvmcache.c | 236 +++++++++++++++++++++++---------- lib/cache/lvmcache.h | 12 +- lib/format_text/format-text.c | 2 +- lib/format_text/import_vsn1.c | 3 +- lib/metadata/metadata-liblvm.c | 2 +- 5 files changed, 175 insertions(+), 80 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index fb8100222..c0b2202e6 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -72,6 +72,7 @@ struct lvmcache_vginfo { unsigned vg_use_count; /* Counter of vg reusage */ unsigned precommitted; /* Is vgmetadata live or precommitted? */ unsigned cached_vg_invalidated; /* Signal to regenerate cached_vg */ + int independent_metadata_location; /* metadata read from independent areas */ }; static struct dm_hash_table *_pvid_hash = NULL; @@ -542,7 +543,6 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, { struct lvmcache_vginfo *vginfo; struct lvmcache_info *info; - struct label *label; struct dm_list *devh, *tmp; struct dm_list devs; struct device_list *devl; @@ -587,7 +587,7 @@ const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, dm_list_iterate_safe(devh, tmp, &devs) { devl = dm_list_item(devh, struct device_list); - (void) label_read(devl->dev, &label, UINT64_C(0)); + label_read(devl->dev, NULL, UINT64_C(0)); dm_list_del(&devl->list); dm_free(devl); } @@ -750,7 +750,7 @@ char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid) struct lvmcache_info *info; char *vgname; - if (!lvmcache_device_from_pvid(cmd, (const struct id *)pvid, NULL, NULL)) { + if (!lvmcache_device_from_pvid(cmd, (const struct id *)pvid, NULL)) { log_error("Couldn't find device with uuid %s.", pvid); return NULL; } @@ -766,19 +766,42 @@ char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid) return vgname; } -static void _rescan_entry(struct lvmcache_info *info) +/* + * FIXME: get rid of the CACHE_INVALID state and rescanning + * infos with that flag. The code should just know which devices + * need scanning and when. + */ +static int _label_scan_invalid(struct cmd_context *cmd) { - struct label *label; + struct dm_list devs; + struct dm_hash_node *n; + struct device_list *devl; + struct lvmcache_info *info; + int dev_count = 0; + int ret; - if (info->status & CACHE_INVALID) - (void) label_read(info->dev, &label, UINT64_C(0)); -} + dm_list_init(&devs); -static int _scan_invalid(void) -{ - dm_hash_iter(_pvid_hash, (dm_hash_iterate_fn) _rescan_entry); + dm_hash_iterate(n, _pvid_hash) { + if (!(info = dm_hash_get_data(_pvid_hash, n))) + continue; - return 1; + if (!(info->status & CACHE_INVALID)) + continue; + + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + return_0; + + devl->dev = info->dev; + dm_list_add(&devs, &devl->list); + dev_count++; + } + + log_debug_cache("Scanning %d devs with invalid info.", dev_count); + + ret = label_scan_devs(cmd, &devs); + + return ret; } /* @@ -1093,17 +1116,89 @@ next: goto next; } +/* + * The initial label_scan at the start of the command is done without + * holding VG locks. Then for each VG identified during the label_scan, + * vg_read(vgname) is called while holding the VG lock. The labels + * and metadata on this VG's devices could have changed between the + * initial unlocked label_scan and the current vg_read(). So, we reread + * the labels/metadata for each device in the VG now that we hold the + * lock, and use this for processing the VG. + * + * FIXME: In some cases, the data read by label_scan may be fine, and not + * need to be reread here. e.g. a reporting command, possibly with a + * special option, could skip this second reread. Or, we could look + * at the VG seqno in each copy of the metadata read in the first label + * scan, and if they all match, consider it good enough to use for + * reporting without rereading it. (A command modifying the VG would + * always want to reread while the lock is held before modifying.) + * + * A label scan is ultimately creating associations between devices + * and VGs so that when vg_read wants to get VG metadata, it knows + * which devices to read. In the special case where VG metadata is + * stored in files on the file system (configured in lvm.conf), the + * vginfo->independent_metadata_location flag is set during label scan. + * When we get here to rescan, we are revalidating the device to VG + * mapping from label scan by repeating the label scan on a subset of + * devices. If we see independent_metadata_location is set from the + * initial label scan, we know that there is nothing to do because + * there is no device to VG mapping to revalidate, since the VG metadata + * comes directly from files. + */ + +int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const char *vgid) +{ + struct dm_list devs; + struct device_list *devl; + struct lvmcache_vginfo *vginfo; + struct lvmcache_info *info; + + if (lvmetad_used()) + return 1; + + dm_list_init(&devs); + + if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) + return_0; + + /* + * When the VG metadata is from an independent location, + * then rescanning the devices in the VG won't find the + * metadata, and will destroy the vginfo/info associations + * that were created during label scan when the + * independent locations were read. + */ + if (vginfo->independent_metadata_location) + return 1; + + dm_list_iterate_items(info, &vginfo->infos) { + if (!(devl = dm_malloc(sizeof(*devl)))) { + log_error("device_list element allocation failed"); + return 0; + } + devl->dev = info->dev; + dm_list_add(&devs, &devl->list); + } + + label_scan_devs(cmd, &devs); + + /* + * TODO: grab vginfo again, and compare vginfo->infos + * to what was found above before rereading labels. + * If there are any info->devs now that were not in the + * first devs list, then do label_read on those also. + */ + + return 1; +} + int lvmcache_label_scan(struct cmd_context *cmd) { struct dm_list del_cache_devs; struct dm_list add_cache_devs; struct lvmcache_info *info; struct device_list *devl; - struct label *label; - struct dev_iter *iter; - struct device *dev; struct format_type *fmt; - int dev_count = 0; int r = 0; @@ -1121,34 +1216,40 @@ int lvmcache_label_scan(struct cmd_context *cmd) goto out; } + /* + * Scan devices whose info struct has the INVALID flag set. + * When scanning has read the pv_header, mda_header and + * mda locations, it will clear the INVALID flag (via + * lvmcache_make_valid). + */ if (_has_scanned && !_force_label_scan) { - r = _scan_invalid(); + r = _label_scan_invalid(cmd); goto out; } if (_force_label_scan && (cmd->full_filter && !cmd->full_filter->use_count) && !refresh_filters(cmd)) goto_out; - if (!cmd->full_filter || !(iter = dev_iter_create(cmd->full_filter, _force_label_scan))) { - log_error("dev_iter creation failed"); + if (!cmd->full_filter) { + log_error("label scan is missing full filter"); goto out; } - log_very_verbose("Scanning device labels"); - /* * Duplicates found during this label scan are added to _found_duplicate_devs(). */ _destroy_duplicate_device_list(&_found_duplicate_devs); - while ((dev = dev_iter_get(iter))) { - (void) label_read(dev, &label, UINT64_C(0)); - dev_count++; - } - - dev_iter_destroy(iter); - - log_very_verbose("Scanned %d device labels", dev_count); + /* + * Do the actual scanning. This populates lvmcache + * with infos/vginfos based on reading headers from + * each device, and a vg summary from each mda. + * + * Note that this will *skip* scanning a device if + * an info struct already exists in lvmcache for + * the device. + */ + label_scan(cmd); /* * _choose_preferred_devs() returns: @@ -1182,7 +1283,7 @@ int lvmcache_label_scan(struct cmd_context *cmd) dm_list_iterate_items(devl, &add_cache_devs) { log_debug_cache("Rescan preferred device %s for lvmcache", dev_name(devl->dev)); - (void) label_read(devl->dev, &label, UINT64_C(0)); + label_read(devl->dev, NULL, UINT64_C(0)); } dm_list_splice(&_unused_duplicate_devs, &del_cache_devs); @@ -1441,61 +1542,45 @@ struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname, return pvids; } -static struct device *_device_from_pvid(const struct id *pvid, - uint64_t *label_sector) +int lvmcache_get_vg_devs(struct cmd_context *cmd, + struct lvmcache_vginfo *vginfo, + struct dm_list *devs) +{ + struct lvmcache_info *info; + struct device_list *devl; + + dm_list_iterate_items(info, &vginfo->infos) { + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + return_0; + + devl->dev = info->dev; + dm_list_add(devs, &devl->list); + } + return 1; +} + +static struct device *_device_from_pvid(const struct id *pvid, uint64_t *label_sector) { struct lvmcache_info *info; - struct label *label; if ((info = lvmcache_info_from_pvid((const char *) pvid, NULL, 0))) { - if (lvmetad_used()) { - if (info->label && label_sector) - *label_sector = info->label->sector; - return info->dev; - } - - if (label_read(info->dev, &label, UINT64_C(0))) { - info = (struct lvmcache_info *) label->info; - if (id_equal(pvid, (struct id *) &info->dev->pvid)) { - if (label_sector) - *label_sector = label->sector; - return info->dev; - } - } + if (info->label && label_sector) + *label_sector = info->label->sector; + return info->dev; } + return NULL; } -struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, - unsigned *scan_done_once, uint64_t *label_sector) +struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, uint64_t *label_sector) { struct device *dev; - /* Already cached ? */ - dev = _device_from_pvid(pvid, label_sector); - if (dev) - return dev; - - lvmcache_label_scan(cmd); - - /* Try again */ - dev = _device_from_pvid(pvid, label_sector); - if (dev) - return dev; - - if (critical_section() || (scan_done_once && *scan_done_once)) - return NULL; - - lvmcache_force_next_label_scan(); - lvmcache_label_scan(cmd); - if (scan_done_once) - *scan_done_once = 1; - - /* Try again */ dev = _device_from_pvid(pvid, label_sector); if (dev) return dev; + log_debug_devs("No device with uuid %s.", (const char *)pvid); return NULL; } @@ -1503,7 +1588,6 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, const char *devname) { struct device *dev; - struct label *label; if (!(dev = dev_cache_get(devname, cmd->filter))) { log_error("%s: Couldn't find device. Check your filters?", @@ -1511,7 +1595,7 @@ const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, return NULL; } - if (!(label_read(dev, &label, UINT64_C(0)))) + if (!(label_read(dev, NULL, UINT64_C(0)))) return NULL; return dev->pvid; @@ -2657,6 +2741,14 @@ int lvmcache_vgid_is_cached(const char *vgid) { return 1; } +void lvmcache_set_independent_location(const char *vgname) +{ + struct lvmcache_vginfo *vginfo; + + if ((vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) + vginfo->independent_metadata_location = 1; +} + /* * Return true iff it is impossible to find out from this info alone whether the * PV in question is or is not an orphan. diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 847c208f1..826e91e96 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -74,6 +74,7 @@ void lvmcache_destroy(struct cmd_context *cmd, int retain_orphans, int reset); */ void lvmcache_force_next_label_scan(void); int lvmcache_label_scan(struct cmd_context *cmd); +int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const char *vgid); /* Add/delete a device */ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid, @@ -105,10 +106,8 @@ struct lvmcache_vginfo *lvmcache_vginfo_from_vgid(const char *vgid); struct lvmcache_info *lvmcache_info_from_pvid(const char *pvid, struct device *dev, int valid_only); const char *lvmcache_vgname_from_vgid(struct dm_pool *mem, const char *vgid); const char *lvmcache_vgid_from_vgname(struct cmd_context *cmd, const char *vgname); -struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, - unsigned *scan_done_once, uint64_t *label_sector); -const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, - const char *devname); +struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, uint64_t *label_sector); +const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, const char *devname); char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid); const char *lvmcache_vgname_from_info(struct lvmcache_info *info); const struct format_type *lvmcache_fmt_from_info(struct lvmcache_info *info); @@ -215,4 +214,9 @@ void lvmcache_remove_unchosen_duplicate(struct device *dev); int lvmcache_pvid_in_unchosen_duplicates(const char *pvid); +int lvmcache_get_vg_devs(struct cmd_context *cmd, + struct lvmcache_vginfo *vginfo, + struct dm_list *devs); +void lvmcache_set_independent_location(const char *vgname); + #endif diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index e9a34e696..c438b2da7 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -2534,7 +2534,7 @@ static int _get_config_disk_area(struct cmd_context *cmd, return 0; } - if (!(dev_area.dev = lvmcache_device_from_pvid(cmd, &id, NULL, NULL))) { + if (!(dev_area.dev = lvmcache_device_from_pvid(cmd, &id, NULL))) { char buffer[64] __attribute__((aligned(8))); if (!id_write_format(&id, buffer, sizeof(buffer))) diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index d51397a00..b41d83c8f 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -216,8 +216,7 @@ static int _read_pv(struct format_instance *fid, /* * Convert the uuid into a device. */ - if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, scan_done_once, - &pv->label_sector))) { + if (!(pv->dev = lvmcache_device_from_pvid(fid->fmt->cmd, &pv->id, &pv->label_sector))) { char buffer[64] __attribute__((aligned(8))); if (!id_write_format(&pv->id, buffer, sizeof(buffer))) diff --git a/lib/metadata/metadata-liblvm.c b/lib/metadata/metadata-liblvm.c index a4284bc89..388e8d9f0 100644 --- a/lib/metadata/metadata-liblvm.c +++ b/lib/metadata/metadata-liblvm.c @@ -314,7 +314,7 @@ struct physical_volume *pvcreate_vol(struct cmd_context *cmd, const char *pv_nam } if (pp->pva.idp) { - if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL, NULL)) && + if ((dev = lvmcache_device_from_pvid(cmd, pp->pva.idp, NULL)) && (dev != dev_cache_get(pv_name, cmd->full_filter))) { if (!id_write_format((const struct id*)&pp->pva.idp->uuid, buffer, sizeof(buffer))) From 748f29b42a61e05fb696a86e04b4b589d70d6d79 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 7 Feb 2018 13:26:37 -0600 Subject: [PATCH 25/87] scan: do scanning at the start of a command Move the location of scans to make it clearer and avoid unnecessary repeated scanning. There should be one scan at the start of a command which is then used through the rest of command processing. Previously, the initial label scan was called as a side effect from various utility functions. This would lead to it being called unnecessarily. It is an expensive operation, and should only be called when necessary. Also, this is a primary step in the function of the command, and as such it should be called prominently at the top level of command processing, not as a hidden side effect of a utility function. lvm knows exactly where and when the label scan needs to be done. Because of this, move the label scan calls from the internal functions to the top level of processing. Other specific instances of lvmcache_label_scan() are still called unnecessarily or unclearly by specific commands that do not use the common process_each functions. These will be improved in future commits. During the processing phase, rescanning labels for devices in a VG needs to be done after the VG lock is acquired in case things have changed since the initial label scan. This was being done by way of rescanning devices that had the INVALID flag set in lvmcache. This usually approximated the right set of devices, but it was not exact, and obfuscated the real requirement. Correct this by using a new function that rescans the devices in the VG: lvmcache_label_rescan_vg(). Apart from being inexact, the rescanning was extremely well hidden. _vg_read() would call ->create_instance(), _text_create_text_instance(), _create_vg_text_instance() which would call lvmcache_label_scan() which would call _scan_invalid() which repeats the label scan on devices flagged INVALID. lvmcache_label_rescan_vg() is now called prominently by _vg_read() directly. --- lib/cache/lvmcache.c | 2 -- lib/format_text/format-text.c | 16 --------- lib/metadata/metadata.c | 62 ++++++++++++++++++++++++++++------- tools/toollib.c | 27 ++++++++++----- tools/vgcfgrestore.c | 2 ++ 5 files changed, 71 insertions(+), 38 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index c0b2202e6..47058ccfc 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -1436,8 +1436,6 @@ int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal, struct vgnameid_list *vgnl; struct lvmcache_vginfo *vginfo; - lvmcache_label_scan(cmd); - dm_list_iterate_items(vginfo, &_vginfos) { if (!include_internal && is_orphan_vg(vginfo->vgname)) continue; diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index c438b2da7..6c1334684 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -2049,22 +2049,6 @@ static int _create_vg_text_instance(struct format_instance *fid, } if (type & FMT_INSTANCE_MDAS) { - /* - * TODO in theory, this function should be never reached - * while in critical_section(), because lvmcache's - * cached_vg should be valid. However, this assumption - * sometimes fails (possibly due to inconsistent - * (precommit) metadata and/or missing devices), and - * calling lvmcache_label_scan inside the critical - * section may be fatal (i.e. deadlock). - */ - if (!critical_section()) - /* Scan PVs in VG for any further MDAs */ - /* - * FIXME Only scan PVs believed to be in the VG. - */ - lvmcache_label_scan(fid->fmt->cmd); - if (!(vginfo = lvmcache_vginfo_from_vgname(vg_name, vg_id))) goto_out; if (!lvmcache_fid_add_mdas_vg(vginfo, fid)) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 00b7737b3..b4ee20470 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3859,20 +3859,28 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, correct_vg = NULL; } + /* + * Rescan the devices that are associated with this vg in lvmcache. + * This repeats what was done by the command's initial label scan, + * but only the devices associated with this VG. + * + * The lvmcache info about these devs is from the initial label scan + * performed by the command before the vg lock was held. Now the VG + * lock is held, so we rescan all the info from the devs in case + * something changed between the initial scan and now that the lock + * is held. + */ + log_debug_metadata("Reading VG rereading labels for %s", vgname); - /* Find the vgname in the cache */ - /* If it's not there we must do full scan to be completely sure */ - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 1))) { + if (!lvmcache_label_rescan_vg(cmd, vgname, vgid)) { + /* The VG wasn't found, so force a full label scan. */ + lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 1))) { - /* Independent MDAs aren't supported under low memory */ - if (!cmd->independent_metadata_areas && prioritized_section()) - return_NULL; - lvmcache_force_next_label_scan(); - lvmcache_label_scan(cmd); - if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) - return_NULL; - } + } + + if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) { + log_debug_metadata("Cache did not find fmt for vgname %s", vgname); + return_NULL; } /* Now determine the correct vgname if none was supplied */ @@ -3890,6 +3898,36 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, if (use_precommitted && !(fmt->features & FMT_PRECOMMIT)) use_precommitted = 0; + /* + * A "format instance" is an abstraction for a VG location, + * i.e. where a VG's metadata exists on disk. + * + * An fic (format_instance_ctx) is a temporary struct used + * to create an fid (format_instance). The fid hangs around + * and is used to create a 'vg' to which it connected (vg->fid). + * + * The 'fic' describes a VG in terms of fmt/name/id. + * + * The 'fid' describes a VG in more detail than the fic, + * holding information about where to find the VG metadata. + * + * The 'vg' describes the VG in the most detail representing + * all the VG metadata. + * + * The fic and fid are set up by create_instance() to describe + * the VG location. This happens before the VG metadata is + * assembled into the more familiar struct volume_group "vg". + * + * The fid has one main purpose: to keep track of the metadata + * locations for a given VG. It does this by putting 'mda' + * structs on fid->metadata_areas_in_use, which specify where + * metadata is located on disk. It gets this information + * (metadata locations for a specific VG) from the command's + * initial label scan. The info is passed indirectly via + * lvmcache info/vginfo structs, which are created by the + * label scan and then copied into fid by create_instance(). + */ + /* create format instance with appropriate metadata area */ fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; fic.context.vg_ref.vg_name = vgname; diff --git a/tools/toollib.c b/tools/toollib.c index 451f24dab..1c216d807 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -2228,14 +2228,10 @@ int process_each_vg(struct cmd_context *cmd, } /* - * First rescan for available devices, then force the next - * label scan to be done. get_vgnameids() will scan labels - * (when not using lvmetad). + * Scan all devices to populate lvmcache with initial + * list of PVs and VGs. */ - if (cmd->cname->flags & REQUIRES_FULL_LABEL_SCAN) { - dev_cache_full_scan(cmd->full_filter); - lvmcache_force_next_label_scan(); - } + lvmcache_label_scan(cmd); /* * A list of all VGs on the system is needed when: @@ -3744,6 +3740,12 @@ int process_each_lv(struct cmd_context *cmd, goto_out; } + /* + * Scan all devices to populate lvmcache with initial + * list of PVs and VGs. + */ + lvmcache_label_scan(cmd); + /* * A list of all VGs on the system is needed when: * . processing all VGs on the system @@ -4453,7 +4455,12 @@ int process_each_pv(struct cmd_context *cmd, if (!trust_cache() && !orphans_locked) { log_debug("Scanning for available devices"); lvmcache_destroy(cmd, 1, 0); - dev_cache_full_scan(cmd->full_filter); + + /* + * Scan all devices to populate lvmcache with initial + * list of PVs and VGs. + */ + lvmcache_label_scan(cmd); } if (!get_vgnameids(cmd, &all_vgnameids, only_this_vgname, 1)) { @@ -5467,6 +5474,8 @@ int pvcreate_each_device(struct cmd_context *cmd, dev_cache_full_scan(cmd->full_filter); + lvmcache_label_scan(cmd); + /* * Translate arg names into struct device's. */ @@ -5621,6 +5630,8 @@ int pvcreate_each_device(struct cmd_context *cmd, goto out; } + lvmcache_label_scan(cmd); + /* * The device args began on the arg_devices list, then the first check * loop moved those entries to arg_process as they were found. Devices diff --git a/tools/vgcfgrestore.c b/tools/vgcfgrestore.c index b5a2add12..e9f1a4c34 100644 --- a/tools/vgcfgrestore.c +++ b/tools/vgcfgrestore.c @@ -74,6 +74,8 @@ int vgcfgrestore(struct cmd_context *cmd, int argc, char **argv) return ECMD_FAILED; } + lvmcache_label_scan(cmd); + cmd->handles_unknown_segments = 1; if (!(arg_is_set(cmd, file_ARG) ? From d9ef9eb330bdc66dd6d9b45713d5c0b25d645ac0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 25 Oct 2017 13:39:46 -0500 Subject: [PATCH 26/87] label_scan: fix independent metadata areas This fixes the use of lvmcache_label_rescan_vg() in the previous commit for the special case of independent metadata areas. label scan is about discovering VG name to device associations using information from disks, but devices in VGs with independent metadata areas have no information on disk, so the label scan does nothing for these VGs/devices. With independent metadata areas, only the VG metadata found in files is used. This metadata is found and read in vg_read in the processing phase. lvmcache_label_rescan_vg() drops lvmcache info for the VG devices before repeating the label scan on them. In the case of independent metadata areas, there is no metadata on devices, so the label scan of the devices will find nothing, so will not recreate the necessary vginfo/info data in lvmcache for the VG. Fix this by setting a flag in the lvmcache vginfo struct indicating that the VG uses independent metadata areas, and label rescanning should be skipped. In the case of independent metadata areas, it is the metadata processing in the vg_read phase that sets up the lvmcache vginfo/info information, and label scan has no role. --- lib/format_text/format-text.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 6c1334684..8c12c825d 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1141,6 +1141,9 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) dir_list = &((struct mda_lists *) fmt->private)->dirs; + if (!dm_list_empty(dir_list)) + log_debug_metadata("Scanning independent files for %s", vgname ? vgname : "VGs"); + dm_list_iterate_items(dl, dir_list) { if (!(d = opendir(dl->dir))) { log_sys_error("opendir", dl->dir); @@ -1173,10 +1176,14 @@ static int _scan_file(const struct format_type *fmt, const char *vgname) stack; break; } + + log_debug_metadata("Scanning independent file %s for VG %s", path, scanned_vgname); + if ((vg = _vg_read_file_name(fid, scanned_vgname, path))) { /* FIXME Store creation host in vg */ lvmcache_update_vg(vg, 0); + lvmcache_set_independent_location(vg->name); release_vg(vg); } } @@ -1302,6 +1309,8 @@ int read_metadata_location_summary(const struct format_type *fmt, return 1; } +/* used for independent_metadata_areas */ + static int _scan_raw(const struct format_type *fmt, const char *vgname __attribute__((unused))) { struct raw_list *rl; @@ -1313,11 +1322,16 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu raw_list = &((struct mda_lists *) fmt->private)->raws; + if (!dm_list_empty(raw_list)) + log_debug_metadata("Scanning independent raw locations for %s", vgname ? vgname : "VGs"); + fid.fmt = fmt; dm_list_init(&fid.metadata_areas_in_use); dm_list_init(&fid.metadata_areas_ignored); dm_list_iterate_items(rl, raw_list) { + log_debug_metadata("Scanning independent dev %s", dev_name(rl->dev_area.dev)); + /* FIXME We're reading mdah twice here... */ if (!dev_open_readonly(rl->dev_area.dev)) { stack; @@ -1331,8 +1345,10 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu if (read_metadata_location_summary(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0, 0); - if (vg) + if (vg) { lvmcache_update_vg(vg, 0); + lvmcache_set_independent_location(vg->name); + } } close_dev: if (!dev_close(rl->dev_area.dev)) @@ -1342,6 +1358,8 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu return 1; } +/* used for independent_metadata_areas */ + static int _text_scan(const struct format_type *fmt, const char *vgname) { return (_scan_file(fmt, vgname) & _scan_raw(fmt, vgname)); @@ -1796,6 +1814,8 @@ static struct metadata_area_ops _metadata_text_raw_ops = { .mda_import_text = _mda_import_text_raw }; +/* used only for sending info to lvmetad */ + static int _mda_export_text_raw(struct metadata_area *mda, struct dm_config_tree *cft, struct dm_config_node *parent) @@ -1820,6 +1840,8 @@ static int _mda_export_text_raw(struct metadata_area *mda, NULL) ? 1 : 0; } +/* used only for receiving info from lvmetad */ + static int _mda_import_text_raw(struct lvmcache_info *info, const struct dm_config_node *cn) { struct device *device; From 098c843c50cdcc2e4f4162037e1ff5975624f3e2 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 25 Oct 2017 13:55:22 -0500 Subject: [PATCH 27/87] independent metadata areas: fix bogus code Fix mixing bitwise & and logical && which was always 1 in any case. --- lib/format_text/format-text.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 8c12c825d..9538080f0 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1362,7 +1362,9 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu static int _text_scan(const struct format_type *fmt, const char *vgname) { - return (_scan_file(fmt, vgname) & _scan_raw(fmt, vgname)); + _scan_file(fmt, vgname); + _scan_raw(fmt, vgname); + return 1; } struct _write_single_mda_baton { From 9c71fa02144619a67993920cee2146fed820f49c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 26 Oct 2017 10:58:23 -0500 Subject: [PATCH 28/87] lvmetad: use new label_scan for update from lvmlockd When lvmlockd indicates that the lvmetad cache is out of date because of changes by another node, lvmetad_pvscan_vg() rescans the devices in the VG to update lvmetad. Use the new label_scan in this function to use the common code and take advantage of the new aio and reduced reads. --- lib/cache/lvmetad.c | 437 ++++++++++++++++++++++++++++++-------------- 1 file changed, 299 insertions(+), 138 deletions(-) diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c index 4b7410cec..552dbf0fd 100644 --- a/lib/cache/lvmetad.c +++ b/lib/cache/lvmetad.c @@ -39,7 +39,7 @@ static int64_t _lvmetad_update_timeout; static int _found_lvm1_metadata = 0; -static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg); +static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg, const char *vgid, struct format_type *fmt); static uint64_t _monotonic_seconds(void) { @@ -1093,14 +1093,17 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna * invalidated the cached vg. */ if (rescan) { - if (!(vg2 = _lvmetad_pvscan_vg(cmd, vg))) { + if (!(vg2 = _lvmetad_pvscan_vg(cmd, vg, vgid, fmt))) { log_debug_lvmetad("VG %s from lvmetad not found during rescan.", vgname); fid = NULL; release_vg(vg); vg = NULL; goto out; } + fid->ref_count++; release_vg(vg); + fid->ref_count--; + fmt->ops->destroy_instance(fid); vg = vg2; fid = vg2->fid; } @@ -1108,14 +1111,14 @@ struct volume_group *lvmetad_vg_lookup(struct cmd_context *cmd, const char *vgna dm_list_iterate_items(pvl, &vg->pvs) { if (!_pv_update_struct_pv(pvl->pv, fid)) { vg = NULL; - goto_out; /* FIXME error path */ + goto_out; /* FIXME: use an error path that disables lvmetad */ } } dm_list_iterate_items(pvl, &vg->pvs_outdated) { if (!_pv_update_struct_pv(pvl->pv, fid)) { vg = NULL; - goto_out; /* FIXME error path */ + goto_out; /* FIXME: use an error path that disables lvmetad */ } } @@ -1761,6 +1764,7 @@ int lvmetad_pv_gone_by_dev(struct device *dev) */ struct _lvmetad_pvscan_baton { + struct cmd_context *cmd; struct volume_group *vg; struct format_instance *fid; }; @@ -1771,7 +1775,7 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton) struct volume_group *vg; if (mda_is_ignored(mda) || - !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL, 1))) + !(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL))) return 1; /* FIXME Also ensure contents match etc. */ @@ -1783,6 +1787,33 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton) return 1; } +/* + * FIXME: handle errors and do proper comparison of metadata from each area + * like vg_read and fall back to real vg_read from disk if there's any problem. + */ + +static int _lvmetad_pvscan_vg_single(struct metadata_area *mda, void *baton) +{ + struct _lvmetad_pvscan_baton *b = baton; + struct volume_group *vg = NULL; + + if (mda_is_ignored(mda)) + return 1; + + if (!(vg = mda->ops->vg_read(b->fid, "", mda, NULL, NULL))) + return 1; + + if (!b->vg) + b->vg = vg; + else if (vg->seqno > b->vg->seqno) { + release_vg(b->vg); + b->vg = vg; + } else + release_vg(vg); + + return 1; +} + /* * The lock manager may detect that the vg cached in lvmetad is out of date, * due to something like an lvcreate from another host. @@ -1792,41 +1823,41 @@ static int _lvmetad_pvscan_single(struct metadata_area *mda, void *baton) * the VG, and that PV may have been reused for another VG. */ -static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg) +static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct volume_group *vg, + const char *vgid, struct format_type *fmt) { char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); char uuid[64] __attribute__((aligned(8))); - struct label *label; - struct volume_group *vg_ret = NULL; - struct dm_config_tree *vgmeta_ret = NULL; struct dm_config_tree *vgmeta; struct pv_list *pvl, *pvl_new; - struct device_list *devl, *devl_new, *devlsafe; + struct device_list *devl, *devlsafe; struct dm_list pvs_scan; struct dm_list pvs_drop; - struct dm_list pvs_new; + struct lvmcache_vginfo *vginfo = NULL; struct lvmcache_info *info = NULL; struct format_instance *fid; struct format_instance_ctx fic = { .type = 0 }; struct _lvmetad_pvscan_baton baton; + struct volume_group *save_vg; + struct dm_config_tree *save_meta; struct device *save_dev = NULL; uint32_t save_seqno = 0; - int missing_devs = 0; - int check_new_pvs = 0; + int found_new_pvs = 0; + int retried_reads = 0; int found; + save_vg = NULL; + save_meta = NULL; + save_dev = NULL; + save_seqno = 0; + dm_list_init(&pvs_scan); dm_list_init(&pvs_drop); - dm_list_init(&pvs_new); - log_debug_lvmetad("Rescanning VG %s (seqno %u).", vg->name, vg->seqno); + log_debug_lvmetad("Rescan VG %s to update lvmetad (seqno %u).", vg->name, vg->seqno); /* - * Another host may have added a PV to the VG, and some - * commands do not always populate their lvmcache with - * all devs from lvmetad, so they would fail to find - * the new PV when scanning the VG. So make sure this - * command knows about all PVs from lvmetad. + * Make sure this command knows about all PVs from lvmetad. */ lvmcache_seed_infos_from_lvmetad(cmd); @@ -1841,54 +1872,111 @@ static struct volume_group *_lvmetad_pvscan_vg(struct cmd_context *cmd, struct v dm_list_add(&pvs_scan, &devl->list); } -scan_more: + /* + * Rescan labels/metadata only from devs that we previously + * saw in the VG. If we find below that there are new PVs + * in the VG, we'll have to rescan all devices to find which + * device(s) are now being used. + */ + log_debug_lvmetad("Rescan VG %s scanning data from devs in previous metadata.", vg->name); + + label_scan_devs(cmd, &pvs_scan); /* - * Run the equivalent of lvmetad_pvscan_single on each dev in the VG. + * Check if any pvs_scan entries are no longer PVs. + * In that case, label_read/_find_label_header will have + * found no label_header, and would have dropped the + * info struct for the device from lvmcache. So, if + * we look up the info struct here and don't find it, + * we can infer it's no longer a PV. + * + * FIXME: we should record specific results from the + * label_read and then check specifically for whatever + * result means "no label was found", rather than going + * about this indirectly via the lvmcache side effects. + */ + dm_list_iterate_items_safe(devl, devlsafe, &pvs_scan) { + if (!(info = lvmcache_info_from_pvid(devl->dev->pvid, devl->dev, 0))) { + /* Another host removed this PV from the VG. */ + log_debug_lvmetad("Rescan VG %s from %s dropping dev (no label).", + vg->name, dev_name(devl->dev)); + dm_list_move(&pvs_drop, &devl->list); + } + } + + fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; + fic.context.vg_ref.vg_name = vg->name; + fic.context.vg_ref.vg_id = vgid; + + retry_reads: + + if (!(fid = fmt->ops->create_instance(fmt, &fic))) { + /* FIXME: are there only internal reasons for failures here? */ + log_error("Reading VG %s failed to create format instance.", vg->name); + return NULL; + } + + /* FIXME: not sure if this is necessary */ + fid->ref_count++; + + baton.fid = fid; + baton.cmd = cmd; + + /* + * FIXME: this vg_read path does not have the ability to repair + * any problems with the VG, e.g. VG on one dev has an older + * seqno. When vg_read() is reworked, we need to fall back + * to using that from here (and vg_read's from lvmetad) when + * there is a problem. Perhaps by disabling lvmetad when a + * VG problem is detected, causing commands to fully fall + * back to disk, which will repair the VG. Then lvmetad can + * be repopulated and re-enabled (possibly automatically.) + */ + + /* + * Do a low level vg_read on each dev, verify the vg returned + * from metadata on each device is for the VG being read + * (the PV may have been removed from the VG being read and + * added to a different one), and return this vg to the caller + * as the current vg to use. + * + * The label scan above will have saved in lvmcache which + * vg each device is used in, so we could figure that part + * out without doing the vg_read. */ dm_list_iterate_items_safe(devl, devlsafe, &pvs_scan) { if (!devl->dev) continue; - log_debug_lvmetad("Rescan VG %s scanning %s.", vg->name, dev_name(devl->dev)); - - if (!label_read(devl->dev, &label, 0)) { - /* Another host removed this PV from the VG. */ - log_debug_lvmetad("Rescan VG %s found %s was removed.", vg->name, dev_name(devl->dev)); - - if ((info = lvmcache_info_from_pvid(devl->dev->pvid, NULL, 0))) - lvmcache_del(info); + log_debug_lvmetad("Rescan VG %s getting metadata from %s.", + vg->name, dev_name(devl->dev)); + /* + * The info struct for this dev knows what and where + * the mdas are for this dev (the label scan saved + * the mda locations for this dev on the lvmcache info struct). + */ + if (!(info = lvmcache_info_from_pvid(devl->dev->pvid, devl->dev, 0))) { + log_debug_lvmetad("Rescan VG %s from %s dropping dev (no info).", + vg->name, dev_name(devl->dev)); dm_list_move(&pvs_drop, &devl->list); continue; } - info = (struct lvmcache_info *) label->info; - baton.vg = NULL; - baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic); - if (!baton.fid) - return_NULL; - - if (baton.fid->fmt->features & FMT_OBSOLETE) { - log_debug_lvmetad("Ignoring obsolete format on PV %s in VG %s.", dev_name(devl->dev), vg->name); - lvmcache_fmt(info)->ops->destroy_instance(baton.fid); - dm_list_move(&pvs_drop, &devl->list); - continue; - } /* * Read VG metadata from this dev's mdas. */ - lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton); + lvmcache_foreach_mda(info, _lvmetad_pvscan_vg_single, &baton); /* * The PV may have been removed from the VG by another host * since we last read the VG. */ if (!baton.vg) { - log_debug_lvmetad("Rescan VG %s did not find %s.", vg->name, dev_name(devl->dev)); - lvmcache_fmt(info)->ops->destroy_instance(baton.fid); + log_debug_lvmetad("Rescan VG %s from %s dropping dev (no metadata).", + vg->name, dev_name(devl->dev)); dm_list_move(&pvs_drop, &devl->list); continue; } @@ -1898,10 +1986,15 @@ scan_more: * different VG since we last read the VG. */ if (strcmp(baton.vg->name, vg->name)) { - log_debug_lvmetad("Rescan VG %s found different VG %s on PV %s.", - vg->name, baton.vg->name, dev_name(devl->dev)); + log_debug_lvmetad("Rescan VG %s from %s dropping dev (other VG %s).", + vg->name, dev_name(devl->dev), baton.vg->name); + release_vg(baton.vg); + continue; + } + + if (!(vgmeta = export_vg_to_config_tree(baton.vg))) { + log_error("VG export to config tree failed"); release_vg(baton.vg); - dm_list_move(&pvs_drop, &devl->list); continue; } @@ -1911,20 +2004,35 @@ scan_more: * read from each other dev. */ - if (!save_seqno) - save_seqno = baton.vg->seqno; + if (save_vg && (save_seqno != baton.vg->seqno)) { + /* FIXME: fall back to vg_read to correct this. */ + log_warn("WARNING: inconsistent metadata for VG %s on devices %s seqno %u and %s seqno %u.", + vg->name, dev_name(save_dev), save_seqno, + dev_name(devl->dev), baton.vg->seqno); + log_warn("WARNING: temporarily disable lvmetad to repair metadata."); - if (!(vgmeta = export_vg_to_config_tree(baton.vg))) { - log_error("VG export to config tree failed"); - release_vg(baton.vg); - return NULL; + /* Use the most recent */ + if (save_seqno < baton.vg->seqno) { + release_vg(save_vg); + dm_config_destroy(save_meta); + save_vg = baton.vg; + save_meta = vgmeta; + save_seqno = baton.vg->seqno; + save_dev = devl->dev; + } else { + release_vg(baton.vg); + dm_config_destroy(vgmeta); + } + continue; } - if (!vgmeta_ret) { - vgmeta_ret = vgmeta; + if (!save_vg) { + save_vg = baton.vg; + save_meta = vgmeta; + save_seqno = baton.vg->seqno; save_dev = devl->dev; } else { - struct dm_config_node *meta1 = vgmeta_ret->root; + struct dm_config_node *meta1 = save_meta->root; struct dm_config_node *meta2 = vgmeta->root; struct dm_config_node *sib1 = meta1->sib; struct dm_config_node *sib2 = meta2->sib; @@ -1949,73 +2057,128 @@ scan_more: meta2->sib = NULL; if (compare_config(meta1, meta2)) { + /* FIXME: fall back to vg_read to correct this. */ + log_warn("WARNING: inconsistent metadata for VG %s on devices %s seqno %u and %s seqno %u.", + vg->name, dev_name(save_dev), save_seqno, + dev_name(devl->dev), baton.vg->seqno); + log_warn("WARNING: temporarily disable lvmetad to repair metadata."); log_error("VG %s metadata comparison failed for device %s vs %s", vg->name, dev_name(devl->dev), save_dev ? dev_name(save_dev) : "none"); - _log_debug_inequality(vg->name, vgmeta_ret->root, vgmeta->root); + _log_debug_inequality(vg->name, save_meta->root, vgmeta->root); meta1->sib = sib1; meta2->sib = sib2; - dm_config_destroy(vgmeta); - dm_config_destroy(vgmeta_ret); + + /* no right choice, just use the previous copy */ release_vg(baton.vg); - return NULL; + dm_config_destroy(vgmeta); } meta1->sib = sib1; meta2->sib = sib2; + release_vg(baton.vg); dm_config_destroy(vgmeta); } + } - /* - * Look for any new PVs in the VG metadata that were not in our - * previous version of the VG. Add them to pvs_new to be - * scanned in this loop just like the old PVs. - */ - if (!check_new_pvs) { - check_new_pvs = 1; - dm_list_iterate_items(pvl_new, &baton.vg->pvs) { - found = 0; - dm_list_iterate_items(pvl, &vg->pvs) { - if (pvl_new->pv->dev != pvl->pv->dev) - continue; - found = 1; - break; - } - if (found) + /* FIXME: see above */ + fid->ref_count--; + + /* + * Look for any new PVs in the VG metadata that were not in our + * previous version of the VG. + * + * (Don't look for new PVs after a rescan and retry.) + */ + found_new_pvs = 0; + + if (save_vg && !retried_reads) { + dm_list_iterate_items(pvl_new, &save_vg->pvs) { + found = 0; + dm_list_iterate_items(pvl, &vg->pvs) { + if (pvl_new->pv->dev != pvl->pv->dev) continue; - if (!pvl_new->pv->dev) { - strncpy(pvid_s, (char *) &pvl_new->pv->id, sizeof(pvid_s) - 1); - if (!id_write_format((const struct id *)&pvid_s, uuid, sizeof(uuid))) - stack; - log_error("Device not found for PV %s in VG %s", uuid, vg->name); - missing_devs++; - continue; - } - if (!(devl_new = dm_pool_zalloc(cmd->mem, sizeof(*devl_new)))) - return_NULL; - devl_new->dev = pvl_new->pv->dev; - dm_list_add(&pvs_new, &devl_new->list); - log_debug_lvmetad("Rescan VG %s found %s was added.", vg->name, dev_name(devl_new->dev)); + found = 1; + break; + } + + /* + * PV in new VG metadata not found in old VG metadata. + * There's a good chance we don't know about this new + * PV or what device it's on; a label scan is needed + * of all devices so we know which device the VG is + * now using. + */ + if (!found) { + found_new_pvs++; + strncpy(pvid_s, (char *) &pvl_new->pv->id, sizeof(pvid_s) - 1); + if (!id_write_format((const struct id *)&pvid_s, uuid, sizeof(uuid))) + stack; + log_debug_lvmetad("Rescan VG %s found new PV %s.", vg->name, uuid); } } + } - release_vg(baton.vg); + if (!save_vg && retried_reads) { + log_error("VG %s not found after rescanning devices.", vg->name); + goto out; } /* - * Do the same scanning above for any new PVs. + * Do a full rescan of devices, then look up which devices the + * scan found for this VG name, and select those devices to + * read metadata from in the loop above (rather than the list + * of devices we created from our last copy of the vg metadata.) + * + * Case 1: VG we knew is no longer on any of the devices we knew it + * to be on (save_vg is NULL, which means the metadata wasn't found + * when reading mdas on each of the initial pvs_scan devices). + * Rescan all devs and then retry reading metadata from the devs that + * the scan finds associated with this VG. + * + * Case 2: VG has new PVs but we don't know what devices they are + * so rescan all devs and then retry reading metadata from the devs + * that the scan finds associated with this VG. + * + * (N.B. after a retry, we don't check for found_new_pvs.) */ - if (!dm_list_empty(&pvs_new)) { - dm_list_init(&pvs_scan); - dm_list_splice(&pvs_scan, &pvs_new); - dm_list_init(&pvs_new); - log_debug_lvmetad("Rescan VG %s found new PVs to scan.", vg->name); - goto scan_more; - } + if (!save_vg || found_new_pvs) { + if (!save_vg) + log_debug_lvmetad("Rescan VG %s did not find VG on previous devs.", vg->name); + if (found_new_pvs) + log_debug_lvmetad("Rescan VG %s scanning all devs to find new PVs.", vg->name); - if (missing_devs) { - if (vgmeta_ret) - dm_config_destroy(vgmeta_ret); - return_NULL; + label_scan(cmd); + + if (!(vginfo = lvmcache_vginfo_from_vgname(vg->name, NULL))) { + log_error("VG %s vg info not found after rescanning devices.", vg->name); + goto out; + } + + /* + * Set pvs_scan to devs that the label scan found + * in the VG and retry the metadata reading loop. + */ + dm_list_init(&pvs_scan); + + if (!lvmcache_get_vg_devs(cmd, vginfo, &pvs_scan)) { + log_error("VG %s info devs not found after rescanning devices.", vg->name); + goto out; + } + + log_debug_lvmetad("Rescan VG %s has %d PVs after label scan.", + vg->name, dm_list_size(&pvs_scan)); + + if (save_vg) + release_vg(save_vg); + if (save_meta) + dm_config_destroy(save_meta); + save_vg = NULL; + save_meta = NULL; + save_dev = NULL; + save_seqno = 0; + found_new_pvs = 0; + retried_reads = 1; + goto retry_reads; } /* @@ -2024,52 +2187,50 @@ scan_more: dm_list_iterate_items(devl, &pvs_drop) { if (!devl->dev) continue; - log_debug_lvmetad("Rescan VG %s dropping %s.", vg->name, dev_name(devl->dev)); - if (!lvmetad_pv_gone_by_dev(devl->dev)) - return_NULL; + log_debug_lvmetad("Rescan VG %s removing %s from lvmetad.", vg->name, dev_name(devl->dev)); + if (!lvmetad_pv_gone_by_dev(devl->dev)) { + /* FIXME: use an error path that disables lvmetad */ + log_error("Failed to remove %s from lvmetad.", dev_name(devl->dev)); + } } /* - * Update the VG in lvmetad. + * Update lvmetad with the newly read version of the VG. + * When the seqno is unchanged the cached VG can be left. */ - if (vgmeta_ret) { - fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic); - if (!(vg_ret = import_vg_from_config_tree(vgmeta_ret, fid))) { - log_error("VG import from config tree failed"); - lvmcache_fmt(info)->ops->destroy_instance(fid); - goto out; + if (save_vg && (save_seqno != vg->seqno)) { + dm_list_iterate_items(devl, &pvs_scan) { + if (!devl->dev) + continue; + log_debug_lvmetad("Rescan VG %s removing %s from lvmetad to replace.", + vg->name, dev_name(devl->dev)); + if (!lvmetad_pv_gone_by_dev(devl->dev)) { + /* FIXME: use an error path that disables lvmetad */ + log_error("Failed to remove %s from lvmetad.", dev_name(devl->dev)); + } } + log_debug_lvmetad("Rescan VG %s updating lvmetad from seqno %u to seqno %u.", + vg->name, vg->seqno, save_seqno); + /* - * Update lvmetad with the newly read version of the VG. - * When the seqno is unchanged the cached VG can be left. + * If this vg_update fails the cached metadata in + * lvmetad will remain invalid. */ - if (save_seqno != vg->seqno) { - dm_list_iterate_items(devl, &pvs_scan) { - if (!devl->dev) - continue; - log_debug_lvmetad("Rescan VG %s dropping to replace %s.", vg->name, dev_name(devl->dev)); - if (!lvmetad_pv_gone_by_dev(devl->dev)) - return_NULL; - } - - log_debug_lvmetad("Rescan VG %s updating lvmetad from seqno %u to seqno %u.", - vg->name, vg->seqno, save_seqno); - - /* - * If this vg_update fails the cached metadata in - * lvmetad will remain invalid. - */ - vg_ret->lvmetad_update_pending = 1; - if (!lvmetad_vg_update_finish(vg_ret)) - log_error("Failed to update lvmetad with new VG meta"); + save_vg->lvmetad_update_pending = 1; + if (!lvmetad_vg_update_finish(save_vg)) { + /* FIXME: use an error path that disables lvmetad */ + log_error("Failed to update lvmetad with new VG meta"); } - dm_config_destroy(vgmeta_ret); } out: - if (vg_ret) - log_debug_lvmetad("Rescan VG %s done (seqno %u).", vg_ret->name, vg_ret->seqno); - return vg_ret; + if (!save_vg && fid) + fmt->ops->destroy_instance(fid); + if (save_meta) + dm_config_destroy(save_meta); + if (save_vg) + log_debug_lvmetad("Rescan VG %s done (new seqno %u).", save_vg->name, save_vg->seqno); + return save_vg; } int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev, From e3e5beec74ac0037917f5e9a2693c6ccb16debac Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 7 Feb 2018 13:58:40 -0600 Subject: [PATCH 29/87] lvmetad: use new label_scan for update from pvscan Take advantage of the common implementation with aio and reduced disk reads. --- lib/cache/lvmetad.c | 38 ++++++++++++++++++++------- lib/commands/toolcontext.h | 2 +- lib/format_text/import_vsn1.c | 6 ++++- tools/pvscan.c | 49 ++++++++++++++++++++++++++++++----- 4 files changed, 77 insertions(+), 18 deletions(-) diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c index 552dbf0fd..81ba1b79e 100644 --- a/lib/cache/lvmetad.c +++ b/lib/cache/lvmetad.c @@ -2240,9 +2240,12 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev, struct label *label; struct lvmcache_info *info; struct _lvmetad_pvscan_baton baton; + const struct format_type *fmt; /* Create a dummy instance. */ struct format_instance_ctx fic = { .type = 0 }; + log_debug_lvmetad("Scan metadata from dev %s", dev_name(dev)); + if (!lvmetad_used()) { log_error("Cannot proceed since lvmetad is not active."); return 0; @@ -2253,23 +2256,31 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev, return 1; } - if (!label_read(dev, &label, 0)) { - log_print_unless_silent("No PV label found on %s.", dev_name(dev)); + if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) { + log_print_unless_silent("No PV info found on %s for PVID %s.", dev_name(dev), dev->pvid); if (!lvmetad_pv_gone_by_dev(dev)) goto_bad; return 1; } - info = (struct lvmcache_info *) label->info; + if (!(label = lvmcache_get_label(info))) { + log_print_unless_silent("No PV label found for %s.", dev_name(dev)); + if (!lvmetad_pv_gone_by_dev(dev)) + goto_bad; + return 1; + } + fmt = lvmcache_fmt(info); + + baton.cmd = cmd; baton.vg = NULL; - baton.fid = lvmcache_fmt(info)->ops->create_instance(lvmcache_fmt(info), &fic); + baton.fid = fmt->ops->create_instance(fmt, &fic); if (!baton.fid) goto_bad; - if (baton.fid->fmt->features & FMT_OBSOLETE) { - lvmcache_fmt(info)->ops->destroy_instance(baton.fid); + if (fmt->features & FMT_OBSOLETE) { + fmt->ops->destroy_instance(baton.fid); log_warn("WARNING: Disabling lvmetad cache which does not support obsolete (lvm1) metadata."); lvmetad_set_disabled(cmd, LVMETAD_DISABLE_REASON_LVM1); _found_lvm1_metadata = 1; @@ -2283,9 +2294,9 @@ int lvmetad_pvscan_single(struct cmd_context *cmd, struct device *dev, lvmcache_foreach_mda(info, _lvmetad_pvscan_single, &baton); if (!baton.vg) - lvmcache_fmt(info)->ops->destroy_instance(baton.fid); + fmt->ops->destroy_instance(baton.fid); - if (!lvmetad_pv_found(cmd, (const struct id *) &dev->pvid, dev, lvmcache_fmt(info), + if (!lvmetad_pv_found(cmd, (const struct id *) &dev->pvid, dev, fmt, label->sector, baton.vg, found_vgnames, changed_vgnames)) { release_vg(baton.vg); goto_bad; @@ -2351,6 +2362,13 @@ int lvmetad_pvscan_all_devs(struct cmd_context *cmd, int do_wait) replacing_other_update = 1; } + label_scan(cmd); + + if (lvmcache_found_duplicate_pvs()) { + log_warn("WARNING: Scan found duplicate PVs."); + return 0; + } + log_verbose("Scanning all devices to update lvmetad."); if (!(iter = dev_iter_create(cmd->lvmetad_filter, 1))) { @@ -2721,6 +2739,8 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force) */ _lvmetad_get_pv_cache_list(cmd, &pvc_before); + log_debug_lvmetad("Rescan all devices to validate global cache."); + /* * Update the local lvmetad cache so it correctly reflects any * changes made on remote hosts. (It's possible that this command @@ -2789,7 +2809,7 @@ void lvmetad_validate_global_cache(struct cmd_context *cmd, int force) _update_changed_pvs_in_udev(cmd, &pvc_before, &pvc_after); } - log_debug_lvmetad("Validating global lvmetad cache finished"); + log_debug_lvmetad("Rescanned all devices"); } int lvmetad_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid) diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h index f04afec8c..d20cef157 100644 --- a/lib/commands/toolcontext.h +++ b/lib/commands/toolcontext.h @@ -167,8 +167,8 @@ struct cmd_context { unsigned pv_notify:1; unsigned activate_component:1; /* command activates component LV */ unsigned process_component_lvs:1; /* command processes also component LVs */ - unsigned mirror_warn_printed:1; /* command already printed warning about non-monitored mirrors */ + unsigned pvscan_cache_single:1; /* * Filtering. */ diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index b41d83c8f..dee5379f2 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -221,7 +221,11 @@ static int _read_pv(struct format_instance *fid, if (!id_write_format(&pv->id, buffer, sizeof(buffer))) buffer[0] = '\0'; - log_error_once("Couldn't find device with uuid %s.", buffer); + + if (fid->fmt->cmd && !fid->fmt->cmd->pvscan_cache_single) + log_error_once("Couldn't find device with uuid %s.", buffer); + else + log_debug_metadata("Couldn't find device with uuid %s.", buffer); } if (!(pv->vg_name = dm_pool_strdup(mem, vg->name))) diff --git a/tools/pvscan.c b/tools/pvscan.c index 6581990f8..ab6ea0b48 100644 --- a/tools/pvscan.c +++ b/tools/pvscan.c @@ -300,8 +300,10 @@ static int _pvscan_autoactivate(struct cmd_context *cmd, struct pvscan_aa_params static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) { struct pvscan_aa_params pp = { 0 }; + struct dm_list single_devs; struct dm_list found_vgnames; struct device *dev; + struct device_list *devl; const char *pv_name; const char *reason = NULL; int32_t major = -1; @@ -434,8 +436,12 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) * to drop any devices that have left.) */ - if (argc || devno_args) + if (argc || devno_args) { log_verbose("Scanning devices on command line."); + cmd->pvscan_cache_single = 1; + } + + dm_list_init(&single_devs); while (argc--) { pv_name = *argv++; @@ -453,8 +459,11 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } else { /* Add device path to lvmetad. */ log_debug("Scanning dev %s for lvmetad cache.", pv_name); - if (!lvmetad_pvscan_single(cmd, dev, &found_vgnames, &pp.changed_vgnames)) - add_errors++; + + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + return_0; + devl->dev = dev; + dm_list_add(&single_devs, &devl->list); } } else { if (sscanf(pv_name, "%d:%d", &major, &minor) != 2) { @@ -471,8 +480,11 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } else { /* Add major:minor to lvmetad. */ log_debug("Scanning dev %d:%d for lvmetad cache.", major, minor); - if (!lvmetad_pvscan_single(cmd, dev, &found_vgnames, &pp.changed_vgnames)) - add_errors++; + + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + return_0; + devl->dev = dev; + dm_list_add(&single_devs, &devl->list); } } @@ -482,9 +494,20 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } } + if (!dm_list_empty(&single_devs)) { + label_scan_devs(cmd, &single_devs); + + dm_list_iterate_items(devl, &single_devs) { + if (!lvmetad_pvscan_single(cmd, devl->dev, &found_vgnames, &pp.changed_vgnames)) + add_errors++; + } + } + if (!devno_args) goto activate; + dm_list_init(&single_devs); + /* Process any grouped --major --minor args */ dm_list_iterate_items(current_group, &cmd->arg_value_groups) { major = grouped_arg_int_value(current_group->arg_values, major_ARG, major); @@ -503,8 +526,11 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } else { /* Add major:minor to lvmetad. */ log_debug("Scanning dev %d:%d for lvmetad cache.", major, minor); - if (!lvmetad_pvscan_single(cmd, dev, &found_vgnames, &pp.changed_vgnames)) - add_errors++; + + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + return_0; + devl->dev = dev; + dm_list_add(&single_devs, &devl->list); } if (sigint_caught()) { @@ -513,6 +539,15 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } } + if (!dm_list_empty(&single_devs)) { + label_scan_devs(cmd, &single_devs); + + dm_list_iterate_items(devl, &single_devs) { + if (!lvmetad_pvscan_single(cmd, devl->dev, &found_vgnames, &pp.changed_vgnames)) + add_errors++; + } + } + /* * In the process of scanning devices, lvmetad may have become * disabled. If so, revert to scanning for the autoactivation step. From 5f138f36040297d092977a3b547cdefffb5ac4e8 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 26 Oct 2017 14:32:30 -0500 Subject: [PATCH 30/87] vgcreate: improve the use of label_scan The old code was doing unnecessary label scans when checking to see if the new VG name exists. A single label_scan is sufficient if it is done after the new VG lock is held. --- tools/vgcreate.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/tools/vgcreate.c b/tools/vgcreate.c index af0c36364..87a296f56 100644 --- a/tools/vgcreate.c +++ b/tools/vgcreate.c @@ -26,7 +26,6 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv) const char *clustered_message = ""; char *vg_name; struct arg_value_group_list *current_group; - uint32_t rc; if (!argc) { log_error("Please provide volume group name and " @@ -66,17 +65,30 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv) return_ECMD_FAILED; cmd->lockd_gl_disable = 1; - lvmcache_seed_infos_from_lvmetad(cmd); - /* * Check if the VG name already exists. This should be done before * creating PVs on any of the devices. + * + * When searching if a VG name exists, acquire the VG lock, + * then do the initial label scan which reads all devices and + * populates lvmcache with any VG name it finds. If the VG name + * we want to use exists, then the label scan will find it, + * and the fmt_from_vgname call (used to check if the name exists) + * will return non-NULL. */ - if ((rc = vg_lock_newname(cmd, vp_new.vg_name)) != SUCCESS) { - if (rc == FAILED_EXIST) - log_error("A volume group called %s already exists.", vp_new.vg_name); - else - log_error("Can't get lock for %s.", vp_new.vg_name); + + if (!lock_vol(cmd, vp_new.vg_name, LCK_VG_WRITE, NULL)) { + log_error("Can't get lock for %s.", vp_new.vg_name); + return ECMD_FAILED; + } + + lvmcache_force_next_label_scan(); + lvmcache_label_scan(cmd); /* Does nothing when using lvmetad. */ + lvmcache_seed_infos_from_lvmetad(cmd); /* Does nothing unless using lvmetad. */ + + if (lvmcache_fmt_from_vgname(cmd, vp_new.vg_name, NULL, 0)) { + unlock_vg(cmd, NULL, vp_new.vg_name); + log_error("A volume group called %s already exists.", vp_new.vg_name); return ECMD_FAILED; } From 79c4971210a6337563ffa2fca08fb636423d93d4 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 6 Nov 2017 12:09:52 -0600 Subject: [PATCH 31/87] label_scan: remove extra label scan and read for orphan PVs When process_each_pv() calls vg_read() on the orphan VG, the internal implementation was doing an unnecessary lvmcache_label_scan() and two unnecessary label_read() calls on each orphan. Some of those unnecessary label scans/reads would sometimes be skipped due to caching, but the code was always doing at least one unnecessary read on each orphan. The common format_text case was also unecessarily calling into the format-specific pv_read() function which actually did nothing. By analyzing each case in which vg_read() was being called on the orphan VG, we can say that all of the label scans/reads in vg_read_orphans are unnecessary: 1. reporting commands: the information saved in lvmcache by the original label scan can be reported. There is no advantage to repeating the label scan on the orphans a second time before reporting it. 2. pvcreate/vgcreate/vgextend: these all share a common implementation in pvcreate_each_device(). That function already rescans labels after acquiring the orphan VG lock, which ensures that the command is using valid lvmcache information. --- lib/cache/lvmcache.c | 51 +++--------- lib/cache/lvmcache.h | 4 +- lib/format_text/format-text.c | 31 ------- lib/metadata/metadata.c | 150 +++++++++------------------------- lib/metadata/metadata.h | 6 -- 5 files changed, 54 insertions(+), 188 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 47058ccfc..8e119b5ee 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -2454,56 +2454,29 @@ int lvmcache_fid_add_mdas_vg(struct lvmcache_vginfo *vginfo, struct format_insta return 1; } -static int _get_pv_if_in_vg(struct lvmcache_info *info, - struct physical_volume *pv) -{ - char vgname[NAME_LEN + 1]; - char vgid[ID_LEN + 1]; - - if (info->vginfo && info->vginfo->vgname && - !is_orphan_vg(info->vginfo->vgname)) { - /* - * get_pv_from_vg_by_id() may call - * lvmcache_label_scan() and drop cached - * vginfo so make a local copy of string. - */ - (void) dm_strncpy(vgname, info->vginfo->vgname, sizeof(vgname)); - memcpy(vgid, info->vginfo->vgid, sizeof(vgid)); - - if (get_pv_from_vg_by_id(info->fmt, vgname, vgid, - info->dev->pvid, pv)) - return 1; - } - - return 0; -} - int lvmcache_populate_pv_fields(struct lvmcache_info *info, - struct physical_volume *pv, - int scan_label_only) + struct volume_group *vg, + struct physical_volume *pv) { struct data_area_list *da; - - /* Have we already cached vgname? */ - if (!scan_label_only && _get_pv_if_in_vg(info, pv)) - return 1; - - /* Perform full scan (just the first time) and try again */ - if (!scan_label_only && !critical_section() && !full_scan_done()) { - lvmcache_force_next_label_scan(); - lvmcache_label_scan(info->fmt->cmd); - - if (_get_pv_if_in_vg(info, pv)) - return 1; + + if (!info->label) { + log_error("No cached label for orphan PV %s", pv_dev_name(pv)); + return 0; } - /* Orphan */ + pv->label_sector = info->label->sector; pv->dev = info->dev; pv->fmt = info->fmt; pv->size = info->device_size >> SECTOR_SHIFT; pv->vg_name = FMT_TEXT_ORPHAN_VG_NAME; memcpy(&pv->id, &info->dev->pvid, sizeof(pv->id)); + if (!pv->size) { + log_error("PV %s size is zero.", dev_name(info->dev)); + return 0; + } + /* Currently only support exactly one data area */ if (dm_list_size(&info->das) != 1) { log_error("Must be exactly one data area (found %d) on PV %s", diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 826e91e96..1b5379c44 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -145,8 +145,8 @@ int lvmcache_fid_add_mdas(struct lvmcache_info *info, struct format_instance *fi int lvmcache_fid_add_mdas_pv(struct lvmcache_info *info, struct format_instance *fid); int lvmcache_fid_add_mdas_vg(struct lvmcache_vginfo *vginfo, struct format_instance *fid); int lvmcache_populate_pv_fields(struct lvmcache_info *info, - struct physical_volume *pv, - int scan_label_only); + struct volume_group *vg, + struct physical_volume *pv); int lvmcache_check_format(struct lvmcache_info *info, const struct format_type *fmt); void lvmcache_del_mdas(struct lvmcache_info *info); void lvmcache_del_das(struct lvmcache_info *info); diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 9538080f0..ee1f11d35 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1593,36 +1593,6 @@ static uint64_t _metadata_locn_offset_raw(void *metadata_locn) return mdac->area.start; } -static int _text_pv_read(const struct format_type *fmt, const char *pv_name, - struct physical_volume *pv, int scan_label_only) -{ - struct lvmcache_info *info; - struct device *dev; - - if (!(dev = dev_cache_get(pv_name, fmt->cmd->filter))) - return_0; - - if (lvmetad_used()) { - info = lvmcache_info_from_pvid(dev->pvid, dev, 0); - if (!info && !lvmetad_pv_lookup_by_dev(fmt->cmd, dev, NULL)) - return 0; - info = lvmcache_info_from_pvid(dev->pvid, dev, 0); - } else { - struct label *label; - if (!(label_read(dev, &label, UINT64_C(0)))) - return_0; - info = label->info; - } - - if (!info) - return_0; - - if (!lvmcache_populate_pv_fields(info, pv, scan_label_only)) - return 0; - - return 1; -} - static int _text_pv_initialise(const struct format_type *fmt, struct pv_create_args *pva, struct physical_volume *pv) @@ -2471,7 +2441,6 @@ static struct format_instance *_text_create_text_instance(const struct format_ty static struct format_handler _text_handler = { .scan = _text_scan, - .pv_read = _text_pv_read, .pv_initialise = _text_pv_initialise, .pv_setup = _text_pv_setup, .pv_add_metadata_area = _text_pv_add_metadata_area, diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index b4ee20470..570cbe6b2 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -38,10 +38,9 @@ #include static struct physical_volume *_pv_read(struct cmd_context *cmd, - struct dm_pool *pvmem, - const char *pv_name, - struct format_instance *fid, - uint32_t warn_flags, int scan_label_only); + const struct format_type *fmt, + struct volume_group *vg, + struct lvmcache_info *info); static int _alignment_overrides_default(unsigned long data_alignment, unsigned long default_pe_align) @@ -330,37 +329,6 @@ bad: return NULL; } -int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name, - const char *vgid, const char *pvid, - struct physical_volume *pv) -{ - struct volume_group *vg; - struct pv_list *pvl; - uint32_t warn_flags = WARN_PV_READ | WARN_INCONSISTENT; - int r = 0, consistent = 0; - - if (!(vg = vg_read_internal(fmt->cmd, vg_name, vgid, warn_flags, &consistent))) { - log_error("get_pv_from_vg_by_id: vg_read_internal failed to read VG %s", - vg_name); - return 0; - } - - dm_list_iterate_items(pvl, &vg->pvs) { - if (id_equal(&pvl->pv->id, (const struct id *) pvid)) { - if (!_copy_pv(fmt->cmd->mem, pv, pvl->pv)) { - log_error("internal PV duplication failed"); - r = 0; - goto out; - } - r = 1; - goto out; - } - } -out: - release_vg(vg); - return r; -} - static int _move_pv(struct volume_group *vg_from, struct volume_group *vg_to, const char *pv_name, int enforce_pv_from_source) { @@ -3246,9 +3214,7 @@ static int _check_mda_in_use(struct metadata_area *mda, void *_in_use) struct _vg_read_orphan_baton { struct cmd_context *cmd; struct volume_group *vg; - uint32_t warn_flags; - int consistent; - int repair; + const struct format_type *fmt; }; /* @@ -3345,8 +3311,7 @@ static int _vg_read_orphan_pv(struct lvmcache_info *info, void *baton) uint32_t ext_version; uint32_t ext_flags; - if (!(pv = _pv_read(b->vg->cmd, b->vg->vgmem, dev_name(lvmcache_device(info)), - b->vg->fid, b->warn_flags, 0))) { + if (!(pv = _pv_read(b->cmd, b->fmt, b->vg, info))) { stack; return 1; } @@ -3453,10 +3418,22 @@ static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, vg->free_count = 0; baton.cmd = cmd; - baton.warn_flags = warn_flags; + baton.fmt = fmt; baton.vg = vg; - baton.consistent = 1; - baton.repair = *consistent; + + /* + * vg_read for a normal VG will rescan labels for all the devices + * in the VG, in case something changed on disk between the initial + * label scan and acquiring the VG lock. We don't rescan labels + * here because this is only called in two ways: + * + * 1. for reporting, in which case it doesn't matter if something + * changed between the label scan and printing the PVs here + * + * 2. pvcreate_each_device() for pvcreate//vgcreate/vgextend, + * which already does the label rescan after taking the + * orphan lock. + */ while ((pvl = (struct pv_list *) dm_list_first(&head.list))) { dm_list_del(&pvl->list); @@ -3468,7 +3445,6 @@ static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, if (!lvmcache_foreach_pv(vginfo, _vg_read_orphan_pv, &baton)) return_NULL; - *consistent = baton.consistent; return vg; } @@ -4686,86 +4662,40 @@ const char *find_vgname_from_pvname(struct cmd_context *cmd, return find_vgname_from_pvid(cmd, pvid); } -/* FIXME Use label functions instead of PV functions */ static struct physical_volume *_pv_read(struct cmd_context *cmd, - struct dm_pool *pvmem, - const char *pv_name, - struct format_instance *fid, - uint32_t warn_flags, int scan_label_only) + const struct format_type *fmt, + struct volume_group *vg, + struct lvmcache_info *info) { struct physical_volume *pv; - struct label *label; - struct lvmcache_info *info; - struct device *dev; - const struct format_type *fmt; - int found; + struct device *dev = lvmcache_device(info); - if (!(dev = dev_cache_get(pv_name, cmd->filter))) - return_NULL; - - if (lvmetad_used()) { - info = lvmcache_info_from_pvid(dev->pvid, dev, 0); - if (!info) { - if (!lvmetad_pv_lookup_by_dev(cmd, dev, &found)) - return_NULL; - if (!found) { - if (warn_flags & WARN_PV_READ) - log_error("No physical volume found in lvmetad cache for %s", - pv_name); - return NULL; - } - if (!(info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) { - if (warn_flags & WARN_PV_READ) - log_error("No cache info in lvmetad cache for %s.", - pv_name); - return NULL; - } - } - label = lvmcache_get_label(info); - } else { - if (!(label_read(dev, &label, UINT64_C(0)))) { - if (warn_flags & WARN_PV_READ) - log_error("No physical volume label read from %s", - pv_name); - return NULL; - } - info = (struct lvmcache_info *) label->info; - } - - fmt = lvmcache_fmt(info); - - pv = _alloc_pv(pvmem, dev); - if (!pv) { - log_error("pv allocation for '%s' failed", pv_name); + if (!(pv = _alloc_pv(vg->vgmem, NULL))) { + log_error("pv allocation failed"); return NULL; } - pv->label_sector = label->sector; - - /* FIXME Move more common code up here */ - if (!(lvmcache_fmt(info)->ops->pv_read(lvmcache_fmt(info), pv_name, pv, scan_label_only))) { - log_error("Failed to read existing physical volume '%s'", - pv_name); - goto bad; + if (fmt->ops->pv_read) { + /* format1 and pool */ + if (!(fmt->ops->pv_read(fmt, dev_name(dev), pv, 0))) { + log_error("Failed to read existing physical volume '%s'", dev_name(dev)); + goto bad; + } + } else { + /* format text */ + if (!lvmcache_populate_pv_fields(info, vg, pv)) + goto_bad; } - if (!pv->size) - goto bad; - - if (!alloc_pv_segment_whole_pv(pvmem, pv)) + if (!alloc_pv_segment_whole_pv(vg->vgmem, pv)) goto_bad; - if (fid) - lvmcache_fid_add_mdas(info, fid, (const char *) &pv->id, ID_LEN); - else { - lvmcache_fid_add_mdas(info, fmt->orphan_vg->fid, (const char *) &pv->id, ID_LEN); - pv_set_fid(pv, fmt->orphan_vg->fid); - } - + lvmcache_fid_add_mdas(info, vg->fid, (const char *) &pv->id, ID_LEN); + pv_set_fid(pv, vg->fid); return pv; bad: free_pv_fid(pv); - dm_pool_free(pvmem, pv); + dm_pool_free(vg->vgmem, pv); return NULL; } diff --git a/lib/metadata/metadata.h b/lib/metadata/metadata.h index 5b8d690cc..83983b427 100644 --- a/lib/metadata/metadata.h +++ b/lib/metadata/metadata.h @@ -371,12 +371,6 @@ uint32_t vg_bad_status_bits(const struct volume_group *vg, uint64_t status); int add_pv_to_vg(struct volume_group *vg, const char *pv_name, struct physical_volume *pv, int new_pv); - -/* Find a PV within a given VG */ -int get_pv_from_vg_by_id(const struct format_type *fmt, const char *vg_name, - const char *vgid, const char *pvid, - struct physical_volume *pv); - struct logical_volume *find_lv_in_vg_by_lvid(struct volume_group *vg, const union lvid *lvid); From d9a77e8bb4d25af33625d3cdddf5288207101d5b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 1 Nov 2017 09:35:40 -0500 Subject: [PATCH 32/87] lvmcache: simplify metadata cache The copy of VG metadata stored in lvmcache was not being used in general. It pretended to be a generic VG metadata cache, but was not being used except for clvmd activation. There it was used to avoid reading from disk while devices were suspended, i.e. in resume. This removes the code that attempted to make this look like a generic metadata cache, and replaces with with something narrowly targetted to what it's actually used for. This is a way of passing the VG from suspend to resume in clvmd. Since in the case of clvmd one caller can't simply pass the same VG to both suspend and resume, suspend needs to stash the VG somewhere that resume can grab it from. (resume doesn't want to read it from disk since devices are suspended.) The lvmcache vginfo struct is used as a convenient place to stash the VG to pass it from suspend to resume, even though it isn't related to the lvmcache or vginfo. These suspended_vg* vginfo fields should not be used or touched anywhere else, they are only to be used for passing the VG data from suspend to resume in clvmd. The VG data being passed between suspend and resume is never modified, and will only exist in the brief period between suspend and resume in clvmd. suspend has both old (current) and new (precommitted) copies of the VG metadata. It stashes both of these in the vginfo prior to suspending devices. When vg_commit is successful, it sets a flag in vginfo as before, signaling the transition from old to new metadata. resume grabs the VG stashed by suspend. If the vg_commit happened, it grabs the new VG, and if the vg_commit didn't happen it grabs the old VG. The VG is then used to resume LVs. This isolates clvmd-specific code and usage from the normal lvm vg_read code, making the code simpler and the behavior easier to verify. Sequence of operations: - lv_suspend() has both vg_old and vg_new and stashes a copy of each onto the vginfo: lvmcache_save_suspended_vg(vg_old); lvmcache_save_suspended_vg(vg_new); - vg_commit() happens, which causes all clvmd instances to call lvmcache_commit_metadata(vg). A flag is set in the vginfo indicating the transition from the old to new VG: vginfo->suspended_vg_committed = 1; - lv_resume() needs either vg_old or vg_new to use in resuming LVs. It doesn't want to read the VG from disk since devices are suspended, so it gets the VG stashed by lv_suspend: vg = lvmcache_get_suspended_vg(vgid); If the vg_commit did not happen, suspended_vg_committed will not be set, and in this case, lvmcache_get_suspended_vg() will return the old VG instead of the new VG, and it will resume LVs based on the old metadata. --- lib/activate/activate.c | 82 ++++++- lib/cache/lvmcache.c | 405 ++++++++++++++--------------------- lib/cache/lvmcache.h | 7 +- lib/commands/toolcontext.c | 4 - lib/config/config_settings.h | 7 +- lib/config/defaults.h | 1 - lib/metadata/metadata.c | 71 ++---- lib/metadata/vg.c | 5 - lib/misc/lvm-globals.c | 12 -- lib/misc/lvm-globals.h | 2 - tools/commands.h | 8 +- tools/lvmcmdline.c | 1 - tools/tools.h | 1 - 13 files changed, 264 insertions(+), 342 deletions(-) diff --git a/lib/activate/activate.c b/lib/activate/activate.c index 6611e99e7..565e643ec 100644 --- a/lib/activate/activate.c +++ b/lib/activate/activate.c @@ -28,6 +28,7 @@ #include "config.h" #include "segtype.h" #include "sharedlib.h" +#include "lvmcache.h" #include #include @@ -2172,6 +2173,17 @@ static int _lv_suspend(struct cmd_context *cmd, const char *lvid_s, if (!lv_info(cmd, lv, laopts->origin_only, &info, 0, 0)) goto_out; + /* + * Save old and new (current and precommitted) versions of the + * VG metadata for lv_resume() to use, since lv_resume can't + * read metadata given that devices are suspended. lv_resume() + * will resume LVs using the old/current metadata if the vg_commit + * did happen (or failed), and it will resume LVs using the + * new/precommitted metadata if the vg_commit succeeded. + */ + lvmcache_save_suspended_vg(lv->vg, 0); + lvmcache_save_suspended_vg(lv_pre->vg, 1); + if (!info.exists || info.suspended) { if (!error_if_not_suspended) { r = 1; @@ -2378,16 +2390,55 @@ static int _lv_resume(struct cmd_context *cmd, const char *lvid_s, struct lv_activate_opts *laopts, int error_if_not_active, const struct logical_volume *lv) { - const struct logical_volume *lv_to_free = NULL; struct dm_list *snh; + struct volume_group *vg = NULL; + struct logical_volume *lv_found = NULL; + const union lvid *lvid; + const char *vgid; struct lvinfo info; int r = 0; if (!activation()) return 1; - if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0))) - goto_out; + /* + * When called in clvmd, lvid_s is set and lv is not. We need to + * get the VG metadata without reading disks because devs are + * suspended. lv_suspend() saved old and new VG metadata for us + * to use here. If vg_commit() happened, lvmcache_get_suspended_vg + * will return the new metadata for us to use in resuming LVs. + * If vg_commit() did not happen, lvmcache_get_suspended_vg + * returns the old metadata which we use to resume LVs. + */ + if (!lv && lvid_s) { + lvid = (const union lvid *) lvid_s; + vgid = (const char *)lvid->id[0].uuid; + + if ((vg = lvmcache_get_suspended_vg(vgid))) { + log_debug_activation("Resuming LVID %s found saved vg seqno %d %s", lvid_s, vg->seqno, vg->name); + if ((lv_found = find_lv_in_vg_by_lvid(vg, lvid))) { + log_debug_activation("Resuming LVID %s found saved LV %s", lvid_s, display_lvname(lv_found)); + lv = lv_found; + } else + log_debug_activation("Resuming LVID %s did not find saved LV", lvid_s); + } else + log_debug_activation("Resuming LVID %s did not find saved VG", lvid_s); + + /* + * resume must have been called without a preceding suspend, + * so we need to read the vg. + */ + + if (!lv) { + log_debug_activation("Resuming LVID %s reading VG", lvid_s); + if (!(lv_found = lv_from_lvid(cmd, lvid_s, 0))) { + log_debug_activation("Resuming LVID %s failed to read VG", lvid_s); + goto out; + } + + lv = lv_found; + } + } if (!lv_is_origin(lv) && !lv_is_thin_volume(lv) && !lv_is_thin_pool(lv)) laopts->origin_only = 0; @@ -2448,9 +2499,6 @@ needs_resume: r = 1; out: - if (lv_to_free) - release_vg(lv_to_free->vg); - return r; } @@ -2587,6 +2635,10 @@ int lv_activation_filter(struct cmd_context *cmd, const char *lvid_s, int *activate_lv, const struct logical_volume *lv) { const struct logical_volume *lv_to_free = NULL; + struct volume_group *vg = NULL; + struct logical_volume *lv_found = NULL; + const union lvid *lvid; + const char *vgid; int r = 0; if (!activation()) { @@ -2594,6 +2646,24 @@ int lv_activation_filter(struct cmd_context *cmd, const char *lvid_s, return 1; } + /* + * This function is called while devices are suspended, + * so try to use the copy of the vg that was saved in + * lv_suspend. + */ + if (!lv && lvid_s) { + lvid = (const union lvid *) lvid_s; + vgid = (const char *)lvid->id[0].uuid; + + if ((vg = lvmcache_get_suspended_vg(vgid))) { + log_debug_activation("activation_filter for %s found saved VG seqno %d %s", lvid_s, vg->seqno, vg->name); + if ((lv_found = find_lv_in_vg_by_lvid(vg, lvid))) { + log_debug_activation("activation_filter for %s found saved LV %s", lvid_s, display_lvname(lv_found)); + lv = lv_found; + } + } + } + if (!lv && !(lv_to_free = lv = lv_from_lvid(cmd, lvid_s, 0))) goto_out; diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 8e119b5ee..28e46bbb7 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -63,16 +63,42 @@ struct lvmcache_vginfo { char *lock_type; uint32_t mda_checksum; size_t mda_size; - size_t vgmetadata_size; - char *vgmetadata; /* Copy of VG metadata as format_text string */ - struct dm_config_tree *cft; /* Config tree created from vgmetadata */ - /* Lifetime is directly tied to vgmetadata */ - struct volume_group *cached_vg; - unsigned holders; - unsigned vg_use_count; /* Counter of vg reusage */ - unsigned precommitted; /* Is vgmetadata live or precommitted? */ - unsigned cached_vg_invalidated; /* Signal to regenerate cached_vg */ int independent_metadata_location; /* metadata read from independent areas */ + + /* + * The following are not related to lvmcache or vginfo, + * but are borrowing the vginfo to store the data. + * + * suspended_vg_* are used only by clvmd suspend/resume. + * In suspend, both old (current) and new (precommitted) + * metadata is saved. (Each in three forms: buffer, cft, + * and vg). In resume, if the vg was committed + * (suspended_vg_committed is set), then LVs are resumed + * using the new metadata, but if the vg wasn't committed, + * then LVs are resumed using the old metadata. + * + * suspended_vg_committed is set to 1 when clvmd gets + * LCK_VG_COMMIT from vg_commit(). + * + * These fields are only used between suspend and resume + * in clvmd, and should never be used in any other way. + * The contents of this data are never changed. This + * data does not really belong in lvmcache, it's unrelated + * to lvmcache or vginfo, but it's just a convenient place + * for clvmd to stash the VG between suspend and resume + * (since the same caller isn't present to pass the VG to + * both suspend and resume in the case of clvmd.) + * + * This data is not really a "cache" of the VG, it is just + * a location to pass the VG between suspend and resume. + */ + int suspended_vg_committed; + char *suspended_vg_old_buf; + struct dm_config_tree *suspended_vg_old_cft; + struct volume_group *suspended_vg_old; + char *suspended_vg_new_buf; + struct dm_config_tree *suspended_vg_new_cft; + struct volume_group *suspended_vg_new; }; static struct dm_hash_table *_pvid_hash = NULL; @@ -139,73 +165,7 @@ void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd) _has_scanned = 1; } -/* Volume Group metadata cache functions */ -static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo) -{ - if (!vginfo || !vginfo->vgmetadata) - return; - - dm_free(vginfo->vgmetadata); - - vginfo->vgmetadata = NULL; - - /* Release also cached config tree */ - if (vginfo->cft) { - dm_config_destroy(vginfo->cft); - vginfo->cft = NULL; - } - - log_debug_cache("lvmcache: VG %s wiped.", vginfo->vgname); - - release_vg(vginfo->cached_vg); -} - -/* - * Cache VG metadata against the vginfo with matching vgid. - */ -static void _store_metadata(struct volume_group *vg, unsigned precommitted) -{ - char uuid[64] __attribute__((aligned(8))); - struct lvmcache_vginfo *vginfo; - char *data; - size_t size; - - if (!(vginfo = lvmcache_vginfo_from_vgid((const char *)&vg->id))) { - stack; - return; - } - - if (!(size = export_vg_to_buffer(vg, &data))) { - stack; - _free_cached_vgmetadata(vginfo); - return; - } - - /* Avoid reparsing of the same data string */ - if (vginfo->vgmetadata && vginfo->vgmetadata_size == size && - strcmp(vginfo->vgmetadata, data) == 0) - dm_free(data); - else { - _free_cached_vgmetadata(vginfo); - vginfo->vgmetadata_size = size; - vginfo->vgmetadata = data; - } - - vginfo->precommitted = precommitted; - - if (!id_write_format((const struct id *)vginfo->vgid, uuid, sizeof(uuid))) { - stack; - return; - } - - log_debug_cache("lvmcache: VG %s (%s) stored (%" PRIsize_t " bytes%s).", - vginfo->vgname, uuid, size, - precommitted ? ", precommitted" : ""); -} - -static void _update_cache_info_lock_state(struct lvmcache_info *info, - int locked, - int *cached_vgmetadata_valid) +static void _update_cache_info_lock_state(struct lvmcache_info *info, int locked) { int was_locked = (info->status & CACHE_LOCKED) ? 1 : 0; @@ -213,10 +173,8 @@ static void _update_cache_info_lock_state(struct lvmcache_info *info, * Cache becomes invalid whenever lock state changes unless * exclusive VG_GLOBAL is held (i.e. while scanning). */ - if (!lvmcache_vgname_is_locked(VG_GLOBAL) && (was_locked != locked)) { + if (!lvmcache_vgname_is_locked(VG_GLOBAL) && (was_locked != locked)) info->status |= CACHE_INVALID; - *cached_vgmetadata_valid = 0; - } if (locked) info->status |= CACHE_LOCKED; @@ -228,14 +186,9 @@ static void _update_cache_vginfo_lock_state(struct lvmcache_vginfo *vginfo, int locked) { struct lvmcache_info *info; - int cached_vgmetadata_valid = 1; dm_list_iterate_items(info, &vginfo->infos) - _update_cache_info_lock_state(info, locked, - &cached_vgmetadata_valid); - - if (!cached_vgmetadata_valid) - _free_cached_vgmetadata(vginfo); + _update_cache_info_lock_state(info, locked); } static void _update_cache_lock_state(const char *vgname, int locked) @@ -248,6 +201,35 @@ static void _update_cache_lock_state(const char *vgname, int locked) _update_cache_vginfo_lock_state(vginfo, locked); } +static void _suspended_vg_free(struct lvmcache_vginfo *vginfo, int free_old, int free_new) +{ + if (free_old) { + if (vginfo->suspended_vg_old_buf) + dm_free(vginfo->suspended_vg_old_buf); + if (vginfo->suspended_vg_old_cft) + dm_config_destroy(vginfo->suspended_vg_old_cft); + if (vginfo->suspended_vg_old) + release_vg(vginfo->suspended_vg_old); + + vginfo->suspended_vg_old_buf = NULL; + vginfo->suspended_vg_old_cft = NULL; + vginfo->suspended_vg_old = NULL; + } + + if (free_new) { + if (vginfo->suspended_vg_new_buf) + dm_free(vginfo->suspended_vg_new_buf); + if (vginfo->suspended_vg_new_cft) + dm_config_destroy(vginfo->suspended_vg_new_cft); + if (vginfo->suspended_vg_new) + release_vg(vginfo->suspended_vg_new); + + vginfo->suspended_vg_new_buf = NULL; + vginfo->suspended_vg_new_cft = NULL; + vginfo->suspended_vg_new = NULL; + } +} + static void _drop_metadata(const char *vgname, int drop_precommitted) { struct lvmcache_vginfo *vginfo; @@ -256,25 +238,98 @@ static void _drop_metadata(const char *vgname, int drop_precommitted) if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) return; - /* - * Invalidate cached PV labels. - * If cached precommitted metadata exists that means we - * already invalidated the PV labels (before caching it) - * and we must not do it again. - */ - if (!drop_precommitted && vginfo->precommitted && !vginfo->vgmetadata) - log_error(INTERNAL_ERROR "metadata commit (or revert) missing before " - "dropping metadata from cache."); - - if (drop_precommitted || !vginfo->precommitted) + if (drop_precommitted) dm_list_iterate_items(info, &vginfo->infos) info->status |= CACHE_INVALID; - _free_cached_vgmetadata(vginfo); - - /* VG revert */ if (drop_precommitted) - vginfo->precommitted = 0; + _suspended_vg_free(vginfo, 0, 1); + else + _suspended_vg_free(vginfo, 1, 1); +} + +void lvmcache_save_suspended_vg(struct volume_group *vg, int precommitted) +{ + struct lvmcache_vginfo *vginfo; + struct format_instance *fid; + struct format_instance_ctx fic; + struct volume_group *susp_vg = NULL; + struct dm_config_tree *susp_cft = NULL; + char *susp_buf = NULL; + size_t size; + int new = precommitted; + int old = !precommitted; + + if (!(vginfo = lvmcache_vginfo_from_vgid((const char *)&vg->id))) + goto_bad; + + /* already saved */ + if (old && vginfo->suspended_vg_old && + (vginfo->suspended_vg_old->seqno == vg->seqno)) + return; + + /* already saved */ + if (new && vginfo->suspended_vg_new && + (vginfo->suspended_vg_new->seqno == vg->seqno)) + return; + + _suspended_vg_free(vginfo, old, new); + + if (!(size = export_vg_to_buffer(vg, &susp_buf))) + goto_bad; + + fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; + fic.context.vg_ref.vg_name = vginfo->vgname; + fic.context.vg_ref.vg_id = vginfo->vgid; + if (!(fid = vginfo->fmt->ops->create_instance(vginfo->fmt, &fic))) + goto_bad; + + if (!(susp_cft = config_tree_from_string_without_dup_node_check(susp_buf))) + goto_bad; + + if (!(susp_vg = import_vg_from_config_tree(susp_cft, fid))) + goto_bad; + + if (old) { + vginfo->suspended_vg_old_buf = susp_buf; + vginfo->suspended_vg_old_cft = susp_cft; + vginfo->suspended_vg_old = susp_vg; + log_debug_cache("lvmcache saved suspended vg old seqno %d %s", vg->seqno, vg->name); + } else { + vginfo->suspended_vg_new_buf = susp_buf; + vginfo->suspended_vg_new_cft = susp_cft; + vginfo->suspended_vg_new = susp_vg; + log_debug_cache("lvmcache saved suspended vg new seqno %d %s", vg->seqno, vg->name); + } + return; + +bad: + _suspended_vg_free(vginfo, old, new); + log_debug_cache("lvmcache failed to save suspended pre %d vg %s", precommitted, vg->name); +} + +struct volume_group *lvmcache_get_suspended_vg(const char *vgid) +{ + struct lvmcache_vginfo *vginfo; + + if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) + return_NULL; + + + if (vginfo->suspended_vg_committed) + return vginfo->suspended_vg_new; + else + return vginfo->suspended_vg_old; +} + +void lvmcache_drop_suspended_vg(struct volume_group *vg) +{ + struct lvmcache_vginfo *vginfo; + + if (!(vginfo = lvmcache_vginfo_from_vgid((const char *)&vg->id))) + return; + + _suspended_vg_free(vginfo, 1, 1); } /* @@ -289,11 +344,7 @@ void lvmcache_commit_metadata(const char *vgname) if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) return; - if (vginfo->precommitted) { - log_debug_cache("lvmcache: Upgraded pre-committed VG %s metadata to committed.", - vginfo->vgname); - vginfo->precommitted = 0; - } + vginfo->suspended_vg_committed = 1; } void lvmcache_drop_metadata(const char *vgname, int drop_precommitted) @@ -674,18 +725,6 @@ static int _info_is_valid(struct lvmcache_info *info) return 1; } -static int _vginfo_is_valid(struct lvmcache_vginfo *vginfo) -{ - struct lvmcache_info *info; - - /* Invalid if any info is invalid */ - dm_list_iterate_items(info, &vginfo->infos) - if (!_info_is_valid(info)) - return 0; - - return 1; -} - /* vginfo is invalid if it does not contain at least one valid info */ static int _vginfo_is_invalid(struct lvmcache_vginfo *vginfo) { @@ -1315,121 +1354,6 @@ int lvmcache_label_scan(struct cmd_context *cmd) return r; } -struct volume_group *lvmcache_get_vg(struct cmd_context *cmd, const char *vgname, - const char *vgid, unsigned precommitted) -{ - struct lvmcache_vginfo *vginfo; - struct volume_group *vg = NULL; - struct format_instance *fid; - struct format_instance_ctx fic; - - /* - * We currently do not store precommitted metadata in lvmetad at - * all. This means that any request for precommitted metadata is served - * using the classic scanning mechanics, and read from disk or from - * lvmcache. - */ - if (lvmetad_used() && !precommitted) { - /* Still serve the locally cached VG if available */ - if (vgid && (vginfo = lvmcache_vginfo_from_vgid(vgid)) && - vginfo->vgmetadata && (vg = vginfo->cached_vg)) - goto out; - return lvmetad_vg_lookup(cmd, vgname, vgid); - } - - if (!vgid || !(vginfo = lvmcache_vginfo_from_vgid(vgid)) || !vginfo->vgmetadata) - return NULL; - - if (!_vginfo_is_valid(vginfo)) - return NULL; - - /* - * Don't return cached data if either: - * (i) precommitted metadata is requested but we don't have it cached - * - caller should read it off disk; - * (ii) live metadata is requested but we have precommitted metadata cached - * and no devices are suspended so caller may read it off disk. - * - * If live metadata is requested but we have precommitted metadata cached - * and devices are suspended, we assume this precommitted metadata has - * already been preloaded and committed so it's OK to return it as live. - * Note that we do not clear the PRECOMMITTED flag. - */ - if ((precommitted && !vginfo->precommitted) || - (!precommitted && vginfo->precommitted && !critical_section())) - return NULL; - - /* Use already-cached VG struct when available */ - if ((vg = vginfo->cached_vg) && !vginfo->cached_vg_invalidated) - goto out; - - release_vg(vginfo->cached_vg); - - fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; - fic.context.vg_ref.vg_name = vginfo->vgname; - fic.context.vg_ref.vg_id = vgid; - if (!(fid = vginfo->fmt->ops->create_instance(vginfo->fmt, &fic))) - return_NULL; - - /* Build config tree from vgmetadata, if not yet cached */ - if (!vginfo->cft && - !(vginfo->cft = - config_tree_from_string_without_dup_node_check(vginfo->vgmetadata))) - goto_bad; - - if (!(vg = import_vg_from_config_tree(vginfo->cft, fid))) - goto_bad; - - /* Cache VG struct for reuse */ - vginfo->cached_vg = vg; - vginfo->holders = 1; - vginfo->vg_use_count = 0; - vginfo->cached_vg_invalidated = 0; - vg->vginfo = vginfo; - - if (!dm_pool_lock(vg->vgmem, detect_internal_vg_cache_corruption())) - goto_bad; - -out: - vginfo->holders++; - vginfo->vg_use_count++; - log_debug_cache("Using cached %smetadata for VG %s with %u holder(s).", - vginfo->precommitted ? "pre-committed " : "", - vginfo->vgname, vginfo->holders); - - return vg; - -bad: - _free_cached_vgmetadata(vginfo); - return NULL; -} - -// #if 0 -int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo) -{ - log_debug_cache("VG %s decrementing %d holder(s) at %p.", - vginfo->cached_vg->name, vginfo->holders, vginfo->cached_vg); - - if (--vginfo->holders) - return 0; - - if (vginfo->vg_use_count > 1) - log_debug_cache("VG %s reused %d times.", - vginfo->cached_vg->name, vginfo->vg_use_count); - - /* Debug perform crc check only when it's been used more then once */ - if (!dm_pool_unlock(vginfo->cached_vg->vgmem, - detect_internal_vg_cache_corruption() && - (vginfo->vg_use_count > 1))) - stack; - - vginfo->cached_vg->vginfo = NULL; - vginfo->cached_vg = NULL; - - return 1; -} -// #endif - int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal, struct dm_list *vgnameids) { @@ -1615,8 +1539,6 @@ static int _free_vginfo(struct lvmcache_vginfo *vginfo) struct lvmcache_vginfo *primary_vginfo, *vginfo2; int r = 1; - _free_cached_vgmetadata(vginfo); - vginfo2 = primary_vginfo = lvmcache_vginfo_from_vgname(vginfo->vgname, NULL); if (vginfo == primary_vginfo) { @@ -1639,6 +1561,7 @@ static int _free_vginfo(struct lvmcache_vginfo *vginfo) dm_free(vginfo->system_id); dm_free(vginfo->vgname); dm_free(vginfo->creation_host); + _suspended_vg_free(vginfo, 1, 1); if (*vginfo->vgid && _vgid_hash && lvmcache_vginfo_from_vgid(vginfo->vgid) == vginfo) @@ -2076,12 +1999,6 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg !is_orphan_vg(info->vginfo->vgname) && critical_section()) return 1; - /* If making a PV into an orphan, any cached VG metadata may become - * invalid, incorrectly still referencing device structs. - * (Example: pvcreate -ff) */ - if (is_orphan_vg(vgname) && info->vginfo && !is_orphan_vg(info->vginfo->vgname)) - info->vginfo->cached_vg_invalidated = 1; - /* If moving PV from orphan to real VG, always mark it valid */ if (!is_orphan_vg(vgname)) info->status &= ~CACHE_INVALID; @@ -2117,10 +2034,6 @@ int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) return_0; } - /* store text representation of vg to cache */ - if (vg->cmd->current_settings.cache_vgmetadata) - _store_metadata(vg, precommitted); - return 1; } @@ -2657,6 +2570,10 @@ struct label *lvmcache_get_label(struct lvmcache_info *info) { return info->label; } +/* + * After label_scan reads pv_header, mda_header and mda locations + * from a PV, it clears the INVALID flag. + */ void lvmcache_make_valid(struct lvmcache_info *info) { info->status &= ~CACHE_INVALID; } diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 1b5379c44..1856344a0 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -133,9 +133,6 @@ int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal, struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname, const char *vgid); -/* Returns cached volume group metadata. */ -struct volume_group *lvmcache_get_vg(struct cmd_context *cmd, const char *vgname, - const char *vgid, unsigned precommitted); void lvmcache_drop_metadata(const char *vgname, int drop_precommitted); void lvmcache_commit_metadata(const char *vgname); @@ -219,4 +216,8 @@ int lvmcache_get_vg_devs(struct cmd_context *cmd, struct dm_list *devs); void lvmcache_set_independent_location(const char *vgname); +void lvmcache_save_suspended_vg(struct volume_group *vg, int precommitted); +struct volume_group *lvmcache_get_suspended_vg(const char *vgid); +void lvmcache_drop_suspended_vg(struct volume_group *vg); + #endif diff --git a/lib/commands/toolcontext.c b/lib/commands/toolcontext.c index fe6b8a384..3dc3e2df4 100644 --- a/lib/commands/toolcontext.c +++ b/lib/commands/toolcontext.c @@ -688,9 +688,6 @@ static int _process_config(struct cmd_context *cmd) if (find_config_tree_bool(cmd, report_two_word_unknown_device_CFG, NULL)) init_unknown_device_name("unknown device"); - init_detect_internal_vg_cache_corruption - (find_config_tree_bool(cmd, global_detect_internal_vg_cache_corruption_CFG, NULL)); - if (!_init_system_id(cmd)) return_0; @@ -2010,7 +2007,6 @@ struct cmd_context *create_toolcontext(unsigned is_long_lived, if (set_filters && !init_filters(cmd, 1)) goto_out; - cmd->default_settings.cache_vgmetadata = 1; cmd->current_settings = cmd->default_settings; cmd->initialized.config = 1; diff --git a/lib/config/config_settings.h b/lib/config/config_settings.h index 077fb15ce..b778f4c1c 100644 --- a/lib/config/config_settings.h +++ b/lib/config/config_settings.h @@ -868,11 +868,8 @@ cfg(global_abort_on_internal_errors_CFG, "abort_on_internal_errors", global_CFG_ "Treat any internal errors as fatal errors, aborting the process that\n" "encountered the internal error. Please only enable for debugging.\n") -cfg(global_detect_internal_vg_cache_corruption_CFG, "detect_internal_vg_cache_corruption", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION, vsn(2, 2, 96), NULL, 0, NULL, - "Internal verification of VG structures.\n" - "Check if CRC matches when a parsed VG is used multiple times. This\n" - "is useful to catch unexpected changes to cached VG structures.\n" - "Please only enable for debugging.\n") +cfg(global_detect_internal_vg_cache_corruption_CFG, "detect_internal_vg_cache_corruption", global_CFG_SECTION, 0, CFG_TYPE_BOOL, 0, vsn(2, 2, 96), NULL, vsn(2, 2, 174), NULL, + "No longer used.\n") cfg(global_metadata_read_only_CFG, "metadata_read_only", global_CFG_SECTION, 0, CFG_TYPE_BOOL, DEFAULT_METADATA_READ_ONLY, vsn(2, 2, 75), NULL, 0, NULL, "No operations that change on-disk metadata are permitted.\n" diff --git a/lib/config/defaults.h b/lib/config/defaults.h index d9e19d971..7cebd8481 100644 --- a/lib/config/defaults.h +++ b/lib/config/defaults.h @@ -179,7 +179,6 @@ #define DEFAULT_LOGLEVEL 0 #define DEFAULT_INDENT 1 #define DEFAULT_ABORT_ON_INTERNAL_ERRORS 0 -#define DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION 0 #define DEFAULT_UNITS "r" #define DEFAULT_SUFFIX 1 #define DEFAULT_HOSTTAGS 0 diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 570cbe6b2..5d3f83572 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3767,7 +3767,6 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, struct dm_list all_pvs; char uuid[64] __attribute__((aligned(8))); - unsigned seqno = 0; int reappeared = 0; struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ unsigned use_previous_vg; @@ -3788,7 +3787,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, } if (lvmetad_used() && !use_precommitted) { - if ((correct_vg = lvmcache_get_vg(cmd, vgname, vgid, precommitted))) { + if ((correct_vg = lvmetad_vg_lookup(cmd, vgname, vgid))) { dm_list_iterate_items(pvl, &correct_vg->pvs) reappeared += _check_reappeared_pv(correct_vg, pvl->pv, *consistent); if (reappeared && *consistent) @@ -3818,23 +3817,6 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, return correct_vg; } - /* - * If cached metadata was inconsistent and *consistent is set - * then repair it now. Otherwise just return it. - * Also return if use_precommitted is set due to the FIXME in - * the missing PV logic below. - */ - if ((correct_vg = lvmcache_get_vg(cmd, vgname, vgid, precommitted)) && - (use_precommitted || !*consistent)) { - *consistent = 1; - return correct_vg; - } else { - if (correct_vg && correct_vg->seqno > seqno) - seqno = correct_vg->seqno; - release_vg(correct_vg); - correct_vg = NULL; - } - /* * Rescan the devices that are associated with this vg in lvmcache. * This repeats what was done by the command's initial label scan, @@ -4521,21 +4503,10 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, unsigned precommitted) { const char *vgname; - struct dm_list *vgnames; struct volume_group *vg; - struct dm_str_list *strl; uint32_t warn_flags = WARN_PV_READ | WARN_INCONSISTENT; int consistent = 0; - /* Is corresponding vgname already cached? */ - if (lvmcache_vgid_is_cached(vgid)) { - if ((vg = _vg_read(cmd, NULL, vgid, warn_flags, &consistent, precommitted)) && - id_equal(&vg->id, (const struct id *)vgid)) { - return vg; - } - release_vg(vg); - } - /* * When using lvmlockd we should never reach this point. * The VG is locked, then vg_read() is done, which gets @@ -4548,36 +4519,28 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, /* Mustn't scan if memory locked: ensure cache gets pre-populated! */ if (critical_section()) - return_NULL; + log_debug_metadata("Reading VG by vgid in critical section pre %d vgid %.8s", precommitted, vgid); - /* FIXME Need a genuine read by ID here - don't vg_read_internal by name! */ - /* FIXME Disabled vgrenames while active for now because we aren't - * allowed to do a full scan here any more. */ + if (!(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { + log_debug_metadata("Reading VG by vgid %.8s no VG name found, retrying.", vgid); + lvmcache_destroy(cmd, 0, 0); + lvmcache_force_next_label_scan(); + lvmcache_label_scan(cmd); + } - // The slow way - full scan required to cope with vgrename - lvmcache_force_next_label_scan(); - lvmcache_label_scan(cmd); - if (!(vgnames = get_vgnames(cmd, 0))) { - log_error("vg_read_by_vgid: get_vgnames failed"); + if (!(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { + log_debug_metadata("Reading VG by vgid %.8s no VG name found.", vgid); return NULL; } - dm_list_iterate_items(strl, vgnames) { - vgname = strl->str; - if (!vgname) - continue; // FIXME Unnecessary? - consistent = 0; - if ((vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted)) && - id_equal(&vg->id, (const struct id *)vgid)) { - if (!consistent) { - release_vg(vg); - return NULL; - } - return vg; - } - release_vg(vg); + consistent = 0; + + if ((vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted))) { + /* Does it matter if consistent is 0 or 1? */ + return vg; } + log_debug_metadata("Reading VG by vgid %.8s not found.", vgid); return NULL; } @@ -4593,7 +4556,7 @@ struct logical_volume *lv_from_lvid(struct cmd_context *cmd, const char *lvid_s, log_very_verbose("Finding %svolume group for uuid %s", precommitted ? "precommitted " : "", lvid_s); if (!(vg = _vg_read_by_vgid(cmd, (const char *)lvid->id[0].uuid, precommitted))) { - log_error("Volume group for uuid not found: %s", lvid_s); + log_error("Reading VG not found for LVID %s", lvid_s); return NULL; } diff --git a/lib/metadata/vg.c b/lib/metadata/vg.c index 4c808da54..0b69e42d4 100644 --- a/lib/metadata/vg.c +++ b/lib/metadata/vg.c @@ -97,11 +97,6 @@ void release_vg(struct volume_group *vg) if (!vg || (vg->fid && vg == vg->fid->fmt->orphan_vg)) return; - /* Check if there are any vginfo holders */ - if (vg->vginfo && - !lvmcache_vginfo_holders_dec_and_test_for_zero(vg->vginfo)) - return; - release_vg(vg->vg_committed); release_vg(vg->vg_precommitted); _free_vg(vg); diff --git a/lib/misc/lvm-globals.c b/lib/misc/lvm-globals.c index 0575d21a4..0f384bbec 100644 --- a/lib/misc/lvm-globals.c +++ b/lib/misc/lvm-globals.c @@ -53,8 +53,6 @@ static int _activation_checks = 0; static char _sysfs_dir_path[PATH_MAX] = ""; static int _dev_disable_after_error_count = DEFAULT_DISABLE_AFTER_ERROR_COUNT; static uint64_t _pv_min_size = (DEFAULT_PV_MIN_SIZE_KB * 1024L >> SECTOR_SHIFT); -static int _detect_internal_vg_cache_corruption = - DEFAULT_DETECT_INTERNAL_VG_CACHE_CORRUPTION; static const char *_unknown_device_name = DEFAULT_UNKNOWN_DEVICE_NAME; void init_verbose(int level) @@ -197,11 +195,6 @@ void init_pv_min_size(uint64_t sectors) _pv_min_size = sectors; } -void init_detect_internal_vg_cache_corruption(int detect) -{ - _detect_internal_vg_cache_corruption = detect; -} - void set_cmd_name(const char *cmd) { (void) dm_strncpy(_cmd_name, cmd, sizeof(_cmd_name)); @@ -384,11 +377,6 @@ uint64_t pv_min_size(void) return _pv_min_size; } -int detect_internal_vg_cache_corruption(void) -{ - return _detect_internal_vg_cache_corruption; -} - const char *unknown_device_name(void) { return _unknown_device_name; diff --git a/lib/misc/lvm-globals.h b/lib/misc/lvm-globals.h index 14a7d4366..e23d5984d 100644 --- a/lib/misc/lvm-globals.h +++ b/lib/misc/lvm-globals.h @@ -51,7 +51,6 @@ void init_udev_checking(int checking); void init_dev_disable_after_error_count(int value); void init_pv_min_size(uint64_t sectors); void init_activation_checks(int checks); -void init_detect_internal_vg_cache_corruption(int detect); void init_retry_deactivation(int retry); void init_unknown_device_name(const char *name); @@ -85,7 +84,6 @@ int udev_checking(void); const char *sysfs_dir_path(void); uint64_t pv_min_size(void); int activation_checks(void); -int detect_internal_vg_cache_corruption(void); int retry_deactivation(void); const char *unknown_device_name(void); diff --git a/tools/commands.h b/tools/commands.h index d65330a88..cbd527b58 100644 --- a/tools/commands.h +++ b/tools/commands.h @@ -43,7 +43,7 @@ xx(lastlog, xx(lvchange, "Change the attributes of logical volume(s)", - CACHE_VGMETADATA | PERMITTED_READ_ONLY) + PERMITTED_READ_ONLY) xx(lvconvert, "Change logical volume layout", @@ -127,7 +127,7 @@ xx(pvdata, xx(pvdisplay, "Display various attributes of physical volume(s)", - CACHE_VGMETADATA | PERMITTED_READ_ONLY | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH) /* ALL_VGS_IS_DEFAULT is for polldaemon to find pvmoves in-progress using process_each_vg. */ @@ -145,7 +145,7 @@ xx(pvremove, xx(pvs, "Display information about physical volumes", - CACHE_VGMETADATA | PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH) xx(pvscan, "List all physical volumes", @@ -173,7 +173,7 @@ xx(vgcfgrestore, xx(vgchange, "Change volume group attributes", - CACHE_VGMETADATA | PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT) xx(vgck, "Check the consistency of volume group(s)", diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index 26510bc8b..fc96b8d81 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -2281,7 +2281,6 @@ static int _get_current_settings(struct cmd_context *cmd) cmd->current_settings.archive = arg_int_value(cmd, autobackup_ARG, cmd->current_settings.archive); cmd->current_settings.backup = arg_int_value(cmd, autobackup_ARG, cmd->current_settings.backup); - cmd->current_settings.cache_vgmetadata = cmd->cname->flags & CACHE_VGMETADATA ? 1 : 0; if (arg_is_set(cmd, readonly_ARG)) { cmd->current_settings.activation = 0; diff --git a/tools/tools.h b/tools/tools.h index 33cbf1071..088655129 100644 --- a/tools/tools.h +++ b/tools/tools.h @@ -113,7 +113,6 @@ struct arg_value_group_list { uint32_t prio; }; -#define CACHE_VGMETADATA 0x00000001 #define PERMITTED_READ_ONLY 0x00000002 /* Process all VGs if none specified on the command line. */ #define ALL_VGS_IS_DEFAULT 0x00000004 From 29c6c171217753daf5603432bcaeaed2e8dc8418 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 7 Feb 2018 15:20:39 -0600 Subject: [PATCH 33/87] format-text.c log message fixes --- lib/format_text/format-text.c | 56 +++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index ee1f11d35..8740a05c0 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -468,12 +468,11 @@ static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{')) return rlocn; - log_debug_metadata("Volume group name found in %smetadata on %s at " FMTu64 " does " - "not match expected name %s.", - *precommitted ? "precommitted " : "", - dev_name(dev_area->dev), dev_area->start + rlocn->offset, vgname); + log_error("Metadata on %s at %llu has wrong VG name \"%s\" expected %s.", + dev_name(dev_area->dev), + (unsigned long long)(dev_area->start + rlocn->offset), + vgnamebuf, vgname); - bad: if ((info = lvmcache_info_from_pvid(dev_area->dev->pvid, dev_area->dev, 0)) && !lvmcache_update_vgname_and_id(info, &vgsummary_orphan)) stack; @@ -553,8 +552,10 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); if (wrap > rlocn->offset) { - log_error("VG %s metadata on %s (" FMTu64 " bytes) too large for circular buffer (" FMTu64 " bytes)", - vgname, dev_name(area->dev), rlocn->size, mdah->size - MDA_HEADER_SIZE); + log_error("Metadata for VG %s on %s at %llu size %llu is too large for circular buffer.", + vgname, dev_name(area->dev), + (unsigned long long)(area->start + rlocn->offset), + (unsigned long long)rlocn->size); goto out; } @@ -572,10 +573,10 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, that skips parsing the metadata which also returns NULL. */ } - log_debug_metadata("Found metadata on %s at %"FMTu64" size %"FMTu64" for VG %s", + log_debug_metadata("Found metadata on %s at %llu size %llu for VG %s", dev_name(area->dev), - area->start + rlocn->offset, - rlocn->size, + (unsigned long long)(area->start + rlocn->offset), + (unsigned long long)rlocn->size, vgname); if (vg && precommitted) @@ -1220,8 +1221,9 @@ int read_metadata_location_summary(const struct format_type *fmt, * If no valid offset, do not try to search for vgname */ if (!rlocn->offset) { - log_debug_metadata("Metadata location on %s at %"FMTu64" has offset 0.", - dev_name(dev_area->dev), dev_area->start + rlocn->offset); + log_debug_metadata("Metadata location on %s at %llu has offset 0.", + dev_name(dev_area->dev), + (unsigned long long)(dev_area->start + rlocn->offset)); return 0; } @@ -1235,9 +1237,10 @@ int read_metadata_location_summary(const struct format_type *fmt, /* Ignore this entry if the characters aren't permissible */ if (!validate_name(buf)) { - log_error("Metadata location on %s at %"FMTu64" begins with invalid VG name.", - dev_name(dev_area->dev), dev_area->start + rlocn->offset); - return_0; + log_error("Metadata location on %s at %llu begins with invalid VG name.", + dev_name(dev_area->dev), + (unsigned long long)(dev_area->start + rlocn->offset)); + return 0; } /* We found a VG - now check the metadata */ @@ -1245,8 +1248,9 @@ int read_metadata_location_summary(const struct format_type *fmt, wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size); if (wrap > rlocn->offset) { - log_error("Metadata location on %s at %"FMTu64" is too large for circular buffer.", - dev_name(dev_area->dev), dev_area->start + rlocn->offset); + log_error("Metadata location on %s at %llu is too large for circular buffer.", + dev_name(dev_area->dev), + (unsigned long long)(dev_area->start + rlocn->offset)); return 0; } @@ -1277,22 +1281,24 @@ int read_metadata_location_summary(const struct format_type *fmt, (off_t) (dev_area->start + MDA_HEADER_SIZE), wrap, calc_crc, vgsummary->vgname ? 1 : 0, vgsummary)) { - log_error("Metadata location on %s at %"FMTu64" has invalid summary for VG.", - dev_name(dev_area->dev), dev_area->start + rlocn->offset); + log_error("Metadata location on %s at %llu has invalid summary for VG.", + dev_name(dev_area->dev), + (unsigned long long)(dev_area->start + rlocn->offset)); return 0; } /* Ignore this entry if the characters aren't permissible */ if (!validate_name(vgsummary->vgname)) { - log_error("Metadata location on %s at %"FMTu64" has invalid VG name.", - dev_name(dev_area->dev), dev_area->start + rlocn->offset); + log_error("Metadata location on %s at %llu has invalid VG name.", + dev_name(dev_area->dev), + (unsigned long long)(dev_area->start + rlocn->offset)); return 0; } - log_debug_metadata("Found metadata summary on %s at %"FMTu64" size %"FMTu64" for VG %s", + log_debug_metadata("Found metadata summary on %s at %llu size %llu for VG %s", dev_name(dev_area->dev), - dev_area->start + rlocn->offset, - rlocn->size, + (unsigned long long)(dev_area->start + rlocn->offset), + (unsigned long long)rlocn->size, vgsummary->vgname); if (mda_free_sectors) { @@ -1344,7 +1350,7 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu } if (read_metadata_location_summary(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { - vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0, 0); + vg = _vg_read_raw_area(&fid, vgsummary.vgname, &rl->dev_area, NULL, NULL, 0, 0); if (vg) { lvmcache_update_vg(vg, 0); lvmcache_set_independent_location(vg->name); From f17c2cf7c614e8f6e4ffe9b7415d628ecab61d47 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 9 Feb 2018 12:43:12 -0600 Subject: [PATCH 34/87] pvremove: device check doesn't require label_read It just needs to check if the device was found during the scan, which means checking if it exists in lvmcache. --- lib/cache/lvmcache.c | 19 +++++++++++++++++++ lib/cache/lvmcache.h | 2 ++ tools/toollib.c | 13 ++++++++----- 3 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 28e46bbb7..87bcc3723 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -2562,6 +2562,25 @@ int lvmcache_foreach_ba(struct lvmcache_info *info, return 1; } +struct label *lvmcache_get_dev_label(struct device *dev) +{ + struct lvmcache_info *info; + + if ((info = lvmcache_info_from_pvid(dev->pvid, NULL, 0))) { + /* dev would be different for a duplicate */ + if (info->dev == dev) + return info->label; + } + return NULL; +} + +int lvmcache_has_dev_info(struct device *dev) +{ + if (lvmcache_info_from_pvid(dev->pvid, NULL, 0)) + return 1; + return 0; +} + /* * The lifetime of the label returned is tied to the lifetime of the * lvmcache_info which is the same as lvmcache itself. diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 1856344a0..3967b29c2 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -160,6 +160,8 @@ uint32_t lvmcache_ext_flags(struct lvmcache_info *info); const struct format_type *lvmcache_fmt(struct lvmcache_info *info); struct label *lvmcache_get_label(struct lvmcache_info *info); +struct label *lvmcache_get_dev_label(struct device *dev); +int lvmcache_has_dev_info(struct device *dev); void lvmcache_update_pv(struct lvmcache_info *info, struct physical_volume *pv, const struct format_type *fmt); diff --git a/tools/toollib.c b/tools/toollib.c index 1c216d807..659319528 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -5292,14 +5292,12 @@ static int _pvremove_check_single(struct cmd_context *cmd, * Is there a pv here already? * If not, this is an error unless you used -f. */ - if (!label_read(pd->dev, &label, 0)) { + if (!lvmcache_has_dev_info(pv->dev)) { if (pp->force) { dm_list_move(&pp->arg_process, &pd->list); return 1; } else { - log_error("No PV label found on %s.", pd->name); - dm_list_move(&pp->arg_fail, &pd->list); - return 1; + pd->is_not_pv = 1; } } @@ -5308,7 +5306,11 @@ static int _pvremove_check_single(struct cmd_context *cmd, * device, a PV used in a VG. */ - if (vg && !is_orphan_vg(vg->name)) { + if (pd->is_not_pv) { + /* Device is not a PV. */ + log_debug("Found pvremove arg %s: device is not a PV.", pd->name); + + } else if (vg && !is_orphan_vg(vg->name)) { /* Device is a PV used in a VG. */ log_debug("Found pvremove arg %s: pv is used in %s.", pd->name, vg->name); pd->is_vg_pv = 1; @@ -5330,6 +5332,7 @@ static int _pvremove_check_single(struct cmd_context *cmd, else pp->orphan_vg_name = FMT_TEXT_ORPHAN_VG_NAME; } else { + /* FIXME: is it possible to reach here? */ log_debug("Found pvremove arg %s: device is not a PV.", pd->name); /* Device is not a PV. */ pd->is_not_pv = 1; From 4343280ebc0e2aae0de5fe959c24574b13d0d7be Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 9 Feb 2018 11:24:40 -0600 Subject: [PATCH 35/87] process_each_label: use lvmcache In the same way as the other process_each functions. In the common case all the info that's needed can be used from lvmcache after a label scan. But this means that unchosen devs for duplicate PVs need to be handled explicitly. --- tools/toollib.c | 62 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/tools/toollib.c b/tools/toollib.c index 659319528..f6169ae6b 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -1493,13 +1493,19 @@ int process_each_label(struct cmd_context *cmd, int argc, char **argv, struct label *label; struct dev_iter *iter; struct device *dev; - + struct lvmcache_info *info; + struct dm_list process_duplicates; + struct device_list *devl; int ret_max = ECMD_PROCESSED; int ret; int opt = 0; + dm_list_init(&process_duplicates); + log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_LABEL); + lvmcache_label_scan(cmd); + if (argc) { for (; opt < argc; opt++) { if (!(dev = dev_cache_get(argv[opt], cmd->full_filter))) { @@ -1509,15 +1515,55 @@ int process_each_label(struct cmd_context *cmd, int argc, char **argv, continue; } - log_set_report_object_name_and_id(dev_name(dev), NULL); - - if (!label_read(dev, &label, 0)) { - log_error("No physical volume label read from %s.", - argv[opt]); - ret_max = ECMD_FAILED; + if (!(label = lvmcache_get_dev_label(dev))) { + if (!lvmcache_dev_is_unchosen_duplicate(dev)) { + log_error("No physical volume label read from %s.", argv[opt]); + ret_max = ECMD_FAILED; + } else { + if (!(devl = dm_malloc(sizeof(*devl)))) + return_0; + devl->dev = dev; + dm_list_add(&process_duplicates, &devl->list); + } continue; } + log_set_report_object_name_and_id(dev_name(dev), NULL); + + ret = process_single_label(cmd, label, handle); + report_log_ret_code(ret); + + if (ret > ret_max) + ret_max = ret; + + log_set_report_object_name_and_id(NULL, NULL); + + if (sigint_caught()) + break; + } + + dm_list_iterate_items(devl, &process_duplicates) { + /* + * remove the existing dev for this pvid from lvmcache + * so that the duplicate dev can replace it. + */ + if ((info = lvmcache_info_from_pvid(devl->dev->pvid, NULL, 0))) + lvmcache_del(info); + + /* + * add info to lvmcache from the duplicate dev. + */ + label_read(devl->dev, NULL, 0); + + /* + * the info/label should now be found because + * the label_read should have added it. + */ + if (!(label = lvmcache_get_dev_label(devl->dev))) + continue; + + log_set_report_object_name_and_id(dev_name(dev), NULL); + ret = process_single_label(cmd, label, handle); report_log_ret_code(ret); @@ -1541,7 +1587,7 @@ int process_each_label(struct cmd_context *cmd, int argc, char **argv, while ((dev = dev_iter_get(iter))) { - if (!label_read(dev, &label, 0)) + if (!(label = lvmcache_get_dev_label(dev))) continue; log_set_report_object_name_and_id(dev_name(label->dev), NULL); From 6c67c7557c266063db962807aca18fc088ba921e Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 Feb 2018 08:58:35 -0600 Subject: [PATCH 36/87] scan: use separate fd for bcache Create a new dev->bcache_fd that the scanning code owns and is in charge of opening/closing. This prevents other parts of lvm code (which do various open/close) from interfering with the bcache fd. A number of dev_open and dev_close are removed from the reading path since the read path now uses the bcache. With that in place, open(O_EXCL) for pvcreate/pvremove can then be fixed. That wouldn't work previously because of other open fds. --- lib/cache/lvmetad.c | 12 ++- lib/config/config.c | 4 +- lib/device/dev-io.c | 28 +++--- lib/device/device.h | 2 + lib/format_text/format-text.c | 55 ++--------- lib/format_text/text_label.c | 13 --- lib/label/label.c | 166 ++++++++++++++++++++++++++------- lib/label/label.h | 1 + lib/metadata/metadata-liblvm.c | 1 + tools/toollib.c | 44 ++++----- 10 files changed, 187 insertions(+), 139 deletions(-) diff --git a/lib/cache/lvmetad.c b/lib/cache/lvmetad.c index 81ba1b79e..ed0bcde32 100644 --- a/lib/cache/lvmetad.c +++ b/lib/cache/lvmetad.c @@ -2551,11 +2551,18 @@ static int _lvmetad_get_pv_cache_list(struct cmd_context *cmd, struct dm_list *p */ static void _update_pv_in_udev(struct cmd_context *cmd, dev_t devt) { - struct device *dev; - log_debug_devs("device %d:%d open to update udev", + /* + * FIXME: this is diabled as part of removing dev_opens + * to integrate bcache. If this is really needed, we + * can do a separate open/close here. + */ + log_debug_devs("SKIP device %d:%d open to update udev", (int)MAJOR(devt), (int)MINOR(devt)); +#if 0 + struct device *dev; + if (!(dev = dev_cache_get_by_devt(devt, cmd->lvmetad_filter))) { log_error("_update_pv_in_udev no dev found"); return; @@ -2568,6 +2575,7 @@ static void _update_pv_in_udev(struct cmd_context *cmd, dev_t devt) if (!dev_close(dev)) stack; +#endif } /* diff --git a/lib/config/config.c b/lib/config/config.c index 2d7db698b..0711b8ca9 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -534,11 +534,11 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_r return 0; } - if (!bcache_read_bytes(scan_bcache, dev->fd, offset, size, buf)) + if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, offset, size, buf)) goto out; if (size2) { - if (!bcache_read_bytes(scan_bcache, dev->fd, offset2, size2, buf + size)) + if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, offset2, size2, buf + size)) goto out; } diff --git a/lib/device/dev-io.c b/lib/device/dev-io.c index c321e61ad..39d5d30f3 100644 --- a/lib/device/dev-io.c +++ b/lib/device/dev-io.c @@ -330,6 +330,8 @@ static int _dev_get_size_file(struct device *dev, uint64_t *size) static int _dev_get_size_dev(struct device *dev, uint64_t *size) { const char *name = dev_name(dev); + int fd = dev->bcache_fd; + int do_close = 0; if (dev->size_seqno == _dev_size_seqno) { log_very_verbose("%s: using cached size %" PRIu64 " sectors", @@ -338,12 +340,16 @@ static int _dev_get_size_dev(struct device *dev, uint64_t *size) return 1; } - if (!dev_open_readonly(dev)) - return_0; + if (fd <= 0) { + if (!dev_open_readonly(dev)) + return_0; + fd = dev_fd(dev); + do_close = 1; + } - if (ioctl(dev_fd(dev), BLKGETSIZE64, size) < 0) { + if (ioctl(fd, BLKGETSIZE64, size) < 0) { log_sys_error("ioctl BLKGETSIZE64", name); - if (!dev_close(dev)) + if (do_close && !dev_close(dev)) log_sys_error("close", name); return 0; } @@ -352,7 +358,7 @@ static int _dev_get_size_dev(struct device *dev, uint64_t *size) dev->size = *size; dev->size_seqno = _dev_size_seqno; - if (!dev_close(dev)) + if (do_close && !dev_close(dev)) log_sys_error("close", name); log_very_verbose("%s: size is %" PRIu64 " sectors", name, *size); @@ -629,17 +635,12 @@ int dev_open_readonly_quiet(struct device *dev) int dev_test_excl(struct device *dev) { - int flags; - int r; + int flags = 0; - flags = vg_write_lock_held() ? O_RDWR : O_RDONLY; flags |= O_EXCL; + flags |= O_RDWR; - r = dev_open_flags(dev, flags, 1, 1); - if (r) - dev_close_immediate(dev); - - return r; + return dev_open_flags(dev, flags, 1, 1); } static void _close(struct device *dev) @@ -659,7 +660,6 @@ static void _close(struct device *dev) static int _dev_close(struct device *dev, int immediate) { - if (dev->fd < 0) { log_error("Attempt to close device '%s' " "which is not open.", dev_name(dev)); diff --git a/lib/device/device.h b/lib/device/device.h index d5eb00f72..36d1e3e0f 100644 --- a/lib/device/device.h +++ b/lib/device/device.h @@ -32,6 +32,7 @@ #define DEV_ASSUMED_FOR_LV 0x00000200 /* Is device assumed for an LV */ #define DEV_NOT_O_NOATIME 0x00000400 /* Don't use O_NOATIME */ #define DEV_IN_BCACHE 0x00000800 /* dev fd is open and used in bcache */ +#define DEV_BCACHE_EXCL 0x00001000 /* bcache_fd should be open EXCL */ /* * Support for external device info. @@ -66,6 +67,7 @@ struct device { int phys_block_size; int block_size; int read_ahead; + int bcache_fd; uint32_t flags; unsigned size_seqno; uint64_t size; diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 8740a05c0..a5d839762 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -320,7 +320,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev log_debug_metadata("Reading mda header sector from %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); - if (!bcache_read_bytes(scan_bcache, dev_area->dev->fd, dev_area->start, MDA_HEADER_SIZE, mdah)) { + if (!bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start, MDA_HEADER_SIZE, mdah)) { log_error("Failed to read metadata area header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); return 0; @@ -462,7 +462,7 @@ static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, */ memset(vgnamebuf, 0, sizeof(vgnamebuf)); - bcache_read_bytes(scan_bcache, dev_area->dev->fd, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); + bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{')) @@ -510,18 +510,12 @@ static int _raw_holds_vgname(struct format_instance *fid, int noprecommit = 0; struct mda_header *mdah; - if (!dev_open_readonly(dev_area->dev)) - return_0; - if (!(mdah = raw_read_mda_header(fid->fmt, dev_area, 0))) return_0; if (_read_metadata_location_vg(dev_area, mdah, 0, vgname, &noprecommit)) r = 1; - if (!dev_close(dev_area->dev)) - stack; - return r; } @@ -595,14 +589,8 @@ static struct volume_group *_vg_read_raw(struct format_instance *fid, struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; - if (!dev_open_readonly(mdac->area.dev)) - return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, mda_is_primary(mda)); - if (!dev_close(mdac->area.dev)) - stack; - return vg; } @@ -615,14 +603,8 @@ static struct volume_group *_vg_read_precommit_raw(struct format_instance *fid, struct mda_context *mdac = (struct mda_context *) mda->metadata_locn; struct volume_group *vg; - if (!dev_open_readonly(mdac->area.dev)) - return_NULL; - vg = _vg_read_raw_area(fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, mda_is_primary(mda)); - if (!dev_close(mdac->area.dev)) - stack; - return vg; } @@ -653,9 +635,6 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, if (!found) return 1; - if (!dev_open(mdac->area.dev)) - return_0; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; @@ -694,6 +673,9 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, label_scan_invalidate(mdac->area.dev); + if (!dev_open(mdac->area.dev)) + return_0; + /* Write text out, circularly */ if (!dev_write(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, (size_t) (mdac->rlocn.size - new_wrap), MDA_CONTENT_REASON(mda_is_primary(mda)), @@ -879,9 +861,6 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, int r = 0; int noprecommit = 0; - if (!dev_open(mdac->area.dev)) - return_0; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) goto_out; @@ -895,6 +874,9 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, rlocn->checksum = 0; rlocn_set_ignored(mdah->raw_locns, mda_is_ignored(mda)); + if (!dev_open(mdac->area.dev)) + return_0; + if (!_raw_write_mda_header(fid->fmt, mdac->area.dev, mda_is_primary(mda), mdac->area.start, mdah)) { dm_pool_free(fid->fmt->cmd->mem, mdah); @@ -1227,7 +1209,7 @@ int read_metadata_location_summary(const struct format_type *fmt, return 0; } - bcache_read_bytes(scan_bcache, dev_area->dev->fd, dev_area->start + rlocn->offset, NAME_LEN, buf); + bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, buf); while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && len < (NAME_LEN - 1)) @@ -1338,15 +1320,9 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu dm_list_iterate_items(rl, raw_list) { log_debug_metadata("Scanning independent dev %s", dev_name(rl->dev_area.dev)); - /* FIXME We're reading mdah twice here... */ - if (!dev_open_readonly(rl->dev_area.dev)) { - stack; - continue; - } - if (!(mdah = raw_read_mda_header(fmt, &rl->dev_area, 0))) { stack; - goto close_dev; + continue; } if (read_metadata_location_summary(fmt, mdah, 0, &rl->dev_area, &vgsummary, NULL)) { @@ -1356,9 +1332,6 @@ static int _scan_raw(const struct format_type *fmt, const char *vgname __attribu lvmcache_set_independent_location(vg->name); } } - close_dev: - if (!dev_close(rl->dev_area.dev)) - stack; } return 1; @@ -1488,9 +1461,6 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume if (!lvmcache_update_das(info, pv)) return_0; - if (!dev_open(pv->dev)) - return_0; - baton.pv = pv; baton.fmt = fmt; @@ -1502,8 +1472,6 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume if (!label_write(pv->dev, label)) { stack; - if (!dev_close(pv->dev)) - stack; return 0; } @@ -1513,9 +1481,6 @@ static int _text_pv_write(const struct format_type *fmt, struct physical_volume * update the cache afterwards? */ - if (!dev_close(pv->dev)) - return_0; - return 1; } diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 1c322dd26..206ae3f2e 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -338,12 +338,6 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) * TODO: make lvmcache smarter and move this cache logic there */ - if (!dev_open_readonly(mdac->area.dev)) { - mda_set_ignored(mda, 1); - stack; - return 1; - } - if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda)))) { stack; goto close_dev; @@ -355,23 +349,16 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) log_debug_metadata("Ignoring mda on device %s at offset " FMTu64, dev_name(mdac->area.dev), mdac->area.start); - if (!dev_close(mdac->area.dev)) - stack; return 1; } if (read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, &vgsummary, &mdac->free_sectors) && !lvmcache_update_vgname_and_id(p->info, &vgsummary)) { - if (!dev_close(mdac->area.dev)) - stack; return_0; } close_dev: - if (!dev_close(mdac->area.dev)) - stack; - return 1; } diff --git a/lib/label/label.c b/lib/label/label.c index 57d52484c..2ec187c81 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -116,6 +116,8 @@ int label_remove(struct device *dev) log_very_verbose("Scanning for labels to wipe from %s", dev_name(dev)); + label_scan_invalidate(dev); + if (!dev_open(dev)) return_0; @@ -125,8 +127,6 @@ int label_remove(struct device *dev) */ dev_flush(dev); - label_scan_invalidate(dev); - if (!dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, DEV_IO_LABEL, readbuf)) { log_debug_devs("%s: Failed to read label area", dev_name(dev)); goto out; @@ -396,6 +396,71 @@ static int _process_block(struct device *dev, struct block *bb, int *is_lvm_devi return ret; } +static int _scan_dev_open(struct device *dev) +{ + const char *name; + int flags = 0; + int fd; + + if (dev->flags & DEV_IN_BCACHE) { + log_error("scan_dev_open %s DEV_IN_BCACHE already set", dev_name(dev)); + dev->flags &= ~DEV_IN_BCACHE; + } + + if (dev->bcache_fd > 0) { + log_error("scan_dev_open %s already open with fd %d", + dev_name(dev), dev->bcache_fd); + return 0; + } + + if (!(name = dev_name_confirmed(dev, 1))) { + log_error("scan_dev_open %s no name", dev_name(dev)); + return 0; + } + + flags |= O_RDWR; + flags |= O_DIRECT; + flags |= O_NOATIME; + + if (dev->flags & DEV_BCACHE_EXCL) + flags |= O_EXCL; + + fd = open(name, flags, 0777); + + if (fd < 0) { + if ((errno == EBUSY) && (flags & O_EXCL)) { + log_error("Can't open %s exclusively. Mounted filesystem?", + dev_name(dev)); + } else { + log_error("scan_dev_open %s failed errno %d", dev_name(dev), errno); + } + return 0; + } + + dev->flags |= DEV_IN_BCACHE; + dev->bcache_fd = fd; + return 1; +} + +static int _scan_dev_close(struct device *dev) +{ + if (!(dev->flags & DEV_IN_BCACHE)) + log_error("scan_dev_close %s no DEV_IN_BCACHE set", dev_name(dev)); + + dev->flags &= ~DEV_IN_BCACHE; + dev->flags &= ~DEV_BCACHE_EXCL; + + if (dev->bcache_fd < 0) { + log_error("scan_dev_close %s already closed", dev_name(dev)); + return 0; + } + + if (close(dev->bcache_fd)) + log_warn("close %s errno %d", dev_name(dev), errno); + dev->bcache_fd = -1; + return 1; +} + /* * Read or reread label/metadata from selected devs. * @@ -407,7 +472,7 @@ static int _process_block(struct device *dev, struct block *bb, int *is_lvm_devi * its info is removed from lvmcache. */ -static int _scan_list(struct dm_list *devs) +static int _scan_list(struct dm_list *devs, int *failed) { struct dm_list wait_devs; struct dm_list done_devs; @@ -439,19 +504,16 @@ static int _scan_list(struct dm_list *devs) if (!rem_prefetches) break; - /* - * The in-bcache flag corresponds with this dev_open. - * Clearing the in-bcache flag should be paired with - * a dev_close. (This dev may already be in bcache.) - */ if (!_in_bcache(devl->dev)) { - if (!dev_open_readonly(devl->dev)) { + if (!_scan_dev_open(devl->dev)) { log_debug_devs("%s: Failed to open device.", dev_name(devl->dev)); + dm_list_del(&devl->list); + scan_failed_count++; continue; } } - bcache_prefetch(scan_bcache, devl->dev->fd, 0); + bcache_prefetch(scan_bcache, devl->dev->bcache_fd, 0); rem_prefetches--; @@ -462,12 +524,12 @@ static int _scan_list(struct dm_list *devs) dm_list_iterate_items_safe(devl, devl2, &wait_devs) { bb = NULL; - if (!bcache_get(scan_bcache, devl->dev->fd, 0, 0, &bb)) { + if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb)) { log_debug_devs("%s: Failed to scan device.", dev_name(devl->dev)); scan_failed_count++; scan_failed = 1; } else { - log_debug_devs("Processing data from device %s fd %d block %p", dev_name(devl->dev), devl->dev->fd, bb); + log_debug_devs("Processing data from device %s fd %d block %p", dev_name(devl->dev), devl->dev->bcache_fd, bb); _process_block(devl->dev, bb, &is_lvm_device); scan_lvm_count++; scan_failed = 0; @@ -483,12 +545,8 @@ static int _scan_list(struct dm_list *devs) * drop it from bcache. */ if (scan_failed || !is_lvm_device) { - devl->dev->flags &= ~DEV_IN_BCACHE; - bcache_invalidate_fd(scan_bcache, devl->dev->fd); - dev_close(devl->dev); - } else { - /* The device must be kept open while it's in bcache. */ - devl->dev->flags |= DEV_IN_BCACHE; + bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _scan_dev_close(devl->dev); } dm_list_del(&devl->list); @@ -498,12 +556,13 @@ static int _scan_list(struct dm_list *devs) if (!dm_list_empty(devs)) goto scan_more; - /* FIXME: let the caller know if some lvm devices failed to be scanned. */ - log_debug_devs("Scanned %d devices: %d for lvm, %d failed.", dm_list_size(&done_devs), scan_lvm_count, scan_failed_count); - return 0; + if (failed) + *failed = scan_failed_count; + + return 1; } /* @@ -548,8 +607,10 @@ int label_scan(struct cmd_context *cmd) * label_scan should not generally be called a second time, * so this will usually not be true. */ - if (_in_bcache(dev)) - bcache_invalidate_fd(scan_bcache, dev->fd); + if (_in_bcache(dev)) { + bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _scan_dev_close(dev); + } }; dev_iter_destroy(iter); @@ -573,7 +634,9 @@ int label_scan(struct cmd_context *cmd) return 0; } - return _scan_list(&all_devs); + _scan_list(&all_devs, NULL); + + return 1; } /* @@ -589,19 +652,48 @@ int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs) struct device_list *devl; dm_list_iterate_items(devl, devs) { - if (_in_bcache(devl->dev)) - bcache_invalidate_fd(scan_bcache, devl->dev->fd); + if (_in_bcache(devl->dev)) { + bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _scan_dev_close(devl->dev); + } } - return _scan_list(devs); + _scan_list(devs, NULL); + + /* FIXME: this function should probably fail if any devs couldn't be scanned */ + + return 1; +} + +int label_scan_devs_excl(struct dm_list *devs) +{ + struct device_list *devl; + int failed = 0; + + dm_list_iterate_items(devl, devs) { + if (_in_bcache(devl->dev)) { + bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); + _scan_dev_close(devl->dev); + } + /* + * With this flag set, _scan_dev_open() done by + * _scan_list() will do open EXCL + */ + devl->dev->flags |= DEV_BCACHE_EXCL; + } + + _scan_list(devs, &failed); + + if (failed) + return 0; + return 1; } void label_scan_invalidate(struct device *dev) { if (_in_bcache(dev)) { - dev->flags &= ~DEV_IN_BCACHE; - bcache_invalidate_fd(scan_bcache, dev->fd); - dev_close(dev); + bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _scan_dev_close(dev); } } @@ -645,7 +737,7 @@ int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector { struct dm_list one_dev; struct device_list *devl; - int ret; + int failed = 0; /* scanning is done by list, so make a single item list for this dev */ if (!(devl = dm_zalloc(sizeof(*devl)))) @@ -654,10 +746,12 @@ int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector dm_list_init(&one_dev); dm_list_add(&one_dev, &devl->list); - if (_in_bcache(dev)) - bcache_invalidate_fd(scan_bcache, dev->fd); + if (_in_bcache(dev)) { + bcache_invalidate_fd(scan_bcache, dev->bcache_fd); + _scan_dev_close(dev); + } - ret = _scan_list(&one_dev); + _scan_list(&one_dev, &failed); /* * FIXME: this ugliness of returning a pointer to the label is @@ -671,7 +765,9 @@ int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector *labelp = lvmcache_get_label(info); } - return ret; + if (failed) + return 0; + return 1; } /* diff --git a/lib/label/label.h b/lib/label/label.h index d9e36bc33..eb62f6452 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -104,6 +104,7 @@ extern struct bcache *scan_bcache; int label_scan(struct cmd_context *cmd); int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs); +int label_scan_devs_excl(struct dm_list *devs); void label_scan_invalidate(struct device *dev); void label_scan_destroy(struct cmd_context *cmd); int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector); diff --git a/lib/metadata/metadata-liblvm.c b/lib/metadata/metadata-liblvm.c index 388e8d9f0..d8b3b2aae 100644 --- a/lib/metadata/metadata-liblvm.c +++ b/lib/metadata/metadata-liblvm.c @@ -241,6 +241,7 @@ static int _pvcreate_check(struct cmd_context *cmd, const char *name, name); goto out; } + dev_close(dev); if (!wipe_known_signatures(cmd, dev, name, TYPE_LVM1_MEMBER | TYPE_LVM2_MEMBER, diff --git a/tools/toollib.c b/tools/toollib.c index f6169ae6b..0cc2edd70 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -5082,16 +5082,6 @@ static int _pvcreate_check_single(struct cmd_context *cmd, log_debug("Checking pvcreate arg %s which has existing PVID: %.32s.", pv_dev_name(pv), pv->dev->pvid[0] ? pv->dev->pvid : ""); - /* - * This test will fail if the device belongs to an MD array. - */ - if (!dev_test_excl(pv->dev)) { - /* FIXME Detect whether device-mapper itself is still using it */ - log_error("Can't open %s exclusively. Mounted filesystem?", - pv_dev_name(pv)); - dm_list_move(&pp->arg_fail, &pd->list); - return 1; - } /* * Don't allow using a device with duplicates. @@ -5223,14 +5213,6 @@ static int _pv_confirm_single(struct cmd_context *cmd, if (!found) return 1; - /* Repeat the same from check_single. */ - if (!dev_test_excl(pv->dev)) { - /* FIXME Detect whether device-mapper itself is still using it */ - log_error("Can't open %s exclusively. Mounted filesystem?", - pv_dev_name(pv)); - goto fail; - } - /* * What kind of device is this: an orphan PV, an uninitialized/unused * device, a PV used in a VG. @@ -5323,16 +5305,6 @@ static int _pvremove_check_single(struct cmd_context *cmd, log_debug("Checking device %s for pvremove %.32s.", pv_dev_name(pv), pv->dev->pvid[0] ? pv->dev->pvid : ""); - /* - * This test will fail if the device belongs to an MD array. - */ - if (!dev_test_excl(pv->dev)) { - /* FIXME Detect whether device-mapper itself is still using it */ - log_error("Can't open %s exclusively. Mounted filesystem?", - pv_dev_name(pv)); - dm_list_move(&pp->arg_fail, &pd->list); - return 1; - } /* * Is there a pv here already? @@ -5458,8 +5430,10 @@ int pvcreate_each_device(struct cmd_context *cmd, struct volume_group *orphan_vg; struct dm_list remove_duplicates; struct dm_list arg_sort; + struct dm_list rescan_devs; struct pv_list *pvl; struct pv_list *vgpvl; + struct device_list *devl; const char *pv_name; int consistent = 0; int must_use_all = (cmd->cname->flags & MUST_USE_ALL_ARGS); @@ -5470,6 +5444,7 @@ int pvcreate_each_device(struct cmd_context *cmd, dm_list_init(&remove_duplicates); dm_list_init(&arg_sort); + dm_list_init(&rescan_devs); handle->custom_handle = pp; @@ -5715,6 +5690,19 @@ int pvcreate_each_device(struct cmd_context *cmd, do_command: + dm_list_iterate_items(pd, &pp->arg_process) { + if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) + goto bad; + devl->dev = pd->dev; + dm_list_add(&rescan_devs, &devl->list); + } + + log_debug("Rescanning devices with exclusive open"); + if (!label_scan_devs_excl(&rescan_devs)) { + log_debug("Failed to rescan devs excl"); + goto bad; + } + /* * Reorder arg_process entries to match the original order of args. */ From 9d2add136192a08be7b3441b6fbe583c27b03dc8 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 Feb 2018 12:50:44 -0600 Subject: [PATCH 37/87] scan: add a dev to bcache before each read to handle write path This is a temporary hacky workaround to the problem of reads going through bcache and writes not using bcache. The write path wants to read parts of data that it is incrementally writing to disk, but the reads (using bcache) don't work because the writes are not in the bcache. For now, add a dev to bcache before each attempt to read it in case it's being used on the write path. --- lib/format_text/format-text.c | 6 ++++++ lib/label/label.c | 18 ++++++++++++++++++ lib/label/label.h | 1 + 3 files changed, 25 insertions(+) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index a5d839762..f33451fb9 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -320,6 +320,8 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev log_debug_metadata("Reading mda header sector from %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); + label_scan_confirm(dev_area->dev); /* FIXME: remove this, ensures dev is in bcache */ + if (!bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start, MDA_HEADER_SIZE, mdah)) { log_error("Failed to read metadata area header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); @@ -462,6 +464,8 @@ static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, */ memset(vgnamebuf, 0, sizeof(vgnamebuf)); + label_scan_confirm(dev_area->dev); /* FIXME: remove this, ensures dev is in bcache */ + bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && @@ -1209,6 +1213,8 @@ int read_metadata_location_summary(const struct format_type *fmt, return 0; } + label_scan_confirm(dev_area->dev); /* FIXME: remove this, ensures dev is in bcache */ + bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, buf); while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && diff --git a/lib/label/label.c b/lib/label/label.c index 2ec187c81..5da781ec4 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -786,3 +786,21 @@ int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_s return label_read(dev, labelp, 0); } +/* + * FIXME: remove this. It should not be needed once writes are going through + * bcache. As it is now, the write path involves multiple writes to a device, + * and later writes want to read previous writes from disk. They do these + * reads using the standard read paths which require the devs to be in bcache, + * but the bcache reads do not find the dev because the writes have gone around + * bcache. To work around this for now, check if each dev is in bcache before + * reading it, and if not add it first. + */ + +void label_scan_confirm(struct device *dev) +{ + if (!_in_bcache(dev)) { + log_warn("add dev %s to bcache", dev_name(dev)); + label_read(dev, NULL, 0); + } +} + diff --git a/lib/label/label.h b/lib/label/label.h index eb62f6452..e265a6b9b 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -109,5 +109,6 @@ void label_scan_invalidate(struct device *dev); void label_scan_destroy(struct cmd_context *cmd); int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector); int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector); +void label_scan_confirm(struct device *dev); #endif From a1e3398ffcf5def36934eabbb9ea26db221bbe74 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 Feb 2018 13:37:10 -0600 Subject: [PATCH 38/87] scan: handle no devices Still create bcache. --- lib/label/label.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/label/label.c b/lib/label/label.c index 5da781ec4..f0ec92fad 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -577,6 +577,7 @@ int label_scan(struct cmd_context *cmd) struct device_list *devl; struct device *dev; struct io_engine *ioe; + int cache_blocks; log_debug_devs("Finding devices to scan"); @@ -615,6 +616,9 @@ int label_scan(struct cmd_context *cmd) dev_iter_destroy(iter); if (!scan_bcache) { + /* No devices can happen, just create bcache with any small number. */ + if (!(cache_blocks = dm_list_size(&all_devs))) + cache_blocks = 8; /* * 100 is arbitrary, it's the max number of concurrent aio's @@ -630,7 +634,7 @@ int label_scan(struct cmd_context *cmd) * of the devs will not be lvm devices, and we don't need * an entry for those. We might want to change this. */ - if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, dm_list_size(&all_devs), ioe))) + if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, cache_blocks, ioe))) return 0; } From c29899b910811d0d1dc5607d200a1e59773eff0c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 Feb 2018 14:28:24 -0600 Subject: [PATCH 39/87] remove unused variable in _pvremove_check_single --- tools/toollib.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/toollib.c b/tools/toollib.c index 0cc2edd70..807e34c33 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -5279,7 +5279,6 @@ static int _pvremove_check_single(struct cmd_context *cmd, struct pvcreate_params *pp = (struct pvcreate_params *) handle->custom_handle; struct pvcreate_device *pd; struct pvcreate_prompt *prompt; - struct label *label; int found = 0; if (!pv->dev) From 89f54a5094981c827ca557cb1381e5f1a028912c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 Feb 2018 15:53:59 -0600 Subject: [PATCH 40/87] remove debugging print --- lib/label/label.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index f0ec92fad..126849815 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -802,9 +802,7 @@ int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_s void label_scan_confirm(struct device *dev) { - if (!_in_bcache(dev)) { - log_warn("add dev %s to bcache", dev_name(dev)); + if (!_in_bcache(dev)) label_read(dev, NULL, 0); - } } From e4f478d86d6545f6cced7a8ba3bc0b79dccb7b6e Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 13 Feb 2018 16:17:02 -0600 Subject: [PATCH 41/87] scan: handle request to scan missing dev --- lib/label/label.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/label/label.c b/lib/label/label.c index 126849815..19beecf8d 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -257,6 +257,8 @@ struct bcache *scan_bcache; static bool _in_bcache(struct device *dev) { + if (!dev) + return NULL; return (dev->flags & DEV_IN_BCACHE) ? true : false; } @@ -402,6 +404,9 @@ static int _scan_dev_open(struct device *dev) int flags = 0; int fd; + if (!dev) + return 0; + if (dev->flags & DEV_IN_BCACHE) { log_error("scan_dev_open %s DEV_IN_BCACHE already set", dev_name(dev)); dev->flags &= ~DEV_IN_BCACHE; From 37471bb4777f3f407d2cd942995b45c326ea221a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Feb 2018 13:49:56 -0600 Subject: [PATCH 42/87] scan: skip extra scan in vg_read Drop an extra label scan in the recovery part of vg_read. This is a temporary improvement until the pending replacement for the broken recovery code burried in vg_read. --- lib/metadata/metadata.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 5d3f83572..b588a04f3 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -4080,8 +4080,6 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, /* Independent MDAs aren't supported under low memory */ if (!cmd->independent_metadata_areas && prioritized_section()) return_NULL; - lvmcache_force_next_label_scan(); - lvmcache_label_scan(cmd); if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) return_NULL; From 6e580465b50edcd5fef0eb95180a620cb785d835 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Feb 2018 14:47:28 -0600 Subject: [PATCH 43/87] vgremove: fix force remove on devs with damaged metadata The improved detection of bad metadata when scanning (where errors were ignored before) means we now have to override some errors when forcibly erasing damaged metadata. --- lib/format_text/format-text.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index f33451fb9..8a4221304 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -865,12 +865,28 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, int r = 0; int noprecommit = 0; - if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda)))) - goto_out; + if (!(mdah = dm_pool_alloc(fid->fmt->cmd->mem, MDA_HEADER_SIZE))) { + log_error("struct mda_header allocation failed"); + return 0; + } - if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, mda_is_primary(mda), vg->name, &noprecommit))) { + /* + * FIXME: what's the point of reading the mda_header and metadata, + * since we zero the rlocn fields whether we can read them or not. + */ + + if (!_raw_read_mda_header(mdah, &mdac->area, mda_is_primary(mda))) { + log_warn("WARNING: Removing metadata location on %s with bad mda header.", + dev_name(mdac->area.dev)); rlocn = &mdah->raw_locns[0]; mdah->raw_locns[1].offset = 0; + } else { + if (!(rlocn = _read_metadata_location_vg(&mdac->area, mdah, mda_is_primary(mda), vg->name, &noprecommit))) { + log_warn("WARNING: Removing metadata location on %s with bad metadata.", + dev_name(mdac->area.dev)); + rlocn = &mdah->raw_locns[0]; + mdah->raw_locns[1].offset = 0; + } } rlocn->offset = 0; From 7bce66c5e83296398e2eee99140b3d6e409236c9 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Feb 2018 15:45:31 -0600 Subject: [PATCH 44/87] scan: setup bcache for commands using lvmetad Commands using lvmetad will not begin with a proper label_scan which initializes bcache, but may later decide they need to scan a set of devs, in which case they'll need bcache set up at that point. --- lib/label/label.c | 66 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 19beecf8d..bf1070cfb 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -570,6 +570,38 @@ static int _scan_list(struct dm_list *devs, int *failed) return 1; } +static int _setup_bcache(int cache_blocks) +{ + struct io_engine *ioe; + + /* No devices can happen, just create bcache with any small number. */ + if (!cache_blocks) + cache_blocks = 8; + + /* + * 100 is arbitrary, it's the max number of concurrent aio's + * possible, i.e, the number of devices that can be read at + * once. Should this be configurable? + */ + if (!(ioe = create_async_io_engine(100))) { + log_error("Failed to create bcache io engine."); + return 0; + } + + /* + * Configure one cache block for each device on the system. + * We won't generally need to cache that many because some + * of the devs will not be lvm devices, and we don't need + * an entry for those. We might want to change this. + */ + if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, cache_blocks, ioe))) { + log_error("Failed to create bcache with %d cache blocks.", cache_blocks); + return 0; + } + + return 1; +} + /* * Scan and cache lvm data from all devices on the system. * The cache should be empty/reset before calling this. @@ -581,8 +613,6 @@ int label_scan(struct cmd_context *cmd) struct dev_iter *iter; struct device_list *devl; struct device *dev; - struct io_engine *ioe; - int cache_blocks; log_debug_devs("Finding devices to scan"); @@ -621,25 +651,11 @@ int label_scan(struct cmd_context *cmd) dev_iter_destroy(iter); if (!scan_bcache) { - /* No devices can happen, just create bcache with any small number. */ - if (!(cache_blocks = dm_list_size(&all_devs))) - cache_blocks = 8; - /* - * 100 is arbitrary, it's the max number of concurrent aio's - * possible, i.e, the number of devices that can be read at - * once. Should this be configurable? + * FIXME: there should probably be some max number of + * cache blocks we use when setting up bcache. */ - if (!(ioe = create_async_io_engine(100))) - return 0; - - /* - * Configure one cache block for each device on the system. - * We won't generally need to cache that many because some - * of the devs will not be lvm devices, and we don't need - * an entry for those. We might want to change this. - */ - if (!(scan_bcache = bcache_create(BCACHE_BLOCK_SIZE_IN_SECTORS, cache_blocks, ioe))) + if (!_setup_bcache(dm_list_size(&all_devs))) return 0; } @@ -660,6 +676,18 @@ int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs) { struct device_list *devl; + if (!scan_bcache) { + /* + * This is only needed when commands are using lvmetad, in + * which case they don't do an initial label_scan, but may + * later need to rescan certain devs from disk and call this + * function. + * FIXME: is there some better number to choose here? + */ + if (!_setup_bcache(32)) + return 0; + } + dm_list_iterate_items(devl, devs) { if (_in_bcache(devl->dev)) { bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); From f328532f05877fc04e7f67c751ef95a844831b9b Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Feb 2018 16:15:30 -0600 Subject: [PATCH 45/87] scan: leave the caller's dev list unchanged When scanning the list of devs from the caller they are moved to another temporary list, but were never returned to the original list. --- lib/label/label.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/label/label.c b/lib/label/label.c index bf1070cfb..3359b4dbe 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -567,6 +567,8 @@ static int _scan_list(struct dm_list *devs, int *failed) if (failed) *failed = scan_failed_count; + dm_list_splice(devs, &done_devs); + return 1; } From 28255e3eeef13a0e73a40d533623b22dd9db89cb Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Feb 2018 16:21:27 -0600 Subject: [PATCH 46/87] scan: always setup bcache for commands using lvmetad Do this at the start of the command so that it doesn't need to be checked and set up in every function that could need it. --- lib/cache/lvmcache.c | 5 ++++- lib/label/label.c | 29 +++++++++++++++++------------ lib/label/label.h | 1 + 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 87bcc3723..53254f476 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -1241,8 +1241,11 @@ int lvmcache_label_scan(struct cmd_context *cmd) int r = 0; - if (lvmetad_used()) + if (lvmetad_used()) { + if (!label_scan_setup_bcache()) + return 0; return 1; + } /* Avoid recursion when a PVID can't be found! */ if (_scanning_in_progress) diff --git a/lib/label/label.c b/lib/label/label.c index 3359b4dbe..dc14bebc3 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -678,18 +678,6 @@ int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs) { struct device_list *devl; - if (!scan_bcache) { - /* - * This is only needed when commands are using lvmetad, in - * which case they don't do an initial label_scan, but may - * later need to rescan certain devs from disk and call this - * function. - * FIXME: is there some better number to choose here? - */ - if (!_setup_bcache(32)) - return 0; - } - dm_list_iterate_items(devl, devs) { if (_in_bcache(devl->dev)) { bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); @@ -841,3 +829,20 @@ void label_scan_confirm(struct device *dev) label_read(dev, NULL, 0); } +/* + * This is only needed when commands are using lvmetad, in which case they + * don't do an initial label_scan, but may later need to rescan certain devs + * from disk and call this function. FIXME: is there some better number to + * choose here? + */ + +int label_scan_setup_bcache(void) +{ + if (!scan_bcache) { + if (!_setup_bcache(32)) + return 0; + } + + return 1; +} + diff --git a/lib/label/label.h b/lib/label/label.h index e265a6b9b..107bd30f7 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -110,5 +110,6 @@ void label_scan_destroy(struct cmd_context *cmd); int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector); int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector); void label_scan_confirm(struct device *dev); +int label_scan_setup_bcache(void); #endif From 96a61337b00a250f69e7a8e6ac390c47c36c2c0f Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 14 Feb 2018 16:43:26 -0600 Subject: [PATCH 47/87] lvmdiskscan: use the new label_scan instead of doing it's own. --- tools/lvmdiskscan.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/lvmdiskscan.c b/tools/lvmdiskscan.c index cb688b5ca..7e2fc8878 100644 --- a/tools/lvmdiskscan.c +++ b/tools/lvmdiskscan.c @@ -87,7 +87,6 @@ int lvmdiskscan(struct cmd_context *cmd, int argc __attribute__((unused)), uint64_t size; struct dev_iter *iter; struct device *dev; - struct label *label; /* initialise these here to avoid problems with the lvm shell */ disks_found = 0; @@ -105,10 +104,10 @@ int lvmdiskscan(struct cmd_context *cmd, int argc __attribute__((unused)), return ECMD_FAILED; } - /* Do scan */ + label_scan(cmd); + for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) { - /* Try if it is a PV first */ - if ((label_read(dev, &label, UINT64_C(0)))) { + if (lvmcache_has_dev_info(dev)) { if (!dev_get_size(dev, &size)) { log_error("Couldn't get size of \"%s\"", dev_name(dev)); From d75aa557845e37f5c4b90ca43c81943bd9b90094 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Feb 2018 09:54:12 -0600 Subject: [PATCH 48/87] disable LVM1 tests --- test/lib/flavour-ndev-cluster.sh | 1 - test/lib/flavour-ndev-vanilla.sh | 1 - test/lib/flavour-udev-cluster.sh | 1 - test/lib/flavour-udev-vanilla.sh | 1 - 4 files changed, 4 deletions(-) diff --git a/test/lib/flavour-ndev-cluster.sh b/test/lib/flavour-ndev-cluster.sh index 3082b112a..362906952 100644 --- a/test/lib/flavour-ndev-cluster.sh +++ b/test/lib/flavour-ndev-cluster.sh @@ -1,2 +1 @@ export LVM_TEST_LOCKING=3 -export LVM_TEST_LVM1=1 diff --git a/test/lib/flavour-ndev-vanilla.sh b/test/lib/flavour-ndev-vanilla.sh index c106e6129..1899c948e 100644 --- a/test/lib/flavour-ndev-vanilla.sh +++ b/test/lib/flavour-ndev-vanilla.sh @@ -1,2 +1 @@ export LVM_TEST_LOCKING=1 -export LVM_TEST_LVM1=1 diff --git a/test/lib/flavour-udev-cluster.sh b/test/lib/flavour-udev-cluster.sh index 1cab55826..a9025a618 100644 --- a/test/lib/flavour-udev-cluster.sh +++ b/test/lib/flavour-udev-cluster.sh @@ -1,3 +1,2 @@ export LVM_TEST_LOCKING=3 export LVM_TEST_DEVDIR=/dev -export LVM_TEST_LVM1=1 diff --git a/test/lib/flavour-udev-vanilla.sh b/test/lib/flavour-udev-vanilla.sh index 6fbdafee5..ca778a6d8 100644 --- a/test/lib/flavour-udev-vanilla.sh +++ b/test/lib/flavour-udev-vanilla.sh @@ -1,3 +1,2 @@ export LVM_TEST_LOCKING=1 export LVM_TEST_DEVDIR=/dev -export LVM_TEST_LVM1=1 From ae093df3f15f6d125bf85ed921e5a971b170dc23 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Feb 2018 10:00:07 -0600 Subject: [PATCH 49/87] test: vgsplit-usage if LVM1 tests --- test/shell/vgsplit-usage.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/shell/vgsplit-usage.sh b/test/shell/vgsplit-usage.sh index 98818abb0..a112e8621 100644 --- a/test/shell/vgsplit-usage.sh +++ b/test/shell/vgsplit-usage.sh @@ -184,6 +184,7 @@ check pvlv_counts $vg1 2 1 0 vgremove -f $vg1 # vgsplit rejects split because metadata types differ +if test -n "$LVM_TEST_LVM1" ; then pvcreate -ff -M1 "$dev3" "$dev4" pvcreate -ff "$dev1" "$dev2" vgcreate -M1 $vg1 "$dev3" "$dev4" @@ -192,3 +193,4 @@ not vgsplit $vg1 $vg2 "$dev3" 2>err; grep "Metadata types differ" err vgremove -f $vg1 $vg2 fi +fi From b504bb809efa8a3a4db0e494bb80d3720d82e6ff Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Feb 2018 10:27:43 -0600 Subject: [PATCH 50/87] scan: use 128K bcache block size --- lib/label/label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/label/label.c b/lib/label/label.c index dc14bebc3..38db73371 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -253,7 +253,7 @@ struct label *label_create(struct labeller *labeller) /* global variable for accessing the bcache populated by label scan */ struct bcache *scan_bcache; -#define BCACHE_BLOCK_SIZE_IN_SECTORS 2048 /* 1MB */ +#define BCACHE_BLOCK_SIZE_IN_SECTORS 256 /* 256*512 = 128K */ static bool _in_bcache(struct device *dev) { From e7670d333855ecccb21b30bc6b4525d6bafaa940 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Feb 2018 11:06:50 -0600 Subject: [PATCH 51/87] pvck: use bcache --- lib/format_text/format-text.c | 24 +++++++++++++++++------- tools/pvck.c | 2 ++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 8a4221304..ef59f071a 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -187,9 +187,6 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, FMTu64, mdac->area.start, mdac->area.size); area = &mdac->area; - if (!dev_open_readonly(area->dev)) - return_0; - if (!(mdah = raw_read_mda_header(fmt, area, mda_is_primary(mda)))) goto_out; @@ -230,8 +227,23 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, if (!(buf = dm_malloc(size + size2))) goto_out; - if (!dev_read_circular(area->dev, offset, size, offset2, size2, MDA_CONTENT_REASON(mda_is_primary(mda)), buf)) - goto_out; + if (!bcache_read_bytes(scan_bcache, area->dev->bcache_fd, offset, size, buf)) { + log_error("Failed to read dev %s offset %llu size %llu", + dev_name(area->dev), + (unsigned long long)offset, + (unsigned long long)size); + goto out; + } + + if (size2) { + if (!bcache_read_bytes(scan_bcache, area->dev->bcache_fd, offset2, size2, buf + size)) { + log_error("Failed to read dev %s offset %llu size %llu", + dev_name(area->dev), + (unsigned long long)offset2, + (unsigned long long)size2); + goto out; + } + } /* * FIXME: We could add more sophisticated metadata detection @@ -268,8 +280,6 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, r = 1; out: dm_free(buf); - if (!dev_close(area->dev)) - stack; return r; } diff --git a/tools/pvck.c b/tools/pvck.c index 0fedb4a6c..634b38d19 100644 --- a/tools/pvck.c +++ b/tools/pvck.c @@ -23,6 +23,8 @@ int pvck(struct cmd_context *cmd, int argc, char **argv) /* FIXME: validate cmdline options */ /* FIXME: what does the cmdline look like? */ + label_scan_setup_bcache(); + /* * Use what's on the cmdline directly, and avoid calling into * some of the other infrastructure functions, so as to avoid From c2b10daf694bea490044b752209a416f886373ea Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 15 Feb 2018 14:17:51 -0600 Subject: [PATCH 52/87] scan: put dev back on caller's list Commit 6e442875613915e506440e59a290b56756df2521 missed adding devs back to caller's list. --- lib/label/label.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 38db73371..c35818a7f 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -513,6 +513,7 @@ static int _scan_list(struct dm_list *devs, int *failed) if (!_scan_dev_open(devl->dev)) { log_debug_devs("%s: Failed to open device.", dev_name(devl->dev)); dm_list_del(&devl->list); + dm_list_add(&done_devs, &devl->list); scan_failed_count++; continue; } @@ -561,8 +562,8 @@ static int _scan_list(struct dm_list *devs, int *failed) if (!dm_list_empty(devs)) goto scan_more; - log_debug_devs("Scanned %d devices: %d for lvm, %d failed.", - dm_list_size(&done_devs), scan_lvm_count, scan_failed_count); + log_debug_devs("Scanned devices: %d lvm, %d failed.", + scan_lvm_count, scan_failed_count); if (failed) *failed = scan_failed_count; From 34fd818caf89f39dab58ef2ce7c60c193db02cf0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 16 Feb 2018 10:37:09 -0600 Subject: [PATCH 53/87] scan: drop bcache and close fd for LV with stacked PV When a PV is stacked on an LV, the LV will be kept in bcache, and the open fd on the LV may interfere with processing the LV. So, drop/close a bcache fd for an LV before processing the LV. --- tools/toollib.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/toollib.c b/tools/toollib.c index 807e34c33..0b8823bce 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -3009,6 +3009,7 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg, log_report_t saved_log_report_state = log_get_report_state(); char lv_uuid[64] __attribute__((aligned(8))); char vg_uuid[64] __attribute__((aligned(8))); + struct lvinfo lvinfo; int ret_max = ECMD_PROCESSED; int ret = 0; int whole_selected = 0; @@ -3025,6 +3026,8 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg, struct lv_list *final_lvl; struct dm_list found_arg_lvnames; struct glv_list *glvl, *tglvl; + struct device *dev; + dev_t devt; int do_report_ret_code = 1; log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_LV); @@ -3162,6 +3165,18 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg, } log_set_report_object_name_and_id(NULL, NULL); + /* + * If a PV is stacked on an LV, then the LV is kept open + * in bcache, and needs to be closed so the open fd doesn't + * interfere with processing the LV. + */ + dm_list_iterate_items(lvl, &final_lvs) { + lv_info(cmd, lvl->lv, 0, &lvinfo, 0, 0); + devt = MKDEV(lvinfo.major, lvinfo.minor); + if ((dev = dev_cache_get_by_devt(devt, cmd->filter))) + label_scan_invalidate(dev); + } + dm_list_iterate_items(lvl, &final_lvs) { lv_uuid[0] = '\0'; if (!id_write_format(&lvl->lv->lvid.id[1], lv_uuid, sizeof(lv_uuid))) From 0da296003d3af3f505213a25783dda534af2d9d6 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 16 Feb 2018 14:18:55 -0600 Subject: [PATCH 54/87] vgchange: invalidate bcache for stacked LVs when deactivating An LV with a stacked PV will be open in bcache and needs to be invalidated to close the fd before attempting to deactivate. --- lib/label/label.c | 19 +++++++++++++++++++ lib/label/label.h | 1 + tools/toollib.c | 11 ++--------- tools/vgchange.c | 17 +++++++++++------ 4 files changed, 33 insertions(+), 15 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index c35818a7f..dfd3e37a6 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -20,6 +20,7 @@ #include "lvmcache.h" #include "bcache.h" #include "toolcontext.h" +#include "activate.h" #include #include @@ -725,6 +726,24 @@ void label_scan_invalidate(struct device *dev) } } +/* + * If a PV is stacked on an LV, then the LV is kept open + * in bcache, and needs to be closed so the open fd doesn't + * interfere with processing the LV. + */ + +void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv) +{ + struct lvinfo lvinfo; + struct device *dev; + dev_t devt; + + lv_info(cmd, lv, 0, &lvinfo, 0, 0); + devt = MKDEV(lvinfo.major, lvinfo.minor); + if ((dev = dev_cache_get_by_devt(devt, cmd->filter))) + label_scan_invalidate(dev); +} + /* * Undo label_scan() * diff --git a/lib/label/label.h b/lib/label/label.h index 107bd30f7..bf6e9262b 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -106,6 +106,7 @@ int label_scan(struct cmd_context *cmd); int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs); int label_scan_devs_excl(struct dm_list *devs); void label_scan_invalidate(struct device *dev); +void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv); void label_scan_destroy(struct cmd_context *cmd); int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector); int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector); diff --git a/tools/toollib.c b/tools/toollib.c index 0b8823bce..623bfbb0b 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -3009,7 +3009,6 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg, log_report_t saved_log_report_state = log_get_report_state(); char lv_uuid[64] __attribute__((aligned(8))); char vg_uuid[64] __attribute__((aligned(8))); - struct lvinfo lvinfo; int ret_max = ECMD_PROCESSED; int ret = 0; int whole_selected = 0; @@ -3026,8 +3025,6 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg, struct lv_list *final_lvl; struct dm_list found_arg_lvnames; struct glv_list *glvl, *tglvl; - struct device *dev; - dev_t devt; int do_report_ret_code = 1; log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_LV); @@ -3170,12 +3167,8 @@ int process_each_lv_in_vg(struct cmd_context *cmd, struct volume_group *vg, * in bcache, and needs to be closed so the open fd doesn't * interfere with processing the LV. */ - dm_list_iterate_items(lvl, &final_lvs) { - lv_info(cmd, lvl->lv, 0, &lvinfo, 0, 0); - devt = MKDEV(lvinfo.major, lvinfo.minor); - if ((dev = dev_cache_get_by_devt(devt, cmd->filter))) - label_scan_invalidate(dev); - } + dm_list_iterate_items(lvl, &final_lvs) + label_scan_invalidate_lv(cmd, lvl->lv); dm_list_iterate_items(lvl, &final_lvs) { lv_uuid[0] = '\0'; diff --git a/tools/vgchange.c b/tools/vgchange.c index 7cfaab6a1..61b78dfd0 100644 --- a/tools/vgchange.c +++ b/tools/vgchange.c @@ -209,14 +209,19 @@ int vgchange_activate(struct cmd_context *cmd, struct volume_group *vg, cmd->handles_missing_pvs = 1; /* FIXME: Force argument to deactivate them? */ - if (!do_activate && (lv_open = lvs_in_vg_opened(vg))) { + if (!do_activate) { dm_list_iterate_items(lvl, &vg->lvs) - if (lv_is_visible(lvl->lv) && - !lv_check_not_in_use(lvl->lv, 1)) { - log_error("Can't deactivate volume group \"%s\" with %d open " - "logical volume(s)", vg->name, lv_open); - return 0; + label_scan_invalidate_lv(cmd, lvl->lv); + + if ((lv_open = lvs_in_vg_opened(vg))) { + dm_list_iterate_items(lvl, &vg->lvs) { + if (lv_is_visible(lvl->lv) && !lv_check_not_in_use(lvl->lv, 1)) { + log_error("Can't deactivate volume group \"%s\" with %d open logical volume(s)", + vg->name, lv_open); + return 0; + } } + } } /* FIXME Move into library where clvmd can use it */ From 8b26a007b1b545333b40675b1c425b1ef5e0b653 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 20 Feb 2018 09:33:27 -0600 Subject: [PATCH 55/87] misc bcache fixes from ejt --- lib/device/bcache.c | 64 +++++++++++++++++++++++++++------------------ lib/device/bcache.h | 9 +++++-- lib/label/label.c | 2 +- 3 files changed, 47 insertions(+), 28 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 38c909c12..499c6af51 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -134,7 +134,6 @@ struct async_engine { struct io_engine e; io_context_t aio_context; struct cb_set *cbs; - unsigned max_io; }; static struct async_engine *_to_async(struct io_engine *e) @@ -185,7 +184,10 @@ static bool _async_issue(struct io_engine *ioe, enum dir d, int fd, cb->cb.aio_lio_opcode = (d == DIR_READ) ? IO_CMD_PREAD : IO_CMD_PWRITE; cb_array[0] = &cb->cb; - r = io_submit(e->aio_context, 1, cb_array); + do { + r = io_submit(e->aio_context, 1, cb_array); + } while (r == -EAGAIN); + if (r < 0) { log_sys_warn("io_submit"); _cb_free(e->cbs, cb); @@ -206,7 +208,10 @@ static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) struct async_engine *e = _to_async(ioe); memset(&event, 0, sizeof(event)); - r = io_getevents(e->aio_context, 1, MAX_EVENT, event, NULL); + do { + r = io_getevents(e->aio_context, 1, MAX_EVENT, event, NULL); + } while (r == -EINTR); + if (r < 0) { log_sys_warn("io_getevents"); return false; @@ -223,6 +228,7 @@ static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) else if ((int) ev->res < 0) fn(cb->context, (int) ev->res); + // FIXME: dct added this. a short read is ok?! else if (ev->res >= (1 << SECTOR_SHIFT)) { /* minimum acceptable read is 1 sector */ fn((void *) cb->context, 0); @@ -238,13 +244,12 @@ static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) return true; } -static unsigned _async_max_io(struct io_engine *ioe) +static unsigned _async_max_io(struct io_engine *e) { - struct async_engine *e = _to_async(ioe); - return e->max_io; + return MAX_IO; } -struct io_engine *create_async_io_engine(unsigned max_io) +struct io_engine *create_async_io_engine(void) { int r; struct async_engine *e = dm_malloc(sizeof(*e)); @@ -252,22 +257,20 @@ struct io_engine *create_async_io_engine(unsigned max_io) if (!e) return NULL; - e->max_io = max_io; - e->e.destroy = _async_destroy; e->e.issue = _async_issue; e->e.wait = _async_wait; e->e.max_io = _async_max_io; e->aio_context = 0; - r = io_setup(max_io, &e->aio_context); + r = io_setup(MAX_IO, &e->aio_context); if (r < 0) { log_warn("io_setup failed"); dm_free(e); return NULL; } - e->cbs = _cb_set_create(max_io); + e->cbs = _cb_set_create(MAX_IO); if (!e->cbs) { log_warn("couldn't create control block set"); dm_free(e); @@ -535,10 +538,17 @@ static void _complete_io(void *context, int err) */ dm_list_del(&b->list); - if (b->error) - dm_list_add(&cache->errored, &b->list); + if (b->error) { + if (b->io_dir == DIR_READ) { + // We can just forget about this block, since there's + // no dirty data to be written back. + _hash_remove(b); + dm_list_add(&cache->free, &b->list); - else { + } else + dm_list_add(&cache->errored, &b->list); + + } else { _clear_flags(b, BF_DIRTY); _link_block(b); } @@ -548,35 +558,34 @@ static void _complete_io(void *context, int err) * |b->list| should be valid (either pointing to itself, on one of the other * lists. */ -static bool _issue_low_level(struct block *b, enum dir d) +static void _issue_low_level(struct block *b, enum dir d) { struct bcache *cache = b->cache; sector_t sb = b->index * cache->block_sectors; sector_t se = sb + cache->block_sectors; if (_test_flags(b, BF_IO_PENDING)) - return false; + return; + b->io_dir = d; _set_flags(b, BF_IO_PENDING); dm_list_add(&cache->io_pending, &b->list); if (!cache->engine->issue(cache->engine, d, b->fd, sb, se, b->data, b)) { _complete_io(b, -EIO); - return false; + return; } - return true; - } -static inline bool _issue_read(struct block *b) +static inline void _issue_read(struct block *b) { - return _issue_low_level(b, DIR_READ); + _issue_low_level(b, DIR_READ); } -static inline bool _issue_write(struct block *b) +static inline void _issue_write(struct block *b) { - return _issue_low_level(b, DIR_WRITE); + _issue_low_level(b, DIR_WRITE); } static bool _wait_io(struct bcache *cache) @@ -903,8 +912,13 @@ void bcache_put(struct block *b) _preemptive_writeback(b->cache); } -int bcache_flush(struct bcache *cache) +bool bcache_flush(struct bcache *cache) { + // Only dirty data is on the errored list, since bad read blocks get + // recycled straight away. So we put these back on the dirty list, and + // try and rewrite everything. + dm_list_splice(&cache->dirty, &cache->errored); + while (!dm_list_empty(&cache->dirty)) { struct block *b = dm_list_item(_list_pop(&cache->dirty), struct block); if (b->ref_count || _test_flags(b, BF_IO_PENDING)) { @@ -917,7 +931,7 @@ int bcache_flush(struct bcache *cache) _wait_all(cache); - return dm_list_empty(&cache->errored) ? 0 : -EIO; + return dm_list_empty(&cache->errored); } static void _recycle_block(struct bcache *cache, struct block *b) diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 7d38d3337..e5d98e8fb 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -47,7 +47,7 @@ struct io_engine { unsigned (*max_io)(struct io_engine *e); }; -struct io_engine *create_async_io_engine(unsigned max_io); +struct io_engine *create_async_io_engine(void); /*----------------------------------------------------------------*/ @@ -65,6 +65,7 @@ struct block { unsigned flags; unsigned ref_count; int error; + enum dir io_dir; }; /* @@ -122,7 +123,11 @@ bool bcache_get(struct bcache *cache, int fd, block_address index, unsigned flags, struct block **result); void bcache_put(struct block *b); -int bcache_flush(struct bcache *cache); +/* + * flush() does not attempt to writeback locked blocks. flush will fail + * (return false), if any unlocked dirty data cannot be written back. + */ +bool bcache_flush(struct bcache *cache); /* * Removes a block from the cache. If the block is dirty it will be written diff --git a/lib/label/label.c b/lib/label/label.c index dfd3e37a6..278f26ae5 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -587,7 +587,7 @@ static int _setup_bcache(int cache_blocks) * possible, i.e, the number of devices that can be read at * once. Should this be configurable? */ - if (!(ioe = create_async_io_engine(100))) { + if (!(ioe = create_async_io_engine())) { log_error("Failed to create bcache io engine."); return 0; } From 80654920468841251a7f940e410ca61de8383b41 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 19 Feb 2018 15:40:44 -0600 Subject: [PATCH 56/87] bcache: do all writes through bcache --- lib/device/bcache.c | 86 +++++++++++++++++++++++++- lib/device/bcache.h | 4 +- lib/device/dev-type.c | 6 +- lib/format_text/archiver.c | 10 +-- lib/format_text/format-text.c | 92 +++++++++++----------------- lib/label/label.c | 112 ++++++++++++++++------------------ lib/label/label.h | 1 + lib/metadata/metadata.c | 19 +++--- tools/toollib.c | 17 ++---- 9 files changed, 192 insertions(+), 155 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 499c6af51..272de60b6 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -897,7 +897,7 @@ bool bcache_get(struct bcache *cache, int fd, block_address index, return false; } -void bcache_put(struct block *b) +static void _put_ref(struct block *b) { if (!b->ref_count) { log_warn("ref count on bcache block already zero"); @@ -907,6 +907,11 @@ void bcache_put(struct block *b) b->ref_count--; if (!b->ref_count) b->cache->nr_locked--; +} + +void bcache_put(struct block *b) +{ + _put_ref(b); if (_test_flags(b, BF_DIRTY)) _preemptive_writeback(b->cache); @@ -925,7 +930,7 @@ bool bcache_flush(struct bcache *cache) // The superblock may well be still locked. continue; } - + _issue_write(b); } @@ -1058,5 +1063,82 @@ bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, vo return errors ? false : true; } +bool bcache_write_bytes(struct bcache *cache, int fd, off_t start, size_t len, void *data) +{ + struct block *b; + block_address bb, be, i; + unsigned char *udata = data; + off_t block_size = cache->block_sectors << SECTOR_SHIFT; + int errors = 0; + + byte_range_to_block_range(cache, start, len, &bb, &be); + for (i = bb; i < be; i++) + bcache_prefetch(cache, fd, i); + + for (i = bb; i < be; i++) { + if (!bcache_get(cache, fd, i, 0, &b)) { + errors++; + break; + } + + if (i == bb) { + off_t block_offset = start % block_size; + size_t blen = _min(block_size - block_offset, len); + memcpy(((unsigned char *) b->data) + block_offset, udata, blen); + len -= blen; + udata += blen; + } else { + size_t blen = _min(block_size, len); + memcpy(b->data, udata, blen); + len -= blen; + udata += blen; + } + + _set_flags(b, BF_DIRTY); + _unlink_block(b); + _link_block(b); + _put_ref(b); + } + + if (!bcache_flush(cache)) + errors++; + + return errors ? false : true; +} + +#define ZERO_BUF_LEN 4096 + +bool bcache_write_zeros(struct bcache *cache, int fd, off_t start, size_t len) +{ + char zerobuf[ZERO_BUF_LEN]; + size_t plen; + size_t poff; + + memset(zerobuf, 0, sizeof(zerobuf)); + + if (len <= ZERO_BUF_LEN) + return bcache_write_bytes(cache, fd, start, len, &zerobuf); + + poff = 0; + plen = ZERO_BUF_LEN; + + while (1) { + if (!bcache_write_bytes(cache, fd, start + poff, plen, &zerobuf)) + return false; + + poff += plen; + len -= plen; + + if (!len) + break; + + if (len < ZERO_BUF_LEN) + plen = len; + } + + return true; +} + + //---------------------------------------------------------------- diff --git a/lib/device/bcache.h b/lib/device/bcache.h index e5d98e8fb..d5f6d0ac5 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -148,9 +148,11 @@ void bcache_invalidate_fd(struct bcache *cache, int fd); void bcache_prefetch_bytes(struct bcache *cache, int fd, off_t start, size_t len); /* - * Reads the bytes. + * Reads and writes the bytes. Returns false if errors occur. */ bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, void *data); +bool bcache_write_bytes(struct bcache *cache, int fd, off_t start, size_t len, void *data); +bool bcache_write_zeros(struct bcache *cache, int fd, off_t start, size_t len); /*----------------------------------------------------------------*/ diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c index 9608146b9..e69f7caa2 100644 --- a/lib/device/dev-type.c +++ b/lib/device/dev-type.c @@ -17,6 +17,8 @@ #include "xlate.h" #include "config.h" #include "metadata.h" +#include "bcache.h" +#include "label.h" #include #include @@ -675,7 +677,7 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name, } else log_verbose(_msg_wiping, type, name); - if (!dev_set(dev, offset_value, len, DEV_IO_SIGNATURES, 0)) { + if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, offset_value, len)) { log_error("Failed to wipe %s signature on %s.", type, name); return 0; } @@ -772,7 +774,7 @@ static int _wipe_signature(struct device *dev, const char *type, const char *nam } log_print_unless_silent("Wiping %s on %s.", type, name); - if (!dev_set(dev, offset_found, wipe_len, DEV_IO_SIGNATURES, 0)) { + if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, offset_found, wipe_len)) { log_error("Failed to wipe %s on %s.", type, name); return 0; } diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index 81b5da934..43ae4eb51 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -488,19 +488,11 @@ int backup_restore_vg(struct cmd_context *cmd, struct volume_group *vg, } log_verbose("Zeroing start of device %s", pv_name); - if (!dev_open_quiet(dev)) { - log_error("%s not opened: device not zeroed", pv_name); - return 0; - } - if (!dev_set(dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) { + if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, 0, 2048)) { log_error("%s not wiped: aborting", pv_name); - if (!dev_close(dev)) - stack; return 0; } - if (!dev_close(dev)) - stack; } } diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index ef59f071a..4eef72ee1 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -330,8 +330,6 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev log_debug_metadata("Reading mda header sector from %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); - label_scan_confirm(dev_area->dev); /* FIXME: remove this, ensures dev is in bcache */ - if (!bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start, MDA_HEADER_SIZE, mdah)) { log_error("Failed to read metadata area header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); @@ -397,24 +395,16 @@ static int _raw_write_mda_header(const struct format_type *fmt, mdah->version = FMTT_VERSION; mdah->start = start_byte; - label_scan_invalidate(dev); - - if (!dev_open(dev)) - return_0; - _xlate_mdah(mdah); mdah->checksum_xl = xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl))); - if (!dev_write(dev, start_byte, MDA_HEADER_SIZE, MDA_HEADER_REASON(primary_mda), mdah)) { - dev_close(dev); - return_0; + if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start_byte, MDA_HEADER_SIZE, mdah)) { + log_error("Failed to write mda header to %s fd %d", dev_name(dev), dev->bcache_fd); + return 0; } - if (dev_close(dev)) - stack; - return 1; } @@ -474,8 +464,6 @@ static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, */ memset(vgnamebuf, 0, sizeof(vgnamebuf)); - label_scan_confirm(dev_area->dev); /* FIXME: remove this, ensures dev is in bcache */ - bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && @@ -681,30 +669,32 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, goto out; } - log_debug_metadata("Writing %s metadata to %s at " FMTu64 " len " FMTu64 " of " FMTu64, - vg->name, dev_name(mdac->area.dev), mdac->area.start + - mdac->rlocn.offset, mdac->rlocn.size - new_wrap, mdac->rlocn.size); + log_debug_metadata("Writing metadata for VG %s to %s at %llu len %llu (wrap %llu)", + vg->name, dev_name(mdac->area.dev), + (unsigned long long)(mdac->area.start + mdac->rlocn.offset), + (unsigned long long)(mdac->rlocn.size - new_wrap), + (unsigned long long)new_wrap); - label_scan_invalidate(mdac->area.dev); - - if (!dev_open(mdac->area.dev)) - return_0; - - /* Write text out, circularly */ - if (!dev_write(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, - (size_t) (mdac->rlocn.size - new_wrap), MDA_CONTENT_REASON(mda_is_primary(mda)), - fidtc->raw_metadata_buf)) - goto_out; + if (!bcache_write_bytes(scan_bcache, mdac->area.dev->bcache_fd, mdac->area.start + mdac->rlocn.offset, + (size_t) (mdac->rlocn.size - new_wrap), + fidtc->raw_metadata_buf)) { + log_error("Failed to write metadata to %s fd %d", dev_name(mdac->area.dev), mdac->area.dev->bcache_fd); + goto out; + } if (new_wrap) { - log_debug_metadata("Writing wrapped metadata to %s at " FMTu64 " len " FMTu64 " of " FMTu64, - dev_name(mdac->area.dev), mdac->area.start + - MDA_HEADER_SIZE, new_wrap, mdac->rlocn.size); + log_debug_metadata("Writing metadata for VG %s to %s at %llu len %llu (wrapped)", + vg->name, dev_name(mdac->area.dev), + (unsigned long long)(mdac->area.start + MDA_HEADER_SIZE), + (unsigned long long)new_wrap); - if (!dev_write(mdac->area.dev, mdac->area.start + MDA_HEADER_SIZE, - (size_t) new_wrap, MDA_CONTENT_REASON(mda_is_primary(mda)), - fidtc->raw_metadata_buf + mdac->rlocn.size - new_wrap)) - goto_out; + if (!bcache_write_bytes(scan_bcache, mdac->area.dev->bcache_fd, + mdac->area.start + MDA_HEADER_SIZE, + (size_t) new_wrap, + fidtc->raw_metadata_buf + mdac->rlocn.size - new_wrap)) { + log_error("Failed to write metadata wrap to %s fd %d", dev_name(mdac->area.dev), mdac->area.dev->bcache_fd); + goto out; + } } mdac->rlocn.checksum = calc_crc(INITIAL_CRC, (uint8_t *)fidtc->raw_metadata_buf, @@ -720,9 +710,6 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, out: if (!r) { - if (!dev_close(mdac->area.dev)) - stack; - dm_free(fidtc->raw_metadata_buf); fidtc->raw_metadata_buf = NULL; } @@ -819,9 +806,6 @@ static int _vg_commit_raw_rlocn(struct format_instance *fid, out: if (!precommit) { - if (!dev_close(mdac->area.dev)) - stack; - dm_free(fidtc->raw_metadata_buf); fidtc->raw_metadata_buf = NULL; } @@ -904,9 +888,6 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, rlocn->checksum = 0; rlocn_set_ignored(mdah->raw_locns, mda_is_ignored(mda)); - if (!dev_open(mdac->area.dev)) - return_0; - if (!_raw_write_mda_header(fid->fmt, mdac->area.dev, mda_is_primary(mda), mdac->area.start, mdah)) { dm_pool_free(fid->fmt->cmd->mem, mdah); @@ -917,9 +898,6 @@ static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg, r = 1; out: - if (!dev_close(mdac->area.dev)) - stack; - return r; } @@ -1239,8 +1217,6 @@ int read_metadata_location_summary(const struct format_type *fmt, return 0; } - label_scan_confirm(dev_area->dev); /* FIXME: remove this, ensures dev is in bcache */ - bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, buf); while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && @@ -1397,8 +1373,6 @@ static int _write_single_mda(struct metadata_area *mda, void *baton) if (!_raw_write_mda_header(p->fmt, mdac->area.dev, mda_is_primary(mda), mdac->area.start, mdah)) { - if (!dev_close(p->pv->dev)) - stack; return_0; } return 1; @@ -2123,6 +2097,7 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt, uint64_t mda_start; uint64_t adjustment, limit, tmp_mda_size; uint64_t wipe_size = 8 << SECTOR_SHIFT; + uint64_t zero_len; size_t page_size = lvm_getpagesize(); struct metadata_area *mda; struct mda_context *mdac; @@ -2330,13 +2305,14 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt, } /* Wipe metadata area with zeroes. */ - if (!dev_set(pv->dev, mda_start, - (size_t) ((mda_size > wipe_size) ? wipe_size : mda_size), - MDA_HEADER_REASON(!mda_index), 0)) { - log_error("Failed to wipe new metadata area " - "at the %s of the %s", - mda_index ? "end" : "start", - pv_dev_name(pv)); + + zero_len = (mda_size > wipe_size) ? wipe_size : mda_size; + + if (!bcache_write_zeros(scan_bcache, pv->dev->bcache_fd, mda_start, zero_len)) { + log_error("Failed to wipe new metadata area on %s at %llu len %llu", + pv_dev_name(pv), + (unsigned long long)mda_start, + (unsigned long long)zero_len); return 0; } diff --git a/lib/label/label.c b/lib/label/label.c index 278f26ae5..67d441b43 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -104,8 +104,7 @@ struct labeller *label_get_handler(const char *name) /* FIXME Also wipe associated metadata area headers? */ int label_remove(struct device *dev) { - char buf[LABEL_SIZE] __attribute__((aligned(8))); - char readbuf[LABEL_SCAN_SIZE] __attribute__((aligned(8))); + char readbuf[LABEL_SIZE] __attribute__((aligned(8))); int r = 1; uint64_t sector; int wipe; @@ -113,31 +112,27 @@ int label_remove(struct device *dev) struct label_header *lh; struct lvmcache_info *info; - memset(buf, 0, LABEL_SIZE); - log_very_verbose("Scanning for labels to wipe from %s", dev_name(dev)); - label_scan_invalidate(dev); - - if (!dev_open(dev)) - return_0; - - /* - * We flush the device just in case someone is stupid - * enough to be trying to import an open pv into lvm. - */ - dev_flush(dev); - - if (!dev_read(dev, UINT64_C(0), LABEL_SCAN_SIZE, DEV_IO_LABEL, readbuf)) { - log_debug_devs("%s: Failed to read label area", dev_name(dev)); - goto out; + if (!label_scan_open(dev)) { + log_error("Failed to open device %s", dev_name(dev)); + return 0; } /* Scan first few sectors for anything looking like a label */ for (sector = 0; sector < LABEL_SCAN_SECTORS; sector += LABEL_SIZE >> SECTOR_SHIFT) { - lh = (struct label_header *) (readbuf + - (sector << SECTOR_SHIFT)); + + memset(readbuf, 0, sizeof(readbuf)); + + if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, + sector << SECTOR_SHIFT, LABEL_SIZE, readbuf)) { + log_error("Failed to read label from %s sector %llu", + dev_name(dev), (unsigned long long)sector); + continue; + } + + lh = (struct label_header *)readbuf; wipe = 0; @@ -146,8 +141,7 @@ int label_remove(struct device *dev) wipe = 1; } else { dm_list_iterate_items(li, &_labellers) { - if (li->l->ops->can_handle(li->l, (char *) lh, - sector)) { + if (li->l->ops->can_handle(li->l, (char *)lh, sector)) { wipe = 1; break; } @@ -155,27 +149,24 @@ int label_remove(struct device *dev) } if (wipe) { - log_very_verbose("%s: Wiping label at sector %" PRIu64, - dev_name(dev), sector); - if (dev_write(dev, sector << SECTOR_SHIFT, LABEL_SIZE, DEV_IO_LABEL, - buf)) { + log_very_verbose("%s: Wiping label at sector %llu", + dev_name(dev), (unsigned long long)sector); + + if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, + sector << SECTOR_SHIFT, LABEL_SIZE)) { + log_error("Failed to remove label from %s at sector %llu", + dev_name(dev), (unsigned long long)sector); + r = 0; + } else { /* Also remove the PV record from cache. */ info = lvmcache_info_from_pvid(dev->pvid, dev, 0); if (info) lvmcache_del(info); - } else { - log_error("Failed to remove label from %s at " - "sector %" PRIu64, dev_name(dev), - sector); - r = 0; } } } out: - if (!dev_close(dev)) - stack; - return r; } @@ -197,8 +188,6 @@ int label_write(struct device *dev, struct label *label) return 0; } - label_scan_invalidate(dev); - memset(buf, 0, LABEL_SIZE); strncpy((char *)lh->id, LABEL_ID, sizeof(lh->id)); @@ -211,20 +200,21 @@ int label_write(struct device *dev, struct label *label) lh->crc_xl = xlate32(calc_crc(INITIAL_CRC, (uint8_t *)&lh->offset_xl, LABEL_SIZE - ((uint8_t *) &lh->offset_xl - (uint8_t *) lh))); - if (!dev_open(dev)) - return_0; - log_very_verbose("%s: Writing label to sector %" PRIu64 " with stored offset %" PRIu32 ".", dev_name(dev), label->sector, xlate32(lh->offset_xl)); - if (!dev_write(dev, label->sector << SECTOR_SHIFT, LABEL_SIZE, DEV_IO_LABEL, buf)) { + + if (!label_scan_open(dev)) { + log_error("Failed to open device %s", dev_name(dev)); + return 0; + } + + if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, + label->sector << SECTOR_SHIFT, LABEL_SIZE, buf)) { log_debug_devs("Failed to write label to %s", dev_name(dev)); r = 0; } - if (!dev_close(dev)) - stack; - return r; } @@ -763,8 +753,10 @@ void label_scan_destroy(struct cmd_context *cmd) return; } - while ((dev = dev_iter_get(iter))) - label_scan_invalidate(dev); + while ((dev = dev_iter_get(iter))) { + if (_in_bcache(dev)) + _scan_dev_close(dev); + } dev_iter_destroy(iter); bcache_destroy(scan_bcache); @@ -833,22 +825,6 @@ int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_s return label_read(dev, labelp, 0); } -/* - * FIXME: remove this. It should not be needed once writes are going through - * bcache. As it is now, the write path involves multiple writes to a device, - * and later writes want to read previous writes from disk. They do these - * reads using the standard read paths which require the devs to be in bcache, - * but the bcache reads do not find the dev because the writes have gone around - * bcache. To work around this for now, check if each dev is in bcache before - * reading it, and if not add it first. - */ - -void label_scan_confirm(struct device *dev) -{ - if (!_in_bcache(dev)) - label_read(dev, NULL, 0); -} - /* * This is only needed when commands are using lvmetad, in which case they * don't do an initial label_scan, but may later need to rescan certain devs @@ -866,3 +842,17 @@ int label_scan_setup_bcache(void) return 1; } +/* + * This is needed to write to a new non-lvm device. + * Scanning that dev would not keep it open or in + * bcache, but to use bcache_write we need the dev + * to be open so we can use dev->bcache_fd to write. + */ + +int label_scan_open(struct device *dev) +{ + if (!_in_bcache(dev)) + return _scan_dev_open(dev); + return 1; +} + diff --git a/lib/label/label.h b/lib/label/label.h index bf6e9262b..92f3e7b47 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -112,5 +112,6 @@ int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector); void label_scan_confirm(struct device *dev); int label_scan_setup_bcache(void); +int label_scan_open(struct device *dev); #endif diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index b588a04f3..4cad2a128 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -692,6 +692,7 @@ int check_pv_dev_sizes(struct volume_group *vg) * . lvmcache_get_vgids() * . lvmcache_get_vgnames() * . the vg->pvs_to_write list and pv_to_write struct + * . _pvcreate_write() */ int vg_extend_each_pv(struct volume_group *vg, struct pvcreate_params *pp) @@ -1414,28 +1415,24 @@ static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw) struct device *dev = pv->dev; const char *pv_name = dev_name(dev); + if (!label_scan_open(dev)) { + log_error("%s not opened: device not written", pv_name); + return 0; + } + if (pvw->new_pv) { /* Wipe existing label first */ - if (!label_remove(pv_dev(pv))) { + if (!label_remove(dev)) { log_error("Failed to wipe existing label on %s", pv_name); return 0; } if (pvw->pp->zero) { log_verbose("Zeroing start of device %s", pv_name); - if (!dev_open_quiet(dev)) { - log_error("%s not opened: device not zeroed", pv_name); - return 0; - } - - if (!dev_set(dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) { + if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, 0, 2048)) { log_error("%s not wiped: aborting", pv_name); - if (!dev_close(dev)) - stack; return 0; } - if (!dev_close(dev)) - stack; } } diff --git a/tools/toollib.c b/tools/toollib.c index 623bfbb0b..b0c0e8ed9 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -5728,6 +5728,8 @@ do_command: * Wipe signatures on devices being created. */ dm_list_iterate_items_safe(pd, pd2, &pp->arg_create) { + label_scan_open(pd->dev); + log_verbose("Wiping signatures on new PV %s.", pd->name); if (!wipe_known_signatures(cmd, pd->dev, pd->name, TYPE_LVM1_MEMBER | TYPE_LVM2_MEMBER, @@ -5805,6 +5807,8 @@ do_command: pv_name = pd->name; + label_scan_open(pd->dev); + log_debug("Creating a new PV on %s.", pv_name); if (!(pv = pv_create(cmd, pd->dev, &pp->pva))) { @@ -5816,6 +5820,7 @@ do_command: log_verbose("Set up physical volume for \"%s\" with %" PRIu64 " available sectors.", pv_name, pv_size(pv)); + if (!label_remove(pv->dev)) { log_error("Failed to wipe existing label on %s.", pv_name); dm_list_move(&pp->arg_fail, &pd->list); @@ -5825,21 +5830,11 @@ do_command: if (pp->zero) { log_verbose("Zeroing start of device %s.", pv_name); - if (!dev_open_quiet(pv->dev)) { - log_error("%s not opened: device not zeroed.", pv_name); - dm_list_move(&pp->arg_fail, &pd->list); - continue; - } - - if (!dev_set(pv->dev, UINT64_C(0), (size_t) 2048, DEV_IO_LABEL, 0)) { + if (!bcache_write_zeros(scan_bcache, pv->dev->bcache_fd, 0, 2048)) { log_error("%s not wiped: aborting.", pv_name); - if (!dev_close(pv->dev)) - stack; dm_list_move(&pp->arg_fail, &pd->list); continue; } - if (!dev_close(pv->dev)) - stack; } log_verbose("Writing physical volume data to disk \"%s\".", pv_name); From e49b114f7e437666caefa04aada31f32820e286e Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 27 Feb 2018 11:26:04 -0600 Subject: [PATCH 57/87] bcache: use wrappers for bcache read write in lvm Using a wrapper makes it easier to disable bcache if needed. --- lib/config/config.c | 4 +- lib/device/dev-type.c | 4 +- lib/format_text/archiver.c | 2 +- lib/format_text/format-text.c | 19 ++++---- lib/label/label.c | 82 ++++++++++++++++++++++++++++++++--- lib/label/label.h | 8 ++++ lib/metadata/metadata.c | 2 +- tools/toollib.c | 2 +- 8 files changed, 99 insertions(+), 24 deletions(-) diff --git a/lib/config/config.c b/lib/config/config.c index 0711b8ca9..d07c17379 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -534,11 +534,11 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_r return 0; } - if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, offset, size, buf)) + if (!dev_read_bytes(dev, offset, size, buf)) goto out; if (size2) { - if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, offset2, size2, buf + size)) + if (!dev_read_bytes(dev, offset2, size2, buf + size)) goto out; } diff --git a/lib/device/dev-type.c b/lib/device/dev-type.c index e69f7caa2..992394310 100644 --- a/lib/device/dev-type.c +++ b/lib/device/dev-type.c @@ -677,7 +677,7 @@ static int _blkid_wipe(blkid_probe probe, struct device *dev, const char *name, } else log_verbose(_msg_wiping, type, name); - if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, offset_value, len)) { + if (!dev_write_zeros(dev, offset_value, len)) { log_error("Failed to wipe %s signature on %s.", type, name); return 0; } @@ -774,7 +774,7 @@ static int _wipe_signature(struct device *dev, const char *type, const char *nam } log_print_unless_silent("Wiping %s on %s.", type, name); - if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, offset_found, wipe_len)) { + if (!dev_write_zeros(dev, offset_found, wipe_len)) { log_error("Failed to wipe %s on %s.", type, name); return 0; } diff --git a/lib/format_text/archiver.c b/lib/format_text/archiver.c index 43ae4eb51..c8aeb47cb 100644 --- a/lib/format_text/archiver.c +++ b/lib/format_text/archiver.c @@ -489,7 +489,7 @@ int backup_restore_vg(struct cmd_context *cmd, struct volume_group *vg, log_verbose("Zeroing start of device %s", pv_name); - if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, 0, 2048)) { + if (!dev_write_zeros(dev, 0, 2048)) { log_error("%s not wiped: aborting", pv_name); return 0; } diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 4eef72ee1..4a9c303e4 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -227,7 +227,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, if (!(buf = dm_malloc(size + size2))) goto_out; - if (!bcache_read_bytes(scan_bcache, area->dev->bcache_fd, offset, size, buf)) { + if (!dev_read_bytes(area->dev, offset, size, buf)) { log_error("Failed to read dev %s offset %llu size %llu", dev_name(area->dev), (unsigned long long)offset, @@ -236,7 +236,7 @@ static int _pv_analyze_mda_raw (const struct format_type * fmt, } if (size2) { - if (!bcache_read_bytes(scan_bcache, area->dev->bcache_fd, offset2, size2, buf + size)) { + if (!dev_read_bytes(area->dev, offset2, size2, buf + size)) { log_error("Failed to read dev %s offset %llu size %llu", dev_name(area->dev), (unsigned long long)offset2, @@ -330,7 +330,7 @@ static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev log_debug_metadata("Reading mda header sector from %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); - if (!bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start, MDA_HEADER_SIZE, mdah)) { + if (!dev_read_bytes(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) { log_error("Failed to read metadata area header on %s at %llu", dev_name(dev_area->dev), (unsigned long long)dev_area->start); return 0; @@ -400,7 +400,7 @@ static int _raw_write_mda_header(const struct format_type *fmt, MDA_HEADER_SIZE - sizeof(mdah->checksum_xl))); - if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start_byte, MDA_HEADER_SIZE, mdah)) { + if (!dev_write_bytes(dev, start_byte, MDA_HEADER_SIZE, mdah)) { log_error("Failed to write mda header to %s fd %d", dev_name(dev), dev->bcache_fd); return 0; } @@ -464,7 +464,7 @@ static struct raw_locn *_read_metadata_location_vg(struct device_area *dev_area, */ memset(vgnamebuf, 0, sizeof(vgnamebuf)); - bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); + dev_read_bytes(dev_area->dev, dev_area->start + rlocn->offset, NAME_LEN, vgnamebuf); if (!strncmp(vgnamebuf, vgname, len = strlen(vgname)) && (isspace(vgnamebuf[len]) || vgnamebuf[len] == '{')) @@ -675,7 +675,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, (unsigned long long)(mdac->rlocn.size - new_wrap), (unsigned long long)new_wrap); - if (!bcache_write_bytes(scan_bcache, mdac->area.dev->bcache_fd, mdac->area.start + mdac->rlocn.offset, + if (!dev_write_bytes(mdac->area.dev, mdac->area.start + mdac->rlocn.offset, (size_t) (mdac->rlocn.size - new_wrap), fidtc->raw_metadata_buf)) { log_error("Failed to write metadata to %s fd %d", dev_name(mdac->area.dev), mdac->area.dev->bcache_fd); @@ -688,8 +688,7 @@ static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg, (unsigned long long)(mdac->area.start + MDA_HEADER_SIZE), (unsigned long long)new_wrap); - if (!bcache_write_bytes(scan_bcache, mdac->area.dev->bcache_fd, - mdac->area.start + MDA_HEADER_SIZE, + if (!dev_write_bytes(mdac->area.dev, mdac->area.start + MDA_HEADER_SIZE, (size_t) new_wrap, fidtc->raw_metadata_buf + mdac->rlocn.size - new_wrap)) { log_error("Failed to write metadata wrap to %s fd %d", dev_name(mdac->area.dev), mdac->area.dev->bcache_fd); @@ -1217,7 +1216,7 @@ int read_metadata_location_summary(const struct format_type *fmt, return 0; } - bcache_read_bytes(scan_bcache, dev_area->dev->bcache_fd, dev_area->start + rlocn->offset, NAME_LEN, buf); + dev_read_bytes(dev_area->dev, dev_area->start + rlocn->offset, NAME_LEN, buf); while (buf[len] && !isspace(buf[len]) && buf[len] != '{' && len < (NAME_LEN - 1)) @@ -2308,7 +2307,7 @@ static int _text_pv_add_metadata_area(const struct format_type *fmt, zero_len = (mda_size > wipe_size) ? wipe_size : mda_size; - if (!bcache_write_zeros(scan_bcache, pv->dev->bcache_fd, mda_start, zero_len)) { + if (!dev_write_zeros(pv->dev, mda_start, zero_len)) { log_error("Failed to wipe new metadata area on %s at %llu len %llu", pv_dev_name(pv), (unsigned long long)mda_start, diff --git a/lib/label/label.c b/lib/label/label.c index 67d441b43..45a3ecf09 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -125,8 +125,7 @@ int label_remove(struct device *dev) memset(readbuf, 0, sizeof(readbuf)); - if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, - sector << SECTOR_SHIFT, LABEL_SIZE, readbuf)) { + if (!dev_read_bytes(dev, sector << SECTOR_SHIFT, LABEL_SIZE, readbuf)) { log_error("Failed to read label from %s sector %llu", dev_name(dev), (unsigned long long)sector); continue; @@ -152,8 +151,7 @@ int label_remove(struct device *dev) log_very_verbose("%s: Wiping label at sector %llu", dev_name(dev), (unsigned long long)sector); - if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, - sector << SECTOR_SHIFT, LABEL_SIZE)) { + if (!dev_write_zeros(dev, sector << SECTOR_SHIFT, LABEL_SIZE)) { log_error("Failed to remove label from %s at sector %llu", dev_name(dev), (unsigned long long)sector); r = 0; @@ -166,7 +164,6 @@ int label_remove(struct device *dev) } } - out: return r; } @@ -209,8 +206,7 @@ int label_write(struct device *dev, struct label *label) return 0; } - if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, - label->sector << SECTOR_SHIFT, LABEL_SIZE, buf)) { + if (!dev_write_bytes(dev, label->sector << SECTOR_SHIFT, LABEL_SIZE, buf)) { log_debug_devs("Failed to write label to %s", dev_name(dev)); r = 0; } @@ -856,3 +852,75 @@ int label_scan_open(struct device *dev) return 1; } +bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data) +{ + int ret; + + if (!scan_bcache) { + if (!dev_open_readonly(dev)) + return false; + + ret = dev_read(dev, start, len, 0, data); + + if (!dev_close(dev)) + stack; + + return ret ? true : false; + } + + if (dev->bcache_fd <= 0) { + log_error("dev_read_bytes %s with invalid bcache_fd", dev_name(dev)); + return false; + } + + return bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data); +} + +bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) +{ + int ret; + + if (!scan_bcache) { + if (!dev_open(dev)) + return false; + + ret = dev_write(dev, start, len, 0, data); + + if (!dev_close(dev)) + stack; + + return ret ? true : false; + } + + if (dev->bcache_fd <= 0) { + log_error("dev_write_bytes %s with invalid bcache_fd", dev_name(dev)); + return false; + } + + return bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data); +} + +bool dev_write_zeros(struct device *dev, off_t start, size_t len) +{ + int ret; + + if (!scan_bcache) { + if (!dev_open(dev)) + return false; + + ret = dev_set(dev, start, len, 0, 0); + + if (!dev_close(dev)) + stack; + + return ret ? true : false; + } + + if (dev->bcache_fd <= 0) { + log_error("dev_write_bytes %s with invalid bcache_fd", dev_name(dev)); + return false; + } + + return bcache_write_zeros(scan_bcache, dev->bcache_fd, start, len); +} + diff --git a/lib/label/label.h b/lib/label/label.h index 92f3e7b47..8ef687b22 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -114,4 +114,12 @@ void label_scan_confirm(struct device *dev); int label_scan_setup_bcache(void); int label_scan_open(struct device *dev); +/* + * Wrappers around bcache equivalents. + * (these make it easier to disable bcache and revert to direct rw if needed) + */ +bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data); +bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data); +bool dev_write_zeros(struct device *dev, off_t start, size_t len); + #endif diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 4cad2a128..b4424519f 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -1429,7 +1429,7 @@ static int _pvcreate_write(struct cmd_context *cmd, struct pv_to_write *pvw) if (pvw->pp->zero) { log_verbose("Zeroing start of device %s", pv_name); - if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, 0, 2048)) { + if (!dev_write_zeros(dev, 0, 2048)) { log_error("%s not wiped: aborting", pv_name); return 0; } diff --git a/tools/toollib.c b/tools/toollib.c index b0c0e8ed9..b029a0d39 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -5830,7 +5830,7 @@ do_command: if (pp->zero) { log_verbose("Zeroing start of device %s.", pv_name); - if (!bcache_write_zeros(scan_bcache, pv->dev->bcache_fd, 0, 2048)) { + if (!dev_write_zeros(pv->dev, 0, 2048)) { log_error("%s not wiped: aborting.", pv_name); dm_list_move(&pp->arg_fail, &pd->list); continue; From 21057676a1f01c6b1f19b2e879d8385d76adb517 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 27 Feb 2018 12:35:45 -0600 Subject: [PATCH 58/87] scan: create bcache with minimum number of blocks In some odd cases (e.g. tests) there are very few devices which results in creating too few blocks in bcache, so create bcache with a minimum number of blocks. --- lib/label/label.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 45a3ecf09..88001a9f9 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -560,13 +560,15 @@ static int _scan_list(struct dm_list *devs, int *failed) return 1; } +#define MIN_BCACHE_BLOCKS 32 + static int _setup_bcache(int cache_blocks) { struct io_engine *ioe; /* No devices can happen, just create bcache with any small number. */ - if (!cache_blocks) - cache_blocks = 8; + if (cache_blocks < MIN_BCACHE_BLOCKS) + cache_blocks = MIN_BCACHE_BLOCKS; /* * 100 is arbitrary, it's the max number of concurrent aio's @@ -831,7 +833,7 @@ int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_s int label_scan_setup_bcache(void) { if (!scan_bcache) { - if (!_setup_bcache(32)) + if (!_setup_bcache(0)) return 0; } From 4331182964e37ad75690007f5f874f2606944a34 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 27 Feb 2018 12:37:25 -0600 Subject: [PATCH 59/87] bcache: add some error messages for debugging --- lib/device/bcache.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 272de60b6..94623a804 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -659,8 +659,11 @@ static struct block *_new_block(struct bcache *cache, int fd, block_address inde if (dm_list_empty(&cache->io_pending)) _writeback(cache, 16); // FIXME: magic number _wait_io(cache); - } else + } else { + log_error("bcache no new blocks for fd %d index %u", + fd, (uint32_t)index); return NULL; + } } } @@ -676,6 +679,18 @@ static struct block *_new_block(struct bcache *cache, int fd, block_address inde _hash_insert(b); } + if (!b) { + log_error("bcache no new blocks for fd %d index %u " + "clean %u free %u dirty %u pending %u nr_data_blocks %u nr_cache_blocks %u", + fd, (uint32_t)index, + dm_list_size(&cache->clean), + dm_list_size(&cache->free), + dm_list_size(&cache->dirty), + dm_list_size(&cache->io_pending), + (uint32_t)cache->nr_data_blocks, + (uint32_t)cache->nr_cache_blocks); + } + return b; } @@ -893,7 +908,7 @@ bool bcache_get(struct bcache *cache, int fd, block_address index, } *result = NULL; - log_warn("couldn't get block"); + log_error("bcache failed to get block %u fd %d", (uint32_t)index, fd); return false; } @@ -1077,6 +1092,8 @@ bool bcache_write_bytes(struct bcache *cache, int fd, off_t start, size_t len, v for (i = bb; i < be; i++) { if (!bcache_get(cache, fd, i, 0, &b)) { + log_error("bcache_write failed to get block %u fd %d bb %u be %u", + (uint32_t)i, fd, (uint32_t)bb, (uint32_t)be); errors++; break; } From da2b155a9da4c4c9caf502b87ad263de6e678b30 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 27 Feb 2018 15:03:56 -0600 Subject: [PATCH 60/87] scan: invalidate bcache for dev after errors If there are errors reading or writing dev, invalidate bcache for it. --- lib/label/label.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 88001a9f9..0514ddf87 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -871,11 +871,16 @@ bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data) } if (dev->bcache_fd <= 0) { - log_error("dev_read_bytes %s with invalid bcache_fd", dev_name(dev)); + log_error("dev_read_bytes %s with invalid fd %d", dev_name(dev), dev->bcache_fd); return false; } - return bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data); + if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { + label_scan_invalidate(dev); + return false; + } + return true; + } bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) @@ -895,11 +900,15 @@ bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) } if (dev->bcache_fd <= 0) { - log_error("dev_write_bytes %s with invalid bcache_fd", dev_name(dev)); + log_error("dev_write_bytes %s with invalid fd %d", dev_name(dev), dev->bcache_fd); return false; } - return bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data); + if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { + label_scan_invalidate(dev); + return false; + } + return true; } bool dev_write_zeros(struct device *dev, off_t start, size_t len) @@ -919,10 +928,14 @@ bool dev_write_zeros(struct device *dev, off_t start, size_t len) } if (dev->bcache_fd <= 0) { - log_error("dev_write_bytes %s with invalid bcache_fd", dev_name(dev)); + log_error("dev_write_bytes %s with invalid fd %d", dev_name(dev), dev->bcache_fd); return false; } - return bcache_write_zeros(scan_bcache, dev->bcache_fd, start, len); + if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, start, len)) { + label_scan_invalidate(dev); + return false; + } + return true; } From 217f3f8741c1f516ab1f039b8725ac3ce638e302 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 27 Feb 2018 16:35:47 -0600 Subject: [PATCH 61/87] scan: add function to drop bcache blocks which can be a little more efficient that destroy. --- lib/label/label.c | 24 ++++++++++++++++-------- lib/label/label.h | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 0514ddf87..dd455ecfb 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -733,20 +733,15 @@ void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv } /* - * Undo label_scan() - * - * Close devices that are open because bcache is holding blocks for them. - * Destroy the bcache. + * Empty the bcache of all blocks and close all open fds, + * but keep the bcache set up. */ -void label_scan_destroy(struct cmd_context *cmd) +void label_scan_drop(struct cmd_context *cmd) { struct dev_iter *iter; struct device *dev; - if (!scan_bcache) - return; - if (!(iter = dev_iter_create(cmd->full_filter, 0))) { return; } @@ -756,6 +751,19 @@ void label_scan_destroy(struct cmd_context *cmd) _scan_dev_close(dev); } dev_iter_destroy(iter); +} + +/* + * Close devices that are open because bcache is holding blocks for them. + * Destroy the bcache. + */ + +void label_scan_destroy(struct cmd_context *cmd) +{ + if (!scan_bcache) + return; + + label_scan_drop(cmd); bcache_destroy(scan_bcache); scan_bcache = NULL; diff --git a/lib/label/label.h b/lib/label/label.h index 8ef687b22..55e92e8d4 100644 --- a/lib/label/label.h +++ b/lib/label/label.h @@ -107,6 +107,7 @@ int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs); int label_scan_devs_excl(struct dm_list *devs); void label_scan_invalidate(struct device *dev); void label_scan_invalidate_lv(struct cmd_context *cmd, struct logical_volume *lv); +void label_scan_drop(struct cmd_context *cmd); void label_scan_destroy(struct cmd_context *cmd); int label_read(struct device *dev, struct label **labelp, uint64_t unused_sector); int label_read_sector(struct device *dev, struct label **labelp, uint64_t scan_sector); From 570c6239eebf18e6884d350654274d3bc8f1c4f1 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 1 Mar 2018 10:17:32 -0600 Subject: [PATCH 62/87] bcache: fix error handling The error handling code wasn't working, but it appears that just removing it is what we need. The doesn't really need any different behavior related to bcache blocks on an io error, it just wants to know if there was an error. --- lib/device/bcache.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 94623a804..903477677 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -539,6 +539,14 @@ static void _complete_io(void *context, int err) dm_list_del(&b->list); if (b->error) { + log_warn("bcache io error %d fd %d", b->error, b->fd); + memset(b->data, 0, cache->block_sectors << SECTOR_SHIFT); + } + + /* Things don't work with this block of code, but work without it. */ +#if 0 + if (b->error) { + log_warn("bcache io error %d fd %d", b->error, b->fd); if (b->io_dir == DIR_READ) { // We can just forget about this block, since there's // no dirty data to be written back. @@ -552,6 +560,9 @@ static void _complete_io(void *context, int err) _clear_flags(b, BF_DIRTY); _link_block(b); } +#endif + _clear_flags(b, BF_DIRTY); + _link_block(b); } /* @@ -768,7 +779,7 @@ static struct block *_lookup_or_read_block(struct bcache *cache, } } - if (b && !b->error) { + if (b) { if (flags & (GF_DIRTY | GF_ZERO)) _set_flags(b, BF_DIRTY); @@ -904,6 +915,9 @@ bool bcache_get(struct bcache *cache, int fd, block_address index, b->ref_count++; *result = b; + + if (b->error) + return false; return true; } From 1717d4cb178b88dc70bc14a11aba199b34931bb0 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 1 Mar 2018 10:20:34 -0600 Subject: [PATCH 63/87] lvmcache: add shorter way to delete dev info Don't make the caller look up the info first. --- lib/cache/lvmcache.c | 8 ++++++++ lib/cache/lvmcache.h | 2 ++ 2 files changed, 10 insertions(+) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 53254f476..64790803a 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -1608,6 +1608,14 @@ void lvmcache_del(struct lvmcache_info *info) dm_free(info); } +void lvmcache_del_dev(struct device *dev) +{ + struct lvmcache_info *info; + + if ((info = lvmcache_info_from_pvid((const char *)dev->pvid, dev, 0))) + lvmcache_del(info); +} + /* * vginfo must be info->vginfo unless info is NULL (orphans) */ diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 3967b29c2..4343060fe 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -59,6 +59,7 @@ struct lvmcache_vgsummary { const char *lock_type; uint32_t mda_checksum; size_t mda_size; + int zero_offset; }; int lvmcache_init(void); @@ -83,6 +84,7 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid, uint32_t vgstatus); int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt); void lvmcache_del(struct lvmcache_info *info); +void lvmcache_del_dev(struct device *dev); /* Update things */ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, From 44726ed9cb9559801868ade5440d9e6ea6a61127 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 1 Mar 2018 12:40:37 -0600 Subject: [PATCH 64/87] scan: remove lvmcache info for failed devs When scanning a device fails, drop an lvmcache info struct for it. --- lib/label/label.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index dd455ecfb..82d4bf3ef 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -333,7 +333,6 @@ static int _process_block(struct device *dev, struct block *bb, int *is_lvm_devi char label_buf[LABEL_SIZE] __attribute__((aligned(8))); struct label *label = NULL; struct labeller *labeller; - struct lvmcache_info *info; uint64_t sector; int ret = 0; @@ -356,11 +355,7 @@ static int _process_block(struct device *dev, struct block *bb, int *is_lvm_devi log_very_verbose("%s: No lvm label detected", dev_name(dev)); - if ((info = lvmcache_info_from_pvid(dev->pvid, dev, 0))) { - /* FIXME: if this case is actually happening, fix it. */ - log_warn("Device %s has no label, removing PV info from lvmcache.", dev_name(dev)); - lvmcache_del(info); - } + lvmcache_del_dev(dev); /* FIXME: if this is needed, fix it. */ *is_lvm_device = 0; goto_out; @@ -380,6 +375,7 @@ static int _process_block(struct device *dev, struct block *bb, int *is_lvm_devi label->sector = sector; } else { /* FIXME: handle errors */ + lvmcache_del_dev(dev); } out: return ret; @@ -475,6 +471,7 @@ static int _scan_list(struct dm_list *devs, int *failed) int rem_prefetches; int scan_failed; int is_lvm_device; + int ret; dm_list_init(&wait_devs); dm_list_init(&done_devs); @@ -498,7 +495,7 @@ static int _scan_list(struct dm_list *devs, int *failed) if (!_in_bcache(devl->dev)) { if (!_scan_dev_open(devl->dev)) { - log_debug_devs("%s: Failed to open device.", dev_name(devl->dev)); + log_debug_devs("Scan failed to open %s.", dev_name(devl->dev)); dm_list_del(&devl->list); dm_list_add(&done_devs, &devl->list); scan_failed_count++; @@ -518,14 +515,24 @@ static int _scan_list(struct dm_list *devs, int *failed) bb = NULL; if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb)) { - log_debug_devs("%s: Failed to scan device.", dev_name(devl->dev)); + log_error("Scan failed to read %s.", dev_name(devl->dev)); scan_failed_count++; scan_failed = 1; + lvmcache_del_dev(devl->dev); } else { log_debug_devs("Processing data from device %s fd %d block %p", dev_name(devl->dev), devl->dev->bcache_fd, bb); - _process_block(devl->dev, bb, &is_lvm_device); - scan_lvm_count++; - scan_failed = 0; + + ret = _process_block(devl->dev, bb, &is_lvm_device); + + if (!ret && is_lvm_device) { + log_error("Scan failed to process %s", dev_name(devl->dev)); + scan_failed_count++; + scan_failed = 1; + lvmcache_del_dev(devl->dev); + } else { + scan_lvm_count++; + scan_failed = 0; + } } if (bb) @@ -884,6 +891,8 @@ bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data) } if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { + log_error("dev_read_bytes %s at %u failed invalidate fd %d", + dev_name(dev), (uint32_t)start, dev->bcache_fd); label_scan_invalidate(dev); return false; } @@ -913,6 +922,8 @@ bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) } if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { + log_error("dev_write_bytes %s at %u failed invalidate fd %d", + dev_name(dev), (uint32_t)start, dev->bcache_fd); label_scan_invalidate(dev); return false; } @@ -936,11 +947,13 @@ bool dev_write_zeros(struct device *dev, off_t start, size_t len) } if (dev->bcache_fd <= 0) { - log_error("dev_write_bytes %s with invalid fd %d", dev_name(dev), dev->bcache_fd); + log_error("dev_write_zeros %s with invalid fd %d", dev_name(dev), dev->bcache_fd); return false; } if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, start, len)) { + log_error("dev_write_zeros %s at %u failed invalidate fd %d", + dev_name(dev), (uint32_t)start, dev->bcache_fd); label_scan_invalidate(dev); return false; } From 196579af1f7ebe25c0266043c7d966b344699d5d Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 1 Mar 2018 12:42:18 -0600 Subject: [PATCH 65/87] scan: check for errors in text layer The scanning code in the format_text layer has previously ignored errors. Start checking for and returning them. --- lib/format_text/format-text.c | 5 ++++- lib/format_text/text_label.c | 38 ++++++++++++++++++++--------------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 4a9c303e4..4146e7c72 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -536,8 +536,10 @@ static struct volume_group *_vg_read_raw_area(struct format_instance *fid, char *desc; uint32_t wrap = 0; - if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda))) + if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda))) { + log_error("Failed to read vg %s from %s", vgname, dev_name(area->dev)); goto_out; + } if (!(rlocn = _read_metadata_location_vg(area, mdah, primary_mda, vgname, &precommitted))) { log_debug_metadata("VG %s not found on %s", vgname, dev_name(area->dev)); @@ -1213,6 +1215,7 @@ int read_metadata_location_summary(const struct format_type *fmt, log_debug_metadata("Metadata location on %s at %llu has offset 0.", dev_name(dev_area->dev), (unsigned long long)(dev_area->start + rlocn->offset)); + vgsummary->zero_offset = 1; return 0; } diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index 206ae3f2e..e65079e0f 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -331,16 +331,9 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) struct mda_header *mdah; struct lvmcache_vgsummary vgsummary = { 0 }; - /* - * Using the labeller struct to preserve info about - * the last parsed vgname, vgid, creation host - * - * TODO: make lvmcache smarter and move this cache logic there - */ - if (!(mdah = raw_read_mda_header(fmt, &mdac->area, mda_is_primary(mda)))) { - stack; - goto close_dev; + log_error("Failed to read mda header from %s", dev_name(mdac->area.dev)); + goto fail; } mda_set_ignored(mda, rlocn_is_ignored(mdah->raw_locns)); @@ -352,14 +345,25 @@ static int _read_mda_header_and_metadata(struct metadata_area *mda, void *baton) return 1; } - if (read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, &vgsummary, - &mdac->free_sectors) && - !lvmcache_update_vgname_and_id(p->info, &vgsummary)) { - return_0; + if (!read_metadata_location_summary(fmt, mdah, mda_is_primary(mda), &mdac->area, + &vgsummary, &mdac->free_sectors)) { + if (vgsummary.zero_offset) + return 1; + + log_error("Failed to read metadata summary from %s", dev_name(mdac->area.dev)); + goto fail; + } + + if (!lvmcache_update_vgname_and_id(p->info, &vgsummary)) { + log_error("Failed to save lvm summary for %s", dev_name(mdac->area.dev)); + goto fail; } -close_dev: return 1; + +fail: + lvmcache_del(p->info); + return 0; } static int _text_read(struct labeller *l, struct device *dev, void *label_buf, @@ -434,10 +438,12 @@ out: baton.info = info; baton.label = *label; - lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton); + if (!lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton)) { + log_error("Failed to scan VG from %s", dev_name(dev)); + return 0; + } lvmcache_make_valid(info); - return 1; } From ddb5de7a98af5e354013a07c61e46636b9ec01e6 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Thu, 1 Mar 2018 12:43:29 -0600 Subject: [PATCH 66/87] clvm: fix bcache scan handling We can't let clvmd keep all scanned devs open, which prevents them from being removed. So drop the bcache data (and close fds) affter doing a label scan. Also set up bcache before the clvm-specific vg_read (which needs to rescan the vg's devs using bcache) and destroy the bcache after. --- daemons/clvmd/lvm-functions.c | 1 + lib/metadata/metadata.c | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c index c845e6d51..64bdab303 100644 --- a/daemons/clvmd/lvm-functions.c +++ b/daemons/clvmd/lvm-functions.c @@ -665,6 +665,7 @@ int do_refresh_cache(void) init_ignore_suspended_devices(1); lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); + label_scan_destroy(cmd); /* destroys bcache (to close devs), keeps lvmcache */ dm_pool_empty(cmd->mem); pthread_mutex_unlock(&lvm_lock); diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index b4424519f..462c3918b 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -4530,11 +4530,16 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, consistent = 0; + label_scan_setup_bcache(); + if ((vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted))) { /* Does it matter if consistent is 0 or 1? */ + label_scan_destroy(cmd); return vg; } + label_scan_destroy(cmd); + log_debug_metadata("Reading VG by vgid %.8s not found.", vgid); return NULL; } From 7e33bd1335ba91d6b978df3a39250987d3412116 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 6 Mar 2018 11:22:18 -0600 Subject: [PATCH 67/87] lvmetad: fix process_each_label Was missing the call to populate lvmcache info from lvmetad at the start of process_each_label. --- tools/toollib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/toollib.c b/tools/toollib.c index b029a0d39..c8dd9d38d 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -1505,6 +1505,7 @@ int process_each_label(struct cmd_context *cmd, int argc, char **argv, log_set_report_object_type(LOG_REPORT_OBJECT_TYPE_LABEL); lvmcache_label_scan(cmd); + lvmcache_seed_infos_from_lvmetad(cmd); if (argc) { for (; opt < argc; opt++) { From e351f8bc66d49fecaa0ab78f8413d50a4674604a Mon Sep 17 00:00:00 2001 From: David Teigland Date: Tue, 6 Mar 2018 15:05:27 -0600 Subject: [PATCH 68/87] lvmetad: need to set up bcache in another place We need to find one common place to set up bcache for the lvmetad case, instead of adding calls in various places. --- lib/label/label.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/label/label.c b/lib/label/label.c index 82d4bf3ef..9401067a7 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -675,6 +675,13 @@ int label_scan_devs(struct cmd_context *cmd, struct dm_list *devs) { struct device_list *devl; + /* FIXME: get rid of this, it's only needed for lvmetad in which + case we should be setting up bcache in one place. */ + if (!scan_bcache) { + if (!_setup_bcache(0)) + return 0; + } + dm_list_iterate_items(devl, devs) { if (_in_bcache(devl->dev)) { bcache_invalidate_fd(scan_bcache, devl->dev->bcache_fd); From a9b0aa5c178a6d8bb708ed35f833c648f7437ae3 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 9 Mar 2018 11:39:12 -0600 Subject: [PATCH 69/87] lvmetad: more fixes related to bcache Need to open devs prior to bcache io. --- lib/label/label.c | 25 +++++++++++++++++-------- lib/metadata/metadata.c | 12 ++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 9401067a7..14a7e9017 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -515,7 +515,7 @@ static int _scan_list(struct dm_list *devs, int *failed) bb = NULL; if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb)) { - log_error("Scan failed to read %s.", dev_name(devl->dev)); + log_debug_devs("Scan failed to read %s.", dev_name(devl->dev)); scan_failed_count++; scan_failed = 1; lvmcache_del_dev(devl->dev); @@ -525,7 +525,7 @@ static int _scan_list(struct dm_list *devs, int *failed) ret = _process_block(devl->dev, bb, &is_lvm_device); if (!ret && is_lvm_device) { - log_error("Scan failed to process %s", dev_name(devl->dev)); + log_debug_devs("Scan failed to process %s", dev_name(devl->dev)); scan_failed_count++; scan_failed = 1; lvmcache_del_dev(devl->dev); @@ -893,8 +893,11 @@ bool dev_read_bytes(struct device *dev, off_t start, size_t len, void *data) } if (dev->bcache_fd <= 0) { - log_error("dev_read_bytes %s with invalid fd %d", dev_name(dev), dev->bcache_fd); - return false; + /* This is not often needed, perhaps only with lvmetad. */ + if (!label_scan_open(dev)) { + log_error("dev_read_bytes %s cannot open dev", dev_name(dev)); + return false; + } } if (!bcache_read_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { @@ -924,8 +927,11 @@ bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) } if (dev->bcache_fd <= 0) { - log_error("dev_write_bytes %s with invalid fd %d", dev_name(dev), dev->bcache_fd); - return false; + /* This is not often needed, perhaps only with lvmetad. */ + if (!label_scan_open(dev)) { + log_error("dev_write_bytes %s cannot open dev", dev_name(dev)); + return false; + } } if (!bcache_write_bytes(scan_bcache, dev->bcache_fd, start, len, data)) { @@ -954,8 +960,11 @@ bool dev_write_zeros(struct device *dev, off_t start, size_t len) } if (dev->bcache_fd <= 0) { - log_error("dev_write_zeros %s with invalid fd %d", dev_name(dev), dev->bcache_fd); - return false; + /* This is not often needed, perhaps only with lvmetad. */ + if (!label_scan_open(dev)) { + log_error("dev_write_zeros %s cannot open dev", dev_name(dev)); + return false; + } } if (!bcache_write_zeros(scan_bcache, dev->bcache_fd, start, len)) { diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 462c3918b..8cb06befa 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3809,6 +3809,18 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, release_vg(correct_vg); return_NULL; } + + /* + * When a command reads the vg from lvmetad, and then + * writes the vg, the write path does some disk reads + * of the devs. + * FIXME: when a command is going to write the vg, + * we should just read the vg from disk entirely + * and skip reading it from lvmetad. + */ + dm_list_iterate_items(pvl, &correct_vg->pvs) + label_scan_open(pvl->pv->dev); + } return correct_vg; From a01a8d71723740577e5fe9635412940d941b5da9 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 9 Mar 2018 13:18:38 -0600 Subject: [PATCH 70/87] tests: vgck now exits with error for bad vg --- test/shell/vgck.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/shell/vgck.sh b/test/shell/vgck.sh index 2f3fba4c1..186704c70 100644 --- a/test/shell/vgck.sh +++ b/test/shell/vgck.sh @@ -22,18 +22,18 @@ dd if=/dev/urandom bs=512 seek=2 count=32 of="$dev2" # TODO: aux lvmconf "global/locking_type = 4" -vgscan 2>&1 | tee vgscan.out +vgscan 2>&1 | tee vgscan.out || true if test -e LOCAL_LVMETAD; then - not grep "Inconsistent metadata found for VG $vg" vgscan.out + not grep "Failed" vgscan.out else - grep "Inconsistent metadata found for VG $vg" vgscan.out + grep "Failed" vgscan.out fi dd if=/dev/urandom bs=512 seek=2 count=32 of="$dev2" aux notify_lvmetad "$dev2" -vgck $vg 2>&1 | tee vgck.out +vgck $vg 2>&1 | tee vgck.out || true grep Incorrect vgck.out vgremove -ff $vg From ae21305ee7fa60edbd85824d83ce554989161189 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 6 Apr 2018 13:05:17 -0500 Subject: [PATCH 71/87] scan: drop bcache between lvm shell commands A running lvm shell keeps all lvm devices open unless the bcache is dropped. --- lib/cache/lvmcache.c | 22 ++++++++++++++++++++-- tools/lvmcmdline.c | 7 +++---- tools/toollib.c | 1 - 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 64790803a..78665bfb0 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -406,7 +406,7 @@ int lvmcache_verify_lock_order(const char *vgname) return 1; if (!_lock_hash) - return_0; + return 1; dm_hash_iterate(n, _lock_hash) { if (!dm_hash_get_data(_lock_hash, n)) @@ -836,6 +836,9 @@ static int _label_scan_invalid(struct cmd_context *cmd) dev_count++; } + if (dm_list_empty(&devs)) + return 1; + log_debug_cache("Scanning %d devs with invalid info.", dev_count); ret = label_scan_devs(cmd, &devs); @@ -1236,8 +1239,10 @@ int lvmcache_label_scan(struct cmd_context *cmd) struct dm_list del_cache_devs; struct dm_list add_cache_devs; struct lvmcache_info *info; + struct lvmcache_vginfo *vginfo; struct device_list *devl; struct format_type *fmt; + int vginfo_count = 0; int r = 0; @@ -1247,6 +1252,8 @@ int lvmcache_label_scan(struct cmd_context *cmd) return 1; } + log_debug_cache("Finding VG info"); + /* Avoid recursion when a PVID can't be found! */ if (_scanning_in_progress) return 0; @@ -1315,6 +1322,8 @@ int lvmcache_label_scan(struct cmd_context *cmd) dm_list_init(&del_cache_devs); dm_list_init(&add_cache_devs); + log_debug_cache("Resolving duplicate devices"); + _choose_preferred_devs(cmd, &del_cache_devs, &add_cache_devs); dm_list_iterate_items(devl, &del_cache_devs) { @@ -1354,6 +1363,14 @@ int lvmcache_label_scan(struct cmd_context *cmd) _scanning_in_progress = 0; _force_label_scan = 0; + dm_list_iterate_items(vginfo, &_vginfos) { + if (is_orphan_vg(vginfo->vgname)) + continue; + vginfo_count++; + } + + log_debug_cache("Found VG info for %d VGs", vginfo_count); + return r; } @@ -2291,7 +2308,8 @@ static void _lvmcache_destroy_lockname(struct dm_hash_node *n) void lvmcache_destroy(struct cmd_context *cmd, int retain_orphans, int reset) { struct dm_hash_node *n; - log_verbose("Wiping internal VG cache"); + + log_debug_cache("Dropping VG info"); _has_scanned = 0; diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index fc96b8d81..3774014a0 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -3013,10 +3013,9 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv) lvmnotify_send(cmd); out: - if (test_mode()) { - log_verbose("Test mode: Wiping internal cache"); - lvmcache_destroy(cmd, 1, 0); - } + + lvmcache_destroy(cmd, 1, 1); + label_scan_destroy(cmd); if ((config_string_cft = remove_config_tree_by_source(cmd, CONFIG_STRING))) dm_config_destroy(config_string_cft); diff --git a/tools/toollib.c b/tools/toollib.c index c8dd9d38d..7e3b0213a 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -4508,7 +4508,6 @@ int process_each_pv(struct cmd_context *cmd, * before process_each_pv is called. */ if (!trust_cache() && !orphans_locked) { - log_debug("Scanning for available devices"); lvmcache_destroy(cmd, 1, 0); /* From 6d05859862cebe79981557fe1a1005a530302f70 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 6 Apr 2018 13:11:39 -0500 Subject: [PATCH 72/87] bcache: let caller see an error --- lib/device/bcache.c | 21 ++++++++++++++------- lib/device/bcache.h | 9 ++++++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lib/device/bcache.c b/lib/device/bcache.c index 903477677..ea8f70299 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -234,7 +234,6 @@ static bool _async_wait(struct io_engine *ioe, io_complete_fn fn) fn((void *) cb->context, 0); } else { - log_warn("short io"); fn(cb->context, -ENODATA); } @@ -538,10 +537,8 @@ static void _complete_io(void *context, int err) */ dm_list_del(&b->list); - if (b->error) { - log_warn("bcache io error %d fd %d", b->error, b->fd); + if (b->error) memset(b->data, 0, cache->block_sectors << SECTOR_SHIFT); - } /* Things don't work with this block of code, but work without it. */ #if 0 @@ -583,6 +580,7 @@ static void _issue_low_level(struct block *b, enum dir d) dm_list_add(&cache->io_pending, &b->list); if (!cache->engine->issue(cache->engine, d, b->fd, sb, se, b->data, b)) { + /* FIXME: if io_submit() set an errno, return that instead of EIO? */ _complete_io(b, -EIO); return; } @@ -904,7 +902,7 @@ void bcache_prefetch(struct bcache *cache, int fd, block_address index) } bool bcache_get(struct bcache *cache, int fd, block_address index, - unsigned flags, struct block **result) + unsigned flags, struct block **result, int *error) { struct block *b; @@ -916,12 +914,19 @@ bool bcache_get(struct bcache *cache, int fd, block_address index, *result = b; + if (error) + *error = b->error; + if (b->error) return false; return true; } *result = NULL; + + if (error) + *error = -BCACHE_NO_BLOCK; + log_error("bcache failed to get block %u fd %d", (uint32_t)index, fd); return false; } @@ -1068,7 +1073,9 @@ bool bcache_read_bytes(struct bcache *cache, int fd, off_t start, size_t len, vo bcache_prefetch(cache, fd, i); for (i = bb; i < be; i++) { - if (!bcache_get(cache, fd, i, 0, &b)) { + if (!bcache_get(cache, fd, i, 0, &b, NULL)) { + log_error("bcache_read failed to get block %u fd %d bb %u be %u", + (uint32_t)i, fd, (uint32_t)bb, (uint32_t)be); errors++; continue; } @@ -1105,7 +1112,7 @@ bool bcache_write_bytes(struct bcache *cache, int fd, off_t start, size_t len, v bcache_prefetch(cache, fd, i); for (i = bb; i < be; i++) { - if (!bcache_get(cache, fd, i, 0, &b)) { + if (!bcache_get(cache, fd, i, 0, &b, NULL)) { log_error("bcache_write failed to get block %u fd %d bb %u be %u", (uint32_t)i, fd, (uint32_t)bb, (uint32_t)be); errors++; diff --git a/lib/device/bcache.h b/lib/device/bcache.h index d5f6d0ac5..999223a55 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -29,6 +29,13 @@ /*----------------------------------------------------------------*/ +/* + * bcache-specific error numbers + * These supplement standard -EXXX error numbers and + * should not overlap. + */ +#define BCACHE_NO_BLOCK 201 + enum dir { DIR_READ, DIR_WRITE @@ -120,7 +127,7 @@ void bcache_prefetch(struct bcache *cache, int fd, block_address index); * Returns true on success. */ bool bcache_get(struct bcache *cache, int fd, block_address index, - unsigned flags, struct block **result); + unsigned flags, struct block **result, int *error); void bcache_put(struct block *b); /* From 45e5e702c1d488df4898a41db0c00ead63c5f6ee Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 6 Apr 2018 13:12:26 -0500 Subject: [PATCH 73/87] scan: improve io error checking and reporting --- lib/label/label.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/lib/label/label.c b/lib/label/label.c index 14a7e9017..6c3be053e 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -466,17 +466,20 @@ static int _scan_list(struct dm_list *devs, int *failed) struct dm_list done_devs; struct device_list *devl, *devl2; struct block *bb; + int scan_open_errors = 0; + int scan_read_errors = 0; + int scan_process_errors = 0; int scan_failed_count = 0; - int scan_lvm_count = 0; int rem_prefetches; int scan_failed; int is_lvm_device; + int error; int ret; dm_list_init(&wait_devs); dm_list_init(&done_devs); - log_debug_devs("Scanning %d devices.", dm_list_size(devs)); + log_debug_devs("Scanning %d devices for VG info", dm_list_size(devs)); scan_more: rem_prefetches = bcache_max_prefetches(scan_bcache); @@ -498,6 +501,7 @@ static int _scan_list(struct dm_list *devs, int *failed) log_debug_devs("Scan failed to open %s.", dev_name(devl->dev)); dm_list_del(&devl->list); dm_list_add(&done_devs, &devl->list); + scan_open_errors++; scan_failed_count++; continue; } @@ -513,11 +517,15 @@ static int _scan_list(struct dm_list *devs, int *failed) dm_list_iterate_items_safe(devl, devl2, &wait_devs) { bb = NULL; + error = 0; + scan_failed = 0; + is_lvm_device = 0; - if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb)) { - log_debug_devs("Scan failed to read %s.", dev_name(devl->dev)); - scan_failed_count++; + if (!bcache_get(scan_bcache, devl->dev->bcache_fd, 0, 0, &bb, &error)) { + log_debug_devs("Scan failed to read %s error %d.", dev_name(devl->dev), error); scan_failed = 1; + scan_read_errors++; + scan_failed_count++; lvmcache_del_dev(devl->dev); } else { log_debug_devs("Processing data from device %s fd %d block %p", dev_name(devl->dev), devl->dev->bcache_fd, bb); @@ -526,12 +534,10 @@ static int _scan_list(struct dm_list *devs, int *failed) if (!ret && is_lvm_device) { log_debug_devs("Scan failed to process %s", dev_name(devl->dev)); - scan_failed_count++; scan_failed = 1; + scan_process_errors++; + scan_failed_count++; lvmcache_del_dev(devl->dev); - } else { - scan_lvm_count++; - scan_failed = 0; } } @@ -556,8 +562,8 @@ static int _scan_list(struct dm_list *devs, int *failed) if (!dm_list_empty(devs)) goto scan_more; - log_debug_devs("Scanned devices: %d lvm, %d failed.", - scan_lvm_count, scan_failed_count); + log_debug_devs("Scanned devices: open errors %d read errors %d process errors %d", + scan_open_errors, scan_read_errors, scan_process_errors); if (failed) *failed = scan_failed_count; @@ -649,6 +655,8 @@ int label_scan(struct cmd_context *cmd) }; dev_iter_destroy(iter); + log_debug_devs("Found %d devices to scan", dm_list_size(&all_devs)); + if (!scan_bcache) { /* * FIXME: there should probably be some max number of From 89c65d4f71e51c2db4fcba176546d2474e3451bd Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 6 Apr 2018 13:18:03 -0500 Subject: [PATCH 74/87] remove unnecessary REQUIRES_FULL_LABEL_SCAN we always scan all devices --- tools/command.c | 3 +-- tools/commands.h | 2 +- tools/lvmcmdline.c | 7 +------ tools/tools.h | 7 ++----- tools/vgrename.c | 5 ----- 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/tools/command.c b/tools/command.c index 8944399b3..f3b5d82e3 100644 --- a/tools/command.c +++ b/tools/command.c @@ -134,13 +134,12 @@ static inline int configtype_arg(struct cmd_context *cmd __attribute__((unused)) #define ALLOW_UUID_AS_NAME 0x00000010 #define LOCKD_VG_SH 0x00000020 #define NO_METADATA_PROCESSING 0x00000040 -#define REQUIRES_FULL_LABEL_SCAN 0x00000080 +#define IGNORE_PERSISTENT_FILTER 0x00000080 #define MUST_USE_ALL_ARGS 0x00000100 #define NO_LVMETAD_AUTOSCAN 0x00000200 #define ENABLE_DUPLICATE_DEVS 0x00000400 #define DISALLOW_TAG_ARGS 0x00000800 #define GET_VGNAME_FROM_OPTIONS 0x00001000 -#define IGNORE_PERSISTENT_FILTER 0x00002000 /* create foo_CMD enums for command def ID's in command-lines.in */ diff --git a/tools/commands.h b/tools/commands.h index cbd527b58..3d142c339 100644 --- a/tools/commands.h +++ b/tools/commands.h @@ -225,7 +225,7 @@ xx(vgremove, xx(vgrename, "Rename a volume group", - ALLOW_UUID_AS_NAME | REQUIRES_FULL_LABEL_SCAN) + ALLOW_UUID_AS_NAME) xx(vgs, "Display information about volume groups", diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index 3774014a0..bcb2c5358 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -2727,11 +2727,6 @@ static int _cmd_no_lvmetad_autoscan(struct cmd_context *cmd) return cmd->cname->flags & NO_LVMETAD_AUTOSCAN; } -static int _cmd_requires_full_label_scan(struct cmd_context *cmd) -{ - return cmd->cname->flags & REQUIRES_FULL_LABEL_SCAN; -} - static int _cmd_ignores_persistent_filter(struct cmd_context *cmd) { return cmd->cname->flags & IGNORE_PERSISTENT_FILTER; @@ -2865,7 +2860,7 @@ int lvm_run_command(struct cmd_context *cmd, int argc, char **argv) * Similarly ignore the persistent cache if the command is going to discard it regardless. */ if (!cmd->initialized.filters && !_cmd_no_meta_proc(cmd) && - !init_filters(cmd, !(refresh_done || _cmd_requires_full_label_scan(cmd) || _cmd_ignores_persistent_filter(cmd)))) + !init_filters(cmd, !(refresh_done || _cmd_ignores_persistent_filter(cmd)))) return_ECMD_FAILED; if (arg_is_set(cmd, readonly_ARG)) diff --git a/tools/tools.h b/tools/tools.h index 088655129..d4d2fb2a0 100644 --- a/tools/tools.h +++ b/tools/tools.h @@ -124,8 +124,8 @@ struct arg_value_group_list { #define LOCKD_VG_SH 0x00000020 /* Command does not process any metadata. */ #define NO_METADATA_PROCESSING 0x00000040 -/* Command wants to scan for new devices and force labels to be read from them all. */ -#define REQUIRES_FULL_LABEL_SCAN 0x00000080 +/* Command must not load the contents saved by the persistent filter */ +#define IGNORE_PERSISTENT_FILTER 0x00000080 /* Command must use all specified arg names and fail if all cannot be used. */ #define MUST_USE_ALL_ARGS 0x00000100 /* Command wants to control the device scan for lvmetad itself. */ @@ -136,9 +136,6 @@ struct arg_value_group_list { #define DISALLOW_TAG_ARGS 0x00000800 /* Command may need to find VG name in an option value. */ #define GET_VGNAME_FROM_OPTIONS 0x00001000 -/* Command must not load the contents saved by the persistent filter */ -#define IGNORE_PERSISTENT_FILTER 0x00002000 - void usage(const char *name); /* the argument verify/normalise functions */ diff --git a/tools/vgrename.c b/tools/vgrename.c index 5c69faf26..4f2a08bb6 100644 --- a/tools/vgrename.c +++ b/tools/vgrename.c @@ -59,11 +59,6 @@ static int _vgrename_single(struct cmd_context *cmd, const char *vg_name, /* * Check if a VG already exists with the new VG name. * - * When not using lvmetad, it's essential that a full scan has - * been done to ensure we see all existing VG names, so we - * do not use an existing name. This has been done by - * process_each_vg REQUIRES_FULL_LABEL_SCAN. - * * (FIXME: We could look for the new name in the list of all * VGs that process_each_vg created, but we don't have access * to that list here, so we have to look in lvmcache. From c0973e70a58e7e14e9cca29a0f8ad12719ea554f Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 9 Apr 2018 11:13:43 -0500 Subject: [PATCH 75/87] dev_cache: clean up scan Pull out all of the twisted logic and simply call dev_cache_scan at the start of the command prior to label scan. --- daemons/clvmd/lvm-functions.c | 1 - lib/cache/lvmcache.c | 5 ++--- lib/commands/toolcontext.c | 1 - lib/device/dev-cache.c | 40 +++++---------------------------- lib/device/dev-cache.h | 6 ++--- lib/filters/filter-persistent.c | 13 ----------- lib/label/label.c | 2 +- lib/misc/lvm-globals.c | 11 --------- lib/misc/lvm-globals.h | 2 -- tools/lvmcmdline.c | 1 - tools/polldaemon.c | 9 ++++---- tools/pvscan.c | 4 ++-- tools/toollib.c | 2 -- tools/vgimportclone.c | 1 - 14 files changed, 16 insertions(+), 82 deletions(-) diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c index 64bdab303..c278692b9 100644 --- a/daemons/clvmd/lvm-functions.c +++ b/daemons/clvmd/lvm-functions.c @@ -661,7 +661,6 @@ int do_refresh_cache(void) return -1; } - init_full_scan_done(0); init_ignore_suspended_devices(1); lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index 78665bfb0..ef180b9d2 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -157,6 +157,8 @@ void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd) if (!lvmetad_used() || _has_scanned) return; + dev_cache_scan(); + if (!lvmetad_pv_list_to_lvmcache(cmd)) { stack; return; @@ -357,9 +359,6 @@ void lvmcache_drop_metadata(const char *vgname, int drop_precommitted) _drop_metadata(FMT_TEXT_ORPHAN_VG_NAME, 0); _drop_metadata(FMT_LVM1_ORPHAN_VG_NAME, 0); _drop_metadata(FMT_POOL_ORPHAN_VG_NAME, 0); - - /* Indicate that PVs could now be missing from the cache */ - init_full_scan_done(0); } else _drop_metadata(vgname, drop_precommitted); } diff --git a/lib/commands/toolcontext.c b/lib/commands/toolcontext.c index 3dc3e2df4..a54f4d7b5 100644 --- a/lib/commands/toolcontext.c +++ b/lib/commands/toolcontext.c @@ -1648,7 +1648,6 @@ static void _init_rand(struct cmd_context *cmd) static void _init_globals(struct cmd_context *cmd) { - init_full_scan_done(0); init_mirror_in_sync(0); } diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c index e72ffd6fd..52edad845 100644 --- a/lib/device/dev-cache.c +++ b/lib/device/dev-cache.c @@ -1077,12 +1077,11 @@ static int _insert(const char *path, const struct stat *info, return 1; } -static void _full_scan(int dev_scan) +void dev_cache_scan(void) { struct dir_list *dl; - if (_cache.has_scanned && !dev_scan) - return; + _cache.has_scanned = 1; _insert_dirs(&_cache.dirs); @@ -1090,9 +1089,6 @@ static void _full_scan(int dev_scan) dm_list_iterate_items(dl, &_cache.files) _insert_file(dl->dir); - - _cache.has_scanned = 1; - init_full_scan_done(1); } int dev_cache_has_scanned(void) @@ -1100,14 +1096,6 @@ int dev_cache_has_scanned(void) return _cache.has_scanned; } -void dev_cache_scan(int do_scan) -{ - if (!do_scan) - _cache.has_scanned = 1; - else - _full_scan(1); -} - static int _init_preferred_names(struct cmd_context *cmd) { const struct dm_config_node *cn; @@ -1171,7 +1159,6 @@ out: int dev_cache_init(struct cmd_context *cmd) { _cache.names = NULL; - _cache.has_scanned = 0; if (!(_cache.mem = dm_pool_create("dev_cache", 10 * 1024))) return_0; @@ -1413,7 +1400,7 @@ struct device *dev_cache_get(const char *name, struct dev_filter *f) _insert(name, info_available ? &buf : NULL, 0, obtain_device_list_from_udev()); d = (struct device *) dm_hash_lookup(_cache.names, name); if (!d) { - _full_scan(0); + dev_cache_scan(); d = (struct device *) dm_hash_lookup(_cache.names, name); } } @@ -1469,7 +1456,7 @@ struct device *dev_cache_get_by_devt(dev_t dev, struct dev_filter *f) } } - _full_scan(0); + dev_cache_scan(); d = _dev_cache_seek_devt(dev); } @@ -1477,17 +1464,7 @@ struct device *dev_cache_get_by_devt(dev_t dev, struct dev_filter *f) f->passes_filter(f, d))) ? d : NULL; } -void dev_cache_full_scan(struct dev_filter *f) -{ - if (f && f->wipe) { - f->wipe(f); /* might call _full_scan(1) */ - if (!full_scan_done()) - _full_scan(1); - } else - _full_scan(1); -} - -struct dev_iter *dev_iter_create(struct dev_filter *f, int dev_scan) +struct dev_iter *dev_iter_create(struct dev_filter *f, int unused) { struct dev_iter *di = dm_malloc(sizeof(*di)); @@ -1496,13 +1473,6 @@ struct dev_iter *dev_iter_create(struct dev_filter *f, int dev_scan) return NULL; } - if (dev_scan && !trust_cache()) { - /* Flag gets reset between each command */ - if (!full_scan_done()) - dev_cache_full_scan(f); - } else - _full_scan(0); - di->current = btree_first(_cache.devices); di->filter = f; if (di->filter) diff --git a/lib/device/dev-cache.h b/lib/device/dev-cache.h index 546b1fe2a..479727473 100644 --- a/lib/device/dev-cache.h +++ b/lib/device/dev-cache.h @@ -46,10 +46,8 @@ int dev_cache_exit(void); */ int dev_cache_check_for_open_devices(void); -/* Trigger(1) or avoid(0) a scan */ -void dev_cache_scan(int do_scan); +void dev_cache_scan(void); int dev_cache_has_scanned(void); -void dev_cache_full_scan(struct dev_filter *f); int dev_cache_add_dir(const char *path); int dev_cache_add_loopfile(const char *path); @@ -66,7 +64,7 @@ void dev_set_preferred_name(struct dm_str_list *sl, struct device *dev); * Object for iterating through the cache. */ struct dev_iter; -struct dev_iter *dev_iter_create(struct dev_filter *f, int dev_scan); +struct dev_iter *dev_iter_create(struct dev_filter *f, int unused); void dev_iter_destroy(struct dev_iter *iter); struct device *dev_iter_get(struct dev_iter *iter); diff --git a/lib/filters/filter-persistent.c b/lib/filters/filter-persistent.c index 5bc0861fd..a4151c289 100644 --- a/lib/filters/filter-persistent.c +++ b/lib/filters/filter-persistent.c @@ -48,11 +48,7 @@ static void _persistent_filter_wipe(struct dev_filter *f) { struct pfilter *pf = (struct pfilter *) f->private; - log_verbose("Wiping cache of LVM-capable devices"); dm_hash_wipe(pf->devices); - - /* Trigger complete device scan */ - dev_cache_scan(1); } static int _read_array(struct pfilter *pf, struct dm_config_tree *cft, @@ -126,15 +122,6 @@ int persistent_filter_load(struct dev_filter *f, struct dm_config_tree **cft_out /* _read_array(pf, cft, "persistent_filter_cache/invalid_devices", PF_BAD_DEVICE); */ - /* Did we find anything? */ - if (dm_hash_get_num_entries(pf->devices)) { - /* We populated dev_cache ourselves */ - dev_cache_scan(0); - if (!dev_cache_index_devs()) - stack; - r = 1; - } - log_very_verbose("Loaded persistent filter cache from %s", pf->file); out: diff --git a/lib/label/label.c b/lib/label/label.c index 6c3be053e..c11a040a9 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -631,7 +631,7 @@ int label_scan(struct cmd_context *cmd) * on it. This info will be used by the vg_read() phase of the * command. */ - dev_cache_full_scan(cmd->full_filter); + dev_cache_scan(); if (!(iter = dev_iter_create(cmd->full_filter, 0))) { log_error("Scanning failed to get devices."); diff --git a/lib/misc/lvm-globals.c b/lib/misc/lvm-globals.c index 0f384bbec..994148971 100644 --- a/lib/misc/lvm-globals.c +++ b/lib/misc/lvm-globals.c @@ -28,7 +28,6 @@ static int _md_filtering = 0; static int _internal_filtering = 0; static int _fwraid_filtering = 0; static int _pvmove = 0; -static int _full_scan_done = 0; /* Restrict to one full scan during each cmd */ static int _obtain_device_list_from_udev = DEFAULT_OBTAIN_DEVICE_LIST_FROM_UDEV; static enum dev_ext_e _external_device_info_source = DEV_EXT_NONE; static int _trust_cache = 0; /* Don't scan when incomplete VGs encountered */ @@ -92,11 +91,6 @@ void init_pvmove(int level) _pvmove = level; } -void init_full_scan_done(int level) -{ - _full_scan_done = level; -} - void init_obtain_device_list_from_udev(int device_list_from_udev) { _obtain_device_list_from_udev = device_list_from_udev; @@ -253,11 +247,6 @@ int pvmove_mode(void) return _pvmove; } -int full_scan_done(void) -{ - return _full_scan_done; -} - int obtain_device_list_from_udev(void) { return _obtain_device_list_from_udev; diff --git a/lib/misc/lvm-globals.h b/lib/misc/lvm-globals.h index e23d5984d..b3838911f 100644 --- a/lib/misc/lvm-globals.h +++ b/lib/misc/lvm-globals.h @@ -29,7 +29,6 @@ void init_md_filtering(int level); void init_internal_filtering(int level); void init_fwraid_filtering(int level); void init_pvmove(int level); -void init_full_scan_done(int level); void init_external_device_info_source(enum dev_ext_e src); void init_obtain_device_list_from_udev(int device_list_from_udev); void init_trust_cache(int trustcache); @@ -63,7 +62,6 @@ int md_filtering(void); int internal_filtering(void); int fwraid_filtering(void); int pvmove_mode(void); -int full_scan_done(void); int obtain_device_list_from_udev(void); enum dev_ext_e external_device_info_source(void); int trust_cache(void); diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index bcb2c5358..c7ac4b633 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -2446,7 +2446,6 @@ static void _apply_current_settings(struct cmd_context *cmd) _apply_current_output_settings(cmd); init_test(cmd->current_settings.test); - init_full_scan_done(0); init_mirror_in_sync(0); init_dmeventd_monitor(DEFAULT_DMEVENTD_MONITOR); diff --git a/tools/polldaemon.c b/tools/polldaemon.c index d69284d47..cf7a94721 100644 --- a/tools/polldaemon.c +++ b/tools/polldaemon.c @@ -123,13 +123,12 @@ static void _nanosleep(unsigned secs, unsigned allow_zero_time) while (!nanosleep(&wtime, &wtime) && errno == EINTR) {} } -static void _sleep_and_rescan_devices(struct daemon_parms *parms) +static void _sleep_and_rescan_devices(struct cmd_context *cmd, struct daemon_parms *parms) { if (parms->interval && !parms->aborting) { dev_close_all(); _nanosleep(parms->interval, 1); - /* Devices might have changed while we slept */ - init_full_scan_done(0); + lvmcache_label_scan(cmd); } } @@ -145,7 +144,7 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, /* Poll for completion */ while (!finished) { if (parms->wait_before_testing) - _sleep_and_rescan_devices(parms); + _sleep_and_rescan_devices(cmd, parms); /* * An ex VG lock is needed because the check can call finish_copy @@ -218,7 +217,7 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, * continue polling an LV that doesn't have a "status". */ if (!parms->wait_before_testing && !finished) - _sleep_and_rescan_devices(parms); + _sleep_and_rescan_devices(cmd, parms); } return 1; diff --git a/tools/pvscan.c b/tools/pvscan.c index ab6ea0b48..1fcf606a2 100644 --- a/tools/pvscan.c +++ b/tools/pvscan.c @@ -288,8 +288,6 @@ static int _pvscan_autoactivate(struct cmd_context *cmd, struct pvscan_aa_params pp->refresh_all = 1; } - dev_cache_full_scan(cmd->full_filter); - ret = process_each_vg(cmd, 0, NULL, NULL, vgnames, 0, 0, handle, _pvscan_autoactivate_single); destroy_processing_handle(cmd, handle); @@ -495,6 +493,7 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } if (!dm_list_empty(&single_devs)) { + dev_cache_scan(); label_scan_devs(cmd, &single_devs); dm_list_iterate_items(devl, &single_devs) { @@ -540,6 +539,7 @@ static int _pvscan_cache(struct cmd_context *cmd, int argc, char **argv) } if (!dm_list_empty(&single_devs)) { + dev_cache_scan(); label_scan_devs(cmd, &single_devs); dm_list_iterate_items(devl, &single_devs) { diff --git a/tools/toollib.c b/tools/toollib.c index 7e3b0213a..fabf2dc2f 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -5503,8 +5503,6 @@ int pvcreate_each_device(struct cmd_context *cmd, return 0; } - dev_cache_full_scan(cmd->full_filter); - lvmcache_label_scan(cmd); /* diff --git a/tools/vgimportclone.c b/tools/vgimportclone.c index 146d3b4bf..224d0b546 100644 --- a/tools/vgimportclone.c +++ b/tools/vgimportclone.c @@ -332,7 +332,6 @@ retry_name: dm_list_iterate_items(vd, &vp.arg_import) internal_filter_allow(cmd->mem, vd->dev); lvmcache_destroy(cmd, 1, 0); - dev_cache_full_scan(cmd->full_filter); log_debug("Changing VG %s to %s.", vp.old_vgname, vp.new_vgname); From 9b6a62f9445b104f8b4f14b1ebe8258b360950e4 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 9 Apr 2018 13:40:49 -0500 Subject: [PATCH 76/87] lvmcache: simplify Recent changes allow some major simplification of the way lvmcache works and is used. lvmcache_label_scan is now called in a controlled fashion at the start of commands, and not via various unpredictable side effects. Remove various calls to it from other places. lvmcache_label_scan should not be called from anywhere during a command, because it produces an incorrect representation of PVs with no MDAs, and misclassifies them as orphans. This has been a long standing problem. The invalid flag and rescanning based on that is no longer used and removed. The 'force' variation is no longer needed and removed. --- daemons/clvmd/lvm-functions.c | 1 - lib/cache/lvmcache.c | 259 +++++---------------------------- lib/cache/lvmcache.h | 8 - lib/format1/disk-rep.c | 1 - lib/format1/lvm1-label.c | 1 - lib/format_pool/disk_rep.c | 3 - lib/format_text/text_label.c | 1 - lib/metadata/metadata-liblvm.c | 1 - lib/metadata/metadata.c | 10 +- libdm/libdm-config.c | 10 +- liblvm/lvm_vg.c | 1 - tools/toollib.c | 6 + tools/vgcreate.c | 1 - tools/vgmerge.c | 3 + tools/vgsplit.c | 3 + 15 files changed, 54 insertions(+), 255 deletions(-) diff --git a/daemons/clvmd/lvm-functions.c b/daemons/clvmd/lvm-functions.c index c278692b9..6254122ee 100644 --- a/daemons/clvmd/lvm-functions.c +++ b/daemons/clvmd/lvm-functions.c @@ -662,7 +662,6 @@ int do_refresh_cache(void) } init_ignore_suspended_devices(1); - lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); label_scan_destroy(cmd); /* destroys bcache (to close devs), keeps lvmcache */ dm_pool_empty(cmd->mem); diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index ef180b9d2..c8046f7b1 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -29,7 +29,6 @@ #include "lvmetad.h" #include "lvmetad-client.h" -#define CACHE_INVALID 0x00000001 #define CACHE_LOCKED 0x00000002 /* One per device */ @@ -169,15 +168,6 @@ void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd) static void _update_cache_info_lock_state(struct lvmcache_info *info, int locked) { - int was_locked = (info->status & CACHE_LOCKED) ? 1 : 0; - - /* - * Cache becomes invalid whenever lock state changes unless - * exclusive VG_GLOBAL is held (i.e. while scanning). - */ - if (!lvmcache_vgname_is_locked(VG_GLOBAL) && (was_locked != locked)) - info->status |= CACHE_INVALID; - if (locked) info->status |= CACHE_LOCKED; else @@ -235,15 +225,10 @@ static void _suspended_vg_free(struct lvmcache_vginfo *vginfo, int free_old, int static void _drop_metadata(const char *vgname, int drop_precommitted) { struct lvmcache_vginfo *vginfo; - struct lvmcache_info *info; if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) return; - if (drop_precommitted) - dm_list_iterate_items(info, &vginfo->infos) - info->status |= CACHE_INVALID; - if (drop_precommitted) _suspended_vg_free(vginfo, 0, 1); else @@ -703,39 +688,6 @@ const char *lvmcache_vgid_from_vgname(struct cmd_context *cmd, const char *vgnam return NULL; } -static int _info_is_valid(struct lvmcache_info *info) -{ - if (info->status & CACHE_INVALID) - return 0; - - /* - * The caller must hold the VG lock to manipulate metadata. - * In a cluster, remote nodes sometimes read metadata in the - * knowledge that the controlling node is holding the lock. - * So if the VG appears to be unlocked here, it should be safe - * to use the cached value. - */ - if (info->vginfo && !lvmcache_vgname_is_locked(info->vginfo->vgname)) - return 1; - - if (!(info->status & CACHE_LOCKED)) - return 0; - - return 1; -} - -/* vginfo is invalid if it does not contain at least one valid info */ -static int _vginfo_is_invalid(struct lvmcache_vginfo *vginfo) -{ - struct lvmcache_info *info; - - dm_list_iterate_items(info, &vginfo->infos) - if (_info_is_valid(info)) - return 0; - - return 1; -} - /* * If valid_only is set, data will only be returned if the cached data is * known still to be valid. @@ -765,9 +717,6 @@ struct lvmcache_info *lvmcache_info_from_pvid(const char *pvid, struct device *d return NULL; } - if (valid_only && !_info_is_valid(info)) - return NULL; - return info; } @@ -804,64 +753,6 @@ char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid) return vgname; } -/* - * FIXME: get rid of the CACHE_INVALID state and rescanning - * infos with that flag. The code should just know which devices - * need scanning and when. - */ -static int _label_scan_invalid(struct cmd_context *cmd) -{ - struct dm_list devs; - struct dm_hash_node *n; - struct device_list *devl; - struct lvmcache_info *info; - int dev_count = 0; - int ret; - - dm_list_init(&devs); - - dm_hash_iterate(n, _pvid_hash) { - if (!(info = dm_hash_get_data(_pvid_hash, n))) - continue; - - if (!(info->status & CACHE_INVALID)) - continue; - - if (!(devl = dm_pool_zalloc(cmd->mem, sizeof(*devl)))) - return_0; - - devl->dev = info->dev; - dm_list_add(&devs, &devl->list); - dev_count++; - } - - if (dm_list_empty(&devs)) - return 1; - - log_debug_cache("Scanning %d devs with invalid info.", dev_count); - - ret = label_scan_devs(cmd, &devs); - - return ret; -} - -/* - * lvmcache_label_scan() remembers that it has already - * been called, and will not scan labels if it's called - * again. (It will rescan "INVALID" devices if called again.) - * - * To force lvmcache_label_scan() to rescan labels on all devices, - * call lvmcache_force_next_label_scan() before calling - * lvmcache_label_scan(). - */ - -static int _force_label_scan; - -void lvmcache_force_next_label_scan(void) -{ - _force_label_scan = 1; -} - /* * Check if any PVs in vg->pvs have the same PVID as any * entries in _unused_duplicate_devices. @@ -1233,6 +1124,30 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const return 1; } +/* + * Uses label_scan to populate lvmcache with 'vginfo' struct for each VG + * and associated 'info' structs for those VGs. Only VG summary information + * is used to assemble the vginfo/info during the scan, so the resulting + * representation of VG/PV state is incomplete and even incorrect. + * Specifically, PVs with no MDAs are considered orphans and placed in the + * orphan vginfo by lvmcache_label_scan. This is corrected during the + * processing phase as each vg_read() uses VG metadata for each VG to correct + * the lvmcache state, i.e. it moves no-MDA PVs from the orphan vginfo onto + * the correct vginfo. Once vg_read() is finished for all VGs, all of the + * incorrectly placed PVs should have been moved from the orphan vginfo + * onto their correct vginfo's, and the orphan vginfo should (in theory) + * represent only real orphan PVs. (Note: if lvmcache_label_scan is run + * after vg_read udpates to lvmcache state, then the lvmcache will be + * incorrect again, so do not run lvmcache_label_scan during the + * processing phase.) + * + * TODO: in this label scan phase, don't stash no-MDA PVs into the + * orphan VG. We know that's a fiction, and it can have harmful/damaging + * results. Instead, put them into a temporary list where they can be + * pulled from later when vg_read uses metadata to resolve which VG + * they actually belong to. + */ + int lvmcache_label_scan(struct cmd_context *cmd) { struct dm_list del_cache_devs; @@ -1264,20 +1179,6 @@ int lvmcache_label_scan(struct cmd_context *cmd) goto out; } - /* - * Scan devices whose info struct has the INVALID flag set. - * When scanning has read the pv_header, mda_header and - * mda locations, it will clear the INVALID flag (via - * lvmcache_make_valid). - */ - if (_has_scanned && !_force_label_scan) { - r = _label_scan_invalid(cmd); - goto out; - } - - if (_force_label_scan && (cmd->full_filter && !cmd->full_filter->use_count) && !refresh_filters(cmd)) - goto_out; - if (!cmd->full_filter) { log_error("label scan is missing full filter"); goto out; @@ -1339,28 +1240,16 @@ int lvmcache_label_scan(struct cmd_context *cmd) dm_list_splice(&_unused_duplicate_devs, &del_cache_devs); } - _has_scanned = 1; - /* Perform any format-specific scanning e.g. text files */ if (cmd->independent_metadata_areas) dm_list_iterate_items(fmt, &cmd->formats) if (fmt->ops->scan && !fmt->ops->scan(fmt, NULL)) goto out; - /* - * If we are a long-lived process, write out the updated persistent - * device cache for the benefit of short-lived processes. - */ - if (_force_label_scan && cmd->is_long_lived && - cmd->dump_filter && cmd->full_filter && cmd->full_filter->dump && - !cmd->full_filter->dump(cmd->full_filter, 0)) - stack; - r = 1; out: _scanning_in_progress = 0; - _force_label_scan = 0; dm_list_iterate_items(vginfo, &_vginfos) { if (is_orphan_vg(vginfo->vgname)) @@ -1768,10 +1657,8 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info, uint32_t vgstatus, const char *creation_host, const struct format_type *fmt) { - struct lvmcache_vginfo *vginfo, *primary_vginfo, *orphan_vginfo; - struct lvmcache_info *info2, *info3; + struct lvmcache_vginfo *vginfo, *primary_vginfo; char mdabuf[32]; - // struct lvmcache_vginfo *old_vginfo, *next; if (!vgname || (info && info->vginfo && !strcmp(info->vginfo->vgname, vgname))) return 1; @@ -1780,44 +1667,12 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info, if (info) _drop_vginfo(info, info->vginfo); - /* Get existing vginfo or create new one */ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { -/*** FIXME - vginfo ends up duplicated instead of renamed. - // Renaming? This lookup fails. - if ((vginfo = vginfo_from_vgid(vgid))) { - next = vginfo->next; - old_vginfo = vginfo_from_vgname(vginfo->vgname, NULL); - if (old_vginfo == vginfo) { - dm_hash_remove(_vgname_hash, old_vginfo->vgname); - if (old_vginfo->next) { - if (!dm_hash_insert(_vgname_hash, old_vginfo->vgname, old_vginfo->next)) { - log_error("vg hash re-insertion failed: %s", - old_vginfo->vgname); - return 0; - } - } - } else do { - if (old_vginfo->next == vginfo) { - old_vginfo->next = vginfo->next; - break; - } - } while ((old_vginfo = old_vginfo->next)); - vginfo->next = NULL; + /* + * Create a vginfo struct for this VG and put the vginfo + * into the hash table. + */ - dm_free(vginfo->vgname); - if (!(vginfo->vgname = dm_strdup(vgname))) { - log_error("cache vgname alloc failed for %s", vgname); - return 0; - } - - // Rename so can assume new name does not already exist - if (!dm_hash_insert(_vgname_hash, vginfo->vgname, vginfo->next)) { - log_error("vg hash re-insertion failed: %s", - vginfo->vgname); - return 0; - } - } else { -***/ if (!(vginfo = dm_zalloc(sizeof(*vginfo)))) { log_error("lvmcache_update_vgname: list alloc failed"); return 0; @@ -1830,52 +1685,24 @@ static int _lvmcache_update_vgname(struct lvmcache_info *info, dm_list_init(&vginfo->infos); /* - * If we're scanning and there's an invalidated entry, remove it. - * Otherwise we risk bogus warnings of duplicate VGs. + * A different VG (different uuid) can exist with the same name. + * In this case, the two VGs will have separate vginfo structs, + * but the second will be linked onto the existing vginfo->next, + * not in the hash. */ - while ((primary_vginfo = lvmcache_vginfo_from_vgname(vgname, NULL)) && - _scanning_in_progress && _vginfo_is_invalid(primary_vginfo)) { - orphan_vginfo = lvmcache_vginfo_from_vgname(primary_vginfo->fmt->orphan_vg_name, NULL); - if (!orphan_vginfo) { - log_error(INTERNAL_ERROR "Orphan vginfo %s lost from cache.", - primary_vginfo->fmt->orphan_vg_name); - dm_free(vginfo->vgname); - dm_free(vginfo); - return 0; - } - dm_list_iterate_items_safe(info2, info3, &primary_vginfo->infos) { - _vginfo_detach_info(info2); - _vginfo_attach_info(orphan_vginfo, info2); - if (info2->mdas.n) - sprintf(mdabuf, " with %u mdas", - dm_list_size(&info2->mdas)); - else - mdabuf[0] = '\0'; - log_debug_cache("lvmcache: %s: now in VG %s%s%s%s%s", - dev_name(info2->dev), - vgname, orphan_vginfo->vgid[0] ? " (" : "", - orphan_vginfo->vgid[0] ? orphan_vginfo->vgid : "", - orphan_vginfo->vgid[0] ? ")" : "", mdabuf); - } + primary_vginfo = lvmcache_vginfo_from_vgname(vgname, NULL); - if (!_drop_vginfo(NULL, primary_vginfo)) - return_0; - } - - if (!_insert_vginfo(vginfo, vgid, vgstatus, creation_host, - primary_vginfo)) { + if (!_insert_vginfo(vginfo, vgid, vgstatus, creation_host, primary_vginfo)) { dm_free(vginfo->vgname); dm_free(vginfo); return 0; } + /* Ensure orphans appear last on list_iterate */ if (is_orphan_vg(vgname)) dm_list_add(&_vginfos, &vginfo->list); else dm_list_add_h(&_vginfos, &vginfo->list); -/*** - } -***/ } if (info) @@ -2026,10 +1853,6 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg !is_orphan_vg(info->vginfo->vgname) && critical_section()) return 1; - /* If moving PV from orphan to real VG, always mark it valid */ - if (!is_orphan_vg(vgname)) - info->status &= ~CACHE_INVALID; - if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt) || !_lvmcache_update_vgid(info, info->vginfo, vgid) || @@ -2230,8 +2053,6 @@ struct lvmcache_info *lvmcache_add(struct labeller *labeller, } } - info->status |= CACHE_INVALID; - /* * Add or update the _pvid_hash mapping, pvid to info. */ @@ -2617,14 +2438,6 @@ struct label *lvmcache_get_label(struct lvmcache_info *info) { return info->label; } -/* - * After label_scan reads pv_header, mda_header and mda locations - * from a PV, it clears the INVALID flag. - */ -void lvmcache_make_valid(struct lvmcache_info *info) { - info->status &= ~CACHE_INVALID; -} - uint64_t lvmcache_device_size(struct lvmcache_info *info) { return info->device_size; } diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 4343060fe..107c99338 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -67,13 +67,6 @@ void lvmcache_allow_reads_with_lvmetad(void); void lvmcache_destroy(struct cmd_context *cmd, int retain_orphans, int reset); -/* - * lvmcache_label_scan() will scan labels the first time it's - * called, but not on subsequent calls, unless - * lvmcache_force_next_label_scan() is called first - * to force the next lvmcache_label_scan() to scan again. - */ -void lvmcache_force_next_label_scan(void); int lvmcache_label_scan(struct cmd_context *cmd); int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const char *vgid); @@ -187,7 +180,6 @@ int lvmcache_foreach_pv(struct lvmcache_vginfo *vginfo, uint64_t lvmcache_device_size(struct lvmcache_info *info); void lvmcache_set_device_size(struct lvmcache_info *info, uint64_t size); struct device *lvmcache_device(struct lvmcache_info *info); -void lvmcache_make_valid(struct lvmcache_info *info); int lvmcache_is_orphan(struct lvmcache_info *info); int lvmcache_uncertain_ownership(struct lvmcache_info *info); unsigned lvmcache_mda_count(struct lvmcache_info *info); diff --git a/lib/format1/disk-rep.c b/lib/format1/disk-rep.c index 41955afc0..281adc106 100644 --- a/lib/format1/disk-rep.c +++ b/lib/format1/disk-rep.c @@ -337,7 +337,6 @@ static void __update_lvmcache(const struct format_type *fmt, lvmcache_set_device_size(info, ((uint64_t)xlate32(dl->pvd.pv_size)) << SECTOR_SHIFT); lvmcache_del_mdas(info); - lvmcache_make_valid(info); } static struct disk_list *__read_disk(const struct format_type *fmt, diff --git a/lib/format1/lvm1-label.c b/lib/format1/lvm1-label.c index 3b8a655e9..691a05a4a 100644 --- a/lib/format1/lvm1-label.c +++ b/lib/format1/lvm1-label.c @@ -84,7 +84,6 @@ static int _lvm1_read(struct labeller *l, struct device *dev, void *buf, lvmcache_set_ext_flags(info, 0); lvmcache_del_mdas(info); lvmcache_del_bas(info); - lvmcache_make_valid(info); return 1; } diff --git a/lib/format_pool/disk_rep.c b/lib/format_pool/disk_rep.c index 374ff44a0..fe9b03ea9 100644 --- a/lib/format_pool/disk_rep.c +++ b/lib/format_pool/disk_rep.c @@ -111,7 +111,6 @@ int read_pool_label(struct pool_list *pl, struct labeller *l, lvmcache_set_ext_flags(info, 0); lvmcache_del_mdas(info); lvmcache_del_bas(info); - lvmcache_make_valid(info); pl->dev = dev; pl->pv = NULL; @@ -379,8 +378,6 @@ int read_pool_pds(const struct format_type *fmt, const char *vg_name, vg_name); return 0; } - if (full_scan > 0) - lvmcache_force_next_label_scan(); lvmcache_label_scan(fmt->cmd); } while (1); diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index e65079e0f..c47a35a26 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -443,7 +443,6 @@ out: return 0; } - lvmcache_make_valid(info); return 1; } diff --git a/lib/metadata/metadata-liblvm.c b/lib/metadata/metadata-liblvm.c index d8b3b2aae..b0b678ac8 100644 --- a/lib/metadata/metadata-liblvm.c +++ b/lib/metadata/metadata-liblvm.c @@ -273,7 +273,6 @@ out: } if (scan_needed) { - lvmcache_force_next_label_scan(); if (!lvmcache_label_scan(cmd)) { stack; r = 0; diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 8cb06befa..8c8ce25de 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3392,8 +3392,6 @@ static struct volume_group *_vg_read_orphans(struct cmd_context *cmd, struct pv_list head; dm_list_init(&head.list); - lvmcache_label_scan(cmd); - lvmcache_seed_infos_from_lvmetad(cmd); if (!(vginfo = lvmcache_vginfo_from_vgname(orphan_vgname, NULL))) return_NULL; @@ -3839,11 +3837,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, */ log_debug_metadata("Reading VG rereading labels for %s", vgname); - if (!lvmcache_label_rescan_vg(cmd, vgname, vgid)) { - /* The VG wasn't found, so force a full label scan. */ - lvmcache_force_next_label_scan(); - lvmcache_label_scan(cmd); - } + lvmcache_label_rescan_vg(cmd, vgname, vgid); if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) { log_debug_metadata("Cache did not find fmt for vgname %s", vgname); @@ -4531,7 +4525,6 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, if (!(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { log_debug_metadata("Reading VG by vgid %.8s no VG name found, retrying.", vgid); lvmcache_destroy(cmd, 0, 0); - lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); } @@ -5572,7 +5565,6 @@ uint32_t vg_lock_newname(struct cmd_context *cmd, const char *vgname) unlock_vg(cmd, NULL, vgname); return FAILED_LOCKING; } - lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); if (!lvmcache_fmt_from_vgname(cmd, vgname, NULL, 0)) return SUCCESS; /* vgname not found after scanning */ diff --git a/libdm/libdm-config.c b/libdm/libdm-config.c index 746ef0742..3f0d2510e 100644 --- a/libdm/libdm-config.c +++ b/libdm/libdm-config.c @@ -963,7 +963,7 @@ static const char *_find_config_str(const void *start, node_lookup_fn find_fn, if (n && n->v) { if ((n->v->type == DM_CFG_STRING) && (allow_empty || (*n->v->v.str))) { - log_very_verbose("Setting %s to %s", path, n->v->v.str); + /* log_very_verbose("Setting %s to %s", path, n->v->v.str); */ return n->v->v.str; } if ((n->v->type != DM_CFG_STRING) || (!allow_empty && fail)) @@ -994,7 +994,7 @@ static int64_t _find_config_int64(const void *start, node_lookup_fn find, const struct dm_config_node *n = find(start, path); if (n && n->v && n->v->type == DM_CFG_INT) { - log_very_verbose("Setting %s to %" PRId64, path, n->v->v.i); + /* log_very_verbose("Setting %s to %" PRId64, path, n->v->v.i); */ return n->v->v.i; } @@ -1009,7 +1009,7 @@ static float _find_config_float(const void *start, node_lookup_fn find, const struct dm_config_node *n = find(start, path); if (n && n->v && n->v->type == DM_CFG_FLOAT) { - log_very_verbose("Setting %s to %f", path, n->v->v.f); + /* log_very_verbose("Setting %s to %f", path, n->v->v.f); */ return n->v->v.f; } @@ -1058,12 +1058,12 @@ static int _find_config_bool(const void *start, node_lookup_fn find, switch (v->type) { case DM_CFG_INT: b = v->v.i ? 1 : 0; - log_very_verbose("Setting %s to %d", path, b); + /* log_very_verbose("Setting %s to %d", path, b); */ return b; case DM_CFG_STRING: b = _str_to_bool(v->v.str, fail); - log_very_verbose("Setting %s to %d", path, b); + /* log_very_verbose("Setting %s to %d", path, b); */ return b; default: ; diff --git a/liblvm/lvm_vg.c b/liblvm/lvm_vg.c index 559357953..0678bdc16 100644 --- a/liblvm/lvm_vg.c +++ b/liblvm/lvm_vg.c @@ -512,7 +512,6 @@ int lvm_scan(lvm_t libh) int rc = 0; struct saved_env e = store_user_env((struct cmd_context *)libh); - lvmcache_force_next_label_scan(); if (!lvmcache_label_scan((struct cmd_context *)libh)) rc = -1; diff --git a/tools/toollib.c b/tools/toollib.c index fabf2dc2f..e887f6525 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -1485,6 +1485,12 @@ int change_tag(struct cmd_context *cmd, struct volume_group *vg, return 1; } +/* + * FIXME: replace process_each_label() with process_each_vg() which is + * based on performing vg_read(), which provides a correct representation + * of VGs/PVs, that is not provided by lvmcache_label_scan(). + */ + int process_each_label(struct cmd_context *cmd, int argc, char **argv, struct processing_handle *handle, process_single_label_fn_t process_single_label) diff --git a/tools/vgcreate.c b/tools/vgcreate.c index 87a296f56..0c4c42854 100644 --- a/tools/vgcreate.c +++ b/tools/vgcreate.c @@ -82,7 +82,6 @@ int vgcreate(struct cmd_context *cmd, int argc, char **argv) return ECMD_FAILED; } - lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); /* Does nothing when using lvmetad. */ lvmcache_seed_infos_from_lvmetad(cmd); /* Does nothing unless using lvmetad. */ diff --git a/tools/vgmerge.c b/tools/vgmerge.c index 013bef47d..67c349864 100644 --- a/tools/vgmerge.c +++ b/tools/vgmerge.c @@ -72,6 +72,9 @@ static int _vgmerge_single(struct cmd_context *cmd, const char *vg_name_to, return ECMD_FAILED; } + lvmcache_label_scan(cmd); + lvmcache_seed_infos_from_lvmetad(cmd); + if (strcmp(vg_name_to, vg_name_from) > 0) lock_vg_from_first = 1; diff --git a/tools/vgsplit.c b/tools/vgsplit.c index 46c891167..2d391119d 100644 --- a/tools/vgsplit.c +++ b/tools/vgsplit.c @@ -581,6 +581,9 @@ int vgsplit(struct cmd_context *cmd, int argc, char **argv) return ECMD_FAILED; } + lvmcache_label_scan(cmd); + lvmcache_seed_infos_from_lvmetad(cmd); + if (strcmp(vg_name_to, vg_name_from) < 0) lock_vg_from_first = 0; From aa833bdd8aa6a3e3557b19fa3877b0eee806df63 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 9 Apr 2018 13:57:44 -0500 Subject: [PATCH 77/87] bcache: intercept test mode before write Don't allow writes in test mode. test mode should be more sophisticated than just faking writes, and this should be a last defense for cases where test mode is not being checked correctly. --- lib/label/label.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/label/label.c b/lib/label/label.c index c11a040a9..4b18f5682 100644 --- a/lib/label/label.c +++ b/lib/label/label.c @@ -922,6 +922,9 @@ bool dev_write_bytes(struct device *dev, off_t start, size_t len, void *data) { int ret; + if (test_mode()) + return true; + if (!scan_bcache) { if (!dev_open(dev)) return false; @@ -955,6 +958,9 @@ bool dev_write_zeros(struct device *dev, off_t start, size_t len) { int ret; + if (test_mode()) + return true; + if (!scan_bcache) { if (!dev_open(dev)) return false; From 7b0a8f47be7df13aab0552599aa2dc2233cc223c Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 13 Apr 2018 14:40:00 -0500 Subject: [PATCH 78/87] lvmpolld: update to use new scanning correctly --- tools/polldaemon.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/polldaemon.c b/tools/polldaemon.c index cf7a94721..83f0424dc 100644 --- a/tools/polldaemon.c +++ b/tools/polldaemon.c @@ -126,6 +126,14 @@ static void _nanosleep(unsigned secs, unsigned allow_zero_time) static void _sleep_and_rescan_devices(struct cmd_context *cmd, struct daemon_parms *parms) { if (parms->interval && !parms->aborting) { + /* + * FIXME: do we really need to drop everything and then rescan + * everything between each iteration? What change exactly does + * each iteration check for, and does seeing that require + * rescanning everything? + */ + lvmcache_destroy(cmd, 1, 0); + label_scan_destroy(cmd); dev_close_all(); _nanosleep(parms->interval, 1); lvmcache_label_scan(cmd); @@ -141,6 +149,9 @@ int wait_for_single_lv(struct cmd_context *cmd, struct poll_operation_id *id, uint32_t lockd_state = 0; int ret; + if (!parms->wait_before_testing) + lvmcache_label_scan(cmd); + /* Poll for completion */ while (!finished) { if (parms->wait_before_testing) From aee27dc7bad5734012885fe9f174def0a3f26771 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Wed, 18 Apr 2018 16:29:42 -0500 Subject: [PATCH 79/87] scan: skip device rescan in vg_read For reporting commands (pvs,vgs,lvs,pvdisplay,vgdisplay,lvdisplay) we do not need to repeat the label scan of devices in vg_read if they all had matching metadata in the initial label scan. The data read by label scan can just be reused for the vg_read. This cuts the amount of device i/o in half, from two reads of each device to one. We have to be careful to avoid repairing the VG if we've skipped rescanning. (The VG repair code is very poor, and will be redone soon.) --- lib/cache/lvmcache.c | 207 ++++++++++++++++++++++++++-------- lib/cache/lvmcache.h | 3 + lib/commands/toolcontext.h | 2 + lib/config/config.c | 6 + lib/format_text/format-text.c | 19 ++++ lib/format_text/import_vsn1.c | 6 + lib/format_text/text_label.c | 7 ++ lib/metadata/metadata.c | 88 ++++++++++++--- test/shell/mda-rollback.sh | 3 + tools/command.c | 1 + tools/commands.h | 12 +- tools/lvmcmdline.c | 3 + tools/toollib.c | 2 +- tools/tools.h | 4 + 14 files changed, 295 insertions(+), 68 deletions(-) diff --git a/lib/cache/lvmcache.c b/lib/cache/lvmcache.c index c8046f7b1..f1fd68375 100644 --- a/lib/cache/lvmcache.c +++ b/lib/cache/lvmcache.c @@ -62,7 +62,9 @@ struct lvmcache_vginfo { char *lock_type; uint32_t mda_checksum; size_t mda_size; + int seqno; int independent_metadata_location; /* metadata read from independent areas */ + int scan_summary_mismatch; /* vgsummary from devs had mismatching seqno or checksum */ /* * The following are not related to lvmcache or vginfo, @@ -1057,25 +1059,34 @@ next: * the labels/metadata for each device in the VG now that we hold the * lock, and use this for processing the VG. * - * FIXME: In some cases, the data read by label_scan may be fine, and not - * need to be reread here. e.g. a reporting command, possibly with a - * special option, could skip this second reread. Or, we could look - * at the VG seqno in each copy of the metadata read in the first label - * scan, and if they all match, consider it good enough to use for - * reporting without rereading it. (A command modifying the VG would - * always want to reread while the lock is held before modifying.) - * * A label scan is ultimately creating associations between devices * and VGs so that when vg_read wants to get VG metadata, it knows - * which devices to read. In the special case where VG metadata is - * stored in files on the file system (configured in lvm.conf), the + * which devices to read. + * + * It's possible that a VG is being modified during the first label + * scan, causing the scan to see inconsistent metadata on different + * devs in the VG. It's possible that those modifications are + * adding/removing devs from the VG, in which case the device/VG + * associations in lvmcache after the scan are not correct. + * NB. It's even possible the VG was removed completely between + * label scan and here, in which case we'd not find the VG in + * lvmcache after this rescan. + * + * A scan will also create in incorrect/incomplete picture of a VG + * when devices have no metadata areas. The scan does not use + * VG metadata to figure out that a dev with no metadata belongs + * to a particular VG, so a device with no mdas will not be linked + * to that VG after a scan. + * + * (In the special case where VG metadata is stored in files on the + * file system (configured in lvm.conf), the * vginfo->independent_metadata_location flag is set during label scan. * When we get here to rescan, we are revalidating the device to VG * mapping from label scan by repeating the label scan on a subset of * devices. If we see independent_metadata_location is set from the * initial label scan, we know that there is nothing to do because * there is no device to VG mapping to revalidate, since the VG metadata - * comes directly from files. + * comes directly from files.) */ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const char *vgid) @@ -1083,7 +1094,7 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const struct dm_list devs; struct device_list *devl; struct lvmcache_vginfo *vginfo; - struct lvmcache_info *info; + struct lvmcache_info *info, *info2; if (lvmetad_used()) return 1; @@ -1112,14 +1123,17 @@ int lvmcache_label_rescan_vg(struct cmd_context *cmd, const char *vgname, const dm_list_add(&devs, &devl->list); } - label_scan_devs(cmd, &devs); + dm_list_iterate_items_safe(info, info2, &vginfo->infos) + lvmcache_del(info); - /* - * TODO: grab vginfo again, and compare vginfo->infos - * to what was found above before rereading labels. - * If there are any info->devs now that were not in the - * first devs list, then do label_read on those also. - */ + /* Dropping the last info struct is supposed to drop vginfo. */ + if ((vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) + log_warn("VG info not dropped before rescan of %s", vgname); + + /* FIXME: should we also rescan unused_duplicate_devs for devs + being rescanned here and then repeat resolving the duplicates? */ + + label_scan_devs(cmd, &devs); return 1; } @@ -1803,28 +1817,6 @@ out: return 1; } -static int _lvmcache_update_vg_mda_info(struct lvmcache_info *info, uint32_t mda_checksum, - size_t mda_size) -{ - if (!info || !info->vginfo || !mda_size) - return 1; - - if (info->vginfo->mda_checksum == mda_checksum || info->vginfo->mda_size == mda_size) - return 1; - - info->vginfo->mda_checksum = mda_checksum; - info->vginfo->mda_size = mda_size; - - /* FIXME Add checksum index */ - - log_debug_cache("lvmcache %s: VG %s: stored metadata checksum 0x%08" - PRIx32 " with size %" PRIsize_t ".", - dev_name(info->dev), info->vginfo->vgname, - mda_checksum, mda_size); - - return 1; -} - int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) { if (!_lock_hash && !lvmcache_init()) { @@ -1835,10 +1827,18 @@ int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt); } +/* + * FIXME: get rid of other callers of this function which call it + * in odd cases to "fix up" some bit of lvmcache state. Make those + * callers fix up what they need to directly, and leave this function + * with one purpose and caller. + */ + int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary) { const char *vgname = vgsummary->vgname; const char *vgid = (char *)&vgsummary->vgid; + struct lvmcache_vginfo *vginfo; if (!vgname && !info->vginfo) { log_error(INTERNAL_ERROR "NULL vgname handed to cache"); @@ -1853,12 +1853,80 @@ int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vg !is_orphan_vg(info->vginfo->vgname) && critical_section()) return 1; - if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, - vgsummary->creation_host, info->fmt) || - !_lvmcache_update_vgid(info, info->vginfo, vgid) || - !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type, vgsummary->system_id) || - !_lvmcache_update_vg_mda_info(info, vgsummary->mda_checksum, vgsummary->mda_size)) - return_0; + /* + * Creates a new vginfo struct for this vgname/vgid if none exists, + * and attaches the info struct for the dev to the vginfo. + * Puts the vginfo into the vgname hash table. + */ + if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt)) { + log_error("Failed to update VG %s info in lvmcache.", vgname); + return 0; + } + + /* + * Puts the vginfo into the vgid hash table. + */ + if (!_lvmcache_update_vgid(info, info->vginfo, vgid)) { + log_error("Failed to update VG %s info in lvmcache.", vgname); + return 0; + } + + /* + * FIXME: identify which case this is and why this is needed, then + * change that so it doesn't use this function and we can remove + * this special case. + * (I think this distinguishes the scan path, where these things + * are set from the vg_read path where lvmcache_update_vg() is + * called which calls this function without seqno/mda_size/mda_checksum.) + */ + if (!vgsummary->seqno && !vgsummary->mda_size && !vgsummary->mda_checksum) + return 1; + + if (!(vginfo = info->vginfo)) + return 1; + + if (!vginfo->seqno) { + vginfo->seqno = vgsummary->seqno; + + log_debug_cache("lvmcache %s: VG %s: set seqno to %d", + dev_name(info->dev), vginfo->vgname, vginfo->seqno); + + } else if (vgsummary->seqno != vginfo->seqno) { + log_warn("Scan of VG %s from %s found metadata seqno %d vs previous %d.", + vgname, dev_name(info->dev), vgsummary->seqno, vginfo->seqno); + vginfo->scan_summary_mismatch = 1; + /* If we don't return success, this dev info will be removed from lvmcache, + and then we won't be able to rescan it or repair it. */ + return 1; + } + + if (!vginfo->mda_size) { + vginfo->mda_checksum = vgsummary->mda_checksum; + vginfo->mda_size = vgsummary->mda_size; + + log_debug_cache("lvmcache %s: VG %s: set mda_checksum to %x mda_size to %zu", + dev_name(info->dev), vginfo->vgname, + vginfo->mda_checksum, vginfo->mda_size); + + } else if ((vginfo->mda_size != vgsummary->mda_size) || (vginfo->mda_checksum != vgsummary->mda_checksum)) { + log_warn("Scan of VG %s from %s found mda_checksum %x mda_size %zu vs previous %x %zu", + vgname, dev_name(info->dev), vgsummary->mda_checksum, vgsummary->mda_size, + vginfo->mda_checksum, vginfo->mda_size); + vginfo->scan_summary_mismatch = 1; + /* If we don't return success, this dev info will be removed from lvmcache, + and then we won't be able to rescan it or repair it. */ + return 1; + } + + /* + * If a dev has an unmatching checksum, ignore the other + * info from it, keeping the info we already saved. + */ + if (!_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, + vgsummary->lock_type, vgsummary->system_id)) { + log_error("Failed to update VG %s info in lvmcache.", vgname); + return 0; + } return 1; } @@ -2532,6 +2600,7 @@ int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary) vgsummary->vgname = vginfo->vgname; vgsummary->creation_host = vginfo->creation_host; vgsummary->vgstatus = vginfo->status; + vgsummary->seqno = vginfo->seqno; /* vginfo->vgid has 1 extra byte then vgsummary->vgid */ memcpy(&vgsummary->vgid, vginfo->vgid, sizeof(vgsummary->vgid)); @@ -2592,3 +2661,47 @@ int lvmcache_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const ch return ret; } +/* + * Example of reading four devs in sequence from the same VG: + * + * dev1: + * lvmcache: creates vginfo with initial values + * + * dev2: all checksums match. + * mda_header checksum matches vginfo from dev1 + * metadata checksum matches vginfo from dev1 + * metadata is not parsed, and the vgsummary values copied + * from lvmcache from dev1 and passed back to lvmcache for dev2. + * lvmcache: attach info for dev2 to existing vginfo + * + * dev3: mda_header and metadata have unmatching checksums. + * mda_header checksum matches vginfo from dev1 + * metadata checksum doesn't match vginfo from dev1 + * produces read error in config.c + * lvmcache: info for dev3 is deleted, FIXME: use a defective state + * + * dev4: mda_header and metadata have matching checksums, but + * does not match checksum in lvmcache from prev dev. + * mda_header checksum doesn't match vginfo from dev1 + * lvmcache_lookup_mda returns 0, no vgname, no checksum_only + * lvmcache: update_vgname_and_id sees checksum from dev4 does not + * match vginfo from dev1, so vginfo->scan_summary_mismatch is set. + * attach info for dev4 to existing vginfo + * + * dev5: config parsing error. + * lvmcache: info for dev5 is deleted, FIXME: use a defective state + */ + +int lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid) +{ + struct lvmcache_vginfo *vginfo; + + if (!vgname || !vgid) + return 1; + + if ((vginfo = lvmcache_vginfo_from_vgid(vgid))) + return vginfo->scan_summary_mismatch; + + return 1; +} + diff --git a/lib/cache/lvmcache.h b/lib/cache/lvmcache.h index 107c99338..ad478bd27 100644 --- a/lib/cache/lvmcache.h +++ b/lib/cache/lvmcache.h @@ -60,6 +60,7 @@ struct lvmcache_vgsummary { uint32_t mda_checksum; size_t mda_size; int zero_offset; + int seqno; }; int lvmcache_init(void); @@ -216,4 +217,6 @@ void lvmcache_save_suspended_vg(struct volume_group *vg, int precommitted); struct volume_group *lvmcache_get_suspended_vg(const char *vgid); void lvmcache_drop_suspended_vg(struct volume_group *vg); +int lvmcache_scan_mismatch(struct cmd_context *cmd, const char *vgname, const char *vgid); + #endif diff --git a/lib/commands/toolcontext.h b/lib/commands/toolcontext.h index d20cef157..24742641c 100644 --- a/lib/commands/toolcontext.h +++ b/lib/commands/toolcontext.h @@ -169,6 +169,8 @@ struct cmd_context { unsigned process_component_lvs:1; /* command processes also component LVs */ unsigned mirror_warn_printed:1; /* command already printed warning about non-monitored mirrors */ unsigned pvscan_cache_single:1; + unsigned can_use_one_scan:1; + /* * Filtering. */ diff --git a/lib/config/config.c b/lib/config/config.c index d07c17379..ad816c209 100644 --- a/lib/config/config.c +++ b/lib/config/config.c @@ -545,6 +545,12 @@ int config_file_read_fd(struct dm_config_tree *cft, struct device *dev, dev_io_r fb = buf; } + /* + * The checksum passed in is the checksum from the mda_header + * preceding this metadata. They should always match. + * FIXME: handle case where mda_header checksum is bad, + * but the checksum calculated here is correct. + */ if (checksum_fn && checksum != (checksum_fn(checksum_fn(INITIAL_CRC, (const uint8_t *)fb, size), (const uint8_t *)(fb + size), size2))) { diff --git a/lib/format_text/format-text.c b/lib/format_text/format-text.c index 4146e7c72..792d75a4d 100644 --- a/lib/format_text/format-text.c +++ b/lib/format_text/format-text.c @@ -1261,8 +1261,27 @@ int read_metadata_location_summary(const struct format_type *fmt, * which also matches the checksum saved in vginfo from * another device, then it skips parsing the metadata into * a config tree, which saves considerable cpu time. + * + * (NB. there can be different VGs with different metadata + * and checksums, but with the same name.) + * + * FIXME: handle the case where mda_header checksum is bad + * but metadata checksum is good. */ + /* + * If the checksum we compute of the metadata differs from + * the checksum from mda_header that we save here, then we + * ignore the device. FIXME: we need to classify a device + * with errors like this as defective. + * + * If the checksum from mda_header and computed from metadata + * does not match the checksum saved in lvmcache from a prev + * device, then we do not skip parsing/saving metadata from + * this dev. It's parsed, fields saved in vgsummary, which + * is passed into lvmcache (update_vgname_and_id), and + * there we'll see a checksum mismatch. + */ vgsummary->mda_checksum = rlocn->checksum; vgsummary->mda_size = rlocn->size; lvmcache_lookup_mda(vgsummary); diff --git a/lib/format_text/import_vsn1.c b/lib/format_text/import_vsn1.c index dee5379f2..e038a273f 100644 --- a/lib/format_text/import_vsn1.c +++ b/lib/format_text/import_vsn1.c @@ -1292,6 +1292,12 @@ static int _read_vgsummary(const struct format_type *fmt, const struct dm_config (!(vgsummary->lock_type = dm_pool_strdup(mem, str)))) return_0; + if (!_read_int32(vgn, "seqno", &vgsummary->seqno)) { + log_error("Couldn't read seqno for volume group %s.", + vgsummary->vgname); + return 0; + } + return 1; } diff --git a/lib/format_text/text_label.c b/lib/format_text/text_label.c index c47a35a26..7d10e065b 100644 --- a/lib/format_text/text_label.c +++ b/lib/format_text/text_label.c @@ -438,6 +438,13 @@ out: baton.info = info; baton.label = *label; + /* + * In the vg_read phase, we compare all mdas and decide which to use + * which are bad and need repair. + * + * FIXME: this quits if the first mda is bad, but we need something + * smarter to be able to use the second mda if it's good. + */ if (!lvmcache_foreach_mda(info, _read_mda_header_and_metadata, &baton)) { log_error("Failed to scan VG from %s", dev_name(dev)); return 0; diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 8c8ce25de..685c5890d 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -3761,6 +3761,7 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, struct pv_list *pvl; struct dm_list all_pvs; char uuid[64] __attribute__((aligned(8))); + int skipped_rescan = 0; int reappeared = 0; struct cached_vg_fmtdata *vg_fmtdata = NULL; /* Additional format-specific data about the vg */ @@ -3834,10 +3835,42 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, * lock is held, so we rescan all the info from the devs in case * something changed between the initial scan and now that the lock * is held. + * + * Some commands (e.g. reporting) are fine reporting data read by + * the label scan. It doesn't matter if the devs changed between + * the label scan and here, we can report what was seen in the + * scan, even though it is the old state, since we will not be + * making any modifications. If the VG was being modified during + * the scan, and caused us to see inconsistent metadata on the + * different PVs in the VG, then we do want to rescan the devs + * here to get a consistent view of the VG. Note that we don't + * know if the scan found all the PVs in the VG at this point. + * We don't know that until vg_read looks at the list of PVs in + * the metadata and compares that to the devices found by the scan. + * + * It's possible that a change made to the VG during scan was + * adding or removing a PV from the VG. In this case, the list + * of devices associated with the VG in lvmcache would change + * due to the rescan. + * + * The devs in the VG may be persistently inconsistent due to some + * previous problem. In this case, rescanning the labels here will + * find the same inconsistency. The VG repair (mistakenly done by + * vg_read below) is supposed to fix that. + * + * FIXME: sort out the usage of the global lock (which is mixed up + * with the orphan lock), and when we can tell that the global + * lock is taken prior to the label scan, and still held here, + * we can also skip the rescan in that case. */ - log_debug_metadata("Reading VG rereading labels for %s", vgname); - - lvmcache_label_rescan_vg(cmd, vgname, vgid); + if (!cmd->can_use_one_scan || lvmcache_scan_mismatch(cmd, vgname, vgid)) { + skipped_rescan = 0; + log_debug_metadata("Rescanning devices for for %s", vgname); + lvmcache_label_rescan_vg(cmd, vgname, vgid); + } else { + log_debug_metadata("Skipped rescanning devices for %s", vgname); + skipped_rescan = 1; + } if (!(fmt = lvmcache_fmt_from_vgname(cmd, vgname, vgid, 0))) { log_debug_metadata("Cache did not find fmt for vgname %s", vgname); @@ -3940,10 +3973,8 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, /* FIXME Also ensure contents same - checksum compare? */ if (correct_vg->seqno != vg->seqno) { - if (cmd->metadata_read_only) - log_very_verbose("Not repairing VG %s metadata seqno (%d != %d) " - "as global/metadata_read_only is set.", - vgname, vg->seqno, correct_vg->seqno); + if (cmd->metadata_read_only || skipped_rescan) + log_warn("Not repairing metadata for VG %s.", vgname); else inconsistent = 1; @@ -4004,7 +4035,29 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, return_NULL; } - log_debug_metadata("Empty mda found for VG %s.", vgname); + log_debug_metadata("Empty mda found for VG %s on %s.", + vgname, dev_name(pvl->pv->dev)); + +#if 0 + /* + * If we are going to do any repair we have to be using + * the latest metadata on disk, so we have to rescan devs + * if we skipped that at the start of the vg_read. We'll + * likely come back through here, but without having + * skipped_rescan. + * + * FIXME: in some cases we don't want to do this. + */ + if (skipped_rescan && cmd->can_use_one_scan) { + log_debug_metadata("Restarting read to rescan devs."); + cmd->can_use_one_scan = 0; + release_vg(correct_vg); + correct_vg = NULL; + lvmcache_del(info); + label_read(pvl->pv->dev, NULL, 0); + goto restart_scan; + } +#endif if (inconsistent_mdas) continue; @@ -4142,10 +4195,8 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, /* FIXME Also ensure contents same - checksums same? */ if (correct_vg->seqno != vg->seqno) { /* Ignore inconsistent seqno if told to skip repair logic */ - if (cmd->metadata_read_only) - log_very_verbose("Not repairing VG %s metadata seqno (%d != %d) " - "as global/metadata_read_only is set.", - vgname, vg->seqno, correct_vg->seqno); + if (cmd->metadata_read_only || skipped_rescan) + log_warn("Not repairing metadata for VG %s.", vgname); else inconsistent = 1; @@ -4225,6 +4276,13 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, return correct_vg; } + if (skipped_rescan) { + log_warn("Not repairing metadata for VG %s.", vgname); + _free_pv_list(&all_pvs); + release_vg(correct_vg); + return_NULL; + } + /* Don't touch if vgids didn't match */ if (inconsistent_vgid) { log_warn("WARNING: Inconsistent metadata UUIDs found for " @@ -4271,14 +4329,16 @@ static struct volume_group *_vg_read(struct cmd_context *cmd, } /* We have the VG now finally, check if PV ext info is in sync with VG metadata. */ - if (!_check_or_repair_pv_ext(cmd, correct_vg, *consistent, &inconsistent_pvs)) { + if (!_check_or_repair_pv_ext(cmd, correct_vg, + skipped_rescan ? 0 : *consistent, + &inconsistent_pvs)) { release_vg(correct_vg); return_NULL; } *consistent = !inconsistent_pvs; - if (correct_vg && *consistent) { + if (correct_vg && *consistent && !skipped_rescan) { if (update_old_pv_ext && !_vg_update_old_pv_ext_if_needed(correct_vg)) { release_vg(correct_vg); return_NULL; diff --git a/test/shell/mda-rollback.sh b/test/shell/mda-rollback.sh index dbfdc7d51..34080faaf 100644 --- a/test/shell/mda-rollback.sh +++ b/test/shell/mda-rollback.sh @@ -25,6 +25,9 @@ vgextend $vg1 "$dev1" dd if=badmda of="$dev1" bs=256K count=1 +# the vg_read in vgck (and other commands) will repair the metadata +vgck $vg1 + # dev1 is part of vg1 (as witnessed by metadata on dev2 and dev3), but its mda # was corrupt (written over by a backup from time dev1 was an orphan) check pv_field "$dev1" vg_name $vg1 diff --git a/tools/command.c b/tools/command.c index f3b5d82e3..377d03f0f 100644 --- a/tools/command.c +++ b/tools/command.c @@ -140,6 +140,7 @@ static inline int configtype_arg(struct cmd_context *cmd __attribute__((unused)) #define ENABLE_DUPLICATE_DEVS 0x00000400 #define DISALLOW_TAG_ARGS 0x00000800 #define GET_VGNAME_FROM_OPTIONS 0x00001000 +#define CAN_USE_ONE_SCAN 0x00002000 /* create foo_CMD enums for command def ID's in command-lines.in */ diff --git a/tools/commands.h b/tools/commands.h index 3d142c339..4af92c87f 100644 --- a/tools/commands.h +++ b/tools/commands.h @@ -55,7 +55,7 @@ xx(lvcreate, xx(lvdisplay, "Display information about a logical volume", - PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | CAN_USE_ONE_SCAN) xx(lvextend, "Add space to a logical volume", @@ -99,7 +99,7 @@ xx(lvresize, xx(lvs, "Display information about logical volumes", - PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | CAN_USE_ONE_SCAN) xx(lvscan, "List all logical volumes in all volume groups", @@ -127,7 +127,7 @@ xx(pvdata, xx(pvdisplay, "Display various attributes of physical volume(s)", - PERMITTED_READ_ONLY | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH | CAN_USE_ONE_SCAN) /* ALL_VGS_IS_DEFAULT is for polldaemon to find pvmoves in-progress using process_each_vg. */ @@ -145,7 +145,7 @@ xx(pvremove, xx(pvs, "Display information about physical volumes", - PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | ENABLE_ALL_DEVS | ENABLE_DUPLICATE_DEVS | LOCKD_VG_SH | CAN_USE_ONE_SCAN) xx(pvscan, "List all physical volumes", @@ -189,7 +189,7 @@ xx(vgcreate, xx(vgdisplay, "Display volume group information", - PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | CAN_USE_ONE_SCAN) xx(vgexport, "Unregister volume group(s) from the system", @@ -229,7 +229,7 @@ xx(vgrename, xx(vgs, "Display information about volume groups", - PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH) + PERMITTED_READ_ONLY | ALL_VGS_IS_DEFAULT | LOCKD_VG_SH | CAN_USE_ONE_SCAN) xx(vgscan, "Search for all volume groups", diff --git a/tools/lvmcmdline.c b/tools/lvmcmdline.c index c7ac4b633..0600b1c32 100644 --- a/tools/lvmcmdline.c +++ b/tools/lvmcmdline.c @@ -2291,6 +2291,9 @@ static int _get_current_settings(struct cmd_context *cmd) if (cmd->cname->flags & LOCKD_VG_SH) cmd->lockd_vg_default_sh = 1; + if (cmd->cname->flags & CAN_USE_ONE_SCAN) + cmd->can_use_one_scan = 1; + cmd->partial_activation = 0; cmd->degraded_activation = 0; activation_mode = find_config_tree_str(cmd, activation_mode_CFG, NULL); diff --git a/tools/toollib.c b/tools/toollib.c index e887f6525..6b71f2dcc 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -2032,7 +2032,7 @@ static int _process_vgnameid_list(struct cmd_context *cmd, uint32_t read_flags, (!dm_list_empty(arg_tags) && str_list_match_list(arg_tags, &vg->tags, NULL))) && select_match_vg(cmd, handle, vg) && _select_matches(handle)) { - log_very_verbose("Processing VG %s %s", vg_name, vg_uuid ? uuid : ""); + log_very_verbose("Running command for VG %s %s", vg_name, vg_uuid ? uuid : ""); ret = process_single_vg(cmd, vg_name, vg, handle); _update_selection_result(handle, &whole_selected); diff --git a/tools/tools.h b/tools/tools.h index d4d2fb2a0..5fe3ba86f 100644 --- a/tools/tools.h +++ b/tools/tools.h @@ -136,6 +136,10 @@ struct arg_value_group_list { #define DISALLOW_TAG_ARGS 0x00000800 /* Command may need to find VG name in an option value. */ #define GET_VGNAME_FROM_OPTIONS 0x00001000 +/* The data read from disk by label scan can be used for vg_read. */ +#define CAN_USE_ONE_SCAN 0x00002000 + + void usage(const char *name); /* the argument verify/normalise functions */ From c42a18d372f314aa51d9d7877342f1d78fd68973 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 20 Apr 2018 12:00:49 -0500 Subject: [PATCH 80/87] liblvm2app: missed the addition of lvmcache_label_scan --- liblvm/lvm_vg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/liblvm/lvm_vg.c b/liblvm/lvm_vg.c index 0678bdc16..616c78fde 100644 --- a/liblvm/lvm_vg.c +++ b/liblvm/lvm_vg.c @@ -219,6 +219,8 @@ static vg_t _lvm_vg_open(lvm_t libh, const char *vgname, const char *mode, return NULL; } + lvmcache_label_scan((struct cmd_context *)libh); + vg = vg_read((struct cmd_context *)libh, vgname, NULL, internal_flags, 0); if (vg_read_error(vg)) { /* FIXME: use log_errno either here in inside vg_read */ From 1409c4a1c20319845171b91edb6310e25ae81792 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 20 Apr 2018 16:09:49 -0500 Subject: [PATCH 81/87] clvm: rescan when VG or PV not found Rescan devices to update lvmcache content when clvmd vg_read doesn't find a VG or PV. --- lib/metadata/metadata.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/metadata/metadata.c b/lib/metadata/metadata.c index 685c5890d..2df9abeb7 100644 --- a/lib/metadata/metadata.c +++ b/lib/metadata/metadata.c @@ -4585,6 +4585,7 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, if (!(vgname = lvmcache_vgname_from_vgid(cmd->mem, vgid))) { log_debug_metadata("Reading VG by vgid %.8s no VG name found, retrying.", vgid); lvmcache_destroy(cmd, 0, 0); + label_scan_destroy(cmd); lvmcache_label_scan(cmd); } @@ -4597,14 +4598,33 @@ static struct volume_group *_vg_read_by_vgid(struct cmd_context *cmd, label_scan_setup_bcache(); - if ((vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted))) { - /* Does it matter if consistent is 0 or 1? */ - label_scan_destroy(cmd); - return vg; + if (!(vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted))) { + log_error("Rescan devices to look for missing VG."); + goto scan; } - label_scan_destroy(cmd); + if (vg_missing_pv_count(vg)) { + log_error("Rescan devices to look for missing PVs."); + release_vg(vg); + goto scan; + } + label_scan_destroy(cmd); /* drop bcache to close devs, keep lvmcache */ + return vg; + + scan: + lvmcache_destroy(cmd, 0, 0); + label_scan_destroy(cmd); + lvmcache_label_scan(cmd); + + if (!(vg = _vg_read(cmd, vgname, vgid, warn_flags, &consistent, precommitted))) + goto fail; + + label_scan_destroy(cmd); /* drop bcache to close devs, keep lvmcache */ + return vg; + + fail: + label_scan_destroy(cmd); /* drop bache to close devs, keep lvmcache */ log_debug_metadata("Reading VG by vgid %.8s not found.", vgid); return NULL; } From fcdac700f9aac079761ad302e06413a5eb2178b9 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Mon, 23 Apr 2018 22:28:47 +0200 Subject: [PATCH 82/87] gcc: remove duplicate typedef --- lib/device/bcache.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/device/bcache.h b/lib/device/bcache.h index 999223a55..4ce137ae3 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -97,8 +97,6 @@ enum bcache_get_flags { GF_DIRTY = (1 << 1) }; -typedef uint64_t block_address; - unsigned bcache_nr_cache_blocks(struct bcache *cache); unsigned bcache_max_prefetches(struct bcache *cache); From ae274617770aa44263619151ef24393cb8a16a79 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Mon, 23 Apr 2018 21:28:35 +0200 Subject: [PATCH 83/87] lvchange: update mirror table when changing monitoring Since for non-monitored mirrors we let mirror running without error handling - when monitoring changes for mirror, updated table (refresh) is needed. --- tools/lvchange.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lvchange.c b/tools/lvchange.c index 0aac5ac19..e1be4cbb2 100644 --- a/tools/lvchange.c +++ b/tools/lvchange.c @@ -1603,6 +1603,10 @@ static int _lvchange_monitor_poll_single(struct cmd_context *cmd, struct logical_volume *lv, struct processing_handle *handle) { + /* Mirror is using different table line when un/monitored */ + if (lv_is_mirror(lv) && !lv_refresh(cmd, lv)) + return_ECMD_FAILED; + if (arg_is_set(cmd, monitor_ARG) && !_lvchange_monitoring(cmd, lv)) return_ECMD_FAILED; From 66f4f8c27f6520edc79bafe4aa829d75b3d5e854 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Mon, 23 Apr 2018 22:13:03 +0200 Subject: [PATCH 84/87] lvconvert: preserve regionsize from existing mirror When adding leg to existing mirror - preserve its regionsize. --- tools/lvconvert.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/lvconvert.c b/tools/lvconvert.c index 44fdd1608..abdcf2c2d 100644 --- a/tools/lvconvert.c +++ b/tools/lvconvert.c @@ -758,6 +758,13 @@ static int _lvconvert_mirrors_parse_params(struct cmd_context *cmd, if (*old_mimage_count != *new_mimage_count) log_verbose("Adjusting mirror image count of %s", lv->name); + /* If region size is not given by user - use value from mirror */ + if (lv_is_mirrored(lv) && !lp->region_size_supplied) { + lp->region_size = first_seg(lv)->region_size; + log_debug("Copying region size %s from existing mirror.", + display_size(lv->vg->cmd, lp->region_size)); + } + /* * Adjust log type * From c492fbb51ce783ca32efd7ed8414d809daaf4ca5 Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Mon, 23 Apr 2018 22:11:38 +0200 Subject: [PATCH 85/87] debug: more explanatory error message --- lib/metadata/lv_manip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/metadata/lv_manip.c b/lib/metadata/lv_manip.c index 160943f90..7f9cf0b4e 100644 --- a/lib/metadata/lv_manip.c +++ b/lib/metadata/lv_manip.c @@ -3805,7 +3805,7 @@ int lv_add_mirror_lvs(struct logical_volume *lv, return_0; if (region_size && region_size != seg->region_size) { - log_error("Conflicting region_size."); + log_error("Conflicting region_size %u != %u.", region_size, seg->region_size); return 0; } From fc3ed8856f445079de43cd33f64582cee9bc80ea Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Mon, 23 Apr 2018 21:26:05 +0200 Subject: [PATCH 86/87] tests: update testing to not use delay dev Instead of using delayer device user 'zero' device and let mirror do some real work which takes some time. In case the test machine is too fast - mirror might need to be made bigger to meet needed criteria. Also move all test needed this 'zero' PV trick to the end of test so $dev2 and $dev4 are covered with 'zero' and can take any amount of write without consuming any real space. --- test/shell/lvconvert-mirror.sh | 129 +++++++++++++++++---------------- 1 file changed, 68 insertions(+), 61 deletions(-) diff --git a/test/shell/lvconvert-mirror.sh b/test/shell/lvconvert-mirror.sh index 4e182125c..a45e6e722 100644 --- a/test/shell/lvconvert-mirror.sh +++ b/test/shell/lvconvert-mirror.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (C) 2010-2015 Red Hat, Inc. All rights reserved. +# Copyright (C) 2010-2018 Red Hat, Inc. All rights reserved. # # This copyrighted material is made available to anyone wishing to use, # modify, copy, or redistribute it subject to the terms and conditions @@ -17,7 +17,7 @@ export LVM_TEST_LVMETAD_DEBUG_OPTS=${LVM_TEST_LVMETAD_DEBUG_OPTS-} . lib/inittest -aux prepare_pvs 5 20 +aux prepare_pvs 5 100 get_devs # proper DEVRANGE needs to be set according to extent size @@ -78,56 +78,6 @@ lvcreate -aey -l2 -n $lv1 $vg "$dev1" not lvconvert -m+1 --mirrorlog core $vg/$lv1 "$dev1" lvremove -ff $vg -# Start w/ 3-way mirror -# Test pulling primary image before mirror in-sync (should fail) -# Test pulling primary image after mirror in-sync (should work) -# Test that the correct devices remain in the mirror -offset=$(get first_extent_sector "$dev2") -offset=$(( offset + 2 )) -# put 1 single slowing delayed sector -# update in case mirror ever gets faster and allows parallel read -aux delay_dev "$dev2" 0 10 ${offset}:1 - -lvcreate -aey -l10 -Zn -Wn --type mirror --regionsize 16k -m2 -n $lv1 $vg "$dev1" "$dev2" "$dev4" "$dev3:$DEVRANGE" -lvs -a -o+seg_pe_ranges $vg -not lvconvert -m-1 $vg/$lv1 "$dev1" -lvconvert $vg/$lv1 # wait -lvs -a $vg -aux enable_dev "$dev2" -lvconvert $vg/$lv1 # wait -lvconvert -m2 $vg/$lv1 "$dev1" "$dev2" "$dev4" "$dev3:0" # If the above "should" failed... - -aux wait_for_sync $vg $lv1 -lvconvert -m-1 $vg/$lv1 "$dev1" -check mirror_images_on $vg $lv1 "$dev2" "$dev4" -lvconvert -m-1 $vg/$lv1 "$dev2" -check linear $vg $lv1 -check lv_on $vg $lv1 "$dev4" -lvremove -ff $vg - -# FIXME: lots of unneeded extents here for log - it needs to be at least region_size in size -# No parallel lvconverts on a single LV please - -lvcreate -aey -Zn -Wn -l8 --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:0-8" -check mirror $vg $lv1 -check mirror_legs $vg $lv1 2 - -offset=$(get first_extent_sector "$dev4") -offset=$(( offset + 2 )) -aux delay_dev "$dev4" 0 2000 ${offset}: -LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4" - -# Next convert should fail b/c we can't have 2 at once -not lvconvert -m+1 $vg/$lv1 "$dev5" -aux enable_dev "$dev4" -lvconvert $vg/$lv1 # wait -lvconvert -m2 $vg/$lv1 # In case the above "should" actually failed - -check mirror $vg $lv1 "$dev3" -check mirror_no_temporaries $vg $lv1 -check mirror_legs $vg $lv1 3 -lvremove -ff $vg - # add 1 mirror to core log mirror, but # implicitly keep log as 'core' lvcreate -aey -l2 --type mirror -m1 --mirrorlog core -n $lv1 $vg "$dev1" "$dev2" @@ -240,13 +190,6 @@ lvremove -ff $vg # --------------------------------------------------------------------- -# "rhbz440405: lvconvert -m0 incorrectly fails if all PEs allocated" -lvcreate -aey -l "$(get pv_field "$dev1" pe_count)" --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:$DEVRANGE" -aux wait_for_sync $vg $lv1 -lvconvert -m0 $vg/$lv1 "$dev1" -check linear $vg $lv1 -lvremove -ff $vg - # "rhbz264241: lvm mirror doesn't lose it's "M" --nosync attribute # after being down and the up converted" lvcreate -aey -l2 --type mirror -m1 -n $lv1 --nosync $vg @@ -300,6 +243,7 @@ lvcreate -aey -l15 -n $lv1 $vg not lvconvert --type mirror -m1 --corelog --stripes 2 $vg/$lv1 lvremove -ff $vg + # Linear to mirror with mirrored log using --alloc anywhere lvcreate -aey -l2 -n $lv1 $vg "$dev1" if test -e LOCAL_CLVMD; then @@ -311,9 +255,10 @@ check mirror $vg $lv1 fi lvremove -ff $vg -# FIXME - cases which needs to be fixed to work in cluster -test -e LOCAL_CLVMD && exit 0 +if test -e LOCAL_CLVMD; then +: # FIXME - cases which needs to be fixed to work in cluster +else # Should not be able to add images to --nosync mirror # but should be able to after 'lvchange --resync' lvcreate -aey --type mirror -m 1 -l1 -n $lv1 $vg --nosync @@ -359,5 +304,67 @@ lvcreate -l2 -n $lv1 $vg lvconvert --type mirror -i1 -m1 $vg/$lv1 | tee out grep -e "$vg/$lv1: Converted:" out || die "Missing sync info in foreground mode" lvremove -ff $vg +fi + + +######################################################################### +# Start w/ 3-way mirror +# Test that the correct devices remain in the mirror +# Make $dev2 & $dev4 zero backend device so large mirrors can be user +# without consuming any real space. Clearly such mirrors can't be read back +# but tests here are validating possibilies of those conversions +# +# Test pulling primary image before mirror in-sync (should fail) +# Test pulling primary image after mirror in-sync (should work) +# +aux zero_dev "$dev2" $(get first_extent_sector "$dev2"): +aux zero_dev "$dev4" $(get first_extent_sector "$dev4"): + +# Use large enough mirror that takes time to sychronize with small regionsize +lvcreate -aey -L80 -Zn -Wn --type mirror --regionsize 16k -m2 -n $lv1 $vg "$dev1" "$dev2" "$dev4" "$dev3:$DEVRANGE" +not lvconvert -m-1 $vg/$lv1 "$dev1" 2>&1 | tee out +grep "not in-sync" out + +lvconvert $vg/$lv1 # wait + +lvconvert -m-1 $vg/$lv1 "$dev1" +check mirror_images_on $vg $lv1 "$dev2" "$dev4" +lvconvert -m-1 $vg/$lv1 "$dev2" +check linear $vg $lv1 +check lv_on $vg $lv1 "$dev4" +lvremove -ff $vg + +# No parallel lvconverts on a single LV please +# Use big enough mirror size and small regionsize to run on all test machines succesfully +lvcreate -aey -Zn -Wn -L80 --type mirror --regionsize 16k -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:0-8" +check mirror $vg $lv1 +check mirror_legs $vg $lv1 2 + +LVM_TEST_TAG="kill_me_$PREFIX" lvconvert -m+1 -b $vg/$lv1 "$dev4" +# ATM upconversion should be running + +# Next convert should fail b/c we can't have 2 at once +not lvconvert -m+1 $vg/$lv1 "$dev5" 2>&1 | tee out +grep "is already being converted" out + +lvconvert $vg/$lv1 # wait +check mirror $vg $lv1 "$dev3" +check mirror_no_temporaries $vg $lv1 +check mirror_legs $vg $lv1 3 +lvremove -ff $vg + +lvs -a $vg +dmsetup table +losetup -a +ls -lRa $PWD + +# "rhbz440405: lvconvert -m0 incorrectly fails if all PEs allocated" +lvcreate -aey -l "$(get pv_field "$dev1" pe_count)" --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" "$dev3:$DEVRANGE" +lvs -a -o+seg_pe_ranges $vg +aux wait_for_sync $vg $lv1 +lvconvert -m0 $vg/$lv1 "$dev1" +check linear $vg $lv1 +lvremove -ff $vg + vgremove -ff $vg From dcb5434a7f4a3b736d72c7bebe276be6735142ce Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Wed, 21 Mar 2018 11:21:38 +0100 Subject: [PATCH 87/87] tests: more zero usage Another case where usage of zero backend for mirror legs is more effective then using delayed_dev. --- test/shell/lvconvert-mirror-updown.sh | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/test/shell/lvconvert-mirror-updown.sh b/test/shell/lvconvert-mirror-updown.sh index 5c7b13004..d5059c242 100644 --- a/test/shell/lvconvert-mirror-updown.sh +++ b/test/shell/lvconvert-mirror-updown.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -# Copyright (C) 2014 Red Hat, Inc. All rights reserved. +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. # # This copyrighted material is made available to anyone wishing to use, # modify, copy, or redistribute it subject to the terms and conditions @@ -16,26 +16,27 @@ SKIP_WITH_LVMLOCKD=1 . lib/inittest -aux prepare_pvs 3 +aux prepare_pvs 3 100 get_devs vgcreate -s 64k "$vg" "${DEVICES[@]}" -lvcreate -aey -l10 --type mirror -m1 -n $lv1 $vg "$dev1" "$dev2" +# Use zero devices for big mirror legs +aux zero_dev "$dev2" $(get first_extent_sector "$dev2"): +aux zero_dev "$dev3" $(get first_extent_sector "$dev3"): -# Slow down device so we are able to start next conversion in parallel -aux delay_dev "$dev3" 0 200 +lvcreate -aey -L90 --type mirror --corelog --regionsize 16k -m1 -n $lv1 $vg "$dev1" "$dev2" lvconvert -m+1 -b $vg/$lv1 "$dev3" -# To fix - wait helps here.... -#lvconvert $vg/$lv1 + +# We want here ongoing conversion + +lvs -a -o+seg_pe_ranges $vg + +# Now it should be able to drop 2nd. leg +lvconvert -m-1 $vg/$lv1 "$dev2" lvs -a $vg -# -# It fails so use 'should' and -vvvv for now -# -should lvconvert -vvvv -m-1 $vg/$lv1 "$dev2" - vgremove -f $vg