/* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. * * This file is part of LVM2. * * This copyrighted material is made available to anyone wishing to use, * modify, copy, or redistribute it subject to the terms and conditions * of the GNU Lesser General Public License v.2.1. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software Foundation, * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include "lib.h" #include "lvmcache.h" #include "toolcontext.h" #include "dev-cache.h" #include "locking.h" #include "metadata.h" #include "memlock.h" #include "str_list.h" #include "format-text.h" #include "format_pool.h" #include "format1.h" #include "config.h" #include "lvmetad.h" #define CACHE_INVALID 0x00000001 #define CACHE_LOCKED 0x00000002 /* One per device */ struct lvmcache_info { struct dm_list list; /* Join VG members together */ struct dm_list mdas; /* list head for metadata areas */ struct dm_list das; /* list head for data areas */ struct dm_list bas; /* list head for bootloader areas */ struct lvmcache_vginfo *vginfo; /* NULL == unknown */ struct label *label; const struct format_type *fmt; struct device *dev; uint64_t device_size; /* Bytes */ uint32_t ext_version; /* Extension version */ uint32_t ext_flags; /* Extension flags */ uint32_t status; }; /* One per VG */ struct lvmcache_vginfo { struct dm_list list; /* Join these vginfos together */ struct dm_list infos; /* List head for lvmcache_infos */ const struct format_type *fmt; char *vgname; /* "" == orphan */ uint32_t status; char vgid[ID_LEN + 1]; char _padding[7]; struct lvmcache_vginfo *next; /* Another VG with same name? */ char *creation_host; char *system_id; char *lock_type; uint32_t mda_checksum; size_t mda_size; size_t vgmetadata_size; char *vgmetadata; /* Copy of VG metadata as format_text string */ struct dm_config_tree *cft; /* Config tree created from vgmetadata */ /* Lifetime is directly tied to vgmetadata */ struct volume_group *cached_vg; unsigned holders; unsigned vg_use_count; /* Counter of vg reusage */ unsigned precommitted; /* Is vgmetadata live or precommitted? */ unsigned cached_vg_invalidated; /* Signal to regenerate cached_vg */ unsigned preferred_duplicates; /* preferred duplicate pvs have been set */ }; static struct dm_hash_table *_pvid_hash = NULL; static struct dm_hash_table *_vgid_hash = NULL; static struct dm_hash_table *_vgname_hash = NULL; static struct dm_hash_table *_lock_hash = NULL; static DM_LIST_INIT(_vginfos); static int _scanning_in_progress = 0; static int _has_scanned = 0; static int _vgs_locked = 0; static int _vg_global_lock_held = 0; /* Global lock held when cache wiped? */ static int _found_duplicate_pvs = 0; /* If we never see a duplicate PV we can skip checking for them later. */ static int _suppress_lock_ordering = 0; int lvmcache_init(void) { /* * FIXME add a proper lvmcache_locking_reset() that * resets the cache so no previous locks are locked */ _vgs_locked = 0; dm_list_init(&_vginfos); if (!(_vgname_hash = dm_hash_create(128))) return 0; if (!(_vgid_hash = dm_hash_create(128))) return 0; if (!(_pvid_hash = dm_hash_create(128))) return 0; if (!(_lock_hash = dm_hash_create(128))) return 0; /* * Reinitialising the cache clears the internal record of * which locks are held. The global lock can be held during * this operation so its state must be restored afterwards. */ if (_vg_global_lock_held) { lvmcache_lock_vgname(VG_GLOBAL, 0); _vg_global_lock_held = 0; } return 1; } /* * Once PV info has been populated in lvmcache and * lvmcache has chosen preferred duplicate devices, * set this flag so that lvmcache will not try to * compare and choose preferred duplicate devices * again (which may result in different preferred * devices.) PV info can be populated in lvmcache * multiple times, each time causing lvmcache to * compare the duplicate devices, so we need to * record that the comparison/preferences have * already been done, so the preferrences from the * first time through are not changed. * * This is something of a hack to work around the * fact that the code isn't really designed to * handle duplicate PVs, and the fact that lvmetad * has its own way of picking a preferred duplicate * and lvmcache has another way based on having * more information than lvmetad does. * * If we come up with a better overall method to * handle duplicate PVs, then this can probably be * removed. * * FIXME: if we want to make lvmetad work with clvmd, * then this may need to be changed to set * preferred_duplicates back to 0. */ void lvmcache_set_preferred_duplicates(const char *vgid) { struct lvmcache_vginfo *vginfo; if (!(vginfo = lvmcache_vginfo_from_vgid(vgid))) { stack; return; } vginfo->preferred_duplicates = 1; } void lvmcache_seed_infos_from_lvmetad(struct cmd_context *cmd) { if (!lvmetad_active() || _has_scanned) return; if (!lvmetad_pv_list_to_lvmcache(cmd)) { stack; return; } _has_scanned = 1; } /* Volume Group metadata cache functions */ static void _free_cached_vgmetadata(struct lvmcache_vginfo *vginfo) { if (!vginfo || !vginfo->vgmetadata) return; dm_free(vginfo->vgmetadata); vginfo->vgmetadata = NULL; /* Release also cached config tree */ if (vginfo->cft) { dm_config_destroy(vginfo->cft); vginfo->cft = NULL; } log_debug_cache("Metadata cache: VG %s wiped.", vginfo->vgname); release_vg(vginfo->cached_vg); } /* * Cache VG metadata against the vginfo with matching vgid. */ static void _store_metadata(struct volume_group *vg, unsigned precommitted) { char uuid[64] __attribute__((aligned(8))); struct lvmcache_vginfo *vginfo; char *data; size_t size; if (!(vginfo = lvmcache_vginfo_from_vgid((const char *)&vg->id))) { stack; return; } if (!(size = export_vg_to_buffer(vg, &data))) { stack; _free_cached_vgmetadata(vginfo); return; } /* Avoid reparsing of the same data string */ if (vginfo->vgmetadata && vginfo->vgmetadata_size == size && strcmp(vginfo->vgmetadata, data) == 0) dm_free(data); else { _free_cached_vgmetadata(vginfo); vginfo->vgmetadata_size = size; vginfo->vgmetadata = data; } vginfo->precommitted = precommitted; if (!id_write_format((const struct id *)vginfo->vgid, uuid, sizeof(uuid))) { stack; return; } log_debug_cache("Metadata cache: VG %s (%s) stored (%" PRIsize_t " bytes%s).", vginfo->vgname, uuid, size, precommitted ? ", precommitted" : ""); } static void _update_cache_info_lock_state(struct lvmcache_info *info, int locked, int *cached_vgmetadata_valid) { int was_locked = (info->status & CACHE_LOCKED) ? 1 : 0; /* * Cache becomes invalid whenever lock state changes unless * exclusive VG_GLOBAL is held (i.e. while scanning). */ if (!lvmcache_vgname_is_locked(VG_GLOBAL) && (was_locked != locked)) { info->status |= CACHE_INVALID; *cached_vgmetadata_valid = 0; } if (locked) info->status |= CACHE_LOCKED; else info->status &= ~CACHE_LOCKED; } static void _update_cache_vginfo_lock_state(struct lvmcache_vginfo *vginfo, int locked) { struct lvmcache_info *info; int cached_vgmetadata_valid = 1; dm_list_iterate_items(info, &vginfo->infos) _update_cache_info_lock_state(info, locked, &cached_vgmetadata_valid); if (!cached_vgmetadata_valid) _free_cached_vgmetadata(vginfo); } static void _update_cache_lock_state(const char *vgname, int locked) { struct lvmcache_vginfo *vginfo; if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) return; _update_cache_vginfo_lock_state(vginfo, locked); } static void _drop_metadata(const char *vgname, int drop_precommitted) { struct lvmcache_vginfo *vginfo; struct lvmcache_info *info; if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) return; /* * Invalidate cached PV labels. * If cached precommitted metadata exists that means we * already invalidated the PV labels (before caching it) * and we must not do it again. */ if (!drop_precommitted && vginfo->precommitted && !vginfo->vgmetadata) log_error(INTERNAL_ERROR "metadata commit (or revert) missing before " "dropping metadata from cache."); if (drop_precommitted || !vginfo->precommitted) dm_list_iterate_items(info, &vginfo->infos) info->status |= CACHE_INVALID; _free_cached_vgmetadata(vginfo); /* VG revert */ if (drop_precommitted) vginfo->precommitted = 0; } /* * Remote node uses this to upgrade precommitted metadata to commited state * when receives vg_commit notification. * (Note that devices can be suspended here, if so, precommitted metadata are already read.) */ void lvmcache_commit_metadata(const char *vgname) { struct lvmcache_vginfo *vginfo; if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, NULL))) return; if (vginfo->precommitted) { log_debug_cache("Precommitted metadata cache: VG %s upgraded to committed.", vginfo->vgname); vginfo->precommitted = 0; } } void lvmcache_drop_metadata(const char *vgname, int drop_precommitted) { if (lvmcache_vgname_is_locked(VG_GLOBAL)) return; /* For VG_ORPHANS, we need to invalidate all labels on orphan PVs. */ if (!strcmp(vgname, VG_ORPHANS)) { _drop_metadata(FMT_TEXT_ORPHAN_VG_NAME, 0); _drop_metadata(FMT_LVM1_ORPHAN_VG_NAME, 0); _drop_metadata(FMT_POOL_ORPHAN_VG_NAME, 0); /* Indicate that PVs could now be missing from the cache */ init_full_scan_done(0); } else _drop_metadata(vgname, drop_precommitted); } /* * Ensure vgname2 comes after vgname1 alphabetically. * Orphan locks come last. * VG_GLOBAL comes first. */ static int _vgname_order_correct(const char *vgname1, const char *vgname2) { if (is_global_vg(vgname1)) return 1; if (is_global_vg(vgname2)) return 0; if (is_orphan_vg(vgname1)) return 0; if (is_orphan_vg(vgname2)) return 1; if (strcmp(vgname1, vgname2) < 0) return 1; return 0; } void lvmcache_lock_ordering(int enable) { _suppress_lock_ordering = !enable; } /* * Ensure VG locks are acquired in alphabetical order. */ int lvmcache_verify_lock_order(const char *vgname) { struct dm_hash_node *n; const char *vgname2; if (_suppress_lock_ordering) return 1; if (!_lock_hash) return_0; dm_hash_iterate(n, _lock_hash) { if (!dm_hash_get_data(_lock_hash, n)) return_0; if (!(vgname2 = dm_hash_get_key(_lock_hash, n))) { log_error(INTERNAL_ERROR "VG lock %s hits NULL.", vgname); return 0; } if (!_vgname_order_correct(vgname2, vgname)) { log_errno(EDEADLK, INTERNAL_ERROR "VG lock %s must " "be requested before %s, not after.", vgname, vgname2); return 0; } } return 1; } void lvmcache_lock_vgname(const char *vgname, int read_only __attribute__((unused))) { if (!_lock_hash && !lvmcache_init()) { log_error("Internal cache initialisation failed"); return; } if (dm_hash_lookup(_lock_hash, vgname)) log_error(INTERNAL_ERROR "Nested locking attempted on VG %s.", vgname); if (!dm_hash_insert(_lock_hash, vgname, (void *) 1)) log_error("Cache locking failure for %s", vgname); if (strcmp(vgname, VG_GLOBAL)) { _update_cache_lock_state(vgname, 1); _vgs_locked++; } } int lvmcache_vgname_is_locked(const char *vgname) { if (!_lock_hash) return 0; return dm_hash_lookup(_lock_hash, is_orphan_vg(vgname) ? VG_ORPHANS : vgname) ? 1 : 0; } void lvmcache_unlock_vgname(const char *vgname) { if (!dm_hash_lookup(_lock_hash, vgname)) log_error(INTERNAL_ERROR "Attempt to unlock unlocked VG %s.", vgname); if (strcmp(vgname, VG_GLOBAL)) _update_cache_lock_state(vgname, 0); dm_hash_remove(_lock_hash, vgname); /* FIXME Do this per-VG */ if (strcmp(vgname, VG_GLOBAL) && !--_vgs_locked) { dev_close_all(); dev_size_seqno_inc(); /* invalidate all cached dev sizes */ } } int lvmcache_vgs_locked(void) { return _vgs_locked; } /* * When lvmcache sees a duplicate PV, this is set. * process_each_pv() can avoid searching for duplicates * by checking this and seeing that no duplicate PVs exist. */ int lvmcache_found_duplicate_pvs(void) { return _found_duplicate_pvs; } static void _vginfo_attach_info(struct lvmcache_vginfo *vginfo, struct lvmcache_info *info) { if (!vginfo) return; info->vginfo = vginfo; dm_list_add(&vginfo->infos, &info->list); } static void _vginfo_detach_info(struct lvmcache_info *info) { if (!dm_list_empty(&info->list)) { dm_list_del(&info->list); dm_list_init(&info->list); } info->vginfo = NULL; } /* If vgid supplied, require a match. */ struct lvmcache_vginfo *lvmcache_vginfo_from_vgname(const char *vgname, const char *vgid) { struct lvmcache_vginfo *vginfo; if (!vgname) return lvmcache_vginfo_from_vgid(vgid); if (!_vgname_hash) { log_debug_cache(INTERNAL_ERROR "Internal cache is no yet initialized."); return NULL; } if (!(vginfo = dm_hash_lookup(_vgname_hash, vgname))) { log_debug_cache("Metadata cache has no info for vgname: \"%s\"", vgname); return NULL; } if (vgid) do if (!strncmp(vgid, vginfo->vgid, ID_LEN)) return vginfo; while ((vginfo = vginfo->next)); if (!vginfo) log_debug_cache("Metadata cache has not found vgname \"%s\" with vgid \"%." DM_TO_STRING(ID_LEN) "s\".", vgname, vgid ? : ""); return vginfo; } const struct format_type *lvmcache_fmt_from_vgname(struct cmd_context *cmd, const char *vgname, const char *vgid, unsigned revalidate_labels) { struct lvmcache_vginfo *vginfo; struct lvmcache_info *info; struct label *label; struct dm_list *devh, *tmp; struct dm_list devs; struct device_list *devl; struct volume_group *vg; const struct format_type *fmt; char vgid_found[ID_LEN + 1] __attribute__((aligned(8))); if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { if (!lvmetad_active()) return NULL; /* too bad */ /* If we don't have the info but we have lvmetad, we can ask * there before failing. */ if ((vg = lvmetad_vg_lookup(cmd, vgname, vgid))) { fmt = vg->fid->fmt; release_vg(vg); return fmt; } return NULL; } /* * If this function is called repeatedly, only the first one needs to revalidate. */ if (!revalidate_labels) goto out; /* * This function is normally called before reading metadata so * we check cached labels here. Unfortunately vginfo is volatile. */ dm_list_init(&devs); dm_list_iterate_items(info, &vginfo->infos) { if (!(devl = dm_malloc(sizeof(*devl)))) { log_error("device_list element allocation failed"); return NULL; } devl->dev = info->dev; dm_list_add(&devs, &devl->list); } memcpy(vgid_found, vginfo->vgid, sizeof(vgid_found)); dm_list_iterate_safe(devh, tmp, &devs) { devl = dm_list_item(devh, struct device_list); (void) label_read(devl->dev, &label, UINT64_C(0)); dm_list_del(&devl->list); dm_free(devl); } /* If vginfo changed, caller needs to rescan */ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid_found)) || strncmp(vginfo->vgid, vgid_found, ID_LEN)) return NULL; out: return vginfo->fmt; } struct lvmcache_vginfo *lvmcache_vginfo_from_vgid(const char *vgid) { struct lvmcache_vginfo *vginfo; char id[ID_LEN + 1] __attribute__((aligned(8))); if (!_vgid_hash || !vgid) { log_debug_cache(INTERNAL_ERROR "Internal cache cannot lookup vgid."); return NULL; } /* vgid not necessarily NULL-terminated */ strncpy(&id[0], vgid, ID_LEN); id[ID_LEN] = '\0'; if (!(vginfo = dm_hash_lookup(_vgid_hash, id))) { log_debug_cache("Metadata cache has no info for vgid \"%s\"", id); return NULL; } return vginfo; } const char *lvmcache_vgname_from_vgid(struct dm_pool *mem, const char *vgid) { struct lvmcache_vginfo *vginfo; const char *vgname = NULL; if ((vginfo = lvmcache_vginfo_from_vgid(vgid))) vgname = vginfo->vgname; if (mem && vgname) return dm_pool_strdup(mem, vgname); return vgname; } const char *lvmcache_vgid_from_vgname(struct cmd_context *cmd, const char *vgname) { struct lvmcache_vginfo *vginfo; if (!(vginfo = dm_hash_lookup(_vgname_hash, vgname))) return_NULL; if (!vginfo->next) return dm_pool_strdup(cmd->mem, vginfo->vgid); /* * There are multiple VGs with this name to choose from. * Return an error because we don't know which VG is intended. */ return NULL; } static int _info_is_valid(struct lvmcache_info *info) { if (info->status & CACHE_INVALID) return 0; /* * The caller must hold the VG lock to manipulate metadata. * In a cluster, remote nodes sometimes read metadata in the * knowledge that the controlling node is holding the lock. * So if the VG appears to be unlocked here, it should be safe * to use the cached value. */ if (info->vginfo && !lvmcache_vgname_is_locked(info->vginfo->vgname)) return 1; if (!(info->status & CACHE_LOCKED)) return 0; return 1; } static int _vginfo_is_valid(struct lvmcache_vginfo *vginfo) { struct lvmcache_info *info; /* Invalid if any info is invalid */ dm_list_iterate_items(info, &vginfo->infos) if (!_info_is_valid(info)) return 0; return 1; } /* vginfo is invalid if it does not contain at least one valid info */ static int _vginfo_is_invalid(struct lvmcache_vginfo *vginfo) { struct lvmcache_info *info; dm_list_iterate_items(info, &vginfo->infos) if (_info_is_valid(info)) return 0; return 1; } /* * If valid_only is set, data will only be returned if the cached data is * known still to be valid. */ struct lvmcache_info *lvmcache_info_from_pvid(const char *pvid, int valid_only) { struct lvmcache_info *info; char id[ID_LEN + 1] __attribute__((aligned(8))); if (!_pvid_hash || !pvid) return NULL; strncpy(&id[0], pvid, ID_LEN); id[ID_LEN] = '\0'; if (!(info = dm_hash_lookup(_pvid_hash, id))) return NULL; if (valid_only && !_info_is_valid(info)) return NULL; return info; } const char *lvmcache_vgname_from_info(struct lvmcache_info *info) { if (info->vginfo) return info->vginfo->vgname; return NULL; } char *lvmcache_vgname_from_pvid(struct cmd_context *cmd, const char *pvid) { struct lvmcache_info *info; char *vgname; if (!lvmcache_device_from_pvid(cmd, (const struct id *)pvid, NULL, NULL)) { log_error("Couldn't find device with uuid %s.", pvid); return NULL; } info = lvmcache_info_from_pvid(pvid, 0); if (!info) return_NULL; if (!(vgname = dm_pool_strdup(cmd->mem, info->vginfo->vgname))) { log_errno(ENOMEM, "vgname allocation failed"); return NULL; } return vgname; } static void _rescan_entry(struct lvmcache_info *info) { struct label *label; if (info->status & CACHE_INVALID) (void) label_read(info->dev, &label, UINT64_C(0)); } static int _scan_invalid(void) { dm_hash_iter(_pvid_hash, (dm_hash_iterate_fn) _rescan_entry); return 1; } /* * lvmcache_label_scan() remembers that it has already * been called, and will not scan labels if it's called * again. (It will rescan "INVALID" devices if called again.) * * To force lvmcache_label_scan() to rescan labels on all devices, * call lvmcache_force_next_label_scan() before calling * lvmcache_label_scan(). */ static int _force_label_scan; void lvmcache_force_next_label_scan(void) { _force_label_scan = 1; } int lvmcache_label_scan(struct cmd_context *cmd) { struct label *label; struct dev_iter *iter; struct device *dev; struct format_type *fmt; int dev_count = 0; int r = 0; if (lvmetad_active()) return 1; /* Avoid recursion when a PVID can't be found! */ if (_scanning_in_progress) return 0; _scanning_in_progress = 1; if (!_vgname_hash && !lvmcache_init()) { log_error("Internal cache initialisation failed"); goto out; } if (_has_scanned && !_force_label_scan) { r = _scan_invalid(); goto out; } if (_force_label_scan && (cmd->full_filter && !cmd->full_filter->use_count) && !refresh_filters(cmd)) goto_out; if (!cmd->full_filter || !(iter = dev_iter_create(cmd->full_filter, _force_label_scan))) { log_error("dev_iter creation failed"); goto out; } log_very_verbose("Scanning device labels"); while ((dev = dev_iter_get(iter))) { (void) label_read(dev, &label, UINT64_C(0)); dev_count++; } dev_iter_destroy(iter); log_very_verbose("Scanned %d device labels", dev_count); _has_scanned = 1; /* Perform any format-specific scanning e.g. text files */ if (cmd->independent_metadata_areas) dm_list_iterate_items(fmt, &cmd->formats) if (fmt->ops->scan && !fmt->ops->scan(fmt, NULL)) goto out; /* * If we are a long-lived process, write out the updated persistent * device cache for the benefit of short-lived processes. */ if (_force_label_scan && cmd->is_long_lived && cmd->dump_filter && cmd->full_filter && cmd->full_filter->dump && !cmd->full_filter->dump(cmd->full_filter, 0)) stack; r = 1; out: _scanning_in_progress = 0; _force_label_scan = 0; return r; } struct volume_group *lvmcache_get_vg(struct cmd_context *cmd, const char *vgname, const char *vgid, unsigned precommitted) { struct lvmcache_vginfo *vginfo; struct volume_group *vg = NULL; struct format_instance *fid; struct format_instance_ctx fic; /* * We currently do not store precommitted metadata in lvmetad at * all. This means that any request for precommitted metadata is served * using the classic scanning mechanics, and read from disk or from * lvmcache. */ if (lvmetad_active() && !precommitted) { /* Still serve the locally cached VG if available */ if (vgid && (vginfo = lvmcache_vginfo_from_vgid(vgid)) && vginfo->vgmetadata && (vg = vginfo->cached_vg)) goto out; return lvmetad_vg_lookup(cmd, vgname, vgid); } if (!vgid || !(vginfo = lvmcache_vginfo_from_vgid(vgid)) || !vginfo->vgmetadata) return NULL; if (!_vginfo_is_valid(vginfo)) return NULL; /* * Don't return cached data if either: * (i) precommitted metadata is requested but we don't have it cached * - caller should read it off disk; * (ii) live metadata is requested but we have precommitted metadata cached * and no devices are suspended so caller may read it off disk. * * If live metadata is requested but we have precommitted metadata cached * and devices are suspended, we assume this precommitted metadata has * already been preloaded and committed so it's OK to return it as live. * Note that we do not clear the PRECOMMITTED flag. */ if ((precommitted && !vginfo->precommitted) || (!precommitted && vginfo->precommitted && !critical_section())) return NULL; /* Use already-cached VG struct when available */ if ((vg = vginfo->cached_vg) && !vginfo->cached_vg_invalidated) goto out; release_vg(vginfo->cached_vg); fic.type = FMT_INSTANCE_MDAS | FMT_INSTANCE_AUX_MDAS; fic.context.vg_ref.vg_name = vginfo->vgname; fic.context.vg_ref.vg_id = vgid; if (!(fid = vginfo->fmt->ops->create_instance(vginfo->fmt, &fic))) return_NULL; /* Build config tree from vgmetadata, if not yet cached */ if (!vginfo->cft && !(vginfo->cft = dm_config_from_string(vginfo->vgmetadata))) goto_bad; if (!(vg = import_vg_from_config_tree(vginfo->cft, fid))) goto_bad; /* Cache VG struct for reuse */ vginfo->cached_vg = vg; vginfo->holders = 1; vginfo->vg_use_count = 0; vginfo->cached_vg_invalidated = 0; vg->vginfo = vginfo; if (!dm_pool_lock(vg->vgmem, detect_internal_vg_cache_corruption())) goto_bad; out: vginfo->holders++; vginfo->vg_use_count++; log_debug_cache("Using cached %smetadata for VG %s with %u holder(s).", vginfo->precommitted ? "pre-committed " : "", vginfo->vgname, vginfo->holders); return vg; bad: _free_cached_vgmetadata(vginfo); return NULL; } // #if 0 int lvmcache_vginfo_holders_dec_and_test_for_zero(struct lvmcache_vginfo *vginfo) { log_debug_cache("VG %s decrementing %d holder(s) at %p.", vginfo->cached_vg->name, vginfo->holders, vginfo->cached_vg); if (--vginfo->holders) return 0; if (vginfo->vg_use_count > 1) log_debug_cache("VG %s reused %d times.", vginfo->cached_vg->name, vginfo->vg_use_count); /* Debug perform crc check only when it's been used more then once */ if (!dm_pool_unlock(vginfo->cached_vg->vgmem, detect_internal_vg_cache_corruption() && (vginfo->vg_use_count > 1))) stack; vginfo->cached_vg->vginfo = NULL; vginfo->cached_vg = NULL; return 1; } // #endif int lvmcache_get_vgnameids(struct cmd_context *cmd, int include_internal, struct dm_list *vgnameids) { struct vgnameid_list *vgnl; struct lvmcache_vginfo *vginfo; lvmcache_label_scan(cmd); dm_list_iterate_items(vginfo, &_vginfos) { if (!include_internal && is_orphan_vg(vginfo->vgname)) continue; if (!(vgnl = dm_pool_alloc(cmd->mem, sizeof(*vgnl)))) { log_error("vgnameid_list allocation failed."); return 0; } vgnl->vgid = dm_pool_strdup(cmd->mem, vginfo->vgid); vgnl->vg_name = dm_pool_strdup(cmd->mem, vginfo->vgname); if (!vgnl->vgid || !vgnl->vg_name) { log_error("vgnameid_list member allocation failed."); return 0; } dm_list_add(vgnameids, &vgnl->list); } return 1; } struct dm_list *lvmcache_get_vgids(struct cmd_context *cmd, int include_internal) { struct dm_list *vgids; struct lvmcache_vginfo *vginfo; // TODO plug into lvmetad here automagically? lvmcache_label_scan(cmd); if (!(vgids = str_list_create(cmd->mem))) { log_error("vgids list allocation failed"); return NULL; } dm_list_iterate_items(vginfo, &_vginfos) { if (!include_internal && is_orphan_vg(vginfo->vgname)) continue; if (!str_list_add(cmd->mem, vgids, dm_pool_strdup(cmd->mem, vginfo->vgid))) { log_error("strlist allocation failed"); return NULL; } } return vgids; } struct dm_list *lvmcache_get_vgnames(struct cmd_context *cmd, int include_internal) { struct dm_list *vgnames; struct lvmcache_vginfo *vginfo; lvmcache_label_scan(cmd); if (!(vgnames = str_list_create(cmd->mem))) { log_errno(ENOMEM, "vgnames list allocation failed"); return NULL; } dm_list_iterate_items(vginfo, &_vginfos) { if (!include_internal && is_orphan_vg(vginfo->vgname)) continue; if (!str_list_add(cmd->mem, vgnames, dm_pool_strdup(cmd->mem, vginfo->vgname))) { log_errno(ENOMEM, "strlist allocation failed"); return NULL; } } return vgnames; } struct dm_list *lvmcache_get_pvids(struct cmd_context *cmd, const char *vgname, const char *vgid) { struct dm_list *pvids; struct lvmcache_vginfo *vginfo; struct lvmcache_info *info; if (!(pvids = str_list_create(cmd->mem))) { log_error("pvids list allocation failed"); return NULL; } if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) return pvids; dm_list_iterate_items(info, &vginfo->infos) { if (!str_list_add(cmd->mem, pvids, dm_pool_strdup(cmd->mem, info->dev->pvid))) { log_error("strlist allocation failed"); return NULL; } } return pvids; } static struct device *_device_from_pvid(const struct id *pvid, uint64_t *label_sector) { struct lvmcache_info *info; struct label *label; if ((info = lvmcache_info_from_pvid((const char *) pvid, 0))) { if (lvmetad_active()) { if (info->label && label_sector) *label_sector = info->label->sector; return info->dev; } if (label_read(info->dev, &label, UINT64_C(0))) { info = (struct lvmcache_info *) label->info; if (id_equal(pvid, (struct id *) &info->dev->pvid)) { if (label_sector) *label_sector = label->sector; return info->dev; } } } return NULL; } struct device *lvmcache_device_from_pvid(struct cmd_context *cmd, const struct id *pvid, unsigned *scan_done_once, uint64_t *label_sector) { struct device *dev; /* Already cached ? */ dev = _device_from_pvid(pvid, label_sector); if (dev) return dev; lvmcache_label_scan(cmd); /* Try again */ dev = _device_from_pvid(pvid, label_sector); if (dev) return dev; if (critical_section() || (scan_done_once && *scan_done_once)) return NULL; lvmcache_force_next_label_scan(); lvmcache_label_scan(cmd); if (scan_done_once) *scan_done_once = 1; /* Try again */ dev = _device_from_pvid(pvid, label_sector); if (dev) return dev; return NULL; } const char *lvmcache_pvid_from_devname(struct cmd_context *cmd, const char *devname) { struct device *dev; struct label *label; if (!(dev = dev_cache_get(devname, cmd->filter))) { log_error("%s: Couldn't find device. Check your filters?", devname); return NULL; } if (!(label_read(dev, &label, UINT64_C(0)))) return NULL; return dev->pvid; } static int _free_vginfo(struct lvmcache_vginfo *vginfo) { struct lvmcache_vginfo *primary_vginfo, *vginfo2; int r = 1; _free_cached_vgmetadata(vginfo); vginfo2 = primary_vginfo = lvmcache_vginfo_from_vgname(vginfo->vgname, NULL); if (vginfo == primary_vginfo) { dm_hash_remove(_vgname_hash, vginfo->vgname); if (vginfo->next && !dm_hash_insert(_vgname_hash, vginfo->vgname, vginfo->next)) { log_error("_vgname_hash re-insertion for %s failed", vginfo->vgname); r = 0; } } else while (vginfo2) { if (vginfo2->next == vginfo) { vginfo2->next = vginfo->next; break; } vginfo2 = vginfo2->next; } if (vginfo->system_id) dm_free(vginfo->system_id); dm_free(vginfo->vgname); dm_free(vginfo->creation_host); if (*vginfo->vgid && _vgid_hash && lvmcache_vginfo_from_vgid(vginfo->vgid) == vginfo) dm_hash_remove(_vgid_hash, vginfo->vgid); dm_list_del(&vginfo->list); dm_free(vginfo); return r; } /* * vginfo must be info->vginfo unless info is NULL */ static int _drop_vginfo(struct lvmcache_info *info, struct lvmcache_vginfo *vginfo) { if (info) _vginfo_detach_info(info); /* vginfo still referenced? */ if (!vginfo || is_orphan_vg(vginfo->vgname) || !dm_list_empty(&vginfo->infos)) return 1; if (!_free_vginfo(vginfo)) return_0; return 1; } void lvmcache_del(struct lvmcache_info *info) { if (info->dev->pvid[0] && _pvid_hash) dm_hash_remove(_pvid_hash, info->dev->pvid); _drop_vginfo(info, info->vginfo); info->label->labeller->ops->destroy_label(info->label->labeller, info->label); dm_free(info); return; } static int _lvmcache_update_pvid(struct lvmcache_info *info, const char *pvid) { /* * Nothing to do if already stored with same pvid. */ if (((dm_hash_lookup(_pvid_hash, pvid)) == info) && !strcmp(info->dev->pvid, pvid)) return 1; if (*info->dev->pvid) dm_hash_remove(_pvid_hash, info->dev->pvid); strncpy(info->dev->pvid, pvid, sizeof(info->dev->pvid)); if (!dm_hash_insert(_pvid_hash, pvid, info)) { log_error("_lvmcache_update: pvid insertion failed: %s", pvid); return 0; } return 1; } /* * vginfo must be info->vginfo unless info is NULL (orphans) */ static int _lvmcache_update_vgid(struct lvmcache_info *info, struct lvmcache_vginfo *vginfo, const char *vgid) { if (!vgid || !vginfo || !strncmp(vginfo->vgid, vgid, ID_LEN)) return 1; if (vginfo && *vginfo->vgid) dm_hash_remove(_vgid_hash, vginfo->vgid); if (!vgid) { /* FIXME: unreachable code path */ log_debug_cache("lvmcache: %s: clearing VGID", info ? dev_name(info->dev) : vginfo->vgname); return 1; } strncpy(vginfo->vgid, vgid, ID_LEN); vginfo->vgid[ID_LEN] = '\0'; if (!dm_hash_insert(_vgid_hash, vginfo->vgid, vginfo)) { log_error("_lvmcache_update: vgid hash insertion failed: %s", vginfo->vgid); return 0; } if (!is_orphan_vg(vginfo->vgname)) log_debug_cache("lvmcache: %s: setting %s VGID to %s", (info) ? dev_name(info->dev) : "", vginfo->vgname, vginfo->vgid); return 1; } static int _insert_vginfo(struct lvmcache_vginfo *new_vginfo, const char *vgid, uint32_t vgstatus, const char *creation_host, struct lvmcache_vginfo *primary_vginfo) { struct lvmcache_vginfo *last_vginfo = primary_vginfo; char uuid_primary[64] __attribute__((aligned(8))); char uuid_new[64] __attribute__((aligned(8))); int use_new = 0; /* Pre-existing VG takes precedence. Unexported VG takes precedence. */ if (primary_vginfo) { if (!id_write_format((const struct id *)vgid, uuid_new, sizeof(uuid_new))) return_0; if (!id_write_format((const struct id *)&primary_vginfo->vgid, uuid_primary, sizeof(uuid_primary))) return_0; /* * vginfo is kept for each VG with the same name. * They are saved with the vginfo->next list. * These checks just decide the ordering of * that list. * * FIXME: it should no longer matter what order * the vginfo's are kept in, so we can probably * remove these comparisons and reordering entirely. * * If Primary not exported, new exported => keep * Else Primary exported, new not exported => change * Else Primary has hostname for this machine => keep * Else Primary has no hostname, new has one => change * Else New has hostname for this machine => change * Else Keep primary. */ if (!(primary_vginfo->status & EXPORTED_VG) && (vgstatus & EXPORTED_VG)) log_verbose("Cache: Duplicate VG name %s: " "Existing %s takes precedence over " "exported %s", new_vginfo->vgname, uuid_primary, uuid_new); else if ((primary_vginfo->status & EXPORTED_VG) && !(vgstatus & EXPORTED_VG)) { log_verbose("Cache: Duplicate VG name %s: " "%s takes precedence over exported %s", new_vginfo->vgname, uuid_new, uuid_primary); use_new = 1; } else if (primary_vginfo->creation_host && !strcmp(primary_vginfo->creation_host, primary_vginfo->fmt->cmd->hostname)) log_verbose("Cache: Duplicate VG name %s: " "Existing %s (created here) takes precedence " "over %s", new_vginfo->vgname, uuid_primary, uuid_new); else if (!primary_vginfo->creation_host && creation_host) { log_verbose("Cache: Duplicate VG name %s: " "%s (with creation_host) takes precedence over %s", new_vginfo->vgname, uuid_new, uuid_primary); use_new = 1; } else if (creation_host && !strcmp(creation_host, primary_vginfo->fmt->cmd->hostname)) { log_verbose("Cache: Duplicate VG name %s: " "%s (created here) takes precedence over %s", new_vginfo->vgname, uuid_new, uuid_primary); use_new = 1; } else { log_verbose("Cache: Duplicate VG name %s: " "Prefer existing %s vs new %s", new_vginfo->vgname, uuid_primary, uuid_new); } if (!use_new) { while (last_vginfo->next) last_vginfo = last_vginfo->next; last_vginfo->next = new_vginfo; return 1; } dm_hash_remove(_vgname_hash, primary_vginfo->vgname); } if (!dm_hash_insert(_vgname_hash, new_vginfo->vgname, new_vginfo)) { log_error("cache_update: vg hash insertion failed: %s", new_vginfo->vgname); return 0; } if (primary_vginfo) new_vginfo->next = primary_vginfo; return 1; } static int _lvmcache_update_vgname(struct lvmcache_info *info, const char *vgname, const char *vgid, uint32_t vgstatus, const char *creation_host, const struct format_type *fmt) { struct lvmcache_vginfo *vginfo, *primary_vginfo, *orphan_vginfo; struct lvmcache_info *info2, *info3; char mdabuf[32]; // struct lvmcache_vginfo *old_vginfo, *next; if (!vgname || (info && info->vginfo && !strcmp(info->vginfo->vgname, vgname))) return 1; /* Remove existing vginfo entry */ if (info) _drop_vginfo(info, info->vginfo); /* Get existing vginfo or create new one */ if (!(vginfo = lvmcache_vginfo_from_vgname(vgname, vgid))) { /*** FIXME - vginfo ends up duplicated instead of renamed. // Renaming? This lookup fails. if ((vginfo = vginfo_from_vgid(vgid))) { next = vginfo->next; old_vginfo = vginfo_from_vgname(vginfo->vgname, NULL); if (old_vginfo == vginfo) { dm_hash_remove(_vgname_hash, old_vginfo->vgname); if (old_vginfo->next) { if (!dm_hash_insert(_vgname_hash, old_vginfo->vgname, old_vginfo->next)) { log_error("vg hash re-insertion failed: %s", old_vginfo->vgname); return 0; } } } else do { if (old_vginfo->next == vginfo) { old_vginfo->next = vginfo->next; break; } } while ((old_vginfo = old_vginfo->next)); vginfo->next = NULL; dm_free(vginfo->vgname); if (!(vginfo->vgname = dm_strdup(vgname))) { log_error("cache vgname alloc failed for %s", vgname); return 0; } // Rename so can assume new name does not already exist if (!dm_hash_insert(_vgname_hash, vginfo->vgname, vginfo->next)) { log_error("vg hash re-insertion failed: %s", vginfo->vgname); return 0; } } else { ***/ if (!(vginfo = dm_zalloc(sizeof(*vginfo)))) { log_error("lvmcache_update_vgname: list alloc failed"); return 0; } if (!(vginfo->vgname = dm_strdup(vgname))) { dm_free(vginfo); log_error("cache vgname alloc failed for %s", vgname); return 0; } dm_list_init(&vginfo->infos); /* * If we're scanning and there's an invalidated entry, remove it. * Otherwise we risk bogus warnings of duplicate VGs. */ while ((primary_vginfo = lvmcache_vginfo_from_vgname(vgname, NULL)) && _scanning_in_progress && _vginfo_is_invalid(primary_vginfo)) { orphan_vginfo = lvmcache_vginfo_from_vgname(primary_vginfo->fmt->orphan_vg_name, NULL); if (!orphan_vginfo) { log_error(INTERNAL_ERROR "Orphan vginfo %s lost from cache.", primary_vginfo->fmt->orphan_vg_name); dm_free(vginfo->vgname); dm_free(vginfo); return 0; } dm_list_iterate_items_safe(info2, info3, &primary_vginfo->infos) { _vginfo_detach_info(info2); _vginfo_attach_info(orphan_vginfo, info2); if (info2->mdas.n) sprintf(mdabuf, " with %u mdas", dm_list_size(&info2->mdas)); else mdabuf[0] = '\0'; log_debug_cache("lvmcache: %s: now in VG %s%s%s%s%s", dev_name(info2->dev), vgname, orphan_vginfo->vgid[0] ? " (" : "", orphan_vginfo->vgid[0] ? orphan_vginfo->vgid : "", orphan_vginfo->vgid[0] ? ")" : "", mdabuf); } if (!_drop_vginfo(NULL, primary_vginfo)) return_0; } if (!_insert_vginfo(vginfo, vgid, vgstatus, creation_host, primary_vginfo)) { dm_free(vginfo->vgname); dm_free(vginfo); return 0; } /* Ensure orphans appear last on list_iterate */ if (is_orphan_vg(vgname)) dm_list_add(&_vginfos, &vginfo->list); else dm_list_add_h(&_vginfos, &vginfo->list); /*** } ***/ } if (info) _vginfo_attach_info(vginfo, info); else if (!_lvmcache_update_vgid(NULL, vginfo, vgid)) /* Orphans */ return_0; _update_cache_vginfo_lock_state(vginfo, lvmcache_vgname_is_locked(vgname)); /* FIXME Check consistency of list! */ vginfo->fmt = fmt; if (info) { if (info->mdas.n) sprintf(mdabuf, " with %u mdas", dm_list_size(&info->mdas)); else mdabuf[0] = '\0'; log_debug_cache("lvmcache: %s: now in VG %s%s%s%s%s", dev_name(info->dev), vgname, vginfo->vgid[0] ? " (" : "", vginfo->vgid[0] ? vginfo->vgid : "", vginfo->vgid[0] ? ")" : "", mdabuf); } else log_debug_cache("lvmcache: initialised VG %s", vgname); return 1; } static int _lvmcache_update_vgstatus(struct lvmcache_info *info, uint32_t vgstatus, const char *creation_host, const char *lock_type, const char *system_id) { if (!info || !info->vginfo) return 1; if ((info->vginfo->status & EXPORTED_VG) != (vgstatus & EXPORTED_VG)) log_debug_cache("lvmcache: %s: VG %s %s exported", dev_name(info->dev), info->vginfo->vgname, vgstatus & EXPORTED_VG ? "now" : "no longer"); info->vginfo->status = vgstatus; if (!creation_host) goto set_lock_type; if (info->vginfo->creation_host && !strcmp(creation_host, info->vginfo->creation_host)) goto set_lock_type; if (info->vginfo->creation_host) dm_free(info->vginfo->creation_host); if (!(info->vginfo->creation_host = dm_strdup(creation_host))) { log_error("cache creation host alloc failed for %s", creation_host); return 0; } log_debug_cache("lvmcache: %s: VG %s: Set creation host to %s.", dev_name(info->dev), info->vginfo->vgname, creation_host); set_lock_type: if (!lock_type) goto set_system_id; if (info->vginfo->lock_type && !strcmp(lock_type, info->vginfo->lock_type)) goto set_system_id; if (info->vginfo->lock_type) dm_free(info->vginfo->lock_type); if (!(info->vginfo->lock_type = dm_strdup(lock_type))) { log_error("cache lock_type alloc failed for %s", lock_type); return 0; } log_debug_cache("lvmcache: %s: VG %s: Set lock_type to %s.", dev_name(info->dev), info->vginfo->vgname, lock_type); set_system_id: if (!system_id) goto out; if (info->vginfo->system_id && !strcmp(system_id, info->vginfo->system_id)) goto out; if (info->vginfo->system_id) dm_free(info->vginfo->system_id); if (!(info->vginfo->system_id = dm_strdup(system_id))) { log_error("cache system_id alloc failed for %s", system_id); return 0; } log_debug_cache("lvmcache: %s: VG %s: Set system_id to %s.", dev_name(info->dev), info->vginfo->vgname, system_id); out: return 1; } static int _lvmcache_update_vg_mda_info(struct lvmcache_info *info, uint32_t mda_checksum, size_t mda_size) { if (!info || !info->vginfo || !mda_size) return 1; if (info->vginfo->mda_checksum == mda_checksum || info->vginfo->mda_size == mda_size) return 1; info->vginfo->mda_checksum = mda_checksum; info->vginfo->mda_size = mda_size; /* FIXME Add checksum index */ log_debug_cache("lvmcache: %s: VG %s: Stored metadata checksum %" PRIu32 " with size %" PRIsize_t ".", dev_name(info->dev), info->vginfo->vgname, mda_checksum, mda_size); return 1; } int lvmcache_add_orphan_vginfo(const char *vgname, struct format_type *fmt) { if (!_lock_hash && !lvmcache_init()) { log_error("Internal cache initialisation failed"); return 0; } return _lvmcache_update_vgname(NULL, vgname, vgname, 0, "", fmt); } int lvmcache_update_vgname_and_id(struct lvmcache_info *info, struct lvmcache_vgsummary *vgsummary) { const char *vgname = vgsummary->vgname; const char *vgid = (char *)&vgsummary->vgid; if (!vgname && !info->vginfo) { log_error(INTERNAL_ERROR "NULL vgname handed to cache"); /* FIXME Remove this */ vgname = info->fmt->orphan_vg_name; vgid = vgname; } /* If PV without mdas is already in a real VG, don't make it orphan */ if (is_orphan_vg(vgname) && info->vginfo && mdas_empty_or_ignored(&info->mdas) && !is_orphan_vg(info->vginfo->vgname) && critical_section()) return 1; /* If making a PV into an orphan, any cached VG metadata may become * invalid, incorrectly still referencing device structs. * (Example: pvcreate -ff) */ if (is_orphan_vg(vgname) && info->vginfo && !is_orphan_vg(info->vginfo->vgname)) info->vginfo->cached_vg_invalidated = 1; /* If moving PV from orphan to real VG, always mark it valid */ if (!is_orphan_vg(vgname)) info->status &= ~CACHE_INVALID; if (!_lvmcache_update_vgname(info, vgname, vgid, vgsummary->vgstatus, vgsummary->creation_host, info->fmt) || !_lvmcache_update_vgid(info, info->vginfo, vgid) || !_lvmcache_update_vgstatus(info, vgsummary->vgstatus, vgsummary->creation_host, vgsummary->lock_type, vgsummary->system_id) || !_lvmcache_update_vg_mda_info(info, vgsummary->mda_checksum, vgsummary->mda_size)) return_0; return 1; } int lvmcache_update_vg(struct volume_group *vg, unsigned precommitted) { struct pv_list *pvl; struct lvmcache_info *info; char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); struct lvmcache_vgsummary vgsummary = { .vgname = vg->name, .vgstatus = vg->status, .vgid = vg->id, .system_id = vg->system_id, .lock_type = vg->lock_type }; pvid_s[sizeof(pvid_s) - 1] = '\0'; dm_list_iterate_items(pvl, &vg->pvs) { strncpy(pvid_s, (char *) &pvl->pv->id, sizeof(pvid_s) - 1); /* FIXME Could pvl->pv->dev->pvid ever be different? */ if ((info = lvmcache_info_from_pvid(pvid_s, 0)) && !lvmcache_update_vgname_and_id(info, &vgsummary)) return_0; } /* store text representation of vg to cache */ if (vg->cmd->current_settings.cache_vgmetadata) _store_metadata(vg, precommitted); return 1; } /* * Replace pv->dev with dev so that dev will appear for reporting. */ void lvmcache_replace_dev(struct cmd_context *cmd, struct physical_volume *pv, struct device *dev) { struct lvmcache_info *info; char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); strncpy(pvid_s, (char *) &pv->id, sizeof(pvid_s) - 1); pvid_s[sizeof(pvid_s) - 1] = '\0'; if (!(info = lvmcache_info_from_pvid(pvid_s, 0))) return; info->dev = dev; info->label->dev = dev; pv->dev = dev; } /* * We can see multiple different devices with the * same pvid, i.e. duplicates. * * There may be different reasons for seeing two * devices with the same pvid: * - multipath showing two paths to the same thing * - one device copied to another, e.g. with dd, * also referred to as cloned devices. * - a "subsystem" taking a device and creating * another device of its own that represents the * underlying device it is using, e.g. using dm * to create an identity mapping of a PV. * * Given duplicate devices, we have to choose one * of them to be the "preferred" dev, i.e. the one * that will be referenced in lvmcache, by pv->dev. * We can keep the existing dev, that's currently * used in lvmcache, or we can replace the existing * dev with the new duplicate. * * Regardless of which device is preferred, we need * to print messages explaining which devices were * found so that a user can sort out for themselves * what has happened if the preferred device is not * the one they are interested in. * * If a user wants to use the non-preferred device, * they will need to filter out the device that * lvm is preferring. * * The dev_subsystem calls check if the major number * of the dev is part of a subsystem like DM/MD/DRBD. * A dev that's part of a subsystem is preferred over a * duplicate of that dev that is not part of a * subsystem. * * The has_holders calls check if the device is being * used by another, and prefers one that's being used. * * FIXME: why do we prefer a device without holders * over a device with holders? We should understand * the reason for that choice. * * FIXME: there may be other reasons to prefer one * device over another: * * . are there other use/open counts we could check * beyond the holders? * * . check if either is bad/usable and prefer * the good one? * * . prefer the one with smaller minor number? * Might avoid disturbing things due to a new * transient duplicate? */ struct lvmcache_info *lvmcache_add(struct labeller *labeller, const char *pvid, struct device *dev, const char *vgname, const char *vgid, uint32_t vgstatus) { const struct format_type *fmt = labeller->fmt; struct dev_types *dt = fmt->cmd->dev_types; struct label *label; struct lvmcache_info *existing, *info; char pvid_s[ID_LEN + 1] __attribute__((aligned(8))); struct lvmcache_vgsummary vgsummary = { .vgname = vgname, .vgstatus = vgstatus, }; /* N.B. vgid is not NUL-terminated when called from _text_pv_write */ if (vgid) strncpy((char *)&vgsummary.vgid, vgid, sizeof(vgsummary.vgid)); if (!_vgname_hash && !lvmcache_init()) { log_error("Internal cache initialisation failed"); return NULL; } strncpy(pvid_s, pvid, sizeof(pvid_s) - 1); pvid_s[sizeof(pvid_s) - 1] = '\0'; if (!(existing = lvmcache_info_from_pvid(pvid_s, 0)) && !(existing = lvmcache_info_from_pvid(dev->pvid, 0))) { if (!(label = label_create(labeller))) return_NULL; if (!(info = dm_zalloc(sizeof(*info)))) { log_error("lvmcache_info allocation failed"); label_destroy(label); return NULL; } label->info = info; info->label = label; dm_list_init(&info->list); info->dev = dev; lvmcache_del_mdas(info); lvmcache_del_das(info); lvmcache_del_bas(info); } else { if (existing->dev != dev) { int old_in_subsystem = 0; int new_in_subsystem = 0; int old_is_dm = 0; int new_is_dm = 0; int old_has_holders = 0; int new_has_holders = 0; /* * Here are different devices with the same pvid: * duplicates. See comment above. */ /* * This flag tells the process_each_pv code to search * the devices list for duplicates, so that devices * can be processed together with their duplicates * (while processing the VG, rather than reporting * pv->dev under the VG, and its duplicate outside * the VG context.) */ _found_duplicate_pvs = 1; /* * The new dev may not have pvid set. * The process_each_pv code needs to have the pvid * set in each device to detect that the devices * are duplicates. */ strncpy(dev->pvid, pvid_s, sizeof(dev->pvid)); /* * Now decide if we are going to ignore the new * device, or replace the existing/old device in * lvmcache with the new one. */ old_in_subsystem = dev_subsystem_part_major(dt, existing->dev); new_in_subsystem = dev_subsystem_part_major(dt, dev); old_is_dm = dm_is_dm_major(MAJOR(existing->dev->dev)); new_is_dm = dm_is_dm_major(MAJOR(dev->dev)); old_has_holders = dm_device_has_holders(MAJOR(existing->dev->dev), MINOR(existing->dev->dev)); new_has_holders = dm_device_has_holders(MAJOR(dev->dev), MINOR(dev->dev)); if (old_has_holders && new_has_holders) { /* * This is not a selection of old or new, but * just a warning to be aware of. */ log_warn("WARNING: duplicate PV %s is being used from both devices %s and %s", pvid_s, dev_name(existing->dev), dev_name(dev)); } if (existing->vginfo->preferred_duplicates) { /* * The preferred duplicate devs have already * been chosen during a previous populating of * lvmcache, so just use the existing preferences. */ log_warn("Found duplicate PV %s: using existing dev %s", pvid_s, dev_name(existing->dev)); return NULL; } if (old_in_subsystem && !new_in_subsystem) { /* Use old, ignore new. */ log_warn("Found duplicate PV %s: using %s not %s", pvid_s, dev_name(existing->dev), dev_name(dev)); log_warn("Using duplicate PV %s from subsystem %s, ignoring %s", dev_name(existing->dev), dev_subsystem_name(dt, existing->dev), dev_name(dev)); return NULL; } else if (!old_in_subsystem && new_in_subsystem) { /* Use new, replace old. */ log_warn("Found duplicate PV %s: using %s not %s", pvid_s, dev_name(dev), dev_name(existing->dev)); log_warn("Using duplicate PV %s from subsystem %s, replacing %s", dev_name(dev), dev_subsystem_name(dt, dev), dev_name(existing->dev)); } else if (old_has_holders && !new_has_holders) { /* Use new, replace old. */ /* FIXME: why choose the one without olders? */ log_warn("Found duplicate PV %s: using %s not %s", pvid_s, dev_name(dev), dev_name(existing->dev)); log_warn("Using duplicate PV %s without holders, replacing %s", dev_name(dev), dev_name(existing->dev)); } else if (!old_has_holders && new_has_holders) { /* Use old, ignore new. */ log_warn("Found duplicate PV %s: using %s not %s", pvid_s, dev_name(existing->dev), dev_name(dev)); log_warn("Using duplicate PV %s without holders, ignoring %s", dev_name(existing->dev), dev_name(dev)); return NULL; } else if (old_is_dm && new_is_dm) { /* Use new, replace old. */ /* FIXME: why choose the new instead of the old? */ log_warn("Found duplicate PV %s: using %s not %s", pvid_s, dev_name(dev), dev_name(existing->dev)); log_warn("Using duplicate PV %s which is last seen, replacing %s", dev_name(dev), dev_name(existing->dev)); } else if (!strcmp(pvid_s, existing->dev->pvid)) { /* No criteria to use for preferring old or new. */ /* FIXME: why choose the new instead of the old? */ /* FIXME: a transient duplicate would be a reason * to select the old instead of the new. */ log_warn("Found duplicate PV %s: using %s not %s", pvid_s, dev_name(dev), dev_name(existing->dev)); log_warn("Using duplicate PV %s which is last seen, replacing %s", dev_name(dev), dev_name(existing->dev)); } } else { /* * The new dev is the same as the existing dev. * * FIXME: Why can't we just return NULL here if the * device already exists? Things don't seem to work * if we do that for some reason. */ log_debug_cache("Found same device %s with same pvid %s", dev_name(existing->dev), pvid_s); } /* * This happens when running pvcreate on an existing PV. */ if (strcmp(pvid_s, existing->dev->pvid)) { log_verbose("Replacing dev %s pvid %s with dev %s pvid %s", dev_name(existing->dev), existing->dev->pvid, dev_name(dev), pvid_s); } /* * Switch over to new preferred device. */ existing->dev = dev; info = existing; /* Has labeller changed? */ if (info->label->labeller != labeller) { label_destroy(info->label); if (!(info->label = label_create(labeller))) /* FIXME leaves info without label! */ return_NULL; info->label->info = info; } label = info->label; } info->fmt = labeller->fmt; info->status |= CACHE_INVALID; if (!_lvmcache_update_pvid(info, pvid_s)) { if (!existing) { dm_free(info); label_destroy(label); } return NULL; } if (!lvmcache_update_vgname_and_id(info, &vgsummary)) { if (!existing) { dm_hash_remove(_pvid_hash, pvid_s); strcpy(info->dev->pvid, ""); dm_free(info); label_destroy(label); } return NULL; } return info; } static void _lvmcache_destroy_entry(struct lvmcache_info *info) { _vginfo_detach_info(info); info->dev->pvid[0] = 0; label_destroy(info->label); dm_free(info); } static void _lvmcache_destroy_vgnamelist(struct lvmcache_vginfo *vginfo) { struct lvmcache_vginfo *next; do { next = vginfo->next; if (!_free_vginfo(vginfo)) stack; } while ((vginfo = next)); } static void _lvmcache_destroy_lockname(struct dm_hash_node *n) { char *vgname; if (!dm_hash_get_data(_lock_hash, n)) return; vgname = dm_hash_get_key(_lock_hash, n); if (!strcmp(vgname, VG_GLOBAL)) _vg_global_lock_held = 1; else log_error(INTERNAL_ERROR "Volume Group %s was not unlocked", dm_hash_get_key(_lock_hash, n)); } void lvmcache_destroy(struct cmd_context *cmd, int retain_orphans, int reset) { struct dm_hash_node *n; log_verbose("Wiping internal VG cache"); _has_scanned = 0; if (_vgid_hash) { dm_hash_destroy(_vgid_hash); _vgid_hash = NULL; } if (_pvid_hash) { dm_hash_iter(_pvid_hash, (dm_hash_iterate_fn) _lvmcache_destroy_entry); dm_hash_destroy(_pvid_hash); _pvid_hash = NULL; } if (_vgname_hash) { dm_hash_iter(_vgname_hash, (dm_hash_iterate_fn) _lvmcache_destroy_vgnamelist); dm_hash_destroy(_vgname_hash); _vgname_hash = NULL; } if (_lock_hash) { if (reset) _vg_global_lock_held = 0; else dm_hash_iterate(n, _lock_hash) _lvmcache_destroy_lockname(n); dm_hash_destroy(_lock_hash); _lock_hash = NULL; } if (!dm_list_empty(&_vginfos)) log_error(INTERNAL_ERROR "_vginfos list should be empty"); dm_list_init(&_vginfos); if (retain_orphans) if (!init_lvmcache_orphans(cmd)) stack; } int lvmcache_pvid_is_locked(const char *pvid) { struct lvmcache_info *info; info = lvmcache_info_from_pvid(pvid, 0); if (!info || !info->vginfo) return 0; return lvmcache_vgname_is_locked(info->vginfo->vgname); } int lvmcache_fid_add_mdas(struct lvmcache_info *info, struct format_instance *fid, const char *id, int id_len) { return fid_add_mdas(fid, &info->mdas, id, id_len); } int lvmcache_fid_add_mdas_pv(struct lvmcache_info *info, struct format_instance *fid) { return lvmcache_fid_add_mdas(info, fid, info->dev->pvid, ID_LEN); } int lvmcache_fid_add_mdas_vg(struct lvmcache_vginfo *vginfo, struct format_instance *fid) { struct lvmcache_info *info; dm_list_iterate_items(info, &vginfo->infos) { if (!lvmcache_fid_add_mdas_pv(info, fid)) return_0; } return 1; } static int _get_pv_if_in_vg(struct lvmcache_info *info, struct physical_volume *pv) { char vgname[NAME_LEN + 1]; char vgid[ID_LEN + 1]; if (info->vginfo && info->vginfo->vgname && !is_orphan_vg(info->vginfo->vgname)) { /* * get_pv_from_vg_by_id() may call * lvmcache_label_scan() and drop cached * vginfo so make a local copy of string. */ (void) dm_strncpy(vgname, info->vginfo->vgname, sizeof(vgname)); memcpy(vgid, info->vginfo->vgid, sizeof(vgid)); if (get_pv_from_vg_by_id(info->fmt, vgname, vgid, info->dev->pvid, pv)) return 1; } return 0; } int lvmcache_populate_pv_fields(struct lvmcache_info *info, struct physical_volume *pv, int scan_label_only) { struct data_area_list *da; /* Have we already cached vgname? */ if (!scan_label_only && _get_pv_if_in_vg(info, pv)) return 1; /* Perform full scan (just the first time) and try again */ if (!scan_label_only && !critical_section() && !full_scan_done()) { lvmcache_force_next_label_scan(); lvmcache_label_scan(info->fmt->cmd); if (_get_pv_if_in_vg(info, pv)) return 1; } /* Orphan */ pv->dev = info->dev; pv->fmt = info->fmt; pv->size = info->device_size >> SECTOR_SHIFT; pv->vg_name = FMT_TEXT_ORPHAN_VG_NAME; memcpy(&pv->id, &info->dev->pvid, sizeof(pv->id)); /* Currently only support exactly one data area */ if (dm_list_size(&info->das) != 1) { log_error("Must be exactly one data area (found %d) on PV %s", dm_list_size(&info->das), dev_name(info->dev)); return 0; } /* Currently only support one bootloader area at most */ if (dm_list_size(&info->bas) > 1) { log_error("Must be at most one bootloader area (found %d) on PV %s", dm_list_size(&info->bas), dev_name(info->dev)); return 0; } dm_list_iterate_items(da, &info->das) pv->pe_start = da->disk_locn.offset >> SECTOR_SHIFT; dm_list_iterate_items(da, &info->bas) { pv->ba_start = da->disk_locn.offset >> SECTOR_SHIFT; pv->ba_size = da->disk_locn.size >> SECTOR_SHIFT; } return 1; } int lvmcache_check_format(struct lvmcache_info *info, const struct format_type *fmt) { if (info->fmt != fmt) { log_error("PV %s is a different format (seqno %s)", dev_name(info->dev), info->fmt->name); return 0; } return 1; } void lvmcache_del_mdas(struct lvmcache_info *info) { if (info->mdas.n) del_mdas(&info->mdas); dm_list_init(&info->mdas); } void lvmcache_del_das(struct lvmcache_info *info) { if (info->das.n) del_das(&info->das); dm_list_init(&info->das); } void lvmcache_del_bas(struct lvmcache_info *info) { if (info->bas.n) del_bas(&info->bas); dm_list_init(&info->bas); } int lvmcache_add_mda(struct lvmcache_info *info, struct device *dev, uint64_t start, uint64_t size, unsigned ignored) { return add_mda(info->fmt, NULL, &info->mdas, dev, start, size, ignored); } int lvmcache_add_da(struct lvmcache_info *info, uint64_t start, uint64_t size) { return add_da(NULL, &info->das, start, size); } int lvmcache_add_ba(struct lvmcache_info *info, uint64_t start, uint64_t size) { return add_ba(NULL, &info->bas, start, size); } void lvmcache_update_pv(struct lvmcache_info *info, struct physical_volume *pv, const struct format_type *fmt) { info->device_size = pv->size << SECTOR_SHIFT; info->fmt = fmt; } int lvmcache_update_das(struct lvmcache_info *info, struct physical_volume *pv) { struct data_area_list *da; if (info->das.n) { if (!pv->pe_start) dm_list_iterate_items(da, &info->das) pv->pe_start = da->disk_locn.offset >> SECTOR_SHIFT; del_das(&info->das); } else dm_list_init(&info->das); if (!add_da(NULL, &info->das, pv->pe_start << SECTOR_SHIFT, 0 /*pv->size << SECTOR_SHIFT*/)) return_0; return 1; } int lvmcache_update_bas(struct lvmcache_info *info, struct physical_volume *pv) { struct data_area_list *ba; if (info->bas.n) { if (!pv->ba_start && !pv->ba_size) dm_list_iterate_items(ba, &info->bas) { pv->ba_start = ba->disk_locn.offset >> SECTOR_SHIFT; pv->ba_size = ba->disk_locn.size >> SECTOR_SHIFT; } del_das(&info->bas); } else dm_list_init(&info->bas); if (!add_ba(NULL, &info->bas, pv->ba_start << SECTOR_SHIFT, pv->ba_size << SECTOR_SHIFT)) return_0; return 1; } int lvmcache_foreach_pv(struct lvmcache_vginfo *vginfo, int (*fun)(struct lvmcache_info *, void *), void *baton) { struct lvmcache_info *info; dm_list_iterate_items(info, &vginfo->infos) { if (!fun(info, baton)) return_0; } return 1; } int lvmcache_foreach_mda(struct lvmcache_info *info, int (*fun)(struct metadata_area *, void *), void *baton) { struct metadata_area *mda; dm_list_iterate_items(mda, &info->mdas) { if (!fun(mda, baton)) return_0; } return 1; } unsigned lvmcache_mda_count(struct lvmcache_info *info) { return dm_list_size(&info->mdas); } int lvmcache_foreach_da(struct lvmcache_info *info, int (*fun)(struct disk_locn *, void *), void *baton) { struct data_area_list *da; dm_list_iterate_items(da, &info->das) { if (!fun(&da->disk_locn, baton)) return_0; } return 1; } int lvmcache_foreach_ba(struct lvmcache_info *info, int (*fun)(struct disk_locn *, void *), void *baton) { struct data_area_list *ba; dm_list_iterate_items(ba, &info->bas) { if (!fun(&ba->disk_locn, baton)) return_0; } return 1; } /* * The lifetime of the label returned is tied to the lifetime of the * lvmcache_info which is the same as lvmcache itself. */ struct label *lvmcache_get_label(struct lvmcache_info *info) { return info->label; } void lvmcache_make_valid(struct lvmcache_info *info) { info->status &= ~CACHE_INVALID; } uint64_t lvmcache_device_size(struct lvmcache_info *info) { return info->device_size; } void lvmcache_set_device_size(struct lvmcache_info *info, uint64_t size) { info->device_size = size; } struct device *lvmcache_device(struct lvmcache_info *info) { return info->dev; } void lvmcache_set_ext_version(struct lvmcache_info *info, uint32_t version) { info->ext_version = version; } uint32_t lvmcache_ext_version(struct lvmcache_info *info) { return info->ext_version; } void lvmcache_set_ext_flags(struct lvmcache_info *info, uint32_t flags) { info->ext_flags = flags; } uint32_t lvmcache_ext_flags(struct lvmcache_info *info) { return info->ext_flags; } int lvmcache_is_orphan(struct lvmcache_info *info) { if (!info->vginfo) return 1; /* FIXME? */ return is_orphan_vg(info->vginfo->vgname); } int lvmcache_vgid_is_cached(const char *vgid) { struct lvmcache_vginfo *vginfo; if (lvmetad_active()) return 1; vginfo = lvmcache_vginfo_from_vgid(vgid); if (!vginfo || !vginfo->vgname) return 0; if (is_orphan_vg(vginfo->vgname)) return 0; return 1; } /* * Return true iff it is impossible to find out from this info alone whether the * PV in question is or is not an orphan. */ int lvmcache_uncertain_ownership(struct lvmcache_info *info) { return mdas_empty_or_ignored(&info->mdas); } uint64_t lvmcache_smallest_mda_size(struct lvmcache_info *info) { if (!info) return UINT64_C(0); return find_min_mda_size(&info->mdas); } const struct format_type *lvmcache_fmt(struct lvmcache_info *info) { return info->fmt; } int lvmcache_lookup_mda(struct lvmcache_vgsummary *vgsummary) { struct lvmcache_vginfo *vginfo; if (!vgsummary->mda_size) return 0; /* FIXME Index the checksums */ dm_list_iterate_items(vginfo, &_vginfos) { if (vgsummary->mda_checksum == vginfo->mda_checksum && vgsummary->mda_size == vginfo->mda_size && !is_orphan_vg(vginfo->vgname)) { vgsummary->vgname = vginfo->vgname; vgsummary->creation_host = vginfo->creation_host; vgsummary->vgstatus = vginfo->status; /* vginfo->vgid has 1 extra byte then vgsummary->vgid */ memcpy(&vgsummary->vgid, vginfo->vgid, sizeof(vgsummary->vgid)); return 1; } } return 0; } int lvmcache_contains_lock_type_sanlock(struct cmd_context *cmd) { struct lvmcache_vginfo *vginfo; dm_list_iterate_items(vginfo, &_vginfos) { if (vginfo->lock_type && !strcmp(vginfo->lock_type, "sanlock")) return 1; } return 0; } void lvmcache_get_max_name_lengths(struct cmd_context *cmd, unsigned *pv_max_name_len, unsigned *vg_max_name_len) { struct lvmcache_vginfo *vginfo; struct lvmcache_info *info; unsigned len; *vg_max_name_len = 0; *pv_max_name_len = 0; dm_list_iterate_items(vginfo, &_vginfos) { len = strlen(vginfo->vgname); if (*vg_max_name_len < len) *vg_max_name_len = len; dm_list_iterate_items(info, &vginfo->infos) { len = strlen(dev_name(info->dev)); if (*pv_max_name_len < len) *pv_max_name_len = len; } } } int lvmcache_vg_is_foreign(struct cmd_context *cmd, const char *vgname, const char *vgid) { struct lvmcache_vginfo *vginfo; int ret = 0; if (lvmetad_active()) return lvmetad_vg_is_foreign(cmd, vgname, vgid); if ((vginfo = lvmcache_vginfo_from_vgid(vgid))) ret = !is_system_id_allowed(cmd, vginfo->system_id); return ret; }