1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-18 10:04:20 +03:00
lvm2/lib/format1/disk-rep.c
David Teigland 9b6a62f944 lvmcache: simplify
Recent changes allow some major simplification of the way
lvmcache works and is used.  lvmcache_label_scan is now
called in a controlled fashion at the start of commands,
and not via various unpredictable side effects.  Remove
various calls to it from other places.  lvmcache_label_scan
should not be called from anywhere during a command, because
it produces an incorrect representation of PVs with no MDAs,
and misclassifies them as orphans.  This has been a long
standing problem.  The invalid flag and rescanning based on
that is no longer used and removed.  The 'force' variation is
no longer needed and removed.
2018-04-20 11:22:48 -05:00

762 lines
17 KiB
C

/*
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
* Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
*
* This file is part of LVM2.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
* of the GNU Lesser General Public License v.2.1.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "lib.h"
#include "disk-rep.h"
#include "xlate.h"
#include "lvmcache.h"
#include "metadata-exported.h"
#include <fcntl.h>
#define xx16(v) disk->v = xlate16(disk->v)
#define xx32(v) disk->v = xlate32(disk->v)
#define xx64(v) disk->v = xlate64(disk->v)
/*
* Functions to perform the endian conversion
* between disk and core. The same code works
* both ways of course.
*/
static void _xlate_pvd(struct pv_disk *disk)
{
xx16(version);
xx32(pv_on_disk.base);
xx32(pv_on_disk.size);
xx32(vg_on_disk.base);
xx32(vg_on_disk.size);
xx32(pv_uuidlist_on_disk.base);
xx32(pv_uuidlist_on_disk.size);
xx32(lv_on_disk.base);
xx32(lv_on_disk.size);
xx32(pe_on_disk.base);
xx32(pe_on_disk.size);
xx32(pv_major);
xx32(pv_number);
xx32(pv_status);
xx32(pv_allocatable);
xx32(pv_size);
xx32(lv_cur);
xx32(pe_size);
xx32(pe_total);
xx32(pe_allocated);
xx32(pe_start);
}
static void _xlate_lvd(struct lv_disk *disk)
{
xx32(lv_access);
xx32(lv_status);
xx32(lv_open);
xx32(lv_dev);
xx32(lv_number);
xx32(lv_mirror_copies);
xx32(lv_recovery);
xx32(lv_schedule);
xx32(lv_size);
xx32(lv_snapshot_minor);
xx16(lv_chunk_size);
xx16(dummy);
xx32(lv_allocated_le);
xx32(lv_stripes);
xx32(lv_stripesize);
xx32(lv_badblock);
xx32(lv_allocation);
xx32(lv_io_timeout);
xx32(lv_read_ahead);
}
static void _xlate_vgd(struct vg_disk *disk)
{
xx32(vg_number);
xx32(vg_access);
xx32(vg_status);
xx32(lv_max);
xx32(lv_cur);
xx32(lv_open);
xx32(pv_max);
xx32(pv_cur);
xx32(pv_act);
xx32(dummy);
xx32(vgda);
xx32(pe_size);
xx32(pe_total);
xx32(pe_allocated);
xx32(pvg_total);
}
static void _xlate_extents(struct pe_disk *extents, uint32_t count)
{
unsigned i;
for (i = 0; i < count; i++) {
extents[i].lv_num = xlate16(extents[i].lv_num);
extents[i].le_num = xlate16(extents[i].le_num);
}
}
/*
* Handle both minor metadata formats.
*/
static int _munge_formats(struct pv_disk *pvd)
{
uint32_t pe_start;
unsigned b, e;
switch (pvd->version) {
case 1:
pvd->pe_start = ((pvd->pe_on_disk.base +
pvd->pe_on_disk.size) >> SECTOR_SHIFT);
break;
case 2:
pvd->version = 1;
pe_start = pvd->pe_start << SECTOR_SHIFT;
pvd->pe_on_disk.size = pe_start - pvd->pe_on_disk.base;
break;
default:
return 0;
}
/* UUID too long? */
if (pvd->pv_uuid[ID_LEN]) {
/* Retain ID_LEN chars from end */
for (e = ID_LEN; e < sizeof(pvd->pv_uuid); e++) {
if (!pvd->pv_uuid[e]) {
e--;
break;
}
}
for (b = 0; b < ID_LEN; b++) {
pvd->pv_uuid[b] = pvd->pv_uuid[++e - ID_LEN];
/* FIXME Remove all invalid chars */
if (pvd->pv_uuid[b] == '/')
pvd->pv_uuid[b] = '#';
}
memset(&pvd->pv_uuid[ID_LEN], 0, sizeof(pvd->pv_uuid) - ID_LEN);
}
/* If UUID is missing, create one */
if (pvd->pv_uuid[0] == '\0') {
uuid_from_num((char *)pvd->pv_uuid, pvd->pv_number);
pvd->pv_uuid[ID_LEN] = '\0';
}
return 1;
}
/*
* If exported, remove "PV_EXP" from end of VG name
*/
static void _munge_exported_vg(struct pv_disk *pvd)
{
int l;
size_t s;
/* Return if PV not in a VG */
if ((!*pvd->vg_name))
return;
/* FIXME also check vgd->status & VG_EXPORTED? */
l = strlen((char *)pvd->vg_name);
s = sizeof(EXPORTED_TAG);
if (!strncmp((char *)pvd->vg_name + l - s + 1, EXPORTED_TAG, s)) {
pvd->vg_name[l - s + 1] = '\0';
pvd->pv_status |= VG_EXPORTED;
}
}
int munge_pvd(struct device *dev, struct pv_disk *pvd)
{
_xlate_pvd(pvd);
if (pvd->id[0] != 'H' || pvd->id[1] != 'M') {
log_very_verbose("%s does not have a valid LVM1 PV identifier",
dev_name(dev));
return 0;
}
if (!_munge_formats(pvd)) {
log_very_verbose("format1: Unknown metadata version %d "
"found on %s", pvd->version, dev_name(dev));
return 0;
}
/* If VG is exported, set VG name back to the real name */
_munge_exported_vg(pvd);
return 1;
}
static int _read_pvd(struct device *dev, struct pv_disk *pvd)
{
if (!dev_read(dev, UINT64_C(0), sizeof(*pvd), DEV_IO_FMT1, pvd)) {
log_very_verbose("Failed to read PV data from %s",
dev_name(dev));
return 0;
}
return munge_pvd(dev, pvd);
}
static int _read_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk)
{
if (!dev_read(dev, pos, sizeof(*disk), DEV_IO_FMT1, disk))
return_0;
_xlate_lvd(disk);
return 1;
}
int read_vgd(struct device *dev, struct vg_disk *vgd, struct pv_disk *pvd)
{
uint64_t pos = pvd->vg_on_disk.base;
if (!dev_read(dev, pos, sizeof(*vgd), DEV_IO_FMT1, vgd))
return_0;
_xlate_vgd(vgd);
if ((vgd->lv_max > MAX_LV) || (vgd->pv_max > MAX_PV))
return_0;
/* If UUID is missing, create one */
if (vgd->vg_uuid[0] == '\0')
uuid_from_num((char *)vgd->vg_uuid, vgd->vg_number);
return 1;
}
static int _read_uuids(struct disk_list *data)
{
unsigned num_read = 0;
struct uuid_list *ul;
char buffer[NAME_LEN] __attribute__((aligned(8)));
uint64_t pos = data->pvd.pv_uuidlist_on_disk.base;
uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size;
while (pos < end && num_read < data->vgd.pv_cur) {
if (!dev_read(data->dev, pos, sizeof(buffer), DEV_IO_FMT1, buffer))
return_0;
if (!(ul = dm_pool_alloc(data->mem, sizeof(*ul))))
return_0;
memcpy(ul->uuid, buffer, NAME_LEN);
ul->uuid[NAME_LEN - 1] = '\0';
dm_list_add(&data->uuids, &ul->list);
pos += NAME_LEN;
num_read++;
}
return 1;
}
static int _check_lvd(struct lv_disk *lvd)
{
return !(lvd->lv_name[0] == '\0');
}
static int _read_lvs(struct disk_list *data)
{
unsigned int i, lvs_read = 0;
uint64_t pos;
struct lvd_list *ll;
struct vg_disk *vgd = &data->vgd;
for (i = 0; (i < vgd->lv_max) && (lvs_read < vgd->lv_cur); i++) {
pos = data->pvd.lv_on_disk.base + (i * sizeof(struct lv_disk));
ll = dm_pool_alloc(data->mem, sizeof(*ll));
if (!ll)
return_0;
if (!_read_lvd(data->dev, pos, &ll->lvd))
return_0;
if (!_check_lvd(&ll->lvd))
continue;
lvs_read++;
dm_list_add(&data->lvds, &ll->list);
}
return 1;
}
static int _read_extents(struct disk_list *data)
{
size_t len = sizeof(struct pe_disk) * data->pvd.pe_total;
struct pe_disk *extents = dm_pool_alloc(data->mem, len);
uint64_t pos = data->pvd.pe_on_disk.base;
if (!extents)
return_0;
if (!dev_read(data->dev, pos, len, DEV_IO_FMT1, extents))
return_0;
_xlate_extents(extents, data->pvd.pe_total);
data->extents = extents;
return 1;
}
static void __update_lvmcache(const struct format_type *fmt,
struct disk_list *dl,
struct device *dev, const char *vgid,
unsigned exported)
{
struct lvmcache_info *info;
const char *vgname = *((char *)dl->pvd.vg_name) ?
(char *)dl->pvd.vg_name : fmt->orphan_vg_name;
if (!(info = lvmcache_add(fmt->labeller, (char *)dl->pvd.pv_uuid, dev,
vgname, vgid, exported ? EXPORTED_VG : 0))) {
stack;
return;
}
lvmcache_set_device_size(info, ((uint64_t)xlate32(dl->pvd.pv_size)) << SECTOR_SHIFT);
lvmcache_del_mdas(info);
}
static struct disk_list *__read_disk(const struct format_type *fmt,
struct device *dev, struct dm_pool *mem,
const char *vg_name)
{
struct disk_list *dl = dm_pool_zalloc(mem, sizeof(*dl));
const char *name = dev_name(dev);
if (!dl)
return_NULL;
dl->dev = dev;
dl->mem = mem;
dm_list_init(&dl->uuids);
dm_list_init(&dl->lvds);
if (!_read_pvd(dev, &dl->pvd))
goto_bad;
/*
* is it an orphan ?
*/
if (!*dl->pvd.vg_name) {
log_very_verbose("%s is not a member of any format1 VG", name);
__update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0);
return (vg_name) ? NULL : dl;
}
if (!read_vgd(dl->dev, &dl->vgd, &dl->pvd)) {
log_error("Failed to read VG data from PV (%s)", name);
__update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0);
goto bad;
}
if (vg_name && strcmp(vg_name, (char *)dl->pvd.vg_name)) {
log_very_verbose("%s is not a member of the VG %s",
name, vg_name);
__update_lvmcache(fmt, dl, dev, fmt->orphan_vg_name, 0);
goto bad;
}
__update_lvmcache(fmt, dl, dev, (char *)dl->vgd.vg_uuid,
dl->vgd.vg_status & VG_EXPORTED);
if (!_read_uuids(dl)) {
log_error("Failed to read PV uuid list from %s", name);
goto bad;
}
if (!_read_lvs(dl)) {
log_error("Failed to read LV's from %s", name);
goto bad;
}
if (!_read_extents(dl)) {
log_error("Failed to read extents from %s", name);
goto bad;
}
log_very_verbose("Found %s in %sVG %s", name,
(dl->vgd.vg_status & VG_EXPORTED) ? "exported " : "",
dl->pvd.vg_name);
return dl;
bad:
dm_pool_free(dl->mem, dl);
return NULL;
}
struct disk_list *read_disk(const struct format_type *fmt, struct device *dev,
struct dm_pool *mem, const char *vg_name)
{
struct disk_list *dl;
if (!dev_open_readonly(dev))
return_NULL;
dl = __read_disk(fmt, dev, mem, vg_name);
if (!dev_close(dev))
stack;
return dl;
}
static void _add_pv_to_list(struct cmd_context *cmd, struct dm_list *head, struct disk_list *data)
{
struct pv_disk *pvd;
struct disk_list *diskl;
dm_list_iterate_items(diskl, head) {
pvd = &diskl->pvd;
if (!strncmp((char *)data->pvd.pv_uuid, (char *)pvd->pv_uuid,
sizeof(pvd->pv_uuid))) {
if (!dev_subsystem_part_major(cmd->dev_types, data->dev)) {
log_very_verbose("Ignoring duplicate PV %s on "
"%s", pvd->pv_uuid,
dev_name(data->dev));
return;
}
log_very_verbose("Duplicate PV %s - using %s %s",
pvd->pv_uuid, dev_subsystem_name(cmd->dev_types, data->dev),
dev_name(data->dev));
dm_list_del(&diskl->list);
break;
}
}
dm_list_add(head, &data->list);
}
struct _read_pvs_in_vg_baton {
const char *vg_name;
struct dm_list *head;
struct disk_list *data;
struct dm_pool *mem;
int empty;
};
static int _read_pv_in_vg(struct lvmcache_info *info, void *baton)
{
struct _read_pvs_in_vg_baton *b = baton;
b->empty = 0;
if (!lvmcache_device(info) ||
!(b->data = read_disk(lvmcache_fmt(info), lvmcache_device(info), b->mem, b->vg_name)))
return 0; /* stop here */
_add_pv_to_list(lvmcache_fmt(info)->cmd, b->head, b->data);
return 1;
}
/*
* Build a list of pv_d's structures, allocated from mem.
* We keep track of the first object allocated from the pool
* so we can free off all the memory if something goes wrong.
*/
int read_pvs_in_vg(const struct format_type *fmt, const char *vg_name,
struct dev_filter *filter, struct dm_pool *mem,
struct dm_list *head)
{
struct dev_iter *iter;
struct device *dev;
struct lvmcache_vginfo *vginfo;
struct _read_pvs_in_vg_baton baton;
baton.head = head;
baton.empty = 1;
baton.data = NULL;
baton.mem = mem;
baton.vg_name = vg_name;
/* Fast path if we already saw this VG and cached the list of PVs */
if (vg_name && (vginfo = lvmcache_vginfo_from_vgname(vg_name, NULL))) {
lvmcache_foreach_pv(vginfo, _read_pv_in_vg, &baton);
if (!baton.empty) {
/* Did we find the whole VG? */
if (!vg_name || is_orphan_vg(vg_name) ||
(baton.data && *baton.data->pvd.vg_name &&
dm_list_size(head) == baton.data->vgd.pv_cur))
return 1;
/* Failed */
dm_list_init(head);
/* vgcache_del(vg_name); */
}
}
if (!(iter = dev_iter_create(filter, 1))) {
log_error("read_pvs_in_vg: dev_iter_create failed");
return 0;
}
/* Otherwise do a complete scan */
for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) {
if ((baton.data = read_disk(fmt, dev, mem, vg_name))) {
_add_pv_to_list(fmt->cmd, head, baton.data);
}
}
dev_iter_destroy(iter);
if (dm_list_empty(head))
return 0;
return 1;
}
static int _write_vgd(struct disk_list *data)
{
struct vg_disk *vgd = &data->vgd;
uint64_t pos = data->pvd.vg_on_disk.base;
log_debug_metadata("Writing %s VG metadata to %s at %" PRIu64 " len %" PRIsize_t,
data->pvd.vg_name, dev_name(data->dev), pos, sizeof(*vgd));
_xlate_vgd(vgd);
if (!dev_write(data->dev, pos, sizeof(*vgd), DEV_IO_FMT1, vgd))
return_0;
_xlate_vgd(vgd);
return 1;
}
static int _write_uuids(struct disk_list *data)
{
struct uuid_list *ul;
uint64_t pos = data->pvd.pv_uuidlist_on_disk.base;
uint64_t end = pos + data->pvd.pv_uuidlist_on_disk.size;
dm_list_iterate_items(ul, &data->uuids) {
if (pos >= end) {
log_error("Too many uuids to fit on %s",
dev_name(data->dev));
return 0;
}
log_debug_metadata("Writing %s uuidlist to %s at %" PRIu64 " len %d",
data->pvd.vg_name, dev_name(data->dev),
pos, NAME_LEN);
if (!dev_write(data->dev, pos, NAME_LEN, DEV_IO_FMT1, ul->uuid))
return_0;
pos += NAME_LEN;
}
return 1;
}
static int _write_lvd(struct device *dev, uint64_t pos, struct lv_disk *disk)
{
log_debug_metadata("Writing %s LV %s metadata to %s at %" PRIu64 " len %"
PRIsize_t, disk->vg_name, disk->lv_name, dev_name(dev),
pos, sizeof(*disk));
_xlate_lvd(disk);
if (!dev_write(dev, pos, sizeof(*disk), DEV_IO_FMT1, disk))
return_0;
_xlate_lvd(disk);
return 1;
}
static int _write_lvs(struct disk_list *data)
{
struct lvd_list *ll;
uint64_t pos, offset;
pos = data->pvd.lv_on_disk.base;
if (!dev_set(data->dev, pos, data->pvd.lv_on_disk.size, DEV_IO_FMT1, 0)) {
log_error("Couldn't zero lv area on device '%s'",
dev_name(data->dev));
return 0;
}
dm_list_iterate_items(ll, &data->lvds) {
offset = sizeof(struct lv_disk) * ll->lvd.lv_number;
if (offset + sizeof(struct lv_disk) > data->pvd.lv_on_disk.size) {
log_error("lv_number %d too large", ll->lvd.lv_number);
return 0;
}
if (!_write_lvd(data->dev, pos + offset, &ll->lvd))
return_0;
}
return 1;
}
static int _write_extents(struct disk_list *data)
{
size_t len = sizeof(struct pe_disk) * data->pvd.pe_total;
struct pe_disk *extents = data->extents;
uint64_t pos = data->pvd.pe_on_disk.base;
log_debug_metadata("Writing %s extents metadata to %s at %" PRIu64 " len %"
PRIsize_t, data->pvd.vg_name, dev_name(data->dev),
pos, len);
_xlate_extents(extents, data->pvd.pe_total);
if (!dev_write(data->dev, pos, len, DEV_IO_FMT1, extents))
return_0;
_xlate_extents(extents, data->pvd.pe_total);
return 1;
}
static int _write_pvd(struct disk_list *data)
{
char *buf;
uint64_t pos = data->pvd.pv_on_disk.base;
size_t size = data->pvd.pv_on_disk.size;
if (size < sizeof(struct pv_disk)) {
log_error("Invalid PV structure size.");
return 0;
}
/* Make sure that the gap between the PV structure and
the next one is zeroed in order to make non LVM tools
happy (idea from AED) */
buf = dm_zalloc(size);
if (!buf) {
log_error("Couldn't allocate temporary PV buffer.");
return 0;
}
memcpy(buf, &data->pvd, sizeof(struct pv_disk));
log_debug_metadata("Writing %s PV metadata to %s at %" PRIu64 " len %"
PRIsize_t, data->pvd.vg_name, dev_name(data->dev),
pos, size);
_xlate_pvd((struct pv_disk *) buf);
if (!dev_write(data->dev, pos, size, DEV_IO_FMT1, buf)) {
dm_free(buf);
return_0;
}
dm_free(buf);
return 1;
}
/*
* assumes the device has been opened.
*/
static int __write_all_pvd(const struct format_type *fmt __attribute__((unused)),
struct disk_list *data, int write_vg_metadata)
{
const char *pv_name = dev_name(data->dev);
if (!_write_pvd(data)) {
log_error("Failed to write PV structure onto %s", pv_name);
return 0;
}
/* vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, fmt); */
/*
* Stop here for orphan PVs or if VG metadata write not requested.
*/
if ((data->pvd.vg_name[0] == '\0') || !write_vg_metadata) {
/* if (!test_mode())
vgcache_add(data->pvd.vg_name, NULL, data->dev, fmt); */
return 1;
}
/* if (!test_mode())
vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev,
fmt); */
if (!_write_vgd(data)) {
log_error("Failed to write VG data to %s", pv_name);
return 0;
}
if (!_write_uuids(data)) {
log_error("Failed to write PV uuid list to %s", pv_name);
return 0;
}
if (!_write_lvs(data)) {
log_error("Failed to write LV's to %s", pv_name);
return 0;
}
if (!_write_extents(data)) {
log_error("Failed to write extents to %s", pv_name);
return 0;
}
return 1;
}
/*
* opens the device and hands to the above fn.
*/
static int _write_all_pvd(const struct format_type *fmt, struct disk_list *data, int write_vg_metadata)
{
int r;
if (!data->dev)
return_0;
if (!dev_open(data->dev))
return_0;
r = __write_all_pvd(fmt, data, write_vg_metadata);
if (!dev_close(data->dev))
stack;
return r;
}
/*
* Writes all the given pv's to disk. Does very
* little sanity checking, so make sure correct
* data is passed to here.
*/
int write_disks(const struct format_type *fmt, struct dm_list *pvs, int write_vg_metadata)
{
struct disk_list *dl;
dm_list_iterate_items(dl, pvs) {
if (!(_write_all_pvd(fmt, dl, write_vg_metadata)))
return_0;
log_very_verbose("Successfully wrote data to %s",
dev_name(dl->dev));
}
return 1;
}