1
0
mirror of git://sourceware.org/git/lvm2.git synced 2025-01-10 05:18:36 +03:00
lvm2/lib/format1/disk-rep.c
Alasdair Kergon 25b733809a Merge with text format branch.
Lots of changes/very little testing so far => there'll be bugs!

Use 'vgcreate -M text' to create a volume group with its metadata stored
in text files.  Text format metadata changes should be reasonably atomic,
with a (basic) automatic recovery mechanism if the system crashes while a
change is in progress.

Add a metadata section to lvm.conf to specify multiple directories if
you want (recommended) to keep multiple copies of the metadata (eg on
different filesystems).

e.g. metadata {
        dirs = ["/etc/lvm/metadata1","/usr/local/lvm/metadata2"]
}

Plenty of refinements still in the pipeline.
2002-04-24 18:20:51 +00:00

663 lines
13 KiB
C

/*
* Copyright (C) 2001 Sistina Software (UK) Limited.
*
* This file is released under the LGPL.
*/
#include "disk-rep.h"
#include "dbg_malloc.h"
#include "pool.h"
#include "xlate.h"
#include "log.h"
#include "vgcache.h"
#include "filter.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <linux/kdev_t.h>
#define fail do {stack; return 0;} while(0)
#define xx16(v) disk->v = xlate16(disk->v)
#define xx32(v) disk->v = xlate32(disk->v)
#define xx64(v) disk->v = xlate64(disk->v)
/*
* Functions to perform the endian conversion
* between disk and core. The same code works
* both ways of course.
*/
static void _xlate_pvd(struct pv_disk *disk)
{
xx16(version);
xx32(pv_on_disk.base);
xx32(pv_on_disk.size);
xx32(vg_on_disk.base);
xx32(vg_on_disk.size);
xx32(pv_uuidlist_on_disk.base);
xx32(pv_uuidlist_on_disk.size);
xx32(lv_on_disk.base);
xx32(lv_on_disk.size);
xx32(pe_on_disk.base);
xx32(pe_on_disk.size);
xx32(pv_major);
xx32(pv_number);
xx32(pv_status);
xx32(pv_allocatable);
xx32(pv_size);
xx32(lv_cur);
xx32(pe_size);
xx32(pe_total);
xx32(pe_allocated);
xx32(pe_start);
}
static void _xlate_lvd(struct lv_disk *disk)
{
xx32(lv_access);
xx32(lv_status);
xx32(lv_open);
xx32(lv_dev);
xx32(lv_number);
xx32(lv_mirror_copies);
xx32(lv_recovery);
xx32(lv_schedule);
xx32(lv_size);
xx32(lv_snapshot_minor);
xx16(lv_chunk_size);
xx16(dummy);
xx32(lv_allocated_le);
xx32(lv_stripes);
xx32(lv_stripesize);
xx32(lv_badblock);
xx32(lv_allocation);
xx32(lv_io_timeout);
xx32(lv_read_ahead);
}
static void _xlate_vgd(struct vg_disk *disk)
{
xx32(vg_number);
xx32(vg_access);
xx32(vg_status);
xx32(lv_max);
xx32(lv_cur);
xx32(lv_open);
xx32(pv_max);
xx32(pv_cur);
xx32(pv_act);
xx32(dummy);
xx32(vgda);
xx32(pe_size);
xx32(pe_total);
xx32(pe_allocated);
xx32(pvg_total);
}
static void _xlate_extents(struct pe_disk *extents, int count)
{
int i;
for (i = 0; i < count; i++) {
extents[i].lv_num = xlate16(extents[i].lv_num);
extents[i].le_num = xlate16(extents[i].le_num);
}
}
/*
* Handle both minor metadata formats.
*/
static int _munge_formats(struct pv_disk *pvd)
{
uint32_t pe_start;
switch (pvd->version) {
case 1:
pvd->pe_start = ((pvd->pe_on_disk.base +
pvd->pe_on_disk.size) / SECTOR_SIZE);
break;
case 2:
pvd->version = 1;
pe_start = pvd->pe_start * SECTOR_SIZE;
pvd->pe_on_disk.size = pe_start - pvd->pe_on_disk.base;
break;
default:
return 0;
}
return 1;
}
int read_pvd(struct device *dev, struct pv_disk *pvd)
{
if (dev_read(dev, 0, sizeof(*pvd), pvd) != sizeof(*pvd)) {
log_very_verbose("Failed to read PV data from %s",
dev_name(dev));
return 0;
}
_xlate_pvd(pvd);
if (pvd->id[0] != 'H' || pvd->id[1] != 'M') {
log_very_verbose("%s does not have a valid PV identifier",
dev_name(dev));
return 0;
}
if (!_munge_formats(pvd)) {
log_very_verbose("Unknown metadata version %d found on %s",
pvd->version, dev_name(dev));
return 0;
}
return 1;
}
static int _read_lvd(struct device *dev, ulong pos, struct lv_disk *disk)
{
if (dev_read(dev, pos, sizeof(*disk), disk) != sizeof(*disk))
fail;
_xlate_lvd(disk);
return 1;
}
static int _read_vgd(struct disk_list *data)
{
struct vg_disk *vgd = &data->vgd;
ulong pos = data->pvd.vg_on_disk.base;
if (dev_read(data->dev, pos, sizeof(*vgd), vgd) != sizeof(*vgd))
fail;
_xlate_vgd(vgd);
return 1;
}
static int _read_uuids(struct disk_list *data)
{
int num_read = 0;
struct uuid_list *ul;
char buffer[NAME_LEN];
ulong pos = data->pvd.pv_uuidlist_on_disk.base;
ulong end = pos + data->pvd.pv_uuidlist_on_disk.size;
while (pos < end && num_read < data->vgd.pv_cur) {
if (dev_read(data->dev, pos, sizeof(buffer), buffer) !=
sizeof(buffer))
fail;
if (!(ul = pool_alloc(data->mem, sizeof(*ul))))
fail;
memcpy(ul->uuid, buffer, NAME_LEN);
ul->uuid[NAME_LEN - 1] = '\0';
list_add(&data->uuids, &ul->list);
pos += NAME_LEN;
num_read++;
}
return 1;
}
static inline int _check_lvd(struct lv_disk *lvd)
{
return !(lvd->lv_name[0] == '\0');
}
static int _read_lvs(struct disk_list *data)
{
int i, read = 0;
ulong pos;
struct lvd_list *ll;
struct vg_disk *vgd = &data->vgd;
for (i = 0; (i < vgd->lv_max) && (read < vgd->lv_cur); i++) {
pos = data->pvd.lv_on_disk.base + (i * sizeof(struct lv_disk));
ll = pool_alloc(data->mem, sizeof(*ll));
if (!ll)
fail;
if (!_read_lvd(data->dev, pos, &ll->lvd))
fail;
if (!_check_lvd(&ll->lvd))
continue;
read++;
list_add(&data->lvds, &ll->list);
}
return 1;
}
static int _read_extents(struct disk_list *data)
{
size_t len = sizeof(struct pe_disk) * data->pvd.pe_total;
struct pe_disk *extents = pool_alloc(data->mem, len);
ulong pos = data->pvd.pe_on_disk.base;
if (!extents)
fail;
if (dev_read(data->dev, pos, len, extents) != len)
fail;
_xlate_extents(extents, data->pvd.pe_total);
data->extents = extents;
return 1;
}
/*
* If exported, remove "PV_EXP" from end of VG name
*/
static void _munge_exported_vg(struct disk_list *data)
{
int l, s;
/* Return if PV not in a VG or VG not exported */
if ((!*data->pvd.vg_name) || !(data->vgd.vg_status & VG_EXPORTED))
return;
l = strlen(data->pvd.vg_name);
s = sizeof(EXPORTED_TAG);
if (!strncmp(data->pvd.vg_name + l - s + 1, EXPORTED_TAG, s))
data->pvd.vg_name[l - s + 1] = '\0';
data->pvd.pv_status |= VG_EXPORTED;
}
static struct disk_list *__read_disk(struct format_type *fmt,
struct device *dev, struct pool *mem,
const char *vg_name)
{
struct disk_list *dl = pool_alloc(mem, sizeof(*dl));
const char *name = dev_name(dev);
if (!dl) {
stack;
return NULL;
}
dl->dev = dev;
dl->mem = mem;
list_init(&dl->uuids);
list_init(&dl->lvds);
if (!read_pvd(dev, &dl->pvd)) {
stack;
goto bad;
}
/*
* is it an orphan ?
*/
if (!*dl->pvd.vg_name) {
log_very_verbose("%s is not a member of any format1 VG", name);
/* Update VG cache */
vgcache_add(dl->pvd.vg_name, NULL, dev, fmt);
return (vg_name) ? NULL : dl;
}
if (!_read_vgd(dl)) {
log_error("Failed to read VG data from PV (%s)", name);
goto bad;
}
/* If VG is exported, set VG name back to the real name */
_munge_exported_vg(dl);
/* Update VG cache with what we found */
vgcache_add(dl->pvd.vg_name, dl->vgd.vg_uuid, dev, fmt);
if (vg_name && strcmp(vg_name, dl->pvd.vg_name)) {
log_very_verbose("%s is not a member of the VG %s",
name, vg_name);
goto bad;
}
if (!_read_uuids(dl)) {
log_error("Failed to read PV uuid list from %s", name);
goto bad;
}
if (!_read_lvs(dl)) {
log_error("Failed to read LV's from %s", name);
goto bad;
}
if (!_read_extents(dl)) {
log_error("Failed to read extents from %s", name);
goto bad;
}
log_very_verbose("Found %s in %sVG %s", name,
(dl->vgd.vg_status & VG_EXPORTED) ? "exported " : "",
dl->pvd.vg_name);
return dl;
bad:
pool_free(dl->mem, dl);
return NULL;
}
struct disk_list *read_disk(struct format_type *fmt, struct device *dev,
struct pool *mem, const char *vg_name)
{
struct disk_list *r;
if (!dev_open(dev, O_RDONLY)) {
stack;
return NULL;
}
r = __read_disk(fmt, dev, mem, vg_name);
if (!dev_close(dev))
stack;
return r;
}
static void _add_pv_to_list(struct list *head, struct disk_list *data)
{
struct list *pvdh;
struct pv_disk *pvd;
list_iterate(pvdh, head) {
pvd = &list_item(pvdh, struct disk_list)->pvd;
if (!strncmp(data->pvd.pv_uuid, pvd->pv_uuid,
sizeof(pvd->pv_uuid))) {
if (MAJOR(data->dev->dev) != md_major()) {
log_very_verbose("Ignoring duplicate PV %s on "
"%s", pvd->pv_uuid,
dev_name(data->dev));
return;
}
log_very_verbose("Duplicate PV %s - using md %s",
pvd->pv_uuid, dev_name(data->dev));
list_del(pvdh);
break;
}
}
list_add(head, &data->list);
}
/*
* Build a list of pv_d's structures, allocated from mem.
* We keep track of the first object allocated form the pool
* so we can free off all the memory if something goes wrong.
*/
int read_pvs_in_vg(struct format_type *fmt, const char *vg_name,
struct dev_filter *filter, struct pool *mem,
struct list *head)
{
struct dev_iter *iter;
struct device *dev;
struct disk_list *data = NULL;
struct list *pvdh, *pvdh2;
/* Fast path if we already saw this VG and cached the list of PVs */
if ((pvdh = vgcache_find(vg_name))) {
list_iterate(pvdh2, pvdh) {
dev = list_item(pvdh2, struct pvdev_list)->dev;
if (!(data = read_disk(fmt, dev, mem, vg_name)))
break;
_add_pv_to_list(head, data);
}
/* Did we find the whole VG? */
if (!vg_name || !*vg_name ||
(data && *data->pvd.vg_name &&
list_size(head) == data->vgd.pv_cur)) return 1;
/* Something changed. Remove the hints. */
list_init(head);
vgcache_del(vg_name);
}
if (!(iter = dev_iter_create(filter))) {
log_error("read_pvs_in_vg: dev_iter_create failed");
return 0;
}
/* Otherwise do a complete scan */
for (dev = dev_iter_get(iter); dev; dev = dev_iter_get(iter)) {
if ((data = read_disk(fmt, dev, mem, vg_name))) {
_add_pv_to_list(head, data);
}
}
dev_iter_destroy(iter);
if (list_empty(head))
return 0;
return 1;
}
static int _write_vgd(struct disk_list *data)
{
struct vg_disk *vgd = &data->vgd;
ulong pos = data->pvd.vg_on_disk.base;
_xlate_vgd(vgd);
if (dev_write(data->dev, pos, sizeof(*vgd), vgd) != sizeof(*vgd))
fail;
_xlate_vgd(vgd);
return 1;
}
static int _write_uuids(struct disk_list *data)
{
struct uuid_list *ul;
struct list *uh;
ulong pos = data->pvd.pv_uuidlist_on_disk.base;
ulong end = pos + data->pvd.pv_uuidlist_on_disk.size;
list_iterate(uh, &data->uuids) {
if (pos >= end) {
log_error("Too many uuids to fit on %s",
dev_name(data->dev));
return 0;
}
ul = list_item(uh, struct uuid_list);
if (dev_write(data->dev, pos, NAME_LEN, ul->uuid) != NAME_LEN)
fail;
pos += NAME_LEN;
}
return 1;
}
static int _write_lvd(struct device *dev, ulong pos, struct lv_disk *disk)
{
_xlate_lvd(disk);
if (dev_write(dev, pos, sizeof(*disk), disk) != sizeof(*disk))
fail;
_xlate_lvd(disk);
return 1;
}
static int _write_lvs(struct disk_list *data)
{
struct list *lvh;
ulong pos;
pos = data->pvd.lv_on_disk.base;
if (!dev_zero(data->dev, pos, data->pvd.lv_on_disk.size)) {
log_error("Couldn't zero lv area on device '%s'",
dev_name(data->dev));
return 0;
}
list_iterate(lvh, &data->lvds) {
struct lvd_list *ll = list_item(lvh, struct lvd_list);
if (!_write_lvd(data->dev, pos, &ll->lvd))
fail;
pos += sizeof(struct lv_disk);
}
return 1;
}
static int _write_extents(struct disk_list *data)
{
size_t len = sizeof(struct pe_disk) * data->pvd.pe_total;
struct pe_disk *extents = data->extents;
ulong pos = data->pvd.pe_on_disk.base;
_xlate_extents(extents, data->pvd.pe_total);
if (dev_write(data->dev, pos, len, extents) != len)
fail;
_xlate_extents(extents, data->pvd.pe_total);
return 1;
}
static int _write_pvd(struct disk_list *data)
{
char *buf;
ulong pos = data->pvd.pv_on_disk.base;
ulong size = data->pvd.pv_on_disk.size;
if (size < sizeof(struct pv_disk)) {
log_error("Invalid PV structure size.");
return 0;
}
/* Make sure that the gap between the PV structure and
the next one is zeroed in order to make non LVM tools
happy (idea from AED) */
buf = dbg_malloc(size);
if (!buf) {
log_err("Couldn't allocate temporary PV buffer.");
return 0;
}
memset(buf, 0, size);
memcpy(buf, &data->pvd, sizeof(struct pv_disk));
_xlate_pvd((struct pv_disk *) buf);
if (dev_write(data->dev, pos, size, buf) != size) {
dbg_free(buf);
fail;
}
dbg_free(buf);
return 1;
}
/*
* assumes the device has been opened.
*/
static int __write_all_pvd(struct format_type *fmt, struct disk_list *data)
{
const char *pv_name = dev_name(data->dev);
if (!_write_pvd(data)) {
log_error("Failed to write PV structure onto %s", pv_name);
return 0;
}
vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev, fmt);
/*
* Stop here for orphan pv's.
*/
if (data->pvd.vg_name[0] == '\0') {
if (!test_mode())
vgcache_add(data->pvd.vg_name, NULL, data->dev, fmt);
return 1;
}
if (!test_mode())
vgcache_add(data->pvd.vg_name, data->vgd.vg_uuid, data->dev,
fmt);
if (!_write_vgd(data)) {
log_error("Failed to write VG data to %s", pv_name);
return 0;
}
if (!_write_uuids(data)) {
log_error("Failed to write PV uuid list to %s", pv_name);
return 0;
}
if (!_write_lvs(data)) {
log_error("Failed to write LV's to %s", pv_name);
return 0;
}
if (!_write_extents(data)) {
log_error("Failed to write extents to %s", pv_name);
return 0;
}
return 1;
}
/*
* opens the device and hands to the above fn.
*/
static int _write_all_pvd(struct format_type *fmt, struct disk_list *data)
{
int r;
if (!dev_open(data->dev, O_WRONLY)) {
stack;
return 0;
}
r = __write_all_pvd(fmt, data);
if (!dev_close(data->dev))
stack;
return r;
}
/*
* Writes all the given pv's to disk. Does very
* little sanity checking, so make sure correct
* data is passed to here.
*/
int write_disks(struct format_type *fmt, struct list *pvs)
{
struct list *pvh;
struct disk_list *dl;
list_iterate(pvh, pvs) {
dl = list_item(pvh, struct disk_list);
if (!(_write_all_pvd(fmt, dl)))
fail;
log_very_verbose("Successfully wrote data to %s",
dev_name(dl->dev));
}
return 1;
}