mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-23 02:05:07 +03:00
623593d12b
Allow breaking lvm2 'read-only' commands (not taking write-lock) while parsing VG.
2687 lines
80 KiB
C
2687 lines
80 KiB
C
/*
|
|
* Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
|
|
* Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
|
|
*
|
|
* This file is part of LVM2.
|
|
*
|
|
* This copyrighted material is made available to anyone wishing to use,
|
|
* modify, copy, or redistribute it subject to the terms and conditions
|
|
* of the GNU Lesser General Public License v.2.1.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* along with this program; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "lib/misc/lib.h"
|
|
#include "import-export.h"
|
|
#include "format-text.h"
|
|
#include "layout.h"
|
|
#include "lib/device/device.h"
|
|
#include "lib/misc/lvm-file.h"
|
|
#include "lib/config/config.h"
|
|
#include "lib/display/display.h"
|
|
#include "lib/commands/toolcontext.h"
|
|
#include "lib/misc/lvm-string.h"
|
|
#include "lib/uuid/uuid.h"
|
|
#include "lib/misc/crc.h"
|
|
#include "lib/mm/xlate.h"
|
|
#include "lib/label/label.h"
|
|
#include "lib/cache/lvmcache.h"
|
|
#include "libdaemon/client/config-util.h"
|
|
#include "lib/misc/lvm-signal.h"
|
|
|
|
#include <unistd.h>
|
|
#include <limits.h>
|
|
#include <dirent.h>
|
|
#include <ctype.h>
|
|
|
|
static struct format_instance *_text_create_text_instance(const struct format_type *fmt,
|
|
const struct format_instance_ctx *fic);
|
|
|
|
struct text_fid_context {
|
|
char *write_buf; /* buffer containing metadata text to write to disk */
|
|
uint32_t write_buf_size; /* mem size of write_buf, increases in 64K multiples */
|
|
uint32_t new_metadata_size; /* size of text metadata in buf */
|
|
uint32_t checksum; /* crc32 checksum for new metadata */
|
|
unsigned preserve:1;
|
|
};
|
|
|
|
void preserve_text_fidtc(struct volume_group *vg)
|
|
{
|
|
struct format_instance *fid = vg->fid;
|
|
struct text_fid_context *fidtc = (struct text_fid_context *)fid->private;
|
|
|
|
if (fidtc)
|
|
fidtc->preserve = 1;
|
|
}
|
|
|
|
void free_text_fidtc(struct volume_group *vg)
|
|
{
|
|
struct format_instance *fid = vg->fid;
|
|
struct text_fid_context *fidtc = (struct text_fid_context *)fid->private;
|
|
|
|
if (!fidtc)
|
|
return;
|
|
|
|
fidtc->preserve = 0;
|
|
|
|
free(fidtc->write_buf);
|
|
fidtc->write_buf = NULL;
|
|
fidtc->write_buf_size = 0;
|
|
fidtc->new_metadata_size = 0;
|
|
}
|
|
|
|
int rlocn_is_ignored(const struct raw_locn *rlocn)
|
|
{
|
|
return (rlocn->flags & RAW_LOCN_IGNORED ? 1 : 0);
|
|
}
|
|
|
|
void rlocn_set_ignored(struct raw_locn *rlocn, unsigned mda_ignored)
|
|
{
|
|
if (mda_ignored)
|
|
rlocn->flags |= RAW_LOCN_IGNORED;
|
|
else
|
|
rlocn->flags &= ~RAW_LOCN_IGNORED;
|
|
}
|
|
|
|
/*
|
|
* NOTE: Currently there can be only one vg per text file.
|
|
*/
|
|
|
|
/*
|
|
* Only used by vgcreate.
|
|
*/
|
|
static int _text_vg_setup(struct format_instance *fid,
|
|
struct volume_group *vg)
|
|
{
|
|
if (!vg_check_new_extent_size(vg->fid->fmt, vg->extent_size))
|
|
return_0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static uint64_t _mda_free_sectors_raw(struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
|
|
return mdac->free_sectors;
|
|
}
|
|
|
|
static uint64_t _mda_total_sectors_raw(struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
|
|
return mdac->area.size >> SECTOR_SHIFT;
|
|
}
|
|
|
|
/*
|
|
* Check if metadata area belongs to vg
|
|
*/
|
|
static int _mda_in_vg_raw(struct format_instance *fid __attribute__((unused)),
|
|
struct volume_group *vg, struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct pv_list *pvl;
|
|
|
|
dm_list_iterate_items(pvl, &vg->pvs)
|
|
if (pvl->pv->dev == mdac->area.dev)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static unsigned _mda_locns_match_raw(struct metadata_area *mda1,
|
|
struct metadata_area *mda2)
|
|
{
|
|
struct mda_context *mda1c = (struct mda_context *) mda1->metadata_locn;
|
|
struct mda_context *mda2c = (struct mda_context *) mda2->metadata_locn;
|
|
|
|
if ((mda1c->area.dev == mda2c->area.dev) &&
|
|
(mda1c->area.start == mda2c->area.start) &&
|
|
(mda1c->area.size == mda2c->area.size))
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static struct device *_mda_get_device_raw(struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
return mdac->area.dev;
|
|
}
|
|
|
|
static int _text_lv_setup(struct format_instance *fid __attribute__((unused)),
|
|
struct logical_volume *lv)
|
|
{
|
|
/******** FIXME Any LV size restriction?
|
|
uint64_t max_size = UINT_MAX;
|
|
|
|
if (lv->size > max_size) {
|
|
char *dummy = display_size(max_size);
|
|
log_error("logical volumes cannot be larger than %s", dummy);
|
|
free(dummy);
|
|
return 0;
|
|
}
|
|
*/
|
|
|
|
if (!*lv->lvid.s && !lvid_create(&lv->lvid, &lv->vg->id)) {
|
|
log_error("Random lvid creation failed for %s/%s.",
|
|
lv->vg->name, lv->name);
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void _xlate_mdah(struct mda_header *mdah)
|
|
{
|
|
struct raw_locn *rl;
|
|
|
|
mdah->version = xlate32(mdah->version);
|
|
mdah->start = xlate64(mdah->start);
|
|
mdah->size = xlate64(mdah->size);
|
|
|
|
rl = &mdah->raw_locns[0];
|
|
while (rl->offset) {
|
|
rl->checksum = xlate32(rl->checksum);
|
|
rl->offset = xlate64(rl->offset);
|
|
rl->size = xlate64(rl->size);
|
|
rl++;
|
|
}
|
|
}
|
|
|
|
static int _raw_read_mda_header(struct mda_header *mdah, struct device_area *dev_area,
|
|
int primary_mda, uint32_t ignore_bad_fields, uint32_t *bad_fields)
|
|
{
|
|
log_debug_metadata("Reading mda header sector from %s at %llu",
|
|
dev_name(dev_area->dev), (unsigned long long)dev_area->start);
|
|
|
|
if (!dev_read_bytes(dev_area->dev, dev_area->start, MDA_HEADER_SIZE, mdah)) {
|
|
log_error("Failed to read metadata area header on %s at %llu",
|
|
dev_name(dev_area->dev), (unsigned long long)dev_area->start);
|
|
*bad_fields |= BAD_MDA_READ;
|
|
return 0;
|
|
}
|
|
|
|
if (mdah->checksum_xl != xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic,
|
|
MDA_HEADER_SIZE -
|
|
sizeof(mdah->checksum_xl)))) {
|
|
log_warn("WARNING: wrong checksum %x in mda header on %s at %llu",
|
|
mdah->checksum_xl,
|
|
dev_name(dev_area->dev), (unsigned long long)dev_area->start);
|
|
*bad_fields |= BAD_MDA_CHECKSUM;
|
|
}
|
|
|
|
_xlate_mdah(mdah);
|
|
|
|
if (memcmp(mdah->magic, FMTT_MAGIC, sizeof(mdah->magic))) {
|
|
log_warn("WARNING: wrong magic number in mda header on %s at %llu",
|
|
dev_name(dev_area->dev), (unsigned long long)dev_area->start);
|
|
*bad_fields |= BAD_MDA_MAGIC;
|
|
}
|
|
|
|
if (mdah->version != FMTT_VERSION) {
|
|
log_warn("WARNING: wrong version %u in mda header on %s at %llu",
|
|
mdah->version,
|
|
dev_name(dev_area->dev), (unsigned long long)dev_area->start);
|
|
*bad_fields |= BAD_MDA_VERSION;
|
|
}
|
|
|
|
if (mdah->start != dev_area->start) {
|
|
log_warn("WARNING: wrong start sector %llu in mda header on %s at %llu",
|
|
(unsigned long long)mdah->start,
|
|
dev_name(dev_area->dev), (unsigned long long)dev_area->start);
|
|
*bad_fields |= BAD_MDA_START;
|
|
}
|
|
|
|
*bad_fields &= ~ignore_bad_fields;
|
|
|
|
if (*bad_fields)
|
|
return 0;
|
|
|
|
return 1;
|
|
}
|
|
|
|
struct mda_header *raw_read_mda_header(const struct format_type *fmt,
|
|
struct device_area *dev_area,
|
|
int primary_mda, uint32_t ignore_bad_fields, uint32_t *bad_fields)
|
|
{
|
|
struct mda_header *mdah;
|
|
|
|
if (!(mdah = dm_pool_alloc(fmt->cmd->mem, MDA_HEADER_SIZE))) {
|
|
log_error("struct mda_header allocation failed");
|
|
*bad_fields |= BAD_MDA_INTERNAL;
|
|
return NULL;
|
|
}
|
|
|
|
if (!_raw_read_mda_header(mdah, dev_area, primary_mda, ignore_bad_fields, bad_fields)) {
|
|
dm_pool_free(fmt->cmd->mem, mdah);
|
|
return NULL;
|
|
}
|
|
|
|
return mdah;
|
|
}
|
|
|
|
static int _raw_write_mda_header(const struct format_type *fmt,
|
|
struct device *dev, int primary_mda,
|
|
uint64_t start_byte, struct mda_header *mdah)
|
|
{
|
|
memcpy(mdah->magic, FMTT_MAGIC, sizeof(mdah->magic));
|
|
mdah->version = FMTT_VERSION;
|
|
mdah->start = start_byte;
|
|
|
|
_xlate_mdah(mdah);
|
|
mdah->checksum_xl = xlate32(calc_crc(INITIAL_CRC, (uint8_t *)mdah->magic,
|
|
MDA_HEADER_SIZE -
|
|
sizeof(mdah->checksum_xl)));
|
|
|
|
dev_set_last_byte(dev, start_byte + MDA_HEADER_SIZE);
|
|
|
|
if (!dev_write_bytes(dev, start_byte, MDA_HEADER_SIZE, mdah)) {
|
|
log_error("Failed to write mda header to %s.", dev_name(dev));
|
|
return 0;
|
|
}
|
|
dev_unset_last_byte(dev);
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* FIXME: unify this with read_metadata_location() which is used
|
|
* in the label scanning path.
|
|
*/
|
|
|
|
static struct raw_locn *_read_metadata_location_vg(struct cmd_context *cmd,
|
|
struct device_area *dev_area,
|
|
struct mda_header *mdah, int primary_mda,
|
|
const char *vgname,
|
|
int *precommitted)
|
|
{
|
|
struct raw_locn *rlocn, *rlocn_precommitted;
|
|
|
|
rlocn = mdah->raw_locns; /* Slot 0 */
|
|
rlocn_precommitted = rlocn + 1; /* Slot 1 */
|
|
|
|
/* Should we use precommitted metadata? */
|
|
if (*precommitted && rlocn_precommitted->size &&
|
|
(rlocn_precommitted->offset != rlocn->offset)) {
|
|
rlocn = rlocn_precommitted;
|
|
} else {
|
|
*precommitted = 0;
|
|
}
|
|
|
|
/* Do not check non-existent metadata. */
|
|
if (!rlocn->offset && !rlocn->size)
|
|
return NULL;
|
|
|
|
return rlocn;
|
|
}
|
|
|
|
/*
|
|
* Determine offset for new metadata
|
|
*
|
|
* The rounding can have a negative effect: when the current metadata
|
|
* text size is just below the max, a command to remove something, that
|
|
* *reduces* the text metadata size, can still be rejected for being too large,
|
|
* even though it's smaller than the current size. In this case, the user
|
|
* would need to find something in the VG to remove that uses more text space
|
|
* to compensate for the increase due to rounding.
|
|
* Update: I think that the new max_size restriction avoids this problem.
|
|
*/
|
|
|
|
static uint64_t _next_rlocn_offset(struct volume_group *vg, struct raw_locn *rlocn_old, uint64_t old_last, struct mda_header *mdah, uint64_t mdac_area_start, uint64_t alignment)
|
|
{
|
|
uint64_t next_start;
|
|
uint64_t new_start;
|
|
uint64_t adjust = 0;
|
|
|
|
/* This has only been designed to work with 512. */
|
|
if (alignment != 512)
|
|
log_warn("WARNING: metadata alignment should be 512 not %llu",
|
|
(unsigned long long)alignment);
|
|
|
|
/*
|
|
* No metadata has been written yet, begin at MDA_HEADER_SIZE offset
|
|
* from the start of the area.
|
|
*/
|
|
if (!rlocn_old)
|
|
return MDA_HEADER_SIZE;
|
|
|
|
/*
|
|
* If new start would be less than alignment bytes from the end of the
|
|
* metadata area, then start at beginning.
|
|
*/
|
|
if (mdah->size - old_last < alignment) {
|
|
log_debug_metadata("VG %s %u new metadata start align from %llu to beginning %u",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)(old_last + 1), MDA_HEADER_SIZE);
|
|
return MDA_HEADER_SIZE;
|
|
}
|
|
|
|
/*
|
|
* New metadata begins after the old, rounded up to alignment.
|
|
*/
|
|
|
|
next_start = old_last + 1;
|
|
|
|
if (next_start % alignment)
|
|
adjust = alignment - (next_start % alignment);
|
|
|
|
new_start = next_start + adjust;
|
|
|
|
log_debug_metadata("VG %s %u new metadata start align from %llu to %llu (+%llu)",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)next_start,
|
|
(unsigned long long)new_start,
|
|
(unsigned long long)adjust);
|
|
|
|
/*
|
|
* If new_start is beyond the end of the metadata area or within
|
|
* alignment bytes of the end, then start at the beginning.
|
|
*/
|
|
if (new_start > mdah->size - alignment) {
|
|
log_debug_metadata("VG %s %u new metadata start align from %llu to beginning %u",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)new_start, MDA_HEADER_SIZE);
|
|
return MDA_HEADER_SIZE;
|
|
}
|
|
|
|
return new_start;
|
|
}
|
|
|
|
static struct volume_group *_vg_read_raw_area(struct cmd_context *cmd,
|
|
struct format_instance *fid,
|
|
const char *vgname,
|
|
struct device_area *area,
|
|
struct cached_vg_fmtdata **vg_fmtdata,
|
|
unsigned *use_previous_vg,
|
|
int precommitted,
|
|
int primary_mda)
|
|
{
|
|
struct volume_group *vg = NULL;
|
|
struct raw_locn *rlocn;
|
|
struct mda_header *mdah;
|
|
time_t when;
|
|
char *desc;
|
|
uint32_t wrap = 0;
|
|
uint32_t bad_fields = 0;
|
|
|
|
if (!(mdah = raw_read_mda_header(fid->fmt, area, primary_mda, 0, &bad_fields))) {
|
|
log_error("Failed to read vg %s from %s", vgname, dev_name(area->dev));
|
|
goto out;
|
|
}
|
|
|
|
if (!(rlocn = _read_metadata_location_vg(cmd, area, mdah, primary_mda, vgname, &precommitted))) {
|
|
log_debug_metadata("VG %s not found on %s", vgname, dev_name(area->dev));
|
|
goto out;
|
|
}
|
|
|
|
if (rlocn->offset + rlocn->size > mdah->size)
|
|
wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size);
|
|
|
|
vg = text_read_metadata(fid, NULL, vg_fmtdata, use_previous_vg, area->dev, primary_mda,
|
|
(off_t) (area->start + rlocn->offset),
|
|
(uint32_t) (rlocn->size - wrap),
|
|
(off_t) (area->start + MDA_HEADER_SIZE),
|
|
wrap,
|
|
calc_crc,
|
|
rlocn->checksum,
|
|
&when, &desc);
|
|
|
|
if (!vg && (!use_previous_vg || !*use_previous_vg)) {
|
|
log_warn("WARNING: Failed to read metadata text at %llu off %llu size %llu VG %s on %s",
|
|
(unsigned long long)(area->start + rlocn->offset),
|
|
(unsigned long long)rlocn->offset,
|
|
(unsigned long long)rlocn->size,
|
|
vgname,
|
|
dev_name(area->dev));
|
|
|
|
return NULL;
|
|
}
|
|
|
|
log_debug_metadata("Found metadata text at %llu off %llu size %llu VG %s on %s",
|
|
(unsigned long long)(area->start + rlocn->offset),
|
|
(unsigned long long)rlocn->offset,
|
|
(unsigned long long)rlocn->size,
|
|
vgname,
|
|
dev_name(area->dev));
|
|
|
|
if (vg && precommitted)
|
|
vg->status |= PRECOMMITTED;
|
|
|
|
out:
|
|
return vg;
|
|
}
|
|
|
|
static struct volume_group *_vg_read_raw(struct cmd_context *cmd,
|
|
struct format_instance *fid,
|
|
const char *vgname,
|
|
struct metadata_area *mda,
|
|
struct cached_vg_fmtdata **vg_fmtdata,
|
|
unsigned *use_previous_vg)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct volume_group *vg;
|
|
|
|
vg = _vg_read_raw_area(cmd, fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 0, mda_is_primary(mda));
|
|
if (sigint_caught())
|
|
return_NULL;
|
|
|
|
if (!vg && use_previous_vg && !*use_previous_vg) {
|
|
/*
|
|
* This condition (corrupt metadata text) is often seen in the
|
|
* label_scan()/_text_read() phase, where this code corresponds to
|
|
* the lvmcache_save_bad_mda() in _text_read(). In this case we
|
|
* have two mda structs to deal with, one in lvmcache from label scan,
|
|
* and the mda copy on fid->metadata_areas_in_use.
|
|
*/
|
|
struct device *dev = mdac->area.dev;
|
|
struct lvmcache_info *info = lvmcache_info_from_pvid(dev->pvid, dev, 0);
|
|
log_warn("WARNING: reading %s mda%d failed to read metadata.", dev_name(dev), mda_is_primary(mda)?1:2);
|
|
log_warn("WARNING: repair VG metadata on %s with vgck --updatemetadata.", dev_name(dev));
|
|
if (info)
|
|
/* remove mda from lvmcache, saving it in info->bad_mdas for possible repair with updatemetadata */
|
|
lvmcache_del_save_bad_mda(info, mda->mda_num, BAD_MDA_TEXT);
|
|
else
|
|
log_warn("WARNING: No cache info for %s", dev_name(dev));
|
|
|
|
/* remove mda from fid */
|
|
fid_remove_mda(fid, mda, NULL, 0, 0);
|
|
}
|
|
|
|
return vg;
|
|
}
|
|
|
|
static struct volume_group *_vg_read_precommit_raw(struct cmd_context *cmd,
|
|
struct format_instance *fid,
|
|
const char *vgname,
|
|
struct metadata_area *mda,
|
|
struct cached_vg_fmtdata **vg_fmtdata,
|
|
unsigned *use_previous_vg)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct volume_group *vg;
|
|
|
|
vg = _vg_read_raw_area(cmd, fid, vgname, &mdac->area, vg_fmtdata, use_previous_vg, 1, mda_is_primary(mda));
|
|
|
|
return vg;
|
|
}
|
|
|
|
/*
|
|
* VG metadata updates:
|
|
*
|
|
* [mda_header] [raw_locn_0] [raw_locn_1] [text metadata circular buffer]
|
|
*
|
|
* raw_locn.offset points into the metadata circular buffer to the
|
|
* start of metadata.
|
|
*
|
|
* When vg_read wants to read metadata from disk, it looks at the
|
|
* raw_locn_0 offset and reads the text metadata from that location
|
|
* in the circular buffer.
|
|
*
|
|
* Two full copies of the text metadata always exist in the circular
|
|
* buffer. When new metadata needs to be written, the following
|
|
* process is followed:
|
|
*
|
|
* - vg_write is called and writes the new text metadata into the
|
|
* circular buffer after the end of the current copy. vg_write saves
|
|
* an in-memory raw_locn struct (mdac->rlocn) pointing to the new
|
|
* metadata in the buffer. No raw_locn structs are written to disk.
|
|
*
|
|
* - vg_precommit is called and writes the in-memory raw_locn struct that
|
|
* was saved by vg_write into raw_locn_1 (slot 1, the "precommit" slot.)
|
|
* raw_locn_0 still points to the old metadata, and raw_locn_1 points
|
|
* to the new metadata.
|
|
*
|
|
* - vg_commit is called and writes the new raw_locn struct into raw_locn_0
|
|
* (slot 0, the "committed" slot).
|
|
*/
|
|
|
|
/*
|
|
* Writes new text metadata into the circular metadata buffer following the
|
|
* current (old) text metadata that's already in the metadata buffer.
|
|
*
|
|
* vg_write does *not* write new raw_locn fields pointing to the new metadata.
|
|
* The new raw_locn fields for the new metadata are saved in mdac->rlocn and
|
|
* are written later by both vg_precommit and vg_commit. vg_precommit will
|
|
* write the new raw_locn into slot 1 and vg_commit will write the new raw_locn
|
|
* into slot 0.
|
|
*/
|
|
|
|
static int _vg_write_raw(struct format_instance *fid, struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
char desc[2048];
|
|
struct dm_config_tree *cft;
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct text_fid_context *fidtc = (struct text_fid_context *) fid->private;
|
|
struct raw_locn *rlocn_old;
|
|
struct raw_locn *rlocn_new;
|
|
struct mda_header *mdah;
|
|
struct pv_list *pvl;
|
|
uint64_t mda_start = mdac->area.start;
|
|
uint64_t max_size;
|
|
uint64_t old_start = 0, old_last = 0, old_size = 0, old_wrap = 0;
|
|
uint64_t new_start = 0, new_last = 0, new_size = 0, new_wrap = 0;
|
|
uint64_t write1_start = 0, write1_last = 0, write1_size = 0;
|
|
uint64_t write2_start = 0, write2_last = 0, write2_size = 0;
|
|
uint32_t write1_over = 0, write2_over = 0;
|
|
uint32_t write_buf_size;
|
|
uint32_t checksum;
|
|
uint32_t extra_size;
|
|
uint32_t bad_fields = 0;
|
|
char *write_buf = NULL;
|
|
const char *devname = dev_name(mdac->area.dev);
|
|
bool overlap;
|
|
int found = 0;
|
|
int r = 0;
|
|
|
|
/*
|
|
* old_start/old_last/new_start/new_last are relative to the
|
|
* start of the metadata area (mda_start), and specify the first
|
|
* and last bytes of old/new metadata copies in the metadata area.
|
|
*
|
|
* write1_start/write1_last/write2_start/write2_last are
|
|
* relative to the start of the disk, and specify the
|
|
* first/last bytes written to disk when writing a new
|
|
* copy of metadata. (Will generally be larger than the
|
|
* size of the metadata since the write is extended past
|
|
* the end of the new metadata to end on a 512 byte boundary.)
|
|
*
|
|
* So, write1_start == mda_start + new_start.
|
|
*
|
|
* "last" values are inclusive, so last - start + 1 = size.
|
|
* old_last/new_last are the last bytes containing metadata.
|
|
* write1_last/write2_last are the last bytes written.
|
|
* The next copy of metadata will be written beginning at
|
|
* write1_last+1.
|
|
*/
|
|
|
|
/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
|
|
dm_list_iterate_items(pvl, &vg->pvs) {
|
|
if (pvl->pv->dev == mdac->area.dev) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found)
|
|
return 1;
|
|
|
|
if (!(mdah = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda), mda->ignore_bad_fields, &bad_fields)))
|
|
goto_out;
|
|
|
|
/*
|
|
* Create a text metadata representation of struct vg in buffer.
|
|
* This buffer is written to disk below. This function is called
|
|
* to write metadata to each device/mda in the VG. The first time
|
|
* the metadata text is saved in write_buf and subsequent
|
|
* mdas use that.
|
|
*
|
|
* write_buf_size is increased in 64K increments, so will generally
|
|
* be larger than new_size. The extra space in write_buf (after
|
|
* new_size) is zeroed. More than new_size can be written from
|
|
* write_buf to zero data on disk following the new text metadata,
|
|
* up to the next 512 byte boundary.
|
|
*/
|
|
if (fidtc->write_buf) {
|
|
write_buf = fidtc->write_buf;
|
|
write_buf_size = fidtc->write_buf_size;
|
|
new_size = fidtc->new_metadata_size;
|
|
checksum = fidtc->checksum;
|
|
} else {
|
|
if (!vg->write_count++)
|
|
(void) dm_snprintf(desc, sizeof(desc), "Write from %s.", vg->cmd->cmd_line);
|
|
else
|
|
(void) dm_snprintf(desc, sizeof(desc), "Write[%u] from %s.", vg->write_count, vg->cmd->cmd_line);
|
|
|
|
new_size = text_vg_export_raw(vg, desc, &write_buf, &write_buf_size);
|
|
if (!new_size || !write_buf) {
|
|
log_error("VG %s metadata writing failed", vg->name);
|
|
goto out;
|
|
}
|
|
|
|
fidtc->write_buf = write_buf;
|
|
fidtc->write_buf_size = write_buf_size;
|
|
fidtc->new_metadata_size = new_size;
|
|
|
|
/* Immediately reuse existing buffer for parsing metadata back.
|
|
* Such VG is then used for as precommitted VG and later committed VG.
|
|
*
|
|
* 'Lazy' creation of such VG might improve performance, but we
|
|
* lose important validation that written metadata can be parsed. */
|
|
if (!(cft = config_tree_from_string_without_dup_node_check(write_buf, new_size - 1))) {
|
|
log_error("Error parsing metadata for VG %s.", vg->name);
|
|
goto out;
|
|
}
|
|
release_vg(vg->vg_precommitted);
|
|
vg->vg_precommitted = import_vg_from_config_tree(vg->cmd, vg->fid, cft);
|
|
dm_config_destroy(cft);
|
|
if (!vg->vg_precommitted)
|
|
goto_out;
|
|
|
|
fidtc->checksum = checksum = calc_crc(INITIAL_CRC, (uint8_t *)write_buf, new_size);
|
|
}
|
|
|
|
log_debug_metadata("VG %s seqno %u metadata write to %s mda_start %llu mda_size %llu mda_last %llu",
|
|
vg->name, vg->seqno, devname,
|
|
(unsigned long long)mda_start,
|
|
(unsigned long long)mdah->size,
|
|
(unsigned long long)(mda_start + mdah->size - 1));
|
|
|
|
/*
|
|
* The max size of a single copy of text metadata.
|
|
*
|
|
* The space available for all text metadata is the size of the
|
|
* metadata area (mdah->size) minus the sector used for the header.
|
|
* Two copies of the text metadata must fit in this space, so it is
|
|
* divided in two. This result is then reduced by 512 because any
|
|
* single copy of metadata is rounded to begin on a sector boundary.
|
|
*/
|
|
|
|
max_size = ((mdah->size - MDA_HEADER_SIZE) / 2) - 512;
|
|
|
|
if (new_size > max_size) {
|
|
log_error("VG %s %u metadata on %s (%llu bytes) exceeds maximum metadata size (%llu bytes)",
|
|
vg->name, vg->seqno, devname,
|
|
(unsigned long long)new_size,
|
|
(unsigned long long)max_size);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* rlocn_old is the current, committed, raw_locn data in slot0 on disk.
|
|
*
|
|
* rlocn_new (mdac->rlocn) is the new, in-memory, raw_locn data for the
|
|
* new metadata. rlocn_new is in-memory only, not yet written to disk.
|
|
*
|
|
* rlocn_new is not written to disk by vg_write. vg_write only writes
|
|
* the new text metadata into the circular buffer, it does not update any
|
|
* raw_locn slot to point to that new metadata. vg_write saves raw_locn
|
|
* values for the new metadata in memory at mdac->rlocn so that
|
|
* vg_precommit and vg_commit can find it later and write it to disk.
|
|
*
|
|
* rlocn/raw_locn values, old_start, old_last, old_size, new_start,
|
|
* new_last, new_size, are all in bytes, and are all relative to the
|
|
* the start of the metadata area (not to the start of the disk.)
|
|
*
|
|
* The start and last values are the first and last bytes that hold
|
|
* the metadata inclusively, e.g.
|
|
* metadata_v1 start = 512, last = 611, size = 100
|
|
* metadata_v2 start = 612, last = 711, size = 100
|
|
*
|
|
* {old,new}_{start,last} values are all offset values from the
|
|
* beginning of the metadata area mdac->area.start. At the beginning
|
|
* of the metadata area (area.start), the first 512 bytes
|
|
* (MDA_HEADER_SIZE) is reserved for the mda_header/raw_locn structs,
|
|
* after which the circular buffer of text metadata begins.
|
|
* So, the when the text metadata wraps around, it starts again at
|
|
* area.start + MDA_HEADER_SIZE.
|
|
*
|
|
* When pe_start is at 1MB (the default), and mda_start is at 4KB,
|
|
* there will be 1MB - 4KB - 512 bytes of circular buffer space for
|
|
* text metadata.
|
|
*/
|
|
|
|
rlocn_old = &mdah->raw_locns[0]; /* slot0, committed metadata */
|
|
|
|
if (rlocn_is_ignored(rlocn_old))
|
|
rlocn_old = NULL;
|
|
|
|
else if (!rlocn_old->offset && !rlocn_old->size)
|
|
rlocn_old = NULL;
|
|
|
|
else {
|
|
old_start = rlocn_old->offset;
|
|
old_size = rlocn_old->size;
|
|
|
|
if (rlocn_old->offset + rlocn_old->size > mdah->size) {
|
|
old_wrap = (old_start + old_size) - mdah->size;
|
|
old_last = old_wrap + MDA_HEADER_SIZE - 1;
|
|
} else {
|
|
old_wrap = 0;
|
|
old_last = old_start + old_size - 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* _next_rlocn_offset returns the new offset to use for the new
|
|
* metadata. It is set to follow the end of the old metadata, plus
|
|
* some adjustment to start the new metadata on a 512 byte alignment.
|
|
* If the new metadata would start beyond the end of the metadata area,
|
|
* or would start less than 512 bytes before the end of the metadata
|
|
* area, then the new start is set back at the beginning
|
|
* (metadata begins MDA_HEADER_SIZE after start of metadata area).
|
|
*/
|
|
new_start = _next_rlocn_offset(vg, rlocn_old, old_last, mdah, mda_start, MDA_ORIGINAL_ALIGNMENT);
|
|
|
|
if (new_start + new_size > mdah->size) {
|
|
new_wrap = (new_start + new_size) - mdah->size;
|
|
new_last = new_wrap + MDA_HEADER_SIZE - 1;
|
|
|
|
log_debug_metadata("VG %s %u wrapping metadata new_start %llu new_size %llu to size1 %llu size2 %llu",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)new_start,
|
|
(unsigned long long)new_size,
|
|
(unsigned long long)(new_size - new_wrap),
|
|
(unsigned long long)new_wrap);
|
|
} else {
|
|
new_wrap = 0;
|
|
new_last = new_start + new_size - 1;
|
|
}
|
|
|
|
/*
|
|
* Save the new metadata location in memory for vg_precommit and
|
|
* vg_commit. The new location is not written to disk here.
|
|
*/
|
|
rlocn_new = &mdac->rlocn;
|
|
rlocn_new->offset = new_start;
|
|
rlocn_new->size = new_size;
|
|
|
|
log_debug_metadata("VG %s %u metadata area location old start %llu last %llu size %llu wrap %llu",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)old_start,
|
|
(unsigned long long)old_last,
|
|
(unsigned long long)old_size,
|
|
(unsigned long long)old_wrap);
|
|
|
|
log_debug_metadata("VG %s %u metadata area location new start %llu last %llu size %llu wrap %llu",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)new_start,
|
|
(unsigned long long)new_last,
|
|
(unsigned long long)new_size,
|
|
(unsigned long long)new_wrap);
|
|
|
|
/*
|
|
* If the new copy of the metadata would overlap the old copy of the
|
|
* metadata, it means that the circular metadata buffer is full.
|
|
*
|
|
* Given the max_size restriction above, two copies of metadata should
|
|
* never overlap, so these overlap checks should not be technically
|
|
* necessary, and a failure should not occur here. It's left as a
|
|
* sanity check. For some unknown time, lvm did not enforce a
|
|
* max_size, but rather detected the too-large failure by checking for
|
|
* overlap between old and new.
|
|
*/
|
|
|
|
if (new_wrap && old_wrap) {
|
|
|
|
/* old and new can't both wrap without overlapping */
|
|
overlap = true;
|
|
|
|
} else if (!new_wrap && !old_wrap &&
|
|
(new_start > old_last) && (new_last > new_start)) {
|
|
|
|
/* new metadata is located entirely after the old metadata */
|
|
overlap = false;
|
|
|
|
} else if (!new_wrap && !old_wrap &&
|
|
(new_start < old_start) && (new_last < old_start)) {
|
|
|
|
/* new metadata is located entirely before the old metadata */
|
|
overlap = false;
|
|
|
|
} else if (old_wrap && !new_wrap &&
|
|
(old_last < new_start) && (new_start < new_last) && (new_last < old_start)) {
|
|
|
|
/* when old wraps and the new doesn't, then no overlap is:
|
|
old_last followed by new_start followed by new_last
|
|
followed by old_start */
|
|
overlap = false;
|
|
|
|
} else if (new_wrap && !old_wrap &&
|
|
(new_last < old_start) && (old_start < old_last) && (old_last < new_start)) {
|
|
|
|
/* when new wraps and the old doesn't, then no overlap is:
|
|
new_last followed by old_start followed by old_last
|
|
followed by new_start. */
|
|
overlap = false;
|
|
|
|
} else {
|
|
overlap = true;
|
|
}
|
|
|
|
if (overlap) {
|
|
log_error("VG %s %u metadata on %s (%llu bytes) too large for circular buffer (%llu bytes with %llu used)",
|
|
vg->name, vg->seqno, devname,
|
|
(unsigned long long)new_size,
|
|
(unsigned long long)(mdah->size - MDA_HEADER_SIZE),
|
|
(unsigned long long)old_size);
|
|
goto out;
|
|
}
|
|
|
|
if (!new_wrap) {
|
|
write1_start = mda_start + new_start;
|
|
write1_size = new_size;
|
|
write1_last = write1_start + write1_size - 1;
|
|
write1_over = (write1_last + 1) % 512;
|
|
write2_start = 0;
|
|
write2_size = 0;
|
|
write2_last = 0;
|
|
write2_over = 0;
|
|
} else {
|
|
write1_start = mda_start + new_start;
|
|
write1_size = new_size - new_wrap;
|
|
write1_last = write1_start + write1_size - 1;
|
|
write1_over = 0;
|
|
write2_start = mda_start + MDA_HEADER_SIZE;
|
|
write2_size = new_wrap;
|
|
write2_last = write2_start + write2_size - 1;
|
|
write2_over = (write2_last + 1) % 512;
|
|
}
|
|
|
|
if (!new_wrap)
|
|
log_debug_metadata("VG %s %u metadata disk location start %llu size %llu last %llu",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)write1_start,
|
|
(unsigned long long)write1_size,
|
|
(unsigned long long)write1_last);
|
|
else
|
|
log_debug_metadata("VG %s %u metadata disk location write1 start %llu size %llu last %llu write2 start %llu size %llu last %llu",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)write1_start,
|
|
(unsigned long long)write1_size,
|
|
(unsigned long long)write1_last,
|
|
(unsigned long long)write2_start,
|
|
(unsigned long long)write2_size,
|
|
(unsigned long long)write2_last);
|
|
|
|
/*
|
|
* Write more than the size of the new metadata, up to the next
|
|
* 512 byte boundary so that the space between this copy and the
|
|
* subsequent copy of metadata will be zeroed.
|
|
*
|
|
* Extend write1_size so that write1_last+1 is a 512 byte multiple.
|
|
* The next metadata write should follow immediately after the
|
|
* extended write1_last since new metadata tries to begin on a 512
|
|
* byte boundary.
|
|
*
|
|
* write1_size can be extended up to write_buf_size which is the size
|
|
* of write_buf (new_size is the portion of write_buf used by the new
|
|
* metadata.)
|
|
*
|
|
* If this metadata write will wrap, the first write is written
|
|
* all the way to the end of the metadata area, and it's the
|
|
* second wrapped write that is extended up to a 512 byte boundary.
|
|
*/
|
|
|
|
if (write1_over) {
|
|
extra_size = 512 - write1_over; /* this many extra zero bytes written after metadata text */
|
|
write1_size += extra_size;
|
|
write1_last = write1_start + write1_size - 1;
|
|
|
|
log_debug_metadata("VG %s %u metadata last align from %llu to %llu (+%u)",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)write1_last - extra_size,
|
|
(unsigned long long)write1_last, extra_size);
|
|
|
|
if (write1_size > write_buf_size) {
|
|
/* sanity check, shouldn't happen */
|
|
log_error("VG %s %u %s adjusted metadata end %llu extra %u larger than write buffer %llu",
|
|
vg->name, vg->seqno, devname,
|
|
(unsigned long long)write1_size, extra_size,
|
|
(unsigned long long)write_buf_size);
|
|
write1_size -= extra_size;
|
|
}
|
|
}
|
|
|
|
if (write2_over) {
|
|
extra_size = 512 - write2_over; /* this many extra zero bytes written after metadata text */
|
|
write2_size += extra_size;
|
|
write2_last = write2_start + write2_size - 1;
|
|
|
|
log_debug_metadata("VG %s %u metadata last align from %llu to %llu (+%u) (wrapped)",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)write2_last - extra_size,
|
|
(unsigned long long)write2_last, extra_size);
|
|
|
|
if (write1_size + write2_size > write_buf_size) {
|
|
/* sanity check, shouldn't happen */
|
|
log_error("VG %s %u %s adjusted metadata end %llu wrap %llu extra %u larger than write buffer %llu",
|
|
vg->name, vg->seqno, devname,
|
|
(unsigned long long)write1_size,
|
|
(unsigned long long)write2_size, extra_size,
|
|
(unsigned long long)write_buf_size);
|
|
write2_size -= extra_size;
|
|
}
|
|
}
|
|
|
|
if ((write1_size > write_buf_size) || (write2_size > write_buf_size)) {
|
|
/* sanity check, shouldn't happen */
|
|
log_error("VG %s %u %s metadata write size %llu %llu larger than buffer %llu",
|
|
vg->name, vg->seqno, devname,
|
|
(unsigned long long)write1_size,
|
|
(unsigned long long)write2_size,
|
|
(unsigned long long)write_buf_size);
|
|
goto out;
|
|
}
|
|
|
|
dev_set_last_byte(mdac->area.dev, mda_start + mdah->size);
|
|
|
|
log_debug_metadata("VG %s %u metadata write at %llu size %llu (wrap %llu)",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)write1_start,
|
|
(unsigned long long)write1_size,
|
|
(unsigned long long)write2_size);
|
|
|
|
if (!dev_write_bytes(mdac->area.dev, write1_start, (size_t)write1_size, write_buf)) {
|
|
log_error("Failed to write metadata to %s.", devname);
|
|
goto out;
|
|
}
|
|
|
|
if (write2_size) {
|
|
log_debug_metadata("VG %s %u metadata write at %llu size %llu (wrapped)",
|
|
vg->name, vg->seqno,
|
|
(unsigned long long)write2_start,
|
|
(unsigned long long)write2_size);
|
|
|
|
if (!dev_write_bytes(mdac->area.dev, write2_start, write2_size,
|
|
write_buf + new_size - new_wrap)) {
|
|
log_error("Failed to write metadata wrap to %s", devname);
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
dev_unset_last_byte(mdac->area.dev);
|
|
|
|
rlocn_new->checksum = checksum;
|
|
|
|
r = 1;
|
|
|
|
out:
|
|
if (!r)
|
|
free_text_fidtc(vg);
|
|
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Writes new raw_locn to disk that was saved by vg_write_raw (in mdac->rlocn).
|
|
* The new raw_locn points to the new metadata that was written by vg_write_raw.
|
|
*
|
|
* After vg_write writes the new text metadata into the circular buffer,
|
|
* vg_precommit writes the new raw_locn (pointing to the new metadata)
|
|
* into slot1 (raw_locns[1]). Then vg_commit writes the same raw_locn
|
|
* values again, but into slot0 (raw_locns[0]). slot0 is the committed
|
|
* slot, and once slot0 is written, subsequent vg_reads will see the new
|
|
* metadata.
|
|
*/
|
|
|
|
static int _vg_commit_raw_rlocn(struct format_instance *fid,
|
|
struct volume_group *vg,
|
|
struct metadata_area *mda,
|
|
int precommit)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct text_fid_context *fidtc = (struct text_fid_context *) fid->private;
|
|
struct mda_header *mdab;
|
|
struct raw_locn *rlocn_slot0;
|
|
struct raw_locn *rlocn_slot1;
|
|
struct raw_locn *rlocn_new;
|
|
struct pv_list *pvl;
|
|
uint32_t bad_fields = 0;
|
|
int r = 0;
|
|
int found = 0;
|
|
|
|
/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
|
|
dm_list_iterate_items(pvl, &vg->pvs) {
|
|
if (pvl->pv->dev == mdac->area.dev) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (!found)
|
|
return 1;
|
|
|
|
/*
|
|
* Data is read into the mdab buffer, the mdab buffer is then modified
|
|
* with new raw_locn values, then the mdab buffer is written. Note
|
|
* this is different than _vg_write_raw, where data is read into the
|
|
* mdah buffer, but the mdah buffer is not modified and mdac->rlocn is
|
|
* modified.
|
|
*/
|
|
if (!(mdab = raw_read_mda_header(fid->fmt, &mdac->area, mda_is_primary(mda), mda->ignore_bad_fields, &bad_fields)))
|
|
goto_out;
|
|
|
|
/*
|
|
* rlocn_slot0/rlocn_slot1 point into mdab which is the buffer that
|
|
* will be modified and written.
|
|
*/
|
|
rlocn_slot0 = &mdab->raw_locns[0];
|
|
rlocn_slot1 = &mdab->raw_locns[1];
|
|
|
|
if (rlocn_is_ignored(rlocn_slot0) || (!rlocn_slot0->offset && !rlocn_slot0->size)) {
|
|
rlocn_slot0->offset = 0;
|
|
rlocn_slot0->size = 0;
|
|
rlocn_slot0->checksum = 0;
|
|
rlocn_slot1->offset = 0;
|
|
rlocn_slot1->size = 0;
|
|
rlocn_slot1->checksum = 0;
|
|
}
|
|
|
|
/*
|
|
* mdac->rlocn is the in-memory copy of the new metadata's location on
|
|
* disk. mdac->rlocn was saved by vg_write after it wrote the new text
|
|
* metadata to disk. This location of the new metadata is now written
|
|
* to disk by vg_precommit and vg_commit. vg_precommit writes the new
|
|
* location into the precommit slot (slot1 / raw_locns[1]) and
|
|
* vg_commit writes the new location into committed slot (slot0 /
|
|
* raw_locns[0]).
|
|
*
|
|
* vg_revert sets the size of the im-memory mdac->rlocn to 0 and calls
|
|
* this function to clear the precommit slot.
|
|
*/
|
|
|
|
rlocn_new = &mdac->rlocn;
|
|
|
|
if (!rlocn_new->size) {
|
|
/*
|
|
* When there is no new metadata, the precommit slot is
|
|
* cleared and the committed slot is left alone. (see revert)
|
|
*/
|
|
rlocn_slot1->offset = 0;
|
|
rlocn_slot1->size = 0;
|
|
rlocn_slot1->checksum = 0;
|
|
|
|
} else if (precommit) {
|
|
/*
|
|
* vg_precommit writes the new raw_locn into slot 1,
|
|
* and keeps the existing committed raw_locn in slot 0.
|
|
*/
|
|
rlocn_slot1->offset = rlocn_new->offset;
|
|
rlocn_slot1->size = rlocn_new->size;
|
|
rlocn_slot1->checksum = rlocn_new->checksum;
|
|
} else {
|
|
/*
|
|
* vg_commit writes the new raw_locn into slot 0,
|
|
* and zeros the precommitted values in slot 1.
|
|
*/
|
|
rlocn_slot0->offset = rlocn_new->offset;
|
|
rlocn_slot0->size = rlocn_new->size;
|
|
rlocn_slot0->checksum = rlocn_new->checksum;
|
|
|
|
rlocn_slot1->offset = 0;
|
|
rlocn_slot1->size = 0;
|
|
rlocn_slot1->checksum = 0;
|
|
}
|
|
|
|
rlocn_set_ignored(rlocn_slot0, mda_is_ignored(mda));
|
|
|
|
log_debug_metadata("VG %s metadata %scommit %sseq %u on %s mda header at %llu %s.",
|
|
vg->name,
|
|
(precommit) ? "pre" : "",
|
|
(!mdac->rlocn.size) ? "empty ": "",
|
|
vg->seqno, dev_name(mdac->area.dev),
|
|
(unsigned long long)mdac->area.start,
|
|
mda_is_ignored(mda) ? "(ignored)" : "(used)");
|
|
|
|
log_debug_metadata("VG %s metadata %scommit %sslot0 offset %llu size %llu slot1 offset %llu size %llu.",
|
|
vg->name,
|
|
(precommit) ? "pre" : "",
|
|
(!mdac->rlocn.size) ? "empty ": "",
|
|
(unsigned long long)mdab->raw_locns[0].offset,
|
|
(unsigned long long)mdab->raw_locns[0].size,
|
|
(unsigned long long)mdab->raw_locns[1].offset,
|
|
(unsigned long long)mdab->raw_locns[1].size);
|
|
|
|
rlocn_set_ignored(mdab->raw_locns, mda_is_ignored(mda));
|
|
|
|
if (!_raw_write_mda_header(fid->fmt, mdac->area.dev, mda_is_primary(mda), mdac->area.start,
|
|
mdab)) {
|
|
dm_pool_free(fid->fmt->cmd->mem, mdab);
|
|
log_error("Failed to write metadata area header");
|
|
goto out;
|
|
}
|
|
|
|
r = 1;
|
|
|
|
out:
|
|
if (!precommit && !fidtc->preserve)
|
|
free_text_fidtc(vg);
|
|
|
|
return r;
|
|
}
|
|
|
|
static int _vg_commit_raw(struct format_instance *fid, struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
return _vg_commit_raw_rlocn(fid, vg, mda, 0);
|
|
}
|
|
|
|
static int _vg_precommit_raw(struct format_instance *fid,
|
|
struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
return _vg_commit_raw_rlocn(fid, vg, mda, 1);
|
|
}
|
|
|
|
/* Close metadata area devices */
|
|
static int _vg_revert_raw(struct format_instance *fid, struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct pv_list *pvl;
|
|
int found = 0;
|
|
|
|
/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
|
|
dm_list_iterate_items(pvl, &vg->pvs) {
|
|
if (pvl->pv->dev == mdac->area.dev) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!found)
|
|
return 1;
|
|
|
|
/* Wipe pre-committed metadata */
|
|
mdac->rlocn.size = 0;
|
|
return _vg_commit_raw_rlocn(fid, vg, mda, 0);
|
|
}
|
|
|
|
/*
|
|
* vg_remove clears the two raw_locn slots but leaves the circular metadata
|
|
* buffer alone.
|
|
*/
|
|
|
|
static int _vg_remove_raw(struct format_instance *fid, struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) mda->metadata_locn;
|
|
struct mda_header *mdah;
|
|
struct raw_locn *rlocn_slot0;
|
|
struct raw_locn *rlocn_slot1;
|
|
uint32_t bad_fields = 0;
|
|
int r = 0;
|
|
|
|
if (!(mdah = dm_pool_alloc(fid->fmt->cmd->mem, MDA_HEADER_SIZE))) {
|
|
log_error("struct mda_header allocation failed");
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* FIXME: what's the point of reading the mda_header and metadata,
|
|
* since we zero the rlocn fields whether we can read them or not.
|
|
* Just to print the warning?
|
|
*/
|
|
|
|
if (!_raw_read_mda_header(mdah, &mdac->area, mda_is_primary(mda), 0, &bad_fields))
|
|
log_warn("WARNING: Removing metadata location on %s with bad mda header.",
|
|
dev_name(mdac->area.dev));
|
|
|
|
rlocn_slot0 = &mdah->raw_locns[0];
|
|
rlocn_slot1 = &mdah->raw_locns[1];
|
|
|
|
rlocn_slot0->offset = 0;
|
|
rlocn_slot0->size = 0;
|
|
rlocn_slot0->checksum = 0;
|
|
rlocn_set_ignored(rlocn_slot0, mda_is_ignored(mda));
|
|
|
|
rlocn_slot1->offset = 0;
|
|
rlocn_slot1->size = 0;
|
|
rlocn_slot1->checksum = 0;
|
|
|
|
if (!_raw_write_mda_header(fid->fmt, mdac->area.dev, mda_is_primary(mda), mdac->area.start,
|
|
mdah)) {
|
|
dm_pool_free(fid->fmt->cmd->mem, mdah);
|
|
log_error("Failed to write metadata area header");
|
|
goto out;
|
|
}
|
|
|
|
r = 1;
|
|
|
|
out:
|
|
return r;
|
|
}
|
|
|
|
static struct volume_group *_vg_read_file_name(struct format_instance *fid,
|
|
const char *vgname,
|
|
const char *read_path)
|
|
{
|
|
struct volume_group *vg;
|
|
time_t when;
|
|
char *desc;
|
|
|
|
if (!(vg = text_read_metadata_file(fid, read_path, &when, &desc))) {
|
|
log_error("Failed to read VG %s from %s", vgname, read_path);
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* Currently you can only have a single volume group per
|
|
* text file (this restriction may remain). We need to
|
|
* check that it contains the correct volume group.
|
|
*/
|
|
if (vgname && strcmp(vgname, vg->name)) {
|
|
fid->ref_count++; /* Preserve FID after vg release */
|
|
release_vg(vg);
|
|
log_error("'%s' does not contain volume group '%s'.",
|
|
read_path, vgname);
|
|
return NULL;
|
|
}
|
|
|
|
log_debug_metadata("Read volume group %s from %s", vg->name, read_path);
|
|
|
|
return vg;
|
|
}
|
|
|
|
static struct volume_group *_vg_read_file(struct cmd_context *cmd, struct format_instance *fid,
|
|
const char *vgname,
|
|
struct metadata_area *mda,
|
|
struct cached_vg_fmtdata **vg_fmtdata,
|
|
unsigned *use_previous_vg __attribute__((unused)))
|
|
{
|
|
struct text_context *tc = (struct text_context *) mda->metadata_locn;
|
|
|
|
return _vg_read_file_name(fid, vgname, tc->path_live);
|
|
}
|
|
|
|
static struct volume_group *_vg_read_precommit_file(struct cmd_context *cmd, struct format_instance *fid,
|
|
const char *vgname,
|
|
struct metadata_area *mda,
|
|
struct cached_vg_fmtdata **vg_fmtdata,
|
|
unsigned *use_previous_vg __attribute__((unused)))
|
|
{
|
|
struct text_context *tc = (struct text_context *) mda->metadata_locn;
|
|
struct volume_group *vg;
|
|
|
|
if ((vg = _vg_read_file_name(fid, vgname, tc->path_edit)))
|
|
vg->status |= PRECOMMITTED;
|
|
else
|
|
vg = _vg_read_file_name(fid, vgname, tc->path_live);
|
|
|
|
return vg;
|
|
}
|
|
|
|
static int _vg_write_file(struct format_instance *fid __attribute__((unused)),
|
|
struct volume_group *vg, struct metadata_area *mda)
|
|
{
|
|
struct text_context *tc = (struct text_context *) mda->metadata_locn;
|
|
|
|
FILE *fp;
|
|
int fd;
|
|
char *slash;
|
|
char temp_file[PATH_MAX], temp_dir[PATH_MAX];
|
|
|
|
slash = strrchr(tc->path_edit, '/');
|
|
|
|
if (slash == 0)
|
|
strcpy(temp_dir, ".");
|
|
else if (slash - tc->path_edit < PATH_MAX) {
|
|
dm_strncpy(temp_dir, tc->path_edit,
|
|
(size_t) (slash - tc->path_edit + 1));
|
|
} else {
|
|
log_error("Text format failed to determine directory.");
|
|
return 0;
|
|
}
|
|
|
|
if (!create_temp_name(temp_dir, temp_file, sizeof(temp_file), &fd,
|
|
&vg->cmd->rand_seed)) {
|
|
log_error("Couldn't create temporary text file name.");
|
|
return 0;
|
|
}
|
|
|
|
if (!(fp = fdopen(fd, "w"))) {
|
|
log_sys_error("fdopen", temp_file);
|
|
if (close(fd))
|
|
log_sys_error("fclose", temp_file);
|
|
return 0;
|
|
}
|
|
|
|
log_debug_metadata("Writing %s metadata to %s", vg->name, temp_file);
|
|
|
|
if (!text_vg_export_file(vg, tc->desc, fp)) {
|
|
log_error("Failed to write metadata to %s.", temp_file);
|
|
if (fclose(fp))
|
|
log_sys_error("fclose", temp_file);
|
|
return 0;
|
|
}
|
|
|
|
if (fsync(fd) && (errno != EROFS) && (errno != EINVAL)) {
|
|
log_sys_error("fsync", tc->path_edit);
|
|
if (fclose(fp))
|
|
log_sys_error("fclose", tc->path_edit);
|
|
return 0;
|
|
}
|
|
|
|
if (lvm_fclose(fp, tc->path_edit))
|
|
return_0;
|
|
|
|
log_debug_metadata("Renaming %s to %s", temp_file, tc->path_edit);
|
|
if (rename(temp_file, tc->path_edit)) {
|
|
log_error("%s: rename to %s failed: %s", temp_file,
|
|
tc->path_edit, strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _vg_commit_file_backup(struct format_instance *fid __attribute__((unused)),
|
|
struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
struct text_context *tc = (struct text_context *) mda->metadata_locn;
|
|
|
|
if (test_mode()) {
|
|
log_verbose("Test mode: Skipping committing %s metadata (%u)",
|
|
vg->name, vg->seqno);
|
|
if (unlink(tc->path_edit)) {
|
|
log_debug_metadata("Unlinking %s", tc->path_edit);
|
|
log_sys_error("unlink", tc->path_edit);
|
|
return 0;
|
|
}
|
|
} else {
|
|
log_debug_metadata("Committing file %s metadata (%u)", vg->name, vg->seqno);
|
|
log_debug_metadata("Renaming %s to %s", tc->path_edit, tc->path_live);
|
|
if (rename(tc->path_edit, tc->path_live)) {
|
|
log_error("%s: rename to %s failed: %s", tc->path_edit,
|
|
tc->path_live, strerror(errno));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
sync_dir(tc->path_edit);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _vg_commit_file(struct format_instance *fid, struct volume_group *vg,
|
|
struct metadata_area *mda)
|
|
{
|
|
struct text_context *tc = (struct text_context *) mda->metadata_locn;
|
|
const char *slash;
|
|
char new_name[PATH_MAX];
|
|
size_t len, vglen;
|
|
|
|
if (!_vg_commit_file_backup(fid, vg, mda))
|
|
return 0;
|
|
|
|
/* vgrename? */
|
|
if ((slash = strrchr(tc->path_live, '/')))
|
|
slash = slash + 1;
|
|
else
|
|
slash = tc->path_live;
|
|
|
|
if (strcmp(slash, vg->name)) {
|
|
vglen = strlen(vg->name) + 1;
|
|
len = slash - tc->path_live;
|
|
if ((len + vglen) > (sizeof(new_name) - 1)) {
|
|
log_error("Renaming path %s is too long for VG %s.",
|
|
tc->path_live, vg->name);
|
|
return 0;
|
|
}
|
|
strncpy(new_name, tc->path_live, len);
|
|
memcpy(new_name + len, vg->name, vglen);
|
|
log_debug_metadata("Renaming %s to %s", tc->path_live, new_name);
|
|
if (test_mode())
|
|
log_verbose("Test mode: Skipping rename");
|
|
else {
|
|
if (rename(tc->path_live, new_name)) {
|
|
log_error("%s: rename to %s failed: %s",
|
|
tc->path_live, new_name,
|
|
strerror(errno));
|
|
sync_dir(new_name);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _vg_remove_file(struct format_instance *fid __attribute__((unused)),
|
|
struct volume_group *vg __attribute__((unused)),
|
|
struct metadata_area *mda)
|
|
{
|
|
struct text_context *tc = (struct text_context *) mda->metadata_locn;
|
|
|
|
if (path_exists(tc->path_edit) && unlink(tc->path_edit)) {
|
|
log_sys_error("unlink", tc->path_edit);
|
|
return 0;
|
|
}
|
|
|
|
if (path_exists(tc->path_live) && unlink(tc->path_live)) {
|
|
log_sys_error("unlink", tc->path_live);
|
|
return 0;
|
|
}
|
|
|
|
sync_dir(tc->path_live);
|
|
|
|
return 1;
|
|
}
|
|
|
|
int read_metadata_location_summary(const struct format_type *fmt,
|
|
struct metadata_area *mda,
|
|
struct mda_header *mdah, int primary_mda, struct device_area *dev_area,
|
|
struct lvmcache_vgsummary *vgsummary, uint64_t *mda_free_sectors)
|
|
{
|
|
struct raw_locn *rlocn;
|
|
uint32_t wrap = 0;
|
|
uint64_t max_size;
|
|
|
|
if (!mdah) {
|
|
log_error(INTERNAL_ERROR "read_metadata_location_summary called with NULL pointer for mda_header");
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* For the case where the metadata area is unused, half is available.
|
|
*/
|
|
if (mda_free_sectors) {
|
|
max_size = ((mdah->size - MDA_HEADER_SIZE) / 2) - 512;
|
|
*mda_free_sectors = max_size >> SECTOR_SHIFT;
|
|
}
|
|
|
|
rlocn = mdah->raw_locns; /* slot0, committed metadata */
|
|
|
|
/*
|
|
* If no valid offset, do not try to search for vgname
|
|
*/
|
|
if (!rlocn->offset) {
|
|
log_debug_metadata("Metadata location on %s at %llu has offset 0.",
|
|
dev_name(dev_area->dev),
|
|
(unsigned long long)(dev_area->start + rlocn->offset));
|
|
vgsummary->zero_offset = 1;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* This function is used to read the vg summary during label scan.
|
|
* Save the text start location and checksum during scan. After the VG
|
|
* lock is acquired in vg_read, we can reread the mda_header, and
|
|
* compare rlocn->offset,checksum to what was saved during scan. If
|
|
* unchanged, it means that the metadata was not changed between scan
|
|
* and the read.
|
|
*/
|
|
mda->scan_text_offset = rlocn->offset;
|
|
mda->scan_text_checksum = rlocn->checksum;
|
|
|
|
/*
|
|
* When the current metadata wraps around the end of the metadata area
|
|
* (so some is located at the end and some is located at the
|
|
* beginning), then "wrap" is the number of bytes that was written back
|
|
* at the beginning. The end of this wrapped metadata is located at an
|
|
* offset of wrap+MDA_HEADER_SIZE from area.start.
|
|
*/
|
|
if (rlocn->offset + rlocn->size > mdah->size)
|
|
wrap = (uint32_t) ((rlocn->offset + rlocn->size) - mdah->size);
|
|
|
|
/*
|
|
* Did we see this metadata before?
|
|
* Look in lvmcache to see if there is vg info matching
|
|
* the checksum/size that we see in the mda_header (rlocn)
|
|
* on this device. If so, then vgsummary->name is is set
|
|
* and controls if the "checksum_only" flag passed to
|
|
* text_read_metadata_summary() is 1 or 0.
|
|
*
|
|
* If checksum_only = 1, then text_read_metadata_summary()
|
|
* will read the metadata from this device, and run the
|
|
* checksum function on it. If the calculated checksum
|
|
* of the metadata matches the checksum in the mda_header,
|
|
* which also matches the checksum saved in vginfo from
|
|
* another device, then it skips parsing the metadata into
|
|
* a config tree, which saves considerable cpu time.
|
|
*
|
|
* (NB. there can be different VGs with different metadata
|
|
* and checksums, but with the same name.)
|
|
*
|
|
* FIXME: handle the case where mda_header checksum is bad
|
|
* but metadata checksum is good.
|
|
*/
|
|
|
|
/*
|
|
* If the checksum we compute of the metadata differs from
|
|
* the checksum from mda_header that we save here, then we
|
|
* ignore the device. FIXME: we need to classify a device
|
|
* with errors like this as defective.
|
|
*
|
|
* If the checksum from mda_header and computed from metadata
|
|
* does not match the checksum saved in lvmcache from a prev
|
|
* device, then we do not skip parsing/saving metadata from
|
|
* this dev. It's parsed, fields saved in vgsummary, which
|
|
* is passed into lvmcache (update_vgname_and_id), and
|
|
* there we'll see a checksum mismatch.
|
|
*/
|
|
vgsummary->mda_checksum = rlocn->checksum;
|
|
vgsummary->mda_size = rlocn->size;
|
|
|
|
/* Keep track of largest metadata size we find. */
|
|
lvmcache_save_metadata_size(rlocn->size);
|
|
|
|
if (lvmcache_lookup_mda(vgsummary)) {
|
|
log_debug("Skipping read of already known VG metadata with matching mda checksum on %s.",
|
|
dev_name(dev_area->dev));
|
|
goto out;
|
|
}
|
|
|
|
if (!text_read_metadata_summary(fmt, dev_area->dev, MDA_CONTENT_REASON(primary_mda),
|
|
(off_t) (dev_area->start + rlocn->offset),
|
|
(uint32_t) (rlocn->size - wrap),
|
|
(off_t) (dev_area->start + MDA_HEADER_SIZE),
|
|
wrap, calc_crc, vgsummary->vgname ? 1 : 0,
|
|
vgsummary)) {
|
|
log_warn("WARNING: metadata on %s at %llu has invalid summary for VG.",
|
|
dev_name(dev_area->dev),
|
|
(unsigned long long)(dev_area->start + rlocn->offset));
|
|
return 0;
|
|
}
|
|
|
|
/* Ignore this entry if the characters aren't permissible */
|
|
if (!validate_name(vgsummary->vgname)) {
|
|
log_warn("WARNING: metadata on %s at %llu has invalid VG name.",
|
|
dev_name(dev_area->dev),
|
|
(unsigned long long)(dev_area->start + rlocn->offset));
|
|
return 0;
|
|
}
|
|
out:
|
|
log_debug_metadata("Found metadata summary on %s at %llu size %llu for VG %s",
|
|
dev_name(dev_area->dev),
|
|
(unsigned long long)(dev_area->start + rlocn->offset),
|
|
(unsigned long long)rlocn->size,
|
|
vgsummary->vgname);
|
|
|
|
if (mda_free_sectors) {
|
|
/*
|
|
* Report remaining space given that a single copy of metadata
|
|
* can be as large as half the total metadata space, minus 512
|
|
* because each copy is rounded to begin on a sector boundary.
|
|
*/
|
|
max_size = ((mdah->size - MDA_HEADER_SIZE) / 2) - 512;
|
|
|
|
if (rlocn->size >= max_size)
|
|
*mda_free_sectors = UINT64_C(0);
|
|
else
|
|
*mda_free_sectors = (max_size - rlocn->size) >> SECTOR_SHIFT;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
struct _write_single_mda_baton {
|
|
const struct format_type *fmt;
|
|
struct physical_volume *pv;
|
|
};
|
|
|
|
static int _write_single_mda(struct metadata_area *mda, void *baton)
|
|
{
|
|
struct _write_single_mda_baton *p = baton;
|
|
struct mda_context *mdac;
|
|
|
|
char buf[MDA_HEADER_SIZE] __attribute__((aligned(8))) = { 0 };
|
|
struct mda_header *mdah = (struct mda_header *) buf;
|
|
|
|
mdac = mda->metadata_locn;
|
|
mdah->size = mdac->area.size;
|
|
rlocn_set_ignored(mdah->raw_locns, mda_is_ignored(mda));
|
|
|
|
if (!_raw_write_mda_header(p->fmt, mdac->area.dev, mda_is_primary(mda),
|
|
mdac->area.start, mdah)) {
|
|
return_0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
static int _set_ext_flags(struct physical_volume *pv, struct lvmcache_info *info)
|
|
{
|
|
uint32_t ext_flags = lvmcache_ext_flags(info);
|
|
|
|
if (is_orphan(pv))
|
|
ext_flags &= ~PV_EXT_USED;
|
|
else
|
|
ext_flags |= PV_EXT_USED;
|
|
|
|
lvmcache_set_ext_version(info, PV_HEADER_EXTENSION_VSN);
|
|
lvmcache_set_ext_flags(info, ext_flags);
|
|
|
|
return 1;
|
|
}
|
|
|
|
/* Only for orphans - FIXME That's not true any more */
|
|
static int _text_pv_write(struct cmd_context *cmd, const struct format_type *fmt, struct physical_volume *pv)
|
|
{
|
|
char pvid[ID_LEN + 1] __attribute__((aligned(8))) = { 0 };
|
|
char vgid[ID_LEN + 1] __attribute__((aligned(8))) = { 0 };
|
|
struct format_instance *fid = pv->fid;
|
|
struct label *label;
|
|
struct lvmcache_info *info;
|
|
struct mda_context *mdac;
|
|
struct metadata_area *mda;
|
|
struct _write_single_mda_baton baton;
|
|
unsigned mda_index;
|
|
|
|
if (is_orphan_vg(pv->vg_name))
|
|
memcpy(vgid, pv->vg_name, ID_LEN);
|
|
else if (pv->vg)
|
|
memcpy(vgid, &pv->vg->id.uuid, ID_LEN);
|
|
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
/* Add a new cache entry with PV info or update existing one. */
|
|
if (!(info = lvmcache_add(cmd, fmt->labeller, pvid,
|
|
pv->dev, pv->label_sector, pv->vg_name,
|
|
vgid[0] ? vgid : NULL,
|
|
0, NULL)))
|
|
return_0;
|
|
|
|
/* lvmcache_add() creates info and info->label structs for the dev, get info->label. */
|
|
label = lvmcache_get_label(info);
|
|
|
|
lvmcache_update_pv(info, pv, fmt);
|
|
|
|
/* Flush all cached metadata areas, we will reenter new/modified ones. */
|
|
lvmcache_del_mdas(info);
|
|
|
|
/*
|
|
* Add all new or modified metadata areas for this PV stored in
|
|
* its format instance. If this PV is not part of a VG yet,
|
|
* pv->fid will be used. Otherwise pv->vg->fid will be used.
|
|
* The fid_get_mda_indexed fn can handle that transparently,
|
|
* just pass the right format_instance in.
|
|
*/
|
|
|
|
/* FIXME: why is old needed here? */
|
|
if (*pv->old_id.uuid)
|
|
memcpy(pvid, &pv->old_id.uuid, ID_LEN);
|
|
else
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
for (mda_index = 0; mda_index < FMT_TEXT_MAX_MDAS_PER_PV; mda_index++) {
|
|
if (!(mda = fid_get_mda_indexed(fid, pvid, ID_LEN, mda_index)))
|
|
continue;
|
|
|
|
mdac = (struct mda_context *) mda->metadata_locn;
|
|
log_debug_metadata("Creating metadata area on %s at sector "
|
|
FMTu64 " size " FMTu64 " sectors",
|
|
dev_name(mdac->area.dev),
|
|
mdac->area.start >> SECTOR_SHIFT,
|
|
mdac->area.size >> SECTOR_SHIFT);
|
|
|
|
// if fmt is not the same as info->fmt we are in trouble
|
|
if (!lvmcache_add_mda(info, mdac->area.dev,
|
|
mdac->area.start, mdac->area.size,
|
|
mda_is_ignored(mda), NULL))
|
|
return_0;
|
|
}
|
|
|
|
if (!lvmcache_update_bas(info, pv))
|
|
return_0;
|
|
|
|
/*
|
|
* FIXME: Allow writing zero offset/size data area to disk.
|
|
* This requires defining a special value since we can't
|
|
* write offset/size that is 0/0 - this is already reserved
|
|
* as a delimiter in data/metadata area area list in PV header
|
|
* (needs exploring compatibility with older lvm2).
|
|
*/
|
|
|
|
/*
|
|
* We can't actually write pe_start = 0 (a data area offset)
|
|
* in PV header now. We need to replace this value here. This can
|
|
* happen with vgcfgrestore with redefined pe_start or
|
|
* pvcreate --restorefile. However, we can can have this value in
|
|
* metadata which will override the value in the PV header.
|
|
*/
|
|
|
|
if (!lvmcache_update_das(info, pv))
|
|
return_0;
|
|
|
|
baton.pv = pv;
|
|
baton.fmt = fmt;
|
|
|
|
if (!lvmcache_foreach_mda(info, _write_single_mda, &baton))
|
|
return_0;
|
|
|
|
if (!_set_ext_flags(pv, info))
|
|
return_0;
|
|
|
|
if (!label_write(pv->dev, label)) {
|
|
stack;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* FIXME: We should probably use the format instance's metadata
|
|
* areas for label_write and only if it's successful,
|
|
* update the cache afterwards?
|
|
*/
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _text_pv_needs_rewrite(const struct format_type *fmt, struct physical_volume *pv,
|
|
int *needs_rewrite)
|
|
{
|
|
struct lvmcache_info *info;
|
|
uint32_t ext_vsn;
|
|
uint32_t ext_flags;
|
|
|
|
*needs_rewrite = 0;
|
|
|
|
if (!pv->is_labelled)
|
|
return 1;
|
|
|
|
if (!pv->dev)
|
|
return 1;
|
|
|
|
if (!(info = lvmcache_info_from_pv_id(&pv->id, pv->dev, 0))) {
|
|
log_error("Failed to find cached info for PV %s.", pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
ext_vsn = lvmcache_ext_version(info);
|
|
|
|
if (ext_vsn < PV_HEADER_EXTENSION_VSN) {
|
|
log_debug("PV %s header needs rewrite for new ext version", dev_name(pv->dev));
|
|
*needs_rewrite = 1;
|
|
}
|
|
|
|
ext_flags = lvmcache_ext_flags(info);
|
|
if (!(ext_flags & PV_EXT_USED)) {
|
|
log_debug("PV %s header needs rewrite to set ext used", dev_name(pv->dev));
|
|
*needs_rewrite = 1;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Copy constructor for a metadata_locn.
|
|
*/
|
|
static void *_metadata_locn_copy_raw(struct dm_pool *mem, void *metadata_locn)
|
|
{
|
|
struct mda_context *mdac, *mdac_new;
|
|
|
|
mdac = (struct mda_context *) metadata_locn;
|
|
if (!(mdac_new = dm_pool_alloc(mem, sizeof(*mdac_new)))) {
|
|
log_error("mda_context allocation failed");
|
|
return NULL;
|
|
}
|
|
memcpy(mdac_new, mdac, sizeof(*mdac));
|
|
|
|
return mdac_new;
|
|
}
|
|
|
|
/*
|
|
* Return a string description of the metadata location.
|
|
*/
|
|
static const char *_metadata_locn_name_raw(void *metadata_locn)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) metadata_locn;
|
|
|
|
return dev_name(mdac->area.dev);
|
|
}
|
|
|
|
static uint64_t _metadata_locn_offset_raw(void *metadata_locn)
|
|
{
|
|
struct mda_context *mdac = (struct mda_context *) metadata_locn;
|
|
|
|
return mdac->area.start;
|
|
}
|
|
|
|
static int _text_pv_initialise(const struct format_type *fmt,
|
|
struct pv_create_args *pva,
|
|
struct physical_volume *pv)
|
|
{
|
|
uint64_t data_alignment_sectors = pva->data_alignment;
|
|
uint64_t data_alignment_offset_sectors = pva->data_alignment_offset;
|
|
uint64_t adjustment;
|
|
uint64_t final_alignment_sectors = 0;
|
|
|
|
log_debug("PV init requested data_alignment_sectors %llu data_alignment_offset_sectors %llu",
|
|
(unsigned long long)data_alignment_sectors, (unsigned long long)data_alignment_offset_sectors);
|
|
|
|
if (!data_alignment_sectors) {
|
|
data_alignment_sectors = find_config_tree_int(pv->fmt->cmd, devices_data_alignment_CFG, NULL) * 2;
|
|
if (data_alignment_sectors)
|
|
log_debug("PV init config data_alignment_sectors %llu",
|
|
(unsigned long long)data_alignment_sectors);
|
|
}
|
|
|
|
/* sets pv->pe_align */
|
|
set_pe_align(pv, data_alignment_sectors);
|
|
|
|
/* sets pv->pe_align_offset */
|
|
set_pe_align_offset(pv, data_alignment_offset_sectors);
|
|
|
|
if (pv->pe_align < pv->pe_align_offset) {
|
|
log_error("%s: pe_align (%llu sectors) must not be less than pe_align_offset (%llu sectors)",
|
|
pv_dev_name(pv), (unsigned long long)pv->pe_align, (unsigned long long)pv->pe_align_offset);
|
|
return 0;
|
|
}
|
|
|
|
final_alignment_sectors = pv->pe_align + pv->pe_align_offset;
|
|
|
|
log_debug("PV init final alignment %llu sectors from align %llu align_offset %llu",
|
|
(unsigned long long)final_alignment_sectors,
|
|
(unsigned long long)pv->pe_align,
|
|
(unsigned long long)pv->pe_align_offset);
|
|
|
|
if (pv->size < final_alignment_sectors) {
|
|
log_error("%s: Data alignment must not exceed device size.",
|
|
pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
if (pv->size < final_alignment_sectors + pva->ba_size) {
|
|
log_error("%s: Bootloader area with data-aligned start must "
|
|
"not exceed device size.", pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
if (pva->pe_start == PV_PE_START_CALC) {
|
|
/*
|
|
* Calculate new PE start and bootloader area start value.
|
|
* Make sure both are properly aligned!
|
|
* If PE start can't be aligned because BA is taking
|
|
* the whole space, make PE start equal to the PV size
|
|
* which effectively disables DA - it will have zero size.
|
|
* This needs to be done as we can't have a PV without any DA.
|
|
* But we still want to support a PV with BA only!
|
|
*/
|
|
if (pva->ba_size) {
|
|
pv->ba_start = final_alignment_sectors;
|
|
pv->ba_size = pva->ba_size;
|
|
if ((adjustment = pva->ba_size % pv->pe_align))
|
|
pv->ba_size += pv->pe_align - adjustment;
|
|
if (pv->size < pv->ba_start + pv->ba_size)
|
|
pv->ba_size = pv->size - pv->ba_start;
|
|
pv->pe_start = pv->ba_start + pv->ba_size;
|
|
log_debug("Setting pe start to %llu sectors after ba start %llu size %llu for %s",
|
|
(unsigned long long)pv->pe_start,
|
|
(unsigned long long)pv->ba_start,
|
|
(unsigned long long)pv->ba_size,
|
|
pv_dev_name(pv));
|
|
} else {
|
|
pv->pe_start = final_alignment_sectors;
|
|
log_debug("Setting PE start to %llu sectors for %s",
|
|
(unsigned long long)pv->pe_start, pv_dev_name(pv));
|
|
}
|
|
} else {
|
|
/*
|
|
* Try to keep the value of PE start set to a firm value if
|
|
* requested. This is useful when restoring existing PE start
|
|
* value (e.g. backups). Also, if creating a BA, try to place
|
|
* it in between the final alignment and existing PE start
|
|
* if possible.
|
|
*/
|
|
pv->pe_start = pva->pe_start;
|
|
|
|
log_debug("Setting pe start to requested %llu sectors for %s",
|
|
(unsigned long long)pv->pe_start, pv_dev_name(pv));
|
|
|
|
if (pva->ba_size) {
|
|
if ((pva->ba_start && pva->ba_start + pva->ba_size > pva->pe_start) ||
|
|
(pva->pe_start <= final_alignment_sectors) ||
|
|
(pva->pe_start - final_alignment_sectors < pva->ba_size)) {
|
|
log_error("%s: Bootloader area would overlap data area.", pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
pv->ba_start = pva->ba_start ? : final_alignment_sectors;
|
|
pv->ba_size = pva->ba_size;
|
|
}
|
|
}
|
|
|
|
if (pva->extent_size)
|
|
pv->pe_size = pva->extent_size;
|
|
|
|
if (pva->extent_count)
|
|
pv->pe_count = pva->extent_count;
|
|
|
|
if ((pv->pe_start + pv->pe_count * (uint64_t)pv->pe_size - 1) > pv->size) {
|
|
log_error("Physical extents (%s) end beyond end of device (%s) %s.",
|
|
display_size(pv->fmt->cmd, pv->pe_start + pv->pe_count * (uint64_t)pv->pe_size - 1),
|
|
display_size(pv->fmt->cmd, pv->size),
|
|
pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
if (pva->label_sector != UINT64_C(-1))
|
|
pv->label_sector = pva->label_sector;
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void _text_destroy_instance(struct format_instance *fid)
|
|
{
|
|
if (--fid->ref_count <= 1) {
|
|
if (fid->metadata_areas_index)
|
|
dm_hash_destroy(fid->metadata_areas_index);
|
|
dm_pool_destroy(fid->mem);
|
|
}
|
|
}
|
|
|
|
static void _text_destroy(struct format_type *fmt)
|
|
{
|
|
if (fmt->orphan_vg)
|
|
free_orphan_vg(fmt->orphan_vg);
|
|
|
|
free(fmt->private);
|
|
free(fmt);
|
|
}
|
|
|
|
static const struct metadata_area_ops _metadata_text_file_ops = {
|
|
.vg_read = _vg_read_file,
|
|
.vg_read_precommit = _vg_read_precommit_file,
|
|
.vg_write = _vg_write_file,
|
|
.vg_remove = _vg_remove_file,
|
|
.vg_commit = _vg_commit_file
|
|
};
|
|
|
|
static const struct metadata_area_ops _metadata_text_file_backup_ops = {
|
|
.vg_read = _vg_read_file,
|
|
.vg_write = _vg_write_file,
|
|
.vg_remove = _vg_remove_file,
|
|
.vg_commit = _vg_commit_file_backup
|
|
};
|
|
|
|
static const struct metadata_area_ops _metadata_text_raw_ops = {
|
|
.vg_read = _vg_read_raw,
|
|
.vg_read_precommit = _vg_read_precommit_raw,
|
|
.vg_write = _vg_write_raw,
|
|
.vg_remove = _vg_remove_raw,
|
|
.vg_precommit = _vg_precommit_raw,
|
|
.vg_commit = _vg_commit_raw,
|
|
.vg_revert = _vg_revert_raw,
|
|
.mda_metadata_locn_copy = _metadata_locn_copy_raw,
|
|
.mda_metadata_locn_name = _metadata_locn_name_raw,
|
|
.mda_metadata_locn_offset = _metadata_locn_offset_raw,
|
|
.mda_free_sectors = _mda_free_sectors_raw,
|
|
.mda_total_sectors = _mda_total_sectors_raw,
|
|
.mda_in_vg = _mda_in_vg_raw,
|
|
.mda_locns_match = _mda_locns_match_raw,
|
|
.mda_get_device = _mda_get_device_raw,
|
|
};
|
|
|
|
static int _text_pv_setup(const struct format_type *fmt,
|
|
struct physical_volume *pv,
|
|
struct volume_group *vg)
|
|
{
|
|
char pvid[ID_LEN + 1] __attribute__((aligned(8)));
|
|
struct format_instance *fid = pv->fid;
|
|
struct lvmcache_info *info;
|
|
unsigned mda_index;
|
|
struct metadata_area *pv_mda, *pv_mda_copy;
|
|
struct mda_context *pv_mdac;
|
|
uint64_t pe_count;
|
|
uint64_t size_reduction = 0;
|
|
|
|
pvid[ID_LEN] = 0;
|
|
if (*pv->old_id.uuid)
|
|
memcpy(pvid, &pv->old_id.uuid, ID_LEN);
|
|
else
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
/* If PV has its own format instance, add mdas from pv->fid to vg->fid. */
|
|
if (pv->fid != vg->fid) {
|
|
for (mda_index = 0; mda_index < FMT_TEXT_MAX_MDAS_PER_PV; mda_index++) {
|
|
if (!(pv_mda = fid_get_mda_indexed(fid, pvid, ID_LEN, mda_index)))
|
|
continue;
|
|
|
|
/* Be sure it's not already in VG's format instance! */
|
|
if (!fid_get_mda_indexed(vg->fid, pvid, ID_LEN, mda_index)) {
|
|
if (!(pv_mda_copy = mda_copy(vg->fid->mem, pv_mda)))
|
|
return_0;
|
|
fid_add_mda(vg->fid, pv_mda_copy, pvid, ID_LEN, mda_index);
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
* Otherwise, if the PV is already a part of the VG (pv->fid == vg->fid),
|
|
* reread PV mda information from the cache and add it to vg->fid.
|
|
*/
|
|
else {
|
|
if (!pv->dev ||
|
|
!(info = lvmcache_info_from_pvid(pv->dev->pvid, pv->dev, 0))) {
|
|
log_error("PV %s missing from cache", pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
if (!lvmcache_check_format(info, fmt))
|
|
return_0;
|
|
|
|
if (!lvmcache_fid_add_mdas_pv(info, fid))
|
|
return_0;
|
|
}
|
|
|
|
/* If there's the 2nd mda, we need to reduce
|
|
* usable size for further pe_count calculation! */
|
|
if ((pv_mda = fid_get_mda_indexed(fid, pvid, ID_LEN, 1)) &&
|
|
(pv_mdac = pv_mda->metadata_locn))
|
|
size_reduction = pv_mdac->area.size >> SECTOR_SHIFT;
|
|
|
|
/* From now on, VG format instance will be used. */
|
|
pv_set_fid(pv, vg->fid);
|
|
|
|
/* FIXME Cope with genuine pe_count 0 */
|
|
|
|
/* If missing, estimate pv->size from file-based metadata */
|
|
if (!pv->size && pv->pe_count)
|
|
pv->size = pv->pe_count * (uint64_t) vg->extent_size +
|
|
pv->pe_start + size_reduction;
|
|
|
|
/* Recalculate number of extents that will fit */
|
|
if (!pv->pe_count && vg->extent_size) {
|
|
pe_count = (pv->size - pv->pe_start - size_reduction) /
|
|
vg->extent_size;
|
|
if (pe_count > UINT32_MAX) {
|
|
log_error("PV %s too large for extent size %s.",
|
|
pv_dev_name(pv),
|
|
display_size(vg->cmd, (uint64_t) vg->extent_size));
|
|
return 0;
|
|
}
|
|
pv->pe_count = (uint32_t) pe_count;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static void *_create_text_context(struct dm_pool *mem, struct text_context *tc)
|
|
{
|
|
struct text_context *new_tc;
|
|
const char *path;
|
|
char *tmp;
|
|
|
|
if (!tc)
|
|
return NULL;
|
|
|
|
path = tc->path_live;
|
|
|
|
if ((tmp = strstr(path, ".tmp")) && (tmp == path + strlen(path) - 4)) {
|
|
log_error("%s: Volume group filename may not end in .tmp",
|
|
path);
|
|
return NULL;
|
|
}
|
|
|
|
if (!(new_tc = dm_pool_alloc(mem, sizeof(*new_tc))))
|
|
return_NULL;
|
|
|
|
if (!(new_tc->path_live = dm_pool_strdup(mem, path)))
|
|
goto_bad;
|
|
|
|
/* If path_edit not defined, create one from path_live with .tmp suffix. */
|
|
if (!tc->path_edit) {
|
|
if (!(tmp = dm_pool_alloc(mem, strlen(path) + 5)))
|
|
goto_bad;
|
|
sprintf(tmp, "%s.tmp", path);
|
|
new_tc->path_edit = tmp;
|
|
}
|
|
else if (!(new_tc->path_edit = dm_pool_strdup(mem, tc->path_edit)))
|
|
goto_bad;
|
|
|
|
if (!(new_tc->desc = tc->desc ? dm_pool_strdup(mem, tc->desc)
|
|
: dm_pool_strdup(mem, "")))
|
|
goto_bad;
|
|
|
|
return (void *) new_tc;
|
|
|
|
bad:
|
|
dm_pool_free(mem, new_tc);
|
|
|
|
log_error("Couldn't allocate text format context object.");
|
|
return NULL;
|
|
}
|
|
|
|
static int _create_vg_text_instance(struct format_instance *fid,
|
|
const struct format_instance_ctx *fic)
|
|
{
|
|
uint32_t type = fic->type;
|
|
struct text_fid_context *fidtc;
|
|
struct metadata_area *mda;
|
|
struct lvmcache_vginfo *vginfo;
|
|
const char *vg_name, *vg_id;
|
|
|
|
if (!(fidtc = (struct text_fid_context *)
|
|
dm_pool_zalloc(fid->mem, sizeof(*fidtc)))) {
|
|
log_error("Couldn't allocate text_fid_context.");
|
|
return 0;
|
|
}
|
|
|
|
fid->private = (void *) fidtc;
|
|
|
|
if (type & FMT_INSTANCE_PRIVATE_MDAS) {
|
|
if (!(mda = dm_pool_zalloc(fid->mem, sizeof(*mda))))
|
|
return_0;
|
|
mda->ops = &_metadata_text_file_backup_ops;
|
|
mda->metadata_locn = _create_text_context(fid->mem, fic->context.private);
|
|
mda->status = 0;
|
|
fid->metadata_areas_index = NULL;
|
|
fid_add_mda(fid, mda, NULL, 0, 0);
|
|
} else {
|
|
vg_name = fic->context.vg_ref.vg_name;
|
|
vg_id = fic->context.vg_ref.vg_id;
|
|
|
|
if (!(fid->metadata_areas_index = dm_hash_create(116))) {
|
|
log_error("Couldn't create metadata index for format "
|
|
"instance of VG %s.", vg_name);
|
|
return 0;
|
|
}
|
|
|
|
if (type & FMT_INSTANCE_MDAS) {
|
|
if (!(vginfo = lvmcache_vginfo_from_vgname(vg_name, vg_id))) {
|
|
log_debug("No cached vginfo for VG %s and ID %s.", vg_name, vg_id);
|
|
goto out;
|
|
}
|
|
if (!lvmcache_fid_add_mdas_vg(vginfo, fid))
|
|
goto_out;
|
|
}
|
|
}
|
|
|
|
out:
|
|
return 1;
|
|
}
|
|
|
|
static int _add_metadata_area_to_pv(struct physical_volume *pv,
|
|
unsigned mda_index,
|
|
uint64_t mda_start,
|
|
uint64_t mda_size,
|
|
unsigned mda_ignored)
|
|
{
|
|
char pvid[ID_LEN + 1] __attribute__((aligned(8)));
|
|
struct metadata_area *mda;
|
|
struct mda_context *mdac;
|
|
struct mda_lists *mda_lists = (struct mda_lists *) pv->fmt->private;
|
|
|
|
if (mda_index >= FMT_TEXT_MAX_MDAS_PER_PV) {
|
|
log_error(INTERNAL_ERROR "can't add metadata area with "
|
|
"index %u to PV %s. Metadata "
|
|
"layout not supported by %s format.",
|
|
mda_index, dev_name(pv->dev),
|
|
pv->fmt->name);
|
|
}
|
|
|
|
if (!(mda = dm_pool_zalloc(pv->fid->mem, sizeof(struct metadata_area)))) {
|
|
log_error("struct metadata_area allocation failed");
|
|
return 0;
|
|
}
|
|
|
|
if (!(mdac = dm_pool_zalloc(pv->fid->mem, sizeof(struct mda_context)))) {
|
|
log_error("struct mda_context allocation failed");
|
|
free(mda);
|
|
return 0;
|
|
}
|
|
|
|
mda->ops = mda_lists->raw_ops;
|
|
mda->metadata_locn = mdac;
|
|
mda->status = 0;
|
|
|
|
mdac->area.dev = pv->dev;
|
|
mdac->area.start = mda_start;
|
|
mdac->area.size = mda_size;
|
|
mdac->free_sectors = UINT64_C(0);
|
|
memset(&mdac->rlocn, 0, sizeof(mdac->rlocn));
|
|
mda_set_ignored(mda, mda_ignored);
|
|
|
|
pvid[ID_LEN] = 0;
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
fid_add_mda(pv->fid, mda, pvid, ID_LEN, mda_index);
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int _text_pv_remove_metadata_area(const struct format_type *fmt,
|
|
struct physical_volume *pv,
|
|
unsigned mda_index);
|
|
|
|
static int _text_pv_add_metadata_area(const struct format_type *fmt,
|
|
struct physical_volume *pv,
|
|
int pe_start_locked,
|
|
unsigned mda_index,
|
|
uint64_t mda_size,
|
|
unsigned mda_ignored)
|
|
{
|
|
char pvid[ID_LEN + 1] __attribute__((aligned(8))) = { 0 };
|
|
struct format_instance *fid = pv->fid;
|
|
uint64_t ba_size, pe_start, first_unallocated;
|
|
uint64_t alignment, alignment_offset;
|
|
uint64_t disk_size;
|
|
uint64_t mda_start;
|
|
uint64_t adjustment, limit, tmp_mda_size;
|
|
uint64_t wipe_size = 8 << SECTOR_SHIFT;
|
|
uint64_t zero_len;
|
|
size_t page_size = lvm_getpagesize();
|
|
struct metadata_area *mda;
|
|
struct mda_context *mdac;
|
|
const char *limit_name;
|
|
int limit_applied = 0;
|
|
|
|
if (*pv->old_id.uuid)
|
|
memcpy(pvid, &pv->old_id.uuid, ID_LEN);
|
|
else
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
if (mda_index >= FMT_TEXT_MAX_MDAS_PER_PV) {
|
|
log_error(INTERNAL_ERROR "invalid index of value %u used "
|
|
"while trying to add metadata area on PV %s. "
|
|
"Metadata layout not supported by %s format.",
|
|
mda_index, pv_dev_name(pv), fmt->name);
|
|
return 0;
|
|
}
|
|
|
|
pe_start = pv->pe_start << SECTOR_SHIFT;
|
|
ba_size = pv->ba_size << SECTOR_SHIFT;
|
|
alignment = pv->pe_align << SECTOR_SHIFT;
|
|
alignment_offset = pv->pe_align_offset << SECTOR_SHIFT;
|
|
disk_size = pv->size << SECTOR_SHIFT;
|
|
mda_size = mda_size << SECTOR_SHIFT;
|
|
|
|
if (fid_get_mda_indexed(fid, pvid, ID_LEN, mda_index)) {
|
|
if (!_text_pv_remove_metadata_area(fmt, pv, mda_index)) {
|
|
log_error(INTERNAL_ERROR "Metadata area with index %u already "
|
|
"exists on PV %s and removal failed.",
|
|
mda_index, pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* First metadata area at the start of the device. */
|
|
if (mda_index == 0) {
|
|
/*
|
|
* Try to fit MDA0 end within given pe_start limit if its value
|
|
* is locked. If it's not locked, count with any existing MDA1.
|
|
* If there's no MDA1, just use disk size as the limit.
|
|
*/
|
|
if (pe_start_locked) {
|
|
limit = pe_start;
|
|
limit_name = "pe_start";
|
|
}
|
|
else if ((mda = fid_get_mda_indexed(fid, pvid, ID_LEN, 1)) &&
|
|
(mdac = mda->metadata_locn)) {
|
|
limit = mdac->area.start;
|
|
limit_name = "MDA1 start";
|
|
}
|
|
else {
|
|
limit = disk_size;
|
|
limit_name = "disk size";
|
|
}
|
|
|
|
/* Adjust limits for bootloader area if present. */
|
|
if (ba_size) {
|
|
limit -= ba_size;
|
|
limit_name = "ba_start";
|
|
}
|
|
|
|
if (limit > disk_size)
|
|
goto bad;
|
|
|
|
mda_start = LABEL_SCAN_SIZE;
|
|
|
|
/* Align MDA0 start with page size if possible. */
|
|
if (limit - mda_start >= MDA_SIZE_MIN) {
|
|
if ((adjustment = mda_start % page_size))
|
|
mda_start += (page_size - adjustment);
|
|
}
|
|
|
|
/* Align MDA0 end position with given alignment if possible. */
|
|
if (alignment &&
|
|
(adjustment = (mda_start + mda_size) % alignment)) {
|
|
tmp_mda_size = mda_size + alignment - adjustment;
|
|
if (mda_start + tmp_mda_size <= limit)
|
|
mda_size = tmp_mda_size;
|
|
}
|
|
|
|
/* Align MDA0 end position with given alignment offset if possible. */
|
|
if (alignment && alignment_offset &&
|
|
(((mda_start + mda_size) % alignment) == 0)) {
|
|
tmp_mda_size = mda_size + alignment_offset;
|
|
if (mda_start + tmp_mda_size <= limit)
|
|
mda_size = tmp_mda_size;
|
|
}
|
|
|
|
if (mda_start + mda_size > limit) {
|
|
/*
|
|
* Try to decrease the MDA0 size with twice the
|
|
* alignment and then align with given alignment.
|
|
* If pe_start is locked, skip this type of
|
|
* alignment since it would be useless.
|
|
* Check first whether we can apply that!
|
|
*/
|
|
if (!pe_start_locked && alignment &&
|
|
((limit - mda_start) > alignment * 2)) {
|
|
mda_size = limit - mda_start - alignment * 2;
|
|
|
|
if ((adjustment = (mda_start + mda_size) % alignment))
|
|
mda_size += (alignment - adjustment);
|
|
|
|
/* Still too much? Then there's nothing else to do. */
|
|
if (mda_start + mda_size > limit)
|
|
goto bad;
|
|
}
|
|
/* Otherwise, give up and take any usable space. */
|
|
else
|
|
mda_size = limit - mda_start;
|
|
|
|
limit_applied = 1;
|
|
}
|
|
|
|
/*
|
|
* If PV's pe_start is not locked, update pe_start value with the
|
|
* start of the area that follows the MDA0 we've just calculated.
|
|
*/
|
|
if (!pe_start_locked) {
|
|
if (ba_size) {
|
|
pv->ba_start = (mda_start + mda_size) >> SECTOR_SHIFT;
|
|
pv->pe_start = pv->ba_start + pv->ba_size;
|
|
} else
|
|
pv->pe_start = (mda_start + mda_size) >> SECTOR_SHIFT;
|
|
}
|
|
}
|
|
/* Second metadata area at the end of the device. */
|
|
else {
|
|
/*
|
|
* Try to fit MDA1 start within given pe_end or pe_start limit
|
|
* if defined or locked. If pe_start is not defined yet, count
|
|
* with any existing MDA0. If MDA0 does not exist, just use
|
|
* LABEL_SCAN_SIZE.
|
|
*
|
|
* The first_unallocated here is the first unallocated byte
|
|
* beyond existing pe_end if there is any preallocated data area
|
|
* reserved already so we can take that as lower limit for our MDA1
|
|
* start calculation. If data area is not reserved yet, we set
|
|
* first_unallocated to 0, meaning this is not our limiting factor
|
|
* and we will look at other limiting factors if they exist.
|
|
* Of course, if we have preallocated data area, we also must
|
|
* have pe_start assigned too (simply, data area needs its start
|
|
* and end specification).
|
|
*/
|
|
first_unallocated = pv->pe_count ? (pv->pe_start + pv->pe_count *
|
|
(uint64_t)pv->pe_size) << SECTOR_SHIFT
|
|
: 0;
|
|
|
|
if (pe_start || pe_start_locked) {
|
|
limit = first_unallocated ? first_unallocated : pe_start;
|
|
limit_name = first_unallocated ? "pe_end" : "pe_start";
|
|
} else {
|
|
if ((mda = fid_get_mda_indexed(fid, pvid, ID_LEN, 0)) &&
|
|
(mdac = mda->metadata_locn)) {
|
|
limit = mdac->area.start + mdac->area.size;
|
|
limit_name = "MDA0 end";
|
|
}
|
|
else {
|
|
limit = LABEL_SCAN_SIZE;
|
|
limit_name = "label scan size";
|
|
}
|
|
|
|
/* Adjust limits for bootloader area if present. */
|
|
if (ba_size) {
|
|
limit += ba_size;
|
|
limit_name = "ba_end";
|
|
}
|
|
}
|
|
|
|
if (limit >= disk_size)
|
|
goto bad;
|
|
|
|
if (mda_size > disk_size) {
|
|
mda_size = disk_size - limit;
|
|
limit_applied = 1;
|
|
}
|
|
|
|
mda_start = disk_size - mda_size;
|
|
|
|
/* If MDA1 size is too big, just take any usable space. */
|
|
if (disk_size - mda_size < limit) {
|
|
mda_size = disk_size - limit;
|
|
mda_start = disk_size - mda_size;
|
|
limit_applied = 1;
|
|
}
|
|
/* Otherwise, try to align MDA1 start if possible. */
|
|
else if (alignment &&
|
|
(adjustment = mda_start % alignment)) {
|
|
tmp_mda_size = mda_size + adjustment;
|
|
if (tmp_mda_size < disk_size &&
|
|
disk_size - tmp_mda_size >= limit) {
|
|
mda_size = tmp_mda_size;
|
|
mda_start = disk_size - mda_size;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (limit_applied)
|
|
log_very_verbose("Using limited metadata area size on %s "
|
|
"with value " FMTu64 " (limited by %s of "
|
|
FMTu64 ").", pv_dev_name(pv),
|
|
mda_size, limit_name, limit);
|
|
|
|
if (mda_size) {
|
|
if (mda_size < MDA_SIZE_MIN) {
|
|
log_error("Metadata area size too small: " FMTu64 " bytes. "
|
|
"It must be at least %u bytes.", mda_size, MDA_SIZE_MIN);
|
|
goto bad;
|
|
}
|
|
|
|
/* Wipe metadata area with zeroes. */
|
|
|
|
zero_len = (mda_size > wipe_size) ? wipe_size : mda_size;
|
|
|
|
if (!dev_write_zeros(pv->dev, mda_start, zero_len)) {
|
|
log_error("Failed to wipe new metadata area on %s at %llu len %llu",
|
|
pv_dev_name(pv),
|
|
(unsigned long long)mda_start,
|
|
(unsigned long long)zero_len);
|
|
return 0;
|
|
}
|
|
|
|
/* Finally, add new metadata area to PV's format instance. */
|
|
if (!_add_metadata_area_to_pv(pv, mda_index, mda_start,
|
|
mda_size, mda_ignored))
|
|
return_0;
|
|
}
|
|
|
|
return 1;
|
|
|
|
bad:
|
|
log_error("Not enough space available for metadata area "
|
|
"with index %u on PV %s.", mda_index, pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
|
|
static int _remove_metadata_area_from_pv(struct physical_volume *pv,
|
|
unsigned mda_index)
|
|
{
|
|
char pvid[ID_LEN + 1] __attribute__((aligned(8))) = { 0 };
|
|
|
|
if (mda_index >= FMT_TEXT_MAX_MDAS_PER_PV) {
|
|
log_error(INTERNAL_ERROR "can't remove metadata area with "
|
|
"index %u from PV %s. Metadata "
|
|
"layout not supported by %s format.",
|
|
mda_index, dev_name(pv->dev),
|
|
pv->fmt->name);
|
|
return 0;
|
|
}
|
|
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
return fid_remove_mda(pv->fid, NULL, pvid, ID_LEN, mda_index);
|
|
}
|
|
|
|
static int _text_pv_remove_metadata_area(const struct format_type *fmt,
|
|
struct physical_volume *pv,
|
|
unsigned mda_index)
|
|
{
|
|
return _remove_metadata_area_from_pv(pv, mda_index);
|
|
}
|
|
|
|
static int _text_pv_resize(const struct format_type *fmt,
|
|
struct physical_volume *pv,
|
|
struct volume_group *vg,
|
|
uint64_t size)
|
|
{
|
|
char pvid[ID_LEN + 1] __attribute__((aligned(8))) = { 0 };
|
|
struct format_instance *fid = pv->fid;
|
|
struct metadata_area *mda;
|
|
struct mda_context *mdac;
|
|
uint64_t size_reduction;
|
|
uint64_t mda_size;
|
|
unsigned mda_ignored;
|
|
|
|
if (*pv->old_id.uuid)
|
|
memcpy(pvid, &pv->old_id.uuid, ID_LEN);
|
|
else
|
|
memcpy(pvid, &pv->id.uuid, ID_LEN);
|
|
|
|
/*
|
|
* First, set the new size and update the cache and reset pe_count.
|
|
* (pe_count must be reset otherwise it would be considered as
|
|
* a limiting factor while moving the mda!)
|
|
*/
|
|
pv->size = size;
|
|
pv->pe_count = 0;
|
|
|
|
/* If there's an mda at the end, move it to a new position. */
|
|
if ((mda = fid_get_mda_indexed(fid, pvid, ID_LEN, 1)) &&
|
|
(mdac = mda->metadata_locn)) {
|
|
/* FIXME: Maybe MDA0 size would be better? */
|
|
mda_size = mdac->area.size >> SECTOR_SHIFT;
|
|
mda_ignored = mda_is_ignored(mda);
|
|
|
|
if (!_text_pv_remove_metadata_area(fmt, pv, 1) ||
|
|
!_text_pv_add_metadata_area(fmt, pv, 1, 1, mda_size,
|
|
mda_ignored)) {
|
|
log_error("Failed to move metadata area with index 1 "
|
|
"while resizing PV %s.", pv_dev_name(pv));
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* If there's a VG, reduce size by counting in pe_start and metadata areas. */
|
|
if (vg && !is_orphan_vg(vg->name)) {
|
|
size_reduction = pv_pe_start(pv);
|
|
if ((mda = fid_get_mda_indexed(fid, pvid, ID_LEN, 1)) &&
|
|
(mdac = mda->metadata_locn))
|
|
size_reduction += mdac->area.size >> SECTOR_SHIFT;
|
|
pv->size -= size_reduction;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static struct format_instance *_text_create_text_instance(const struct format_type *fmt,
|
|
const struct format_instance_ctx *fic)
|
|
{
|
|
struct format_instance *fid;
|
|
|
|
if (!(fid = alloc_fid(fmt, fic)))
|
|
return_NULL;
|
|
|
|
if (!_create_vg_text_instance(fid, fic)) {
|
|
dm_pool_destroy(fid->mem);
|
|
return_NULL;
|
|
}
|
|
|
|
return fid;
|
|
}
|
|
|
|
static const struct format_handler _text_handler = {
|
|
.pv_initialise = _text_pv_initialise,
|
|
.pv_setup = _text_pv_setup,
|
|
.pv_add_metadata_area = _text_pv_add_metadata_area,
|
|
.pv_remove_metadata_area = _text_pv_remove_metadata_area,
|
|
.pv_resize = _text_pv_resize,
|
|
.pv_write = _text_pv_write,
|
|
.pv_needs_rewrite = _text_pv_needs_rewrite,
|
|
.vg_setup = _text_vg_setup,
|
|
.lv_setup = _text_lv_setup,
|
|
.create_instance = _text_create_text_instance,
|
|
.destroy_instance = _text_destroy_instance,
|
|
.destroy = _text_destroy
|
|
};
|
|
|
|
struct format_type *create_text_format(struct cmd_context *cmd)
|
|
{
|
|
struct format_instance_ctx fic;
|
|
struct format_instance *fid;
|
|
struct format_type *fmt;
|
|
struct mda_lists *mda_lists;
|
|
|
|
if (!(fmt = zalloc(sizeof(*fmt)))) {
|
|
log_error("Failed to allocate text format type structure.");
|
|
return NULL;
|
|
}
|
|
|
|
fmt->cmd = cmd;
|
|
fmt->ops = &_text_handler;
|
|
fmt->name = FMT_TEXT_NAME;
|
|
fmt->alias = FMT_TEXT_ALIAS;
|
|
strncpy(fmt->orphan_vg_name, ORPHAN_VG_NAME(FMT_TEXT_NAME), sizeof(fmt->orphan_vg_name));
|
|
fmt->features = FMT_SEGMENTS | FMT_TAGS | FMT_PRECOMMIT |
|
|
FMT_UNLIMITED_VOLS | FMT_RESIZE_PV |
|
|
FMT_UNLIMITED_STRIPESIZE | FMT_CONFIG_PROFILE |
|
|
FMT_NON_POWER2_EXTENTS | FMT_PV_FLAGS;
|
|
|
|
if (!(mda_lists = malloc(sizeof(struct mda_lists)))) {
|
|
log_error("Failed to allocate dir_list");
|
|
free(fmt);
|
|
return NULL;
|
|
}
|
|
|
|
mda_lists->file_ops = &_metadata_text_file_ops;
|
|
mda_lists->raw_ops = &_metadata_text_raw_ops;
|
|
fmt->private = (void *) mda_lists;
|
|
|
|
if (!(fmt->labeller = text_labeller_create(fmt))) {
|
|
log_error("Couldn't create text label handler.");
|
|
goto bad;
|
|
}
|
|
|
|
if (!(label_register_handler(fmt->labeller))) {
|
|
log_error("Couldn't register text label handler.");
|
|
fmt->labeller->ops->destroy(fmt->labeller);
|
|
goto bad;
|
|
}
|
|
|
|
if (!(fmt->orphan_vg = alloc_vg("text_orphan", cmd, fmt->orphan_vg_name)))
|
|
goto_bad;
|
|
|
|
fic.type = FMT_INSTANCE_AUX_MDAS;
|
|
fic.context.vg_ref.vg_name = fmt->orphan_vg_name;
|
|
fic.context.vg_ref.vg_id = NULL;
|
|
if (!(fid = _text_create_text_instance(fmt, &fic)))
|
|
goto_bad;
|
|
|
|
vg_set_fid(fmt->orphan_vg, fid);
|
|
|
|
log_very_verbose("Initialised format: %s", fmt->name);
|
|
|
|
return fmt;
|
|
bad:
|
|
_text_destroy(fmt);
|
|
|
|
return NULL;
|
|
}
|
|
|
|
int text_wipe_outdated_pv_mda(struct cmd_context *cmd, struct device *dev,
|
|
struct metadata_area *mda)
|
|
{
|
|
struct mda_context *mdac = mda->metadata_locn;
|
|
uint64_t start_byte = mdac->area.start;
|
|
struct mda_header *mdab;
|
|
struct raw_locn *rlocn_slot0;
|
|
struct raw_locn *rlocn_slot1;
|
|
uint32_t bad_fields = 0;
|
|
|
|
if (!(mdab = raw_read_mda_header(cmd->fmt, &mdac->area, mda_is_primary(mda), 0, &bad_fields))) {
|
|
log_error("Failed to read outdated pv mda header on %s", dev_name(dev));
|
|
return 0;
|
|
}
|
|
|
|
rlocn_slot0 = &mdab->raw_locns[0];
|
|
rlocn_slot1 = &mdab->raw_locns[1];
|
|
|
|
rlocn_slot0->offset = 0;
|
|
rlocn_slot0->size = 0;
|
|
rlocn_slot0->checksum = 0;
|
|
rlocn_slot1->offset = 0;
|
|
rlocn_slot1->size = 0;
|
|
rlocn_slot1->checksum = 0;
|
|
|
|
if (!_raw_write_mda_header(cmd->fmt, dev, mda_is_primary(mda), start_byte, mdab)) {
|
|
log_error("Failed to write outdated pv mda header on %s", dev_name(dev));
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|