2001-11-21 12:20:05 +03:00
/*
2008-01-30 17:00:02 +03:00
* Copyright ( C ) 2001 - 2004 Sistina Software , Inc . All rights reserved .
2018-04-20 18:43:50 +03:00
* Copyright ( C ) 2004 - 2012 Red Hat , Inc . All rights reserved .
2001-11-21 12:20:05 +03:00
*
2004-03-30 23:35:44 +04:00
* This file is part of LVM2 .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
2007-08-21 00:55:30 +04:00
* of the GNU Lesser General Public License v .2 .1 .
2004-03-30 23:35:44 +04:00
*
2007-08-21 00:55:30 +04:00
* You should have received a copy of the GNU Lesser General Public License
2004-03-30 23:35:44 +04:00
* along with this program ; if not , write to the Free Software Foundation ,
2016-01-21 13:49:46 +03:00
* Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2001-11-21 12:20:05 +03:00
*/
2018-05-14 12:30:20 +03:00
# include "lib/misc/lib.h"
2002-01-07 12:16:20 +03:00
# include "import-export.h"
2018-12-14 15:10:25 +03:00
# include "format-text.h"
# include "layout.h"
2018-05-14 12:30:20 +03:00
# include "lib/device/device.h"
# include "lib/misc/lvm-file.h"
# include "lib/config/config.h"
# include "lib/display/display.h"
# include "lib/commands/toolcontext.h"
# include "lib/misc/lvm-string.h"
# include "lib/uuid/uuid.h"
# include "lib/misc/crc.h"
# include "lib/mm/xlate.h"
# include "lib/label/label.h"
# include "lib/cache/lvmcache.h"
2001-11-21 12:20:05 +03:00
2002-01-09 22:16:48 +03:00
# include <unistd.h>
# include <limits.h>
2002-04-24 22:20:51 +04:00
# include <dirent.h>
2002-11-18 17:04:08 +03:00
# include <ctype.h>
2002-01-09 22:16:48 +03:00
2011-02-21 15:05:49 +03:00
static struct format_instance * _text_create_text_instance ( const struct format_type * fmt ,
const struct format_instance_ctx * fic ) ;
2002-02-22 14:44:56 +03:00
2005-10-23 04:14:48 +04:00
struct text_fid_context {
2019-07-01 23:00:34 +03:00
char * write_buf ; /* buffer containing metadata text to write to disk */
uint32_t write_buf_size ; /* mem size of write_buf, increases in 64K multiples */
uint32_t new_metadata_size ; /* size of text metadata in buf */
2005-10-23 04:14:48 +04:00
} ;
2010-06-29 00:29:57 +04:00
int rlocn_is_ignored ( const struct raw_locn * rlocn )
{
return ( rlocn - > flags & RAW_LOCN_IGNORED ? 1 : 0 ) ;
}
2010-06-30 21:13:05 +04:00
void rlocn_set_ignored ( struct raw_locn * rlocn , unsigned mda_ignored )
2010-06-29 00:29:57 +04:00
{
2010-06-30 21:13:05 +04:00
if ( mda_ignored )
2010-06-29 00:29:57 +04:00
rlocn - > flags | = RAW_LOCN_IGNORED ;
else
rlocn - > flags & = ~ RAW_LOCN_IGNORED ;
}
2001-11-21 12:20:05 +03:00
/*
2002-12-20 02:25:55 +03:00
* NOTE : Currently there can be only one vg per text file .
2001-11-21 12:20:05 +03:00
*/
2014-10-14 21:12:15 +04:00
/*
* Only used by vgcreate .
*/
static int _text_vg_setup ( struct format_instance * fid ,
2006-05-11 21:58:58 +04:00
struct volume_group * vg )
2001-11-21 12:20:05 +03:00
{
2014-10-14 21:12:15 +04:00
if ( ! vg_check_new_extent_size ( vg - > fid - > fmt , vg - > extent_size ) )
return_0 ;
2002-04-24 22:20:51 +04:00
2002-02-22 14:44:56 +03:00
return 1 ;
2001-11-21 12:20:05 +03:00
}
2007-11-05 20:17:55 +03:00
static uint64_t _mda_free_sectors_raw ( struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
return mdac - > free_sectors ;
}
2009-01-10 01:44:33 +03:00
static uint64_t _mda_total_sectors_raw ( struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
return mdac - > area . size > > SECTOR_SHIFT ;
}
2007-03-23 15:43:17 +03:00
/*
* Check if metadata area belongs to vg
*/
2010-07-09 19:34:40 +04:00
static int _mda_in_vg_raw ( struct format_instance * fid __attribute__ ( ( unused ) ) ,
2007-03-23 15:43:17 +03:00
struct volume_group * vg , struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
struct pv_list * pvl ;
2008-11-04 01:14:30 +03:00
dm_list_iterate_items ( pvl , & vg - > pvs )
2007-03-23 15:43:17 +03:00
if ( pvl - > pv - > dev = = mdac - > area . dev )
return 1 ;
return 0 ;
}
2010-06-29 00:31:38 +04:00
static unsigned _mda_locns_match_raw ( struct metadata_area * mda1 ,
struct metadata_area * mda2 )
{
struct mda_context * mda1c = ( struct mda_context * ) mda1 - > metadata_locn ;
struct mda_context * mda2c = ( struct mda_context * ) mda2 - > metadata_locn ;
if ( ( mda1c - > area . dev = = mda2c - > area . dev ) & &
( mda1c - > area . start = = mda2c - > area . start ) & &
( mda1c - > area . size = = mda2c - > area . size ) )
return 1 ;
return 0 ;
}
2011-06-15 21:45:02 +04:00
static struct device * _mda_get_device_raw ( struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
return mdac - > area . dev ;
}
2010-07-09 19:34:40 +04:00
static int _text_lv_setup ( struct format_instance * fid __attribute__ ( ( unused ) ) ,
2006-05-11 21:58:58 +04:00
struct logical_volume * lv )
2001-11-21 12:20:05 +03:00
{
2008-01-30 17:00:02 +03:00
/******** FIXME Any LV size restriction?
2002-02-22 14:44:56 +03:00
uint64_t max_size = UINT_MAX ;
2001-11-21 12:20:05 +03:00
2002-02-22 14:44:56 +03:00
if ( lv - > size > max_size ) {
2006-05-10 01:23:51 +04:00
char * dummy = display_size ( max_size ) ;
2002-02-22 14:44:56 +03:00
log_error ( " logical volumes cannot be larger than %s " , dummy ) ;
2018-06-08 15:40:53 +03:00
free ( dummy ) ;
2002-02-22 14:44:56 +03:00
return 0 ;
}
2002-11-18 19:21:00 +03:00
*/
2005-01-20 21:11:53 +03:00
if ( ! * lv - > lvid . s & & ! lvid_create ( & lv - > lvid , & lv - > vg - > id ) ) {
log_error ( " Random lvid creation failed for %s/%s. " ,
lv - > vg - > name , lv - > name ) ;
return 0 ;
}
2002-02-22 14:44:56 +03:00
return 1 ;
2001-11-21 12:20:05 +03:00
}
2002-11-18 17:04:08 +03:00
static void _xlate_mdah ( struct mda_header * mdah )
{
struct raw_locn * rl ;
mdah - > version = xlate32 ( mdah - > version ) ;
mdah - > start = xlate64 ( mdah - > start ) ;
mdah - > size = xlate64 ( mdah - > size ) ;
rl = & mdah - > raw_locns [ 0 ] ;
while ( rl - > offset ) {
rl - > checksum = xlate32 ( rl - > checksum ) ;
rl - > offset = xlate64 ( rl - > offset ) ;
rl - > size = xlate64 ( rl - > size ) ;
rl + + ;
}
}
2019-02-05 21:08:00 +03:00
static int _raw_read_mda_header ( struct mda_header * mdah , struct device_area * dev_area ,
int primary_mda , uint32_t ignore_bad_fields , uint32_t * bad_fields )
2018-01-04 18:52:59 +03:00
{
2018-02-07 00:18:11 +03:00
log_debug_metadata ( " Reading mda header sector from %s at %llu " ,
dev_name ( dev_area - > dev ) , ( unsigned long long ) dev_area - > start ) ;
2018-01-07 06:43:25 +03:00
2018-02-27 20:26:04 +03:00
if ( ! dev_read_bytes ( dev_area - > dev , dev_area - > start , MDA_HEADER_SIZE , mdah ) ) {
2018-02-07 00:18:11 +03:00
log_error ( " Failed to read metadata area header on %s at %llu " ,
dev_name ( dev_area - > dev ) , ( unsigned long long ) dev_area - > start ) ;
2019-02-05 21:08:00 +03:00
* bad_fields | = BAD_MDA_READ ;
2018-02-07 00:18:11 +03:00
return 0 ;
2018-04-20 18:43:50 +03:00
}
2018-01-07 06:43:25 +03:00
2010-09-27 23:09:34 +04:00
if ( mdah - > checksum_xl ! = xlate32 ( calc_crc ( INITIAL_CRC , ( uint8_t * ) mdah - > magic ,
2002-11-18 17:04:08 +03:00
MDA_HEADER_SIZE -
sizeof ( mdah - > checksum_xl ) ) ) ) {
2019-02-05 21:08:00 +03:00
log_warn ( " WARNING: wrong checksum %x in mda header on %s at %llu " ,
mdah - > checksum_xl ,
2018-02-07 00:18:11 +03:00
dev_name ( dev_area - > dev ) , ( unsigned long long ) dev_area - > start ) ;
2019-02-05 21:08:00 +03:00
* bad_fields | = BAD_MDA_CHECKSUM ;
2002-11-18 17:04:08 +03:00
}
_xlate_mdah ( mdah ) ;
2019-05-02 00:50:48 +03:00
if ( memcmp ( mdah - > magic , FMTT_MAGIC , sizeof ( mdah - > magic ) ) ) {
2019-02-05 21:08:00 +03:00
log_warn ( " WARNING: wrong magic number in mda header on %s at %llu " ,
2018-02-07 00:18:11 +03:00
dev_name ( dev_area - > dev ) , ( unsigned long long ) dev_area - > start ) ;
2019-02-05 21:08:00 +03:00
* bad_fields | = BAD_MDA_MAGIC ;
2002-11-18 17:04:08 +03:00
}
if ( mdah - > version ! = FMTT_VERSION ) {
2019-02-05 21:08:00 +03:00
log_warn ( " WARNING: wrong version %u in mda header on %s at %llu " ,
2018-02-07 00:18:11 +03:00
mdah - > version ,
dev_name ( dev_area - > dev ) , ( unsigned long long ) dev_area - > start ) ;
2019-02-05 21:08:00 +03:00
* bad_fields | = BAD_MDA_VERSION ;
2002-11-18 17:04:08 +03:00
}
if ( mdah - > start ! = dev_area - > start ) {
2019-02-05 21:08:00 +03:00
log_warn ( " WARNING: wrong start sector %llu in mda header on %s at %llu " ,
2018-02-07 00:18:11 +03:00
( unsigned long long ) mdah - > start ,
dev_name ( dev_area - > dev ) , ( unsigned long long ) dev_area - > start ) ;
2019-02-05 21:08:00 +03:00
* bad_fields | = BAD_MDA_START ;
2002-11-18 17:04:08 +03:00
}
2019-02-05 21:08:00 +03:00
* bad_fields & = ~ ignore_bad_fields ;
if ( * bad_fields )
return 0 ;
2018-04-20 18:43:50 +03:00
return 1 ;
2014-02-28 02:06:42 +04:00
}
2007-01-10 00:12:41 +03:00
2018-04-20 18:43:50 +03:00
struct mda_header * raw_read_mda_header ( const struct format_type * fmt ,
2019-02-05 21:08:00 +03:00
struct device_area * dev_area ,
int primary_mda , uint32_t ignore_bad_fields , uint32_t * bad_fields )
2014-02-28 02:06:42 +04:00
{
struct mda_header * mdah ;
2018-04-20 18:43:50 +03:00
if ( ! ( mdah = dm_pool_alloc ( fmt - > cmd - > mem , MDA_HEADER_SIZE ) ) ) {
2014-02-28 02:06:42 +04:00
log_error ( " struct mda_header allocation failed " ) ;
2019-02-05 21:08:00 +03:00
* bad_fields | = BAD_MDA_INTERNAL ;
2014-02-28 02:06:42 +04:00
return NULL ;
}
2019-02-05 21:08:00 +03:00
if ( ! _raw_read_mda_header ( mdah , dev_area , primary_mda , ignore_bad_fields , bad_fields ) ) {
2018-04-20 18:43:50 +03:00
dm_pool_free ( fmt - > cmd - > mem , mdah ) ;
2014-02-28 02:06:42 +04:00
return NULL ;
}
2018-04-20 18:43:50 +03:00
return mdah ;
2018-01-04 18:52:59 +03:00
}
2002-12-20 02:25:55 +03:00
static int _raw_write_mda_header ( const struct format_type * fmt ,
2017-12-07 06:34:59 +03:00
struct device * dev , int primary_mda ,
2002-11-18 17:04:08 +03:00
uint64_t start_byte , struct mda_header * mdah )
{
2019-05-02 00:50:48 +03:00
memcpy ( mdah - > magic , FMTT_MAGIC , sizeof ( mdah - > magic ) ) ;
2002-11-18 17:04:08 +03:00
mdah - > version = FMTT_VERSION ;
mdah - > start = start_byte ;
_xlate_mdah ( mdah ) ;
2010-09-27 23:09:34 +04:00
mdah - > checksum_xl = xlate32 ( calc_crc ( INITIAL_CRC , ( uint8_t * ) mdah - > magic ,
2002-11-18 17:04:08 +03:00
MDA_HEADER_SIZE -
sizeof ( mdah - > checksum_xl ) ) ) ;
2018-10-30 00:53:17 +03:00
dev_set_last_byte ( dev , start_byte + MDA_HEADER_SIZE ) ;
2018-02-27 20:26:04 +03:00
if ( ! dev_write_bytes ( dev , start_byte , MDA_HEADER_SIZE , mdah ) ) {
2018-10-30 00:53:17 +03:00
dev_unset_last_byte ( dev ) ;
2018-02-20 00:40:44 +03:00
log_error ( " Failed to write mda header to %s fd %d " , dev_name ( dev ) , dev - > bcache_fd ) ;
return 0 ;
2018-02-07 00:18:11 +03:00
}
2018-10-30 00:53:17 +03:00
dev_unset_last_byte ( dev ) ;
2018-02-07 00:18:11 +03:00
2002-11-18 17:04:08 +03:00
return 1 ;
}
2018-02-07 00:18:11 +03:00
/*
* FIXME : unify this with read_metadata_location ( ) which is used
* in the label scanning path .
*/
static struct raw_locn * _read_metadata_location_vg ( struct device_area * dev_area ,
2017-12-07 06:34:59 +03:00
struct mda_header * mdah , int primary_mda ,
2005-04-06 22:59:55 +04:00
const char * vgname ,
2005-10-31 23:15:28 +03:00
int * precommitted )
2002-11-18 17:04:08 +03:00
{
2002-12-20 02:25:55 +03:00
size_t len ;
2018-04-20 18:43:50 +03:00
char vgnamebuf [ NAME_LEN + 2 ] __attribute__ ( ( aligned ( 8 ) ) ) ;
2005-10-31 23:15:28 +03:00
struct raw_locn * rlocn , * rlocn_precommitted ;
2005-03-22 01:40:35 +03:00
struct lvmcache_info * info ;
2015-03-19 02:43:02 +03:00
struct lvmcache_vgsummary vgsummary_orphan = {
. vgname = FMT_TEXT_ORPHAN_VG_NAME ,
} ;
2017-10-28 00:42:00 +03:00
int rlocn_was_ignored ;
2015-03-19 02:43:02 +03:00
2019-09-26 19:27:38 +03:00
dm_list_init ( & vgsummary_orphan . pvsummaries ) ;
2015-03-19 02:43:02 +03:00
memcpy ( & vgsummary_orphan . vgid , FMT_TEXT_ORPHAN_VG_NAME , sizeof ( FMT_TEXT_ORPHAN_VG_NAME ) ) ;
2002-11-18 17:04:08 +03:00
2005-04-06 22:59:55 +04:00
rlocn = mdah - > raw_locns ; /* Slot 0 */
2005-10-31 23:15:28 +03:00
rlocn_precommitted = rlocn + 1 ; /* Slot 1 */
2005-04-06 22:59:55 +04:00
2017-10-28 00:42:00 +03:00
rlocn_was_ignored = rlocn_is_ignored ( rlocn ) ;
2005-10-31 23:15:28 +03:00
/* Should we use precommitted metadata? */
if ( * precommitted & & rlocn_precommitted - > size & &
( rlocn_precommitted - > offset ! = rlocn - > offset ) ) {
rlocn = rlocn_precommitted ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
log_debug_metadata ( " VG %s metadata check %s mda %llu slot1 offset %llu size %llu " ,
vgname ? : " " ,
dev_name ( dev_area - > dev ) ,
( unsigned long long ) dev_area - > start ,
( unsigned long long ) rlocn - > offset ,
( unsigned long long ) rlocn - > size ) ;
} else {
2005-10-31 23:15:28 +03:00
* precommitted = 0 ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
log_debug_metadata ( " VG %s metadata check %s mda %llu slot0 offset %llu size %llu " ,
vgname ? : " " ,
dev_name ( dev_area - > dev ) ,
( unsigned long long ) dev_area - > start ,
( unsigned long long ) rlocn - > offset ,
( unsigned long long ) rlocn - > size ) ;
}
2002-11-18 17:04:08 +03:00
2010-04-14 17:09:16 +04:00
/* Do not check non-existent metadata. */
if ( ! rlocn - > offset & & ! rlocn - > size )
return NULL ;
/*
* Don ' t try to check existing metadata
* if given vgname is an empty string .
*/
if ( ! * vgname )
return rlocn ;
2017-10-28 00:42:00 +03:00
/*
* If live rlocn has ignored flag , data will be out - of - date so skip further checks .
*/
if ( rlocn_was_ignored )
return rlocn ;
2018-02-07 00:18:11 +03:00
/*
* Verify that the VG metadata pointed to by the rlocn
* begins with a valid vgname .
*/
memset ( vgnamebuf , 0 , sizeof ( vgnamebuf ) ) ;
2018-10-15 15:12:18 +03:00
if ( ! dev_read_bytes ( dev_area - > dev , dev_area - > start + rlocn - > offset , NAME_LEN , vgnamebuf ) )
2018-10-15 19:35:33 +03:00
goto fail ;
2005-04-06 22:59:55 +04:00
2018-04-20 18:43:50 +03:00
if ( ! strncmp ( vgnamebuf , vgname , len = strlen ( vgname ) ) & &
( isspace ( vgnamebuf [ len ] ) | | vgnamebuf [ len ] = = ' { ' ) )
2005-04-06 22:59:55 +04:00
return rlocn ;
2018-10-15 19:35:33 +03:00
fail :
2018-02-08 00:20:39 +03:00
log_error ( " Metadata on %s at %llu has wrong VG name \" %s \" expected %s. " ,
dev_name ( dev_area - > dev ) ,
( unsigned long long ) ( dev_area - > start + rlocn - > offset ) ,
vgnamebuf , vgname ) ;
2002-11-18 17:04:08 +03:00
2016-06-06 22:04:17 +03:00
if ( ( info = lvmcache_info_from_pvid ( dev_area - > dev - > pvid , dev_area - > dev , 0 ) ) & &
2015-08-18 14:48:53 +03:00
! lvmcache_update_vgname_and_id ( info , & vgsummary_orphan ) )
stack ;
2005-03-22 01:40:35 +03:00
2002-11-18 17:04:08 +03:00
return NULL ;
}
2005-04-06 20:35:33 +04:00
/*
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
* Determine offset for new metadata
2018-09-20 22:15:49 +03:00
*
2019-07-01 23:00:34 +03:00
* The rounding can have a negative effect : when the current metadata
2018-09-20 22:15:49 +03:00
* text size is just below the max , a command to remove something , that
* * reduces * the text metadata size , can still be rejected for being too large ,
* even though it ' s smaller than the current size . In this case , the user
* would need to find something in the VG to remove that uses more text space
* to compensate for the increase due to rounding .
2019-07-01 23:00:34 +03:00
* Update : I think that the new max_size restriction avoids this problem .
2005-04-06 20:35:33 +04:00
*/
2018-09-20 22:15:49 +03:00
2019-07-01 23:00:34 +03:00
static uint64_t _next_rlocn_offset ( struct volume_group * vg , struct raw_locn * rlocn_old , uint64_t old_last , struct mda_header * mdah , uint64_t mdac_area_start , uint64_t alignment )
2002-11-18 17:04:08 +03:00
{
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
uint64_t next_start ;
uint64_t new_start ;
2019-07-01 23:00:34 +03:00
uint64_t adjust = 0 ;
/* This has only been designed to work with 512. */
if ( alignment ! = 512 )
log_warn ( " WARNING: metadata alignment should be 512 not %llu " ,
( unsigned long long ) alignment ) ;
2017-12-08 04:11:34 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* No metadata has been written yet , begin at MDA_HEADER_SIZE offset
* from the start of the area .
*/
if ( ! rlocn_old )
return MDA_HEADER_SIZE ;
2017-12-08 04:11:34 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* If new start would be less than alignment bytes from the end of the
* metadata area , then start at beginning .
*/
if ( mdah - > size - old_last < alignment ) {
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u new metadata start align from %llu to beginning %u " ,
vg - > name , vg - > seqno ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( unsigned long long ) ( old_last + 1 ) , MDA_HEADER_SIZE ) ;
return MDA_HEADER_SIZE ;
}
2017-12-12 21:46:25 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* New metadata begins after the old , rounded up to alignment .
*/
next_start = old_last + 1 ;
2019-07-01 23:00:34 +03:00
if ( next_start % alignment )
adjust = alignment - ( next_start % alignment ) ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
new_start = next_start + adjust ;
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u new metadata start align from %llu to %llu (+%llu) " ,
vg - > name , vg - > seqno ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( unsigned long long ) next_start ,
( unsigned long long ) new_start ,
2019-07-01 23:00:34 +03:00
( unsigned long long ) adjust ) ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* If new_start is beyond the end of the metadata area or within
* alignment bytes of the end , then start at the beginning .
*/
if ( new_start > mdah - > size - alignment ) {
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u new metadata start align from %llu to beginning %u " ,
vg - > name , vg - > seqno ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( unsigned long long ) new_start , MDA_HEADER_SIZE ) ;
return MDA_HEADER_SIZE ;
}
2002-11-18 17:04:08 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
return new_start ;
2002-11-18 17:04:08 +03:00
}
static struct volume_group * _vg_read_raw_area ( struct format_instance * fid ,
const char * vgname ,
2005-04-06 22:59:55 +04:00
struct device_area * area ,
2015-03-19 02:43:02 +03:00
struct cached_vg_fmtdata * * vg_fmtdata ,
unsigned * use_previous_vg ,
2018-04-20 18:43:50 +03:00
int precommitted ,
2018-02-07 00:18:11 +03:00
int primary_mda )
2002-11-18 17:04:08 +03:00
{
struct volume_group * vg = NULL ;
struct raw_locn * rlocn ;
struct mda_header * mdah ;
time_t when ;
char * desc ;
2002-12-20 02:25:55 +03:00
uint32_t wrap = 0 ;
2019-02-05 21:08:00 +03:00
uint32_t bad_fields = 0 ;
2002-11-18 17:04:08 +03:00
2019-02-05 21:08:00 +03:00
if ( ! ( mdah = raw_read_mda_header ( fid - > fmt , area , primary_mda , 0 , & bad_fields ) ) ) {
2018-03-01 21:42:18 +03:00
log_error ( " Failed to read vg %s from %s " , vgname , dev_name ( area - > dev ) ) ;
2018-12-23 01:44:42 +03:00
goto out ;
2018-03-01 21:42:18 +03:00
}
2002-11-18 17:04:08 +03:00
2018-02-07 00:18:11 +03:00
if ( ! ( rlocn = _read_metadata_location_vg ( area , mdah , primary_mda , vgname , & precommitted ) ) ) {
2013-01-08 02:30:29 +04:00
log_debug_metadata ( " VG %s not found on %s " , vgname , dev_name ( area - > dev ) ) ;
2002-11-18 17:04:08 +03:00
goto out ;
}
if ( rlocn - > offset + rlocn - > size > mdah - > size )
2002-12-20 02:25:55 +03:00
wrap = ( uint32_t ) ( ( rlocn - > offset + rlocn - > size ) - mdah - > size ) ;
2002-11-18 17:04:08 +03:00
2018-02-07 00:18:11 +03:00
vg = text_read_metadata ( fid , NULL , vg_fmtdata , use_previous_vg , area - > dev , primary_mda ,
( off_t ) ( area - > start + rlocn - > offset ) ,
( uint32_t ) ( rlocn - > size - wrap ) ,
( off_t ) ( area - > start + MDA_HEADER_SIZE ) ,
wrap ,
calc_crc ,
rlocn - > checksum ,
& when , & desc ) ;
if ( ! vg ) {
/* FIXME: detect and handle errors, and distinguish from the optimization
that skips parsing the metadata which also returns NULL . */
}
2015-03-19 02:43:02 +03:00
2018-02-08 00:20:39 +03:00
log_debug_metadata ( " Found metadata on %s at %llu size %llu for VG %s " ,
2018-02-07 00:18:11 +03:00
dev_name ( area - > dev ) ,
2018-02-08 00:20:39 +03:00
( unsigned long long ) ( area - > start + rlocn - > offset ) ,
( unsigned long long ) rlocn - > size ,
2018-02-07 00:18:11 +03:00
vgname ) ;
2002-11-18 17:04:08 +03:00
2015-03-19 14:14:47 +03:00
if ( vg & & precommitted )
2005-10-31 23:15:28 +03:00
vg - > status | = PRECOMMITTED ;
2002-11-18 17:04:08 +03:00
out :
return vg ;
}
static struct volume_group * _vg_read_raw ( struct format_instance * fid ,
const char * vgname ,
2012-02-29 06:35:35 +04:00
struct metadata_area * mda ,
2015-03-19 02:43:02 +03:00
struct cached_vg_fmtdata * * vg_fmtdata ,
2018-02-07 00:18:11 +03:00
unsigned * use_previous_vg )
2002-11-18 17:04:08 +03:00
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
2010-06-29 00:30:30 +04:00
struct volume_group * vg ;
2018-02-07 00:18:11 +03:00
vg = _vg_read_raw_area ( fid , vgname , & mdac - > area , vg_fmtdata , use_previous_vg , 0 , mda_is_primary ( mda ) ) ;
2010-06-29 00:30:30 +04:00
return vg ;
2005-04-06 22:59:55 +04:00
}
static struct volume_group * _vg_read_precommit_raw ( struct format_instance * fid ,
const char * vgname ,
2015-03-19 02:43:02 +03:00
struct metadata_area * mda ,
struct cached_vg_fmtdata * * vg_fmtdata ,
2018-04-20 18:43:50 +03:00
unsigned * use_previous_vg )
2005-04-06 22:59:55 +04:00
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
2010-06-29 00:30:30 +04:00
struct volume_group * vg ;
2005-04-06 22:59:55 +04:00
2018-02-07 00:18:11 +03:00
vg = _vg_read_raw_area ( fid , vgname , & mdac - > area , vg_fmtdata , use_previous_vg , 1 , mda_is_primary ( mda ) ) ;
2010-06-29 00:30:30 +04:00
return vg ;
2002-11-18 17:04:08 +03:00
}
2019-06-19 00:10:06 +03:00
# define MAX_DESC_LEN 2048
static char * _build_desc_write ( struct cmd_context * cmd , struct volume_group * vg )
{
size_t len = strlen ( cmd - > cmd_line ) + 32 ;
char * desc ;
if ( len > MAX_DESC_LEN )
len = MAX_DESC_LEN ;
if ( ! ( desc = zalloc ( len ) ) )
return_NULL ;
vg - > write_count + + ;
if ( vg - > write_count = = 1 )
dm_snprintf ( desc , len , " Write from %s. " , cmd - > cmd_line ) ;
else
dm_snprintf ( desc , len , " Write[%u] from %s. " , vg - > write_count , cmd - > cmd_line ) ;
return desc ;
}
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* VG metadata updates :
*
* [ mda_header ] [ raw_locn_0 ] [ raw_locn_1 ] [ text metadata circular buffer ]
*
* raw_locn . offset points into the metadata circular buffer to the
* start of metadata .
*
* When vg_read wants to read metadata from disk , it looks at the
* raw_locn_0 offset and reads the text metadata from that location
* in the circular buffer .
*
* Two full copies of the text metadata always exist in the circular
* buffer . When new metadata needs to be written , the following
* process is followed :
*
* - vg_write is called and writes the new text metadata into the
* circular buffer after the end of the current copy . vg_write saves
* an in - memory raw_locn struct ( mdac - > rlocn ) pointing to the new
* metadata in the buffer . No raw_locn structs are written to disk .
*
* - vg_precommit is called and writes the in - memory raw_locn struct that
* was saved by vg_write into raw_locn_1 ( slot 1 , the " precommit " slot . )
* raw_locn_0 still points to the old metadata , and raw_locn_1 points
* to the new metadata .
*
* - vg_commit is called and writes the new raw_locn struct into raw_locn_0
* ( slot 0 , the " committed " slot ) .
*/
/*
* Writes new text metadata into the circular metadata buffer following the
* current ( old ) text metadata that ' s already in the metadata buffer .
*
* vg_write does * not * write new raw_locn fields pointing to the new metadata .
* The new raw_locn fields for the new metadata are saved in mdac - > rlocn and
* are written later by both vg_precommit and vg_commit . vg_precommit will
* write the new raw_locn into slot 1 and vg_commit will write the new raw_locn
* into slot 0.
*/
2002-11-18 17:04:08 +03:00
static int _vg_write_raw ( struct format_instance * fid , struct volume_group * vg ,
struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
2005-10-23 04:14:48 +04:00
struct text_fid_context * fidtc = ( struct text_fid_context * ) fid - > private ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
struct raw_locn * rlocn_old ;
struct raw_locn * rlocn_new ;
2002-11-18 17:04:08 +03:00
struct mda_header * mdah ;
2005-06-01 20:51:55 +04:00
struct pv_list * pvl ;
2019-07-01 23:00:34 +03:00
uint64_t mda_start = mdac - > area . start ;
uint64_t max_size ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
uint64_t old_start = 0 , old_last = 0 , old_size = 0 , old_wrap = 0 ;
uint64_t new_start = 0 , new_last = 0 , new_size = 0 , new_wrap = 0 ;
2019-07-01 23:00:34 +03:00
uint64_t write1_start = 0 , write1_last = 0 , write1_size = 0 ;
uint64_t write2_start = 0 , write2_last = 0 , write2_size = 0 ;
uint32_t write1_over = 0 , write2_over = 0 ;
uint32_t write_buf_size ;
uint32_t extra_size ;
2019-02-05 21:08:00 +03:00
uint32_t bad_fields = 0 ;
2019-07-01 23:00:34 +03:00
char * write_buf = NULL ;
const char * devname = dev_name ( mdac - > area . dev ) ;
bool overlap ;
2002-11-18 17:04:08 +03:00
int found = 0 ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
int r = 0 ;
2002-11-18 17:04:08 +03:00
2019-07-01 23:00:34 +03:00
/*
* old_start / old_last / new_start / new_last are relative to the
* start of the metadata area ( mda_start ) , and specify the first
* and last bytes of old / new metadata copies in the metadata area .
*
* write1_start / write1_last / write2_start / write2_last are
* relative to the start of the disk , and specify the
* first / last bytes written to disk when writing a new
* copy of metadata . ( Will generally be larger than the
* size of the metadata since the write is extended past
* the end of the new metadata to end on a 512 byte boundary . )
*
* So , write1_start = = mda_start + new_start .
*
* " last " values are inclusive , so last - start + 1 = size .
* old_last / new_last are the last bytes containing metadata .
* write1_last / write2_last are the last bytes written .
* The next copy of metadata will be written beginning at
* write1_last + 1.
*/
2002-11-18 17:04:08 +03:00
/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
2008-11-04 01:14:30 +03:00
dm_list_iterate_items ( pvl , & vg - > pvs ) {
2005-06-01 20:51:55 +04:00
if ( pvl - > pv - > dev = = mdac - > area . dev ) {
2002-11-18 17:04:08 +03:00
found = 1 ;
break ;
}
}
if ( ! found )
return 1 ;
2019-02-05 21:08:00 +03:00
if ( ! ( mdah = raw_read_mda_header ( fid - > fmt , & mdac - > area , mda_is_primary ( mda ) , mda - > ignore_bad_fields , & bad_fields ) ) )
2008-01-30 16:19:47 +03:00
goto_out ;
2002-11-18 17:04:08 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* Create a text metadata representation of struct vg in buffer .
* This buffer is written to disk below . This function is called
* to write metadata to each device / mda in the VG . The first time
2019-07-01 23:00:34 +03:00
* the metadata text is saved in write_buf and subsequent
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
* mdas use that .
2019-07-01 23:00:34 +03:00
*
* write_buf_size is increased in 64 K increments , so will generally
* be larger than new_size . The extra space in write_buf ( after
* new_size ) is zeroed . More than new_size can be written from
* write_buf to zero data on disk following the new text metadata ,
* up to the next 512 byte boundary .
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
*/
2019-07-01 23:00:34 +03:00
if ( fidtc - > write_buf ) {
write_buf = fidtc - > write_buf ;
write_buf_size = fidtc - > write_buf_size ;
new_size = fidtc - > new_metadata_size ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else {
2019-06-19 00:10:06 +03:00
char * desc = _build_desc_write ( fid - > fmt - > cmd , vg ) ;
2019-07-01 23:00:34 +03:00
new_size = text_vg_export_raw ( vg , desc , & write_buf , & write_buf_size ) ;
fidtc - > write_buf = write_buf ;
fidtc - > write_buf_size = write_buf_size ;
fidtc - > new_metadata_size = new_size ;
2019-06-19 00:10:06 +03:00
free ( desc ) ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
}
2019-07-01 23:00:34 +03:00
if ( ! new_size | | ! write_buf ) {
2002-11-18 17:04:08 +03:00
log_error ( " VG %s metadata writing failed " , vg - > name ) ;
goto out ;
}
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s seqno %u metadata write to %s mda_start %llu mda_size %llu mda_last %llu " ,
vg - > name , vg - > seqno , devname ,
( unsigned long long ) mda_start ,
( unsigned long long ) mdah - > size ,
( unsigned long long ) ( mda_start + mdah - > size - 1 ) ) ;
2018-09-24 22:41:58 +03:00
/*
* The max size of a single copy of text metadata .
*
* The space available for all text metadata is the size of the
* metadata area ( mdah - > size ) minus the sector used for the header .
* Two copies of the text metadata must fit in this space , so it is
* divided in two . This result is then reduced by 512 because any
* single copy of metadata is rounded to begin on a sector boundary .
*/
max_size = ( ( mdah - > size - MDA_HEADER_SIZE ) / 2 ) - 512 ;
if ( new_size > max_size ) {
2019-07-01 23:00:34 +03:00
log_error ( " VG %s %u metadata on %s (%llu bytes) exceeds maximum metadata size (%llu bytes) " ,
vg - > name , vg - > seqno , devname ,
2018-09-24 22:41:58 +03:00
( unsigned long long ) new_size ,
( unsigned long long ) max_size ) ;
goto out ;
}
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* rlocn_old is the current , committed , raw_locn data in slot0 on disk .
*
* rlocn_new ( mdac - > rlocn ) is the new , in - memory , raw_locn data for the
2019-07-01 23:00:34 +03:00
* new metadata . rlocn_new is in - memory only , not yet written to disk .
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
*
* rlocn_new is not written to disk by vg_write . vg_write only writes
* the new text metadata into the circular buffer , it does not update any
* raw_locn slot to point to that new metadata . vg_write saves raw_locn
* values for the new metadata in memory at mdac - > rlocn so that
* vg_precommit and vg_commit can find it later and write it to disk .
*
* rlocn / raw_locn values , old_start , old_last , old_size , new_start ,
2019-07-01 23:00:34 +03:00
* new_last , new_size , are all in bytes , and are all relative to the
* the start of the metadata area ( not to the start of the disk . )
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
*
* The start and last values are the first and last bytes that hold
* the metadata inclusively , e . g .
* metadata_v1 start = 512 , last = 611 , size = 100
* metadata_v2 start = 612 , last = 711 , size = 100
*
* { old , new } _ { start , last } values are all offset values from the
* beginning of the metadata area mdac - > area . start . At the beginning
* of the metadata area ( area . start ) , the first 512 bytes
* ( MDA_HEADER_SIZE ) is reserved for the mda_header / raw_locn structs ,
* after which the circular buffer of text metadata begins .
* So , the when the text metadata wraps around , it starts again at
* area . start + MDA_HEADER_SIZE .
2019-07-01 23:00:34 +03:00
*
* When pe_start is at 1 MB ( the default ) , and mda_start is at 4 KB ,
* there will be 1 MB - 4 KB - 512 bytes of circular buffer space for
* text metadata .
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
*/
rlocn_old = & mdah - > raw_locns [ 0 ] ; /* slot0, committed metadata */
if ( rlocn_is_ignored ( rlocn_old ) )
rlocn_old = NULL ;
else if ( ! rlocn_old - > offset & & ! rlocn_old - > size )
rlocn_old = NULL ;
else {
old_start = rlocn_old - > offset ;
old_size = rlocn_old - > size ;
if ( rlocn_old - > offset + rlocn_old - > size > mdah - > size ) {
old_wrap = ( old_start + old_size ) - mdah - > size ;
old_last = old_wrap + MDA_HEADER_SIZE - 1 ;
} else {
old_wrap = 0 ;
old_last = old_start + old_size - 1 ;
}
}
/*
* _next_rlocn_offset returns the new offset to use for the new
* metadata . It is set to follow the end of the old metadata , plus
* some adjustment to start the new metadata on a 512 byte alignment .
* If the new metadata would start beyond the end of the metadata area ,
* or would start less than 512 bytes before the end of the metadata
* area , then the new start is set back at the beginning
* ( metadata begins MDA_HEADER_SIZE after start of metadata area ) .
*/
2019-07-01 23:00:34 +03:00
new_start = _next_rlocn_offset ( vg , rlocn_old , old_last , mdah , mda_start , MDA_ORIGINAL_ALIGNMENT ) ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
if ( new_start + new_size > mdah - > size ) {
new_wrap = ( new_start + new_size ) - mdah - > size ;
new_last = new_wrap + MDA_HEADER_SIZE - 1 ;
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u wrapping metadata new_start %llu new_size %llu to size1 %llu size2 %llu " ,
vg - > name , vg - > seqno ,
( unsigned long long ) new_start ,
( unsigned long long ) new_size ,
( unsigned long long ) ( new_size - new_wrap ) ,
( unsigned long long ) new_wrap ) ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else {
new_wrap = 0 ;
new_last = new_start + new_size - 1 ;
}
/*
* Save the new metadata location in memory for vg_precommit and
* vg_commit . The new location is not written to disk here .
*/
rlocn_new = & mdac - > rlocn ;
rlocn_new - > offset = new_start ;
rlocn_new - > size = new_size ;
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u metadata area location old start %llu last %llu size %llu wrap %llu " ,
vg - > name , vg - > seqno ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( unsigned long long ) old_start ,
( unsigned long long ) old_last ,
( unsigned long long ) old_size ,
( unsigned long long ) old_wrap ) ;
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u metadata area location new start %llu last %llu size %llu wrap %llu " ,
vg - > name , vg - > seqno ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( unsigned long long ) new_start ,
( unsigned long long ) new_last ,
( unsigned long long ) new_size ,
( unsigned long long ) new_wrap ) ;
/*
* If the new copy of the metadata would overlap the old copy of the
* metadata , it means that the circular metadata buffer is full .
2018-09-24 22:41:58 +03:00
*
* Given the max_size restriction above , two copies of metadata should
* never overlap , so these overlap checks should not be technically
* necessary , and a failure should not occur here . It ' s left as a
* sanity check . For some unknown time , lvm did not enforce a
* max_size , but rather detected the too - large failure by checking for
* overlap between old and new .
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
*/
if ( new_wrap & & old_wrap ) {
2017-12-08 04:11:34 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/* old and new can't both wrap without overlapping */
2019-07-01 23:00:34 +03:00
overlap = true ;
2005-10-23 04:14:48 +04:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else if ( ! new_wrap & & ! old_wrap & &
( new_start > old_last ) & & ( new_last > new_start ) ) {
2017-12-12 23:57:36 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/* new metadata is located entirely after the old metadata */
2019-07-01 23:00:34 +03:00
overlap = false ;
2017-12-12 23:57:36 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else if ( ! new_wrap & & ! old_wrap & &
( new_start < old_start ) & & ( new_last < old_start ) ) {
2017-12-13 01:52:22 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/* new metadata is located entirely before the old metadata */
2019-07-01 23:00:34 +03:00
overlap = false ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else if ( old_wrap & & ! new_wrap & &
( old_last < new_start ) & & ( new_start < new_last ) & & ( new_last < old_start ) ) {
/* when old wraps and the new doesn't, then no overlap is:
old_last followed by new_start followed by new_last
followed by old_start */
2019-07-01 23:00:34 +03:00
overlap = false ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else if ( new_wrap & & ! old_wrap & &
( new_last < old_start ) & & ( old_start < old_last ) & & ( old_last < new_start ) ) {
/* when new wraps and the old doesn't, then no overlap is:
new_last followed by old_start followed by old_last
followed by new_start . */
2019-07-01 23:00:34 +03:00
overlap = false ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else {
2019-07-01 23:00:34 +03:00
overlap = true ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
}
if ( overlap ) {
2019-07-01 23:00:34 +03:00
log_error ( " VG %s %u metadata on %s (%llu bytes) too large for circular buffer (%llu bytes with %llu used) " ,
vg - > name , vg - > seqno , devname ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( unsigned long long ) new_size ,
( unsigned long long ) ( mdah - > size - MDA_HEADER_SIZE ) ,
( unsigned long long ) old_size ) ;
2018-04-20 18:43:50 +03:00
goto out ;
2002-11-18 17:04:08 +03:00
}
2019-07-01 23:00:34 +03:00
if ( ! new_wrap ) {
write1_start = mda_start + new_start ;
write1_size = new_size ;
write1_last = write1_start + write1_size - 1 ;
write1_over = ( write1_last + 1 ) % 512 ;
write2_start = 0 ;
write2_size = 0 ;
write2_last = 0 ;
write2_over = 0 ;
} else {
write1_start = mda_start + new_start ;
write1_size = new_size - new_wrap ;
write1_last = write1_start + write1_size - 1 ;
write1_over = 0 ;
write2_start = mda_start + MDA_HEADER_SIZE ;
write2_size = new_wrap ;
write2_last = write2_start + write2_size - 1 ;
write2_over = ( write2_last + 1 ) % 512 ;
}
if ( ! new_wrap )
log_debug_metadata ( " VG %s %u metadata disk location start %llu size %llu last %llu " ,
vg - > name , vg - > seqno ,
( unsigned long long ) write1_start ,
( unsigned long long ) write1_size ,
( unsigned long long ) write1_last ) ;
else
log_debug_metadata ( " VG %s %u metadata disk location write1 start %llu size %llu last %llu write2 start %llu size %llu last %llu " ,
vg - > name , vg - > seqno ,
( unsigned long long ) write1_start ,
( unsigned long long ) write1_size ,
( unsigned long long ) write1_last ,
( unsigned long long ) write2_start ,
( unsigned long long ) write2_size ,
( unsigned long long ) write2_last ) ;
/*
* Write more than the size of the new metadata , up to the next
* 512 byte boundary so that the space between this copy and the
* subsequent copy of metadata will be zeroed .
*
* Extend write1_size so that write1_last + 1 is a 512 byte multiple .
* The next metadata write should follow immediately after the
* extended write1_last since new metadata tries to begin on a 512
* byte boundary .
*
* write1_size can be extended up to write_buf_size which is the size
* of write_buf ( new_size is the portion of write_buf used by the new
* metadata . )
*
* If this metadata write will wrap , the first write is written
* all the way to the end of the metadata area , and it ' s the
* second wrapped write that is extended up to a 512 byte boundary .
*/
if ( write1_over ) {
extra_size = 512 - write1_over ; /* this many extra zero bytes written after metadata text */
write1_size + = extra_size ;
write1_last = write1_start + write1_size - 1 ;
log_debug_metadata ( " VG %s %u metadata last align from %llu to %llu (+%u) " ,
vg - > name , vg - > seqno ,
( unsigned long long ) write1_last - extra_size ,
( unsigned long long ) write1_last , extra_size ) ;
if ( write1_size > write_buf_size ) {
/* sanity check, shouldn't happen */
log_error ( " VG %s %u %s adjusted metadata end %llu extra %u larger than write buffer %llu " ,
vg - > name , vg - > seqno , devname ,
( unsigned long long ) write1_size , extra_size ,
( unsigned long long ) write_buf_size ) ;
write1_size - = extra_size ;
}
}
if ( write2_over ) {
extra_size = 512 - write2_over ; /* this many extra zero bytes written after metadata text */
write2_size + = extra_size ;
write2_last = write2_start + write2_size - 1 ;
log_debug_metadata ( " VG %s %u metadata last align from %llu to %llu (+%u) (wrapped) " ,
vg - > name , vg - > seqno ,
( unsigned long long ) write2_last - extra_size ,
( unsigned long long ) write2_last , extra_size ) ;
if ( write1_size + write2_size > write_buf_size ) {
/* sanity check, shouldn't happen */
log_error ( " VG %s %u %s adjusted metadata end %llu wrap %llu extra %u larger than write buffer %llu " ,
vg - > name , vg - > seqno , devname ,
( unsigned long long ) write1_size ,
( unsigned long long ) write2_size , extra_size ,
( unsigned long long ) write_buf_size ) ;
write2_size - = extra_size ;
}
}
if ( ( write1_size > write_buf_size ) | | ( write2_size > write_buf_size ) ) {
/* sanity check, shouldn't happen */
log_error ( " VG %s %u %s metadata write size %llu %llu larger than buffer %llu " ,
vg - > name , vg - > seqno , devname ,
( unsigned long long ) write1_size ,
( unsigned long long ) write2_size ,
( unsigned long long ) write_buf_size ) ;
goto out ;
}
dev_set_last_byte ( mdac - > area . dev , mda_start + mdah - > size ) ;
2018-02-13 17:58:35 +03:00
2019-07-01 23:00:34 +03:00
log_debug_metadata ( " VG %s %u metadata write at %llu size %llu (wrap %llu) " ,
vg - > name , vg - > seqno ,
( unsigned long long ) write1_start ,
( unsigned long long ) write1_size ,
( unsigned long long ) write2_size ) ;
2018-10-30 00:53:17 +03:00
2019-07-01 23:00:34 +03:00
if ( ! dev_write_bytes ( mdac - > area . dev , write1_start , ( size_t ) write1_size , write_buf ) ) {
log_error ( " Failed to write metadata to %s fd %d " , devname , mdac - > area . dev - > bcache_fd ) ;
2018-10-30 00:53:17 +03:00
dev_unset_last_byte ( mdac - > area . dev ) ;
2018-02-20 00:40:44 +03:00
goto out ;
}
2002-11-18 17:04:08 +03:00
2019-07-01 23:00:34 +03:00
if ( write2_size ) {
log_debug_metadata ( " VG %s %u metadata write at %llu size %llu (wrapped) " ,
vg - > name , vg - > seqno ,
( unsigned long long ) write2_start ,
( unsigned long long ) write2_size ) ;
2018-02-20 00:40:44 +03:00
2019-07-01 23:00:34 +03:00
if ( ! dev_write_bytes ( mdac - > area . dev , write2_start , write2_size ,
write_buf + new_size - new_wrap ) ) {
log_error ( " Failed to write metadata wrap to %s fd %d " , devname , mdac - > area . dev - > bcache_fd ) ;
2018-10-30 00:53:17 +03:00
dev_unset_last_byte ( mdac - > area . dev ) ;
2018-02-20 00:40:44 +03:00
goto out ;
}
2002-11-18 17:04:08 +03:00
}
2018-10-30 00:53:17 +03:00
dev_unset_last_byte ( mdac - > area . dev ) ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
rlocn_new - > checksum = calc_crc ( INITIAL_CRC ,
2019-07-01 23:00:34 +03:00
( uint8_t * ) write_buf ,
( uint32_t ) ( new_size - new_wrap ) ) ;
2002-11-18 17:04:08 +03:00
if ( new_wrap )
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
rlocn_new - > checksum = calc_crc ( rlocn_new - > checksum ,
2019-07-01 23:00:34 +03:00
( uint8_t * ) write_buf + new_size - new_wrap ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
( uint32_t ) new_wrap ) ;
2002-11-18 17:04:08 +03:00
r = 1 ;
out :
2008-10-01 00:37:52 +04:00
if ( ! r ) {
2019-07-01 23:00:34 +03:00
free ( fidtc - > write_buf ) ;
fidtc - > write_buf = NULL ;
fidtc - > write_buf_size = 0 ;
fidtc - > new_metadata_size = 0 ;
2008-10-01 00:37:52 +04:00
}
2002-11-18 17:04:08 +03:00
return r ;
}
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* Writes new raw_locn to disk that was saved by vg_write_raw ( in mdac - > rlocn ) .
* The new raw_locn points to the new metadata that was written by vg_write_raw .
*
* After vg_write writes the new text metadata into the circular buffer ,
* vg_precommit writes the new raw_locn ( pointing to the new metadata )
* into slot1 ( raw_locns [ 1 ] ) . Then vg_commit writes the same raw_locn
* values again , but into slot0 ( raw_locns [ 0 ] ) . slot0 is the committed
* slot , and once slot0 is written , subsequent vg_reads will see the new
* metadata .
*/
2005-04-06 22:59:55 +04:00
static int _vg_commit_raw_rlocn ( struct format_instance * fid ,
struct volume_group * vg ,
struct metadata_area * mda ,
int precommit )
2002-11-18 17:04:08 +03:00
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
2005-10-23 04:14:48 +04:00
struct text_fid_context * fidtc = ( struct text_fid_context * ) fid - > private ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
struct mda_header * mdab ;
struct raw_locn * rlocn_slot0 ;
struct raw_locn * rlocn_slot1 ;
struct raw_locn * rlocn_new ;
2005-06-01 20:51:55 +04:00
struct pv_list * pvl ;
2019-02-05 21:08:00 +03:00
uint32_t bad_fields = 0 ;
2002-11-18 17:04:08 +03:00
int r = 0 ;
int found = 0 ;
/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
2008-11-04 01:14:30 +03:00
dm_list_iterate_items ( pvl , & vg - > pvs ) {
2005-06-01 20:51:55 +04:00
if ( pvl - > pv - > dev = = mdac - > area . dev ) {
2002-11-18 17:04:08 +03:00
found = 1 ;
break ;
}
}
if ( ! found )
return 1 ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* Data is read into the mdab buffer , the mdab buffer is then modified
* with new raw_locn values , then the mdab buffer is written . Note
* this is different than _vg_write_raw , where data is read into the
* mdah buffer , but the mdah buffer is not modified and mdac - > rlocn is
* modified .
*/
2019-02-05 21:08:00 +03:00
if ( ! ( mdab = raw_read_mda_header ( fid - > fmt , & mdac - > area , mda_is_primary ( mda ) , mda - > ignore_bad_fields , & bad_fields ) ) )
2008-01-30 16:19:47 +03:00
goto_out ;
2002-11-18 17:04:08 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* rlocn_slot0 / rlocn_slot1 point into mdab which is the buffer that
* will be modified and written .
*/
rlocn_slot0 = & mdab - > raw_locns [ 0 ] ;
rlocn_slot1 = & mdab - > raw_locns [ 1 ] ;
if ( rlocn_is_ignored ( rlocn_slot0 ) | | ( ! rlocn_slot0 - > offset & & ! rlocn_slot0 - > size ) ) {
rlocn_slot0 - > offset = 0 ;
rlocn_slot0 - > size = 0 ;
rlocn_slot0 - > checksum = 0 ;
rlocn_slot1 - > offset = 0 ;
rlocn_slot1 - > size = 0 ;
rlocn_slot1 - > checksum = 0 ;
}
/*
* mdac - > rlocn is the in - memory copy of the new metadata ' s location on
* disk . mdac - > rlocn was saved by vg_write after it wrote the new text
* metadata to disk . This location of the new metadata is now written
* to disk by vg_precommit and vg_commit . vg_precommit writes the new
* location into the precommit slot ( slot1 / raw_locns [ 1 ] ) and
* vg_commit writes the new location into committed slot ( slot0 /
* raw_locns [ 0 ] ) .
*
* vg_revert sets the size of the im - memory mdac - > rlocn to 0 and calls
* this function to clear the precommit slot .
*/
rlocn_new = & mdac - > rlocn ;
if ( ! rlocn_new - > size ) {
2017-10-28 00:42:00 +03:00
/*
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
* When there is no new metadata , the precommit slot is
* cleared and the committed slot is left alone . ( see revert )
2017-10-28 00:42:00 +03:00
*/
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
rlocn_slot1 - > offset = 0 ;
rlocn_slot1 - > size = 0 ;
rlocn_slot1 - > checksum = 0 ;
2002-11-18 17:04:08 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else if ( precommit ) {
/*
* vg_precommit writes the new raw_locn into slot 1 ,
* and keeps the existing committed raw_locn in slot 0.
*/
rlocn_slot1 - > offset = rlocn_new - > offset ;
rlocn_slot1 - > size = rlocn_new - > size ;
rlocn_slot1 - > checksum = rlocn_new - > checksum ;
} else {
/*
* vg_commit writes the new raw_locn into slot 0 ,
* and zeros the precommitted values in slot 1.
*/
rlocn_slot0 - > offset = rlocn_new - > offset ;
rlocn_slot0 - > size = rlocn_new - > size ;
rlocn_slot0 - > checksum = rlocn_new - > checksum ;
rlocn_slot1 - > offset = 0 ;
rlocn_slot1 - > size = 0 ;
rlocn_slot1 - > checksum = 0 ;
2005-10-31 23:15:28 +03:00
}
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
rlocn_set_ignored ( rlocn_slot0 , mda_is_ignored ( mda ) ) ;
2005-10-31 23:15:28 +03:00
if ( mdac - > rlocn . size ) {
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
if ( precommit ) {
log_debug_metadata ( " VG %s metadata precommit seq %u on %s mda header at %llu %s " ,
vg - > name , vg - > seqno , dev_name ( mdac - > area . dev ) ,
( unsigned long long ) mdac - > area . start ,
mda_is_ignored ( mda ) ? " (ignored) " : " (used) " ) ;
log_debug_metadata ( " VG %s metadata precommit slot0 offset %llu size %llu slot1 offset %llu size %llu " ,
vg - > name ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . size ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . size ) ;
2005-04-06 22:59:55 +04:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
} else {
log_debug_metadata ( " VG %s metadata commit seq %u on %s mda header at %llu %s " ,
vg - > name , vg - > seqno , dev_name ( mdac - > area . dev ) ,
( unsigned long long ) mdac - > area . start ,
mda_is_ignored ( mda ) ? " (ignored) " : " (used) " ) ;
log_debug_metadata ( " VG %s metadata commit slot0 offset %llu size %llu slot1 offset %llu size %llu " ,
vg - > name ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . size ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . size ) ;
}
} else {
if ( precommit ) {
log_debug_metadata ( " VG %s metadata precommit empty seq %u on %s mda header at %llu %s " ,
vg - > name , vg - > seqno , dev_name ( mdac - > area . dev ) ,
( unsigned long long ) mdac - > area . start ,
mda_is_ignored ( mda ) ? " (ignored) " : " (used) " ) ;
log_debug_metadata ( " VG %s metadata precommit empty slot0 offset %llu size %llu slot1 offset %llu size %llu " ,
vg - > name ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . size ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . size ) ;
} else {
log_debug_metadata ( " VG %s metadata commit empty seq %u on %s mda header at %llu %s " ,
vg - > name , vg - > seqno , dev_name ( mdac - > area . dev ) ,
( unsigned long long ) mdac - > area . start ,
mda_is_ignored ( mda ) ? " (ignored) " : " (used) " ) ;
log_debug_metadata ( " VG %s metadata commit empty slot0 offset %llu size %llu slot1 offset %llu size %llu " ,
vg - > name ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 0 ] . size ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . offset ,
( unsigned long long ) mdab - > raw_locns [ 1 ] . size ) ;
}
}
rlocn_set_ignored ( mdab - > raw_locns , mda_is_ignored ( mda ) ) ;
2012-02-23 17:11:07 +04:00
2017-12-07 06:34:59 +03:00
if ( ! _raw_write_mda_header ( fid - > fmt , mdac - > area . dev , mda_is_primary ( mda ) , mdac - > area . start ,
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
mdab ) ) {
dm_pool_free ( fid - > fmt - > cmd - > mem , mdab ) ;
2002-11-18 17:04:08 +03:00
log_error ( " Failed to write metadata area header " ) ;
goto out ;
}
r = 1 ;
out :
2005-10-23 04:14:48 +04:00
if ( ! precommit ) {
2019-07-01 23:00:34 +03:00
free ( fidtc - > write_buf ) ;
fidtc - > write_buf = NULL ;
fidtc - > write_buf_size = 0 ;
fidtc - > new_metadata_size = 0 ;
2005-10-23 04:14:48 +04:00
}
2002-11-18 17:04:08 +03:00
return r ;
}
2005-04-06 22:59:55 +04:00
static int _vg_commit_raw ( struct format_instance * fid , struct volume_group * vg ,
struct metadata_area * mda )
{
return _vg_commit_raw_rlocn ( fid , vg , mda , 0 ) ;
}
static int _vg_precommit_raw ( struct format_instance * fid ,
struct volume_group * vg ,
struct metadata_area * mda )
{
return _vg_commit_raw_rlocn ( fid , vg , mda , 1 ) ;
}
2003-07-05 02:34:56 +04:00
/* Close metadata area devices */
static int _vg_revert_raw ( struct format_instance * fid , struct volume_group * vg ,
struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
2005-06-01 20:51:55 +04:00
struct pv_list * pvl ;
2003-07-05 02:34:56 +04:00
int found = 0 ;
/* Ignore any mda on a PV outside the VG. vgsplit relies on this */
2008-11-04 01:14:30 +03:00
dm_list_iterate_items ( pvl , & vg - > pvs ) {
2005-06-01 20:51:55 +04:00
if ( pvl - > pv - > dev = = mdac - > area . dev ) {
2003-07-05 02:34:56 +04:00
found = 1 ;
break ;
}
}
if ( ! found )
return 1 ;
2005-10-31 23:15:28 +03:00
/* Wipe pre-committed metadata */
mdac - > rlocn . size = 0 ;
return _vg_commit_raw_rlocn ( fid , vg , mda , 0 ) ;
2003-07-05 02:34:56 +04:00
}
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* vg_remove clears the two raw_locn slots but leaves the circular metadata
* buffer alone .
*/
2002-11-18 17:04:08 +03:00
static int _vg_remove_raw ( struct format_instance * fid , struct volume_group * vg ,
struct metadata_area * mda )
{
struct mda_context * mdac = ( struct mda_context * ) mda - > metadata_locn ;
struct mda_header * mdah ;
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
struct raw_locn * rlocn_slot0 ;
struct raw_locn * rlocn_slot1 ;
2019-02-05 21:08:00 +03:00
uint32_t bad_fields = 0 ;
2002-11-18 17:04:08 +03:00
int r = 0 ;
2018-02-14 23:47:28 +03:00
if ( ! ( mdah = dm_pool_alloc ( fid - > fmt - > cmd - > mem , MDA_HEADER_SIZE ) ) ) {
log_error ( " struct mda_header allocation failed " ) ;
return 0 ;
}
2002-11-18 17:04:08 +03:00
2018-02-14 23:47:28 +03:00
/*
* FIXME : what ' s the point of reading the mda_header and metadata ,
* since we zero the rlocn fields whether we can read them or not .
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
* Just to print the warning ?
2018-02-14 23:47:28 +03:00
*/
2019-02-05 21:08:00 +03:00
if ( ! _raw_read_mda_header ( mdah , & mdac - > area , mda_is_primary ( mda ) , 0 , & bad_fields ) )
2018-02-14 23:47:28 +03:00
log_warn ( " WARNING: Removing metadata location on %s with bad mda header. " ,
dev_name ( mdac - > area . dev ) ) ;
2002-11-18 17:04:08 +03:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
rlocn_slot0 = & mdah - > raw_locns [ 0 ] ;
rlocn_slot1 = & mdah - > raw_locns [ 1 ] ;
rlocn_slot0 - > offset = 0 ;
rlocn_slot0 - > size = 0 ;
rlocn_slot0 - > checksum = 0 ;
rlocn_set_ignored ( rlocn_slot0 , mda_is_ignored ( mda ) ) ;
rlocn_slot1 - > offset = 0 ;
rlocn_slot1 - > size = 0 ;
rlocn_slot1 - > checksum = 0 ;
2002-11-18 17:04:08 +03:00
2017-12-07 06:34:59 +03:00
if ( ! _raw_write_mda_header ( fid - > fmt , mdac - > area . dev , mda_is_primary ( mda ) , mdac - > area . start ,
2002-11-18 17:04:08 +03:00
mdah ) ) {
2008-08-16 13:46:55 +04:00
dm_pool_free ( fid - > fmt - > cmd - > mem , mdah ) ;
2002-11-18 17:04:08 +03:00
log_error ( " Failed to write metadata area header " ) ;
goto out ;
}
r = 1 ;
out :
return r ;
}
static struct volume_group * _vg_read_file_name ( struct format_instance * fid ,
const char * vgname ,
2005-04-06 22:59:55 +04:00
const char * read_path )
2001-11-21 12:20:05 +03:00
{
2002-01-10 17:27:47 +03:00
struct volume_group * vg ;
2002-02-11 14:43:17 +03:00
time_t when ;
char * desc ;
2002-01-10 17:27:47 +03:00
2018-02-07 00:18:11 +03:00
if ( ! ( vg = text_read_metadata_file ( fid , read_path , & when , & desc ) ) ) {
log_error ( " Failed to read VG %s from %s " , vgname , read_path ) ;
return NULL ;
}
2002-01-10 17:27:47 +03:00
/*
* Currently you can only have a single volume group per
* text file ( this restriction may remain ) . We need to
* check that it contains the correct volume group .
*/
2002-11-18 17:04:08 +03:00
if ( vgname & & strcmp ( vgname , vg - > name ) ) {
2013-06-18 23:50:29 +04:00
fid - > ref_count + + ; /* Preserve FID after vg release */
2011-08-11 00:25:29 +04:00
release_vg ( vg ) ;
2009-07-16 00:02:46 +04:00
log_error ( " '%s' does not contain volume group '%s'. " ,
read_path , vgname ) ;
2002-01-10 17:27:47 +03:00
return NULL ;
2017-07-19 17:16:12 +03:00
}
log_debug_metadata ( " Read volume group %s from %s " , vg - > name , read_path ) ;
2002-01-10 17:27:47 +03:00
return vg ;
2001-11-21 12:20:05 +03:00
}
2002-11-18 17:04:08 +03:00
static struct volume_group * _vg_read_file ( struct format_instance * fid ,
const char * vgname ,
2012-02-29 06:35:35 +04:00
struct metadata_area * mda ,
2015-03-19 02:43:02 +03:00
struct cached_vg_fmtdata * * vg_fmtdata ,
2018-02-07 00:18:11 +03:00
unsigned * use_previous_vg __attribute__ ( ( unused ) ) )
2002-11-18 17:04:08 +03:00
{
struct text_context * tc = ( struct text_context * ) mda - > metadata_locn ;
return _vg_read_file_name ( fid , vgname , tc - > path_live ) ;
}
2005-04-06 22:59:55 +04:00
static struct volume_group * _vg_read_precommit_file ( struct format_instance * fid ,
const char * vgname ,
2015-03-19 02:43:02 +03:00
struct metadata_area * mda ,
struct cached_vg_fmtdata * * vg_fmtdata ,
2018-04-20 18:43:50 +03:00
unsigned * use_previous_vg __attribute__ ( ( unused ) ) )
2005-04-06 22:59:55 +04:00
{
struct text_context * tc = ( struct text_context * ) mda - > metadata_locn ;
2005-10-31 23:15:28 +03:00
struct volume_group * vg ;
2005-04-06 22:59:55 +04:00
2005-10-31 23:15:28 +03:00
if ( ( vg = _vg_read_file_name ( fid , vgname , tc - > path_edit ) ) )
vg - > status | = PRECOMMITTED ;
else
vg = _vg_read_file_name ( fid , vgname , tc - > path_live ) ;
return vg ;
2005-04-06 22:59:55 +04:00
}
2010-07-09 19:34:40 +04:00
static int _vg_write_file ( struct format_instance * fid __attribute__ ( ( unused ) ) ,
2007-08-22 18:38:18 +04:00
struct volume_group * vg , struct metadata_area * mda )
2001-11-21 12:20:05 +03:00
{
2002-11-18 17:04:08 +03:00
struct text_context * tc = ( struct text_context * ) mda - > metadata_locn ;
2002-01-15 20:37:23 +03:00
2002-01-07 12:05:31 +03:00
FILE * fp ;
2002-01-10 17:27:47 +03:00
int fd ;
2002-01-09 22:16:48 +03:00
char * slash ;
char temp_file [ PATH_MAX ] , temp_dir [ PATH_MAX ] ;
2007-07-02 15:17:21 +04:00
slash = strrchr ( tc - > path_edit , ' / ' ) ;
2001-11-21 12:20:05 +03:00
2002-01-09 22:16:48 +03:00
if ( slash = = 0 )
strcpy ( temp_dir , " . " ) ;
2002-04-24 22:20:51 +04:00
else if ( slash - tc - > path_edit < PATH_MAX ) {
2018-03-02 18:25:37 +03:00
( void ) dm_strncpy ( temp_dir , tc - > path_edit ,
( size_t ) ( slash - tc - > path_edit + 1 ) ) ;
2002-01-09 22:16:48 +03:00
} else {
log_error ( " Text format failed to determine directory. " ) ;
return 0 ;
}
2008-12-07 07:27:56 +03:00
if ( ! create_temp_name ( temp_dir , temp_file , sizeof ( temp_file ) , & fd ,
& vg - > cmd - > rand_seed ) ) {
2009-07-16 00:02:46 +04:00
log_error ( " Couldn't create temporary text file name. " ) ;
2002-02-25 01:31:55 +03:00
return 0 ;
}
2002-01-09 22:16:48 +03:00
if ( ! ( fp = fdopen ( fd , " w " ) ) ) {
log_sys_error ( " fdopen " , temp_file ) ;
2007-01-25 17:37:48 +03:00
if ( close ( fd ) )
log_sys_error ( " fclose " , temp_file ) ;
2002-01-07 12:05:31 +03:00
return 0 ;
}
2001-11-21 12:20:05 +03:00
2013-01-08 02:30:29 +04:00
log_debug_metadata ( " Writing %s metadata to %s " , vg - > name , temp_file ) ;
2002-11-18 17:04:08 +03:00
if ( ! text_vg_export_file ( vg , tc - > desc , fp ) ) {
2002-01-09 22:16:48 +03:00
log_error ( " Failed to write metadata to %s. " , temp_file ) ;
2007-01-25 17:37:48 +03:00
if ( fclose ( fp ) )
log_sys_error ( " fclose " , temp_file ) ;
2002-01-09 16:07:03 +03:00
return 0 ;
}
2005-09-01 22:37:22 +04:00
if ( fsync ( fd ) & & ( errno ! = EROFS ) & & ( errno ! = EINVAL ) ) {
2002-04-24 22:20:51 +04:00
log_sys_error ( " fsync " , tc - > path_edit ) ;
2007-01-25 17:37:48 +03:00
if ( fclose ( fp ) )
log_sys_error ( " fclose " , tc - > path_edit ) ;
2002-04-24 22:20:51 +04:00
return 0 ;
}
2007-07-24 21:48:08 +04:00
if ( lvm_fclose ( fp , tc - > path_edit ) )
return_0 ;
2002-04-24 22:20:51 +04:00
2016-04-20 13:45:15 +03:00
log_debug_metadata ( " Renaming %s to %s " , temp_file , tc - > path_edit ) ;
2002-04-24 22:20:51 +04:00
if ( rename ( temp_file , tc - > path_edit ) ) {
log_error ( " %s: rename to %s failed: %s " , temp_file ,
tc - > path_edit , strerror ( errno ) ) ;
return 0 ;
}
return 1 ;
}
2010-07-09 19:34:40 +04:00
static int _vg_commit_file_backup ( struct format_instance * fid __attribute__ ( ( unused ) ) ,
2002-11-18 17:04:08 +03:00
struct volume_group * vg ,
struct metadata_area * mda )
2002-04-24 22:20:51 +04:00
{
2002-11-18 17:04:08 +03:00
struct text_context * tc = ( struct text_context * ) mda - > metadata_locn ;
if ( test_mode ( ) ) {
log_verbose ( " Test mode: Skipping committing %s metadata (%u) " ,
vg - > name , vg - > seqno ) ;
if ( unlink ( tc - > path_edit ) ) {
2013-01-08 02:30:29 +04:00
log_debug_metadata ( " Unlinking %s " , tc - > path_edit ) ;
2002-11-18 17:04:08 +03:00
log_sys_error ( " unlink " , tc - > path_edit ) ;
return 0 ;
}
} else {
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
log_debug_metadata ( " Committing file %s metadata (%u) " , vg - > name , vg - > seqno ) ;
2013-01-08 02:30:29 +04:00
log_debug_metadata ( " Renaming %s to %s " , tc - > path_edit , tc - > path_live ) ;
2002-11-18 17:04:08 +03:00
if ( rename ( tc - > path_edit , tc - > path_live ) ) {
log_error ( " %s: rename to %s failed: %s " , tc - > path_edit ,
2002-12-06 01:56:22 +03:00
tc - > path_live , strerror ( errno ) ) ;
2002-11-18 17:04:08 +03:00
return 0 ;
}
}
2002-12-06 01:56:22 +03:00
sync_dir ( tc - > path_edit ) ;
2002-04-24 22:20:51 +04:00
return 1 ;
}
2002-11-18 17:04:08 +03:00
static int _vg_commit_file ( struct format_instance * fid , struct volume_group * vg ,
struct metadata_area * mda )
2002-04-24 22:20:51 +04:00
{
2002-11-18 17:04:08 +03:00
struct text_context * tc = ( struct text_context * ) mda - > metadata_locn ;
2011-03-11 17:45:17 +03:00
const char * slash ;
2007-08-06 18:57:48 +04:00
char new_name [ PATH_MAX ] ;
2002-12-20 02:25:55 +03:00
size_t len ;
2002-04-24 22:20:51 +04:00
2002-11-18 17:04:08 +03:00
if ( ! _vg_commit_file_backup ( fid , vg , mda ) )
2002-01-07 12:05:31 +03:00
return 0 ;
2001-11-21 12:20:05 +03:00
2002-11-18 17:04:08 +03:00
/* vgrename? */
2007-07-02 15:17:21 +04:00
if ( ( slash = strrchr ( tc - > path_live , ' / ' ) ) )
2002-11-18 17:04:08 +03:00
slash = slash + 1 ;
else
slash = tc - > path_live ;
if ( strcmp ( slash , vg - > name ) ) {
len = slash - tc - > path_live ;
2017-06-27 12:37:24 +03:00
if ( ( len + strlen ( vg - > name ) ) > ( sizeof ( new_name ) - 1 ) ) {
log_error ( " Renaming path %s is too long for VG %s. " ,
tc - > path_live , vg - > name ) ;
return 0 ;
}
2007-08-06 18:57:48 +04:00
strncpy ( new_name , tc - > path_live , len ) ;
strcpy ( new_name + len , vg - > name ) ;
2013-01-08 02:30:29 +04:00
log_debug_metadata ( " Renaming %s to %s " , tc - > path_live , new_name ) ;
2002-11-18 17:04:08 +03:00
if ( test_mode ( ) )
log_verbose ( " Test mode: Skipping rename " ) ;
else {
2007-08-06 18:57:48 +04:00
if ( rename ( tc - > path_live , new_name ) ) {
2002-11-18 17:04:08 +03:00
log_error ( " %s: rename to %s failed: %s " ,
2007-08-06 18:57:48 +04:00
tc - > path_live , new_name ,
2002-11-18 17:04:08 +03:00
strerror ( errno ) ) ;
2007-08-06 18:57:48 +04:00
sync_dir ( new_name ) ;
2002-11-18 17:04:08 +03:00
return 0 ;
}
}
}
2002-04-24 22:20:51 +04:00
return 1 ;
}
2010-07-09 19:34:40 +04:00
static int _vg_remove_file ( struct format_instance * fid __attribute__ ( ( unused ) ) ,
struct volume_group * vg __attribute__ ( ( unused ) ) ,
2002-11-18 17:04:08 +03:00
struct metadata_area * mda )
2002-04-24 22:20:51 +04:00
{
2002-11-18 17:04:08 +03:00
struct text_context * tc = ( struct text_context * ) mda - > metadata_locn ;
2002-04-24 22:20:51 +04:00
if ( path_exists ( tc - > path_edit ) & & unlink ( tc - > path_edit ) ) {
log_sys_error ( " unlink " , tc - > path_edit ) ;
2002-01-09 22:16:48 +03:00
return 0 ;
}
2002-04-24 22:20:51 +04:00
if ( path_exists ( tc - > path_live ) & & unlink ( tc - > path_live ) ) {
log_sys_error ( " unlink " , tc - > path_live ) ;
return 0 ;
}
2002-12-06 01:56:22 +03:00
sync_dir ( tc - > path_live ) ;
2002-04-24 22:20:51 +04:00
2002-01-07 12:05:31 +03:00
return 1 ;
2001-11-21 12:20:05 +03:00
}
2018-02-07 00:18:11 +03:00
int read_metadata_location_summary ( const struct format_type * fmt ,
2018-04-20 18:43:50 +03:00
struct mda_header * mdah , int primary_mda , struct device_area * dev_area ,
struct lvmcache_vgsummary * vgsummary , uint64_t * mda_free_sectors )
2002-02-22 14:44:56 +03:00
{
2018-04-20 18:43:50 +03:00
struct raw_locn * rlocn ;
uint32_t wrap = 0 ;
unsigned int len = 0 ;
2018-10-15 19:35:33 +03:00
char namebuf [ NAME_LEN + 1 ] __attribute__ ( ( aligned ( 8 ) ) ) ;
2018-09-24 23:27:03 +03:00
uint64_t max_size ;
2018-01-05 00:13:44 +03:00
2018-04-20 18:43:50 +03:00
if ( ! mdah ) {
2018-02-07 00:18:11 +03:00
log_error ( INTERNAL_ERROR " read_metadata_location_summary called with NULL pointer for mda_header " ) ;
2018-04-20 18:43:50 +03:00
return 0 ;
2015-03-06 11:04:31 +03:00
}
2009-03-03 19:35:32 +03:00
2018-09-24 23:27:03 +03:00
/*
* For the case where the metadata area is unused , half is available .
*/
if ( mda_free_sectors ) {
max_size = ( ( mdah - > size - MDA_HEADER_SIZE ) / 2 ) - 512 ;
* mda_free_sectors = max_size > > SECTOR_SHIFT ;
}
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
rlocn = mdah - > raw_locns ; /* slot0, committed metadata */
2018-01-05 00:13:44 +03:00
2018-04-20 18:43:50 +03:00
/*
* If no valid offset , do not try to search for vgname
*/
if ( ! rlocn - > offset ) {
2018-02-08 00:20:39 +03:00
log_debug_metadata ( " Metadata location on %s at %llu has offset 0. " ,
dev_name ( dev_area - > dev ) ,
( unsigned long long ) ( dev_area - > start + rlocn - > offset ) ) ;
2018-03-01 21:42:18 +03:00
vgsummary - > zero_offset = 1 ;
2018-04-20 18:43:50 +03:00
return 0 ;
2018-01-09 04:50:23 +03:00
}
2018-10-15 19:35:33 +03:00
memset ( namebuf , 0 , sizeof ( namebuf ) ) ;
if ( ! dev_read_bytes ( dev_area - > dev , dev_area - > start + rlocn - > offset , NAME_LEN , namebuf ) )
stack ;
2006-04-12 21:54:11 +04:00
2018-10-15 19:35:33 +03:00
while ( namebuf [ len ] & & ! isspace ( namebuf [ len ] ) & & namebuf [ len ] ! = ' { ' & &
2006-04-12 21:54:11 +04:00
len < ( NAME_LEN - 1 ) )
len + + ;
2018-10-15 19:35:33 +03:00
namebuf [ len ] = ' \0 ' ;
2006-04-12 21:54:11 +04:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* Check that the text metadata in the circular buffer begins with a
* valid vg name .
*/
2018-10-15 19:35:33 +03:00
if ( ! validate_name ( namebuf ) ) {
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
log_warn ( " WARNING: Metadata location on %s at %llu begins with invalid VG name. " ,
2018-02-08 00:20:39 +03:00
dev_name ( dev_area - > dev ) ,
( unsigned long long ) ( dev_area - > start + rlocn - > offset ) ) ;
return 0 ;
2018-02-07 00:18:11 +03:00
}
2006-04-12 21:54:11 +04:00
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
* When the current metadata wraps around the end of the metadata area
* ( so some is located at the end and some is located at the
* beginning ) , then " wrap " is the number of bytes that was written back
* at the beginning . The end of this wrapped metadata is located at an
* offset of wrap + MDA_HEADER_SIZE from area . start .
*/
2006-04-11 17:55:59 +04:00
if ( rlocn - > offset + rlocn - > size > mdah - > size )
2018-04-20 18:43:50 +03:00
wrap = ( uint32_t ) ( ( rlocn - > offset + rlocn - > size ) - mdah - > size ) ;
2002-11-18 17:04:08 +03:00
2018-02-07 00:18:11 +03:00
/*
* Did we see this metadata before ?
* Look in lvmcache to see if there is vg info matching
* the checksum / size that we see in the mda_header ( rlocn )
* on this device . If so , then vgsummary - > name is is set
* and controls if the " checksum_only " flag passed to
* text_read_metadata_summary ( ) is 1 or 0.
*
* If checksum_only = 1 , then text_read_metadata_summary ( )
* will read the metadata from this device , and run the
* checksum function on it . If the calculated checksum
* of the metadata matches the checksum in the mda_header ,
* which also matches the checksum saved in vginfo from
* another device , then it skips parsing the metadata into
* a config tree , which saves considerable cpu time .
2018-04-19 00:29:42 +03:00
*
* ( NB . there can be different VGs with different metadata
* and checksums , but with the same name . )
*
* FIXME : handle the case where mda_header checksum is bad
* but metadata checksum is good .
2018-02-07 00:18:11 +03:00
*/
2018-04-19 00:29:42 +03:00
/*
* If the checksum we compute of the metadata differs from
* the checksum from mda_header that we save here , then we
* ignore the device . FIXME : we need to classify a device
* with errors like this as defective .
*
* If the checksum from mda_header and computed from metadata
* does not match the checksum saved in lvmcache from a prev
* device , then we do not skip parsing / saving metadata from
* this dev . It ' s parsed , fields saved in vgsummary , which
* is passed into lvmcache ( update_vgname_and_id ) , and
* there we ' ll see a checksum mismatch .
*/
2015-03-19 02:43:02 +03:00
vgsummary - > mda_checksum = rlocn - > checksum ;
vgsummary - > mda_size = rlocn - > size ;
2019-03-04 21:13:09 +03:00
/* Keep track of largest metadata size we find. */
lvmcache_save_metadata_size ( rlocn - > size ) ;
2018-02-07 00:18:11 +03:00
lvmcache_lookup_mda ( vgsummary ) ;
2015-03-19 02:43:02 +03:00
2018-02-07 00:18:11 +03:00
if ( ! text_read_metadata_summary ( fmt , dev_area - > dev , MDA_CONTENT_REASON ( primary_mda ) ,
2015-03-19 02:43:02 +03:00
( off_t ) ( dev_area - > start + rlocn - > offset ) ,
2018-04-20 18:43:50 +03:00
( uint32_t ) ( rlocn - > size - wrap ) ,
2015-03-19 02:43:02 +03:00
( off_t ) ( dev_area - > start + MDA_HEADER_SIZE ) ,
2018-04-20 18:43:50 +03:00
wrap , calc_crc , vgsummary - > vgname ? 1 : 0 ,
2018-02-07 00:18:11 +03:00
vgsummary ) ) {
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
log_warn ( " WARNING: metadata on %s at %llu has invalid summary for VG. " ,
2018-02-08 00:20:39 +03:00
dev_name ( dev_area - > dev ) ,
( unsigned long long ) ( dev_area - > start + rlocn - > offset ) ) ;
2018-02-07 00:18:11 +03:00
return 0 ;
}
2007-11-05 20:17:55 +03:00
2018-04-20 18:43:50 +03:00
/* Ignore this entry if the characters aren't permissible */
2018-02-07 00:18:11 +03:00
if ( ! validate_name ( vgsummary - > vgname ) ) {
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
log_warn ( " WARNING: metadata on %s at %llu has invalid VG name. " ,
2018-02-08 00:20:39 +03:00
dev_name ( dev_area - > dev ) ,
( unsigned long long ) ( dev_area - > start + rlocn - > offset ) ) ;
2018-02-07 00:18:11 +03:00
return 0 ;
}
2006-04-11 17:55:59 +04:00
2018-02-08 00:20:39 +03:00
log_debug_metadata ( " Found metadata summary on %s at %llu size %llu for VG %s " ,
2018-04-20 18:43:50 +03:00
dev_name ( dev_area - > dev ) ,
2018-02-08 00:20:39 +03:00
( unsigned long long ) ( dev_area - > start + rlocn - > offset ) ,
( unsigned long long ) rlocn - > size ,
2018-02-07 00:18:11 +03:00
vgsummary - > vgname ) ;
2018-01-05 00:13:44 +03:00
2018-04-20 18:43:50 +03:00
if ( mda_free_sectors ) {
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
/*
2018-09-24 23:27:03 +03:00
* Report remaining space given that a single copy of metadata
* can be as large as half the total metadata space , minus 512
* because each copy is rounded to begin on a sector boundary .
metadata: improve write and commit code
The vg_write/vg_commit code was imprecise, uncommented, and
hard to understand. Rewrite it with clearer, cleaner code,
extensive comments, descriptions of how it works, and add
more info in debugging output.
The minor changes in behavior are to things that were
either incorrect or probably unintended:
- vg_write/vg_commit no longer check that the current vgname at
the start of the text metadata matches the vgname being written.
This has already been done at least twice by the time they are
called, and repeating it again against the same cached data has
no use.
- A fragment of old removed code had been left behind that checked
if the old unused alignment policy would wrap. It was still
being checked to decide if the metadata area was full, which
could possibly cause an incorrect full metadata failure.
- vg_remove now clears both the raw_locns in the mda_header that
point to committed metadata (raw_locn slot 0) and precommitted
metadata (raw_locn slot 1). Previously it fully cleared the
committed slot, and would only clear the offset field in the
precommitted slot if it saw a problem with the metadata in the
vg being removed.
- read_metadata_location_summary was wrongly comparing the number
of wrapped bytes with an offset to report an error about the
metadata being too large. This wrong check is removed, it
could have resulted in erroneous errors.
2018-08-30 18:44:53 +03:00
*/
2018-09-24 23:27:03 +03:00
max_size = ( ( mdah - > size - MDA_HEADER_SIZE ) / 2 ) - 512 ;
2018-01-05 00:13:44 +03:00
2018-09-24 22:41:58 +03:00
if ( rlocn - > size > = max_size )
2018-04-20 18:43:50 +03:00
* mda_free_sectors = UINT64_C ( 0 ) ;
else
2018-09-24 22:41:58 +03:00
* mda_free_sectors = ( max_size - rlocn - > size ) > > SECTOR_SHIFT ;
2018-01-05 00:13:44 +03:00
}
2018-04-20 18:43:50 +03:00
return 1 ;
2002-02-22 14:44:56 +03:00
}
2012-02-10 05:28:27 +04:00
struct _write_single_mda_baton {
const struct format_type * fmt ;
struct physical_volume * pv ;
} ;
static int _write_single_mda ( struct metadata_area * mda , void * baton )
{
struct _write_single_mda_baton * p = baton ;
struct mda_context * mdac ;
2012-06-21 23:19:28 +04:00
char buf [ MDA_HEADER_SIZE ] __attribute__ ( ( aligned ( 8 ) ) ) = { 0 } ;
2012-02-10 05:28:27 +04:00
struct mda_header * mdah = ( struct mda_header * ) buf ;
mdac = mda - > metadata_locn ;
mdah - > size = mdac - > area . size ;
rlocn_set_ignored ( mdah - > raw_locns , mda_is_ignored ( mda ) ) ;
2017-12-07 06:34:59 +03:00
if ( ! _raw_write_mda_header ( p - > fmt , mdac - > area . dev , mda_is_primary ( mda ) ,
2012-02-10 05:28:27 +04:00
mdac - > area . start , mdah ) ) {
return_0 ;
}
return 1 ;
}
2016-02-12 12:59:27 +03:00
static int _set_ext_flags ( struct physical_volume * pv , struct lvmcache_info * info )
{
uint32_t ext_flags = lvmcache_ext_flags ( info ) ;
if ( is_orphan ( pv ) )
ext_flags & = ~ PV_EXT_USED ;
else
ext_flags | = PV_EXT_USED ;
lvmcache_set_ext_version ( info , PV_HEADER_EXTENSION_VSN ) ;
lvmcache_set_ext_flags ( info , ext_flags ) ;
return 1 ;
}
2015-04-09 23:13:55 +03:00
/* Only for orphans - FIXME That's not true any more */
2011-02-21 15:26:27 +03:00
static int _text_pv_write ( const struct format_type * fmt , struct physical_volume * pv )
2002-11-18 17:04:08 +03:00
{
2011-02-21 15:26:27 +03:00
struct format_instance * fid = pv - > fid ;
2011-02-21 15:31:28 +03:00
const char * pvid = ( const char * ) ( * pv - > old_id . uuid ? & pv - > old_id : & pv - > id ) ;
2002-11-18 17:04:08 +03:00
struct label * label ;
2003-07-05 02:34:56 +04:00
struct lvmcache_info * info ;
2002-11-18 17:04:08 +03:00
struct mda_context * mdac ;
struct metadata_area * mda ;
2012-02-10 05:28:27 +04:00
struct _write_single_mda_baton baton ;
2011-02-21 15:26:27 +03:00
unsigned mda_index ;
2002-11-18 17:04:08 +03:00
2011-02-21 15:26:27 +03:00
/* Add a new cache entry with PV info or update existing one. */
if ( ! ( info = lvmcache_add ( fmt - > labeller , ( const char * ) & pv - > id ,
2019-02-05 22:40:34 +03:00
pv - > dev , pv - > label_sector , pv - > vg_name ,
is_orphan_vg ( pv - > vg_name ) ? pv - > vg_name : pv - > vg ? ( const char * ) & pv - > vg - > id : NULL , 0 , NULL ) ) )
2008-01-30 16:19:47 +03:00
return_0 ;
2011-02-21 15:26:27 +03:00
2019-02-05 22:40:34 +03:00
/* lvmcache_add() creates info and info->label structs for the dev, get info->label. */
2012-02-10 05:28:27 +04:00
label = lvmcache_get_label ( info ) ;
2002-11-18 17:04:08 +03:00
2012-02-10 05:28:27 +04:00
lvmcache_update_pv ( info , pv , fmt ) ;
2002-11-18 17:04:08 +03:00
2011-02-21 15:26:27 +03:00
/* Flush all cached metadata areas, we will reenter new/modified ones. */
2012-02-10 05:28:27 +04:00
lvmcache_del_mdas ( info ) ;
2011-02-21 15:26:27 +03:00
/*
* Add all new or modified metadata areas for this PV stored in
* its format instance . If this PV is not part of a VG yet ,
* pv - > fid will be used . Otherwise pv - > vg - > fid will be used .
* The fid_get_mda_indexed fn can handle that transparently ,
* just pass the right format_instance in .
*/
for ( mda_index = 0 ; mda_index < FMT_TEXT_MAX_MDAS_PER_PV ; mda_index + + ) {
2011-02-21 15:31:28 +03:00
if ( ! ( mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , mda_index ) ) )
2011-02-21 15:26:27 +03:00
continue ;
mdac = ( struct mda_context * ) mda - > metadata_locn ;
2017-12-11 18:32:53 +03:00
log_debug_metadata ( " Creating metadata area on %s at sector "
FMTu64 " size " FMTu64 " sectors " ,
2013-01-08 02:30:29 +04:00
dev_name ( mdac - > area . dev ) ,
mdac - > area . start > > SECTOR_SHIFT ,
mdac - > area . size > > SECTOR_SHIFT ) ;
2012-02-10 05:28:27 +04:00
// if fmt is not the same as info->fmt we are in trouble
2013-11-22 16:25:27 +04:00
if ( ! lvmcache_add_mda ( info , mdac - > area . dev ,
mdac - > area . start , mdac - > area . size ,
2019-02-05 22:24:23 +03:00
mda_is_ignored ( mda ) , NULL ) )
2013-11-22 16:25:27 +04:00
return_0 ;
2002-11-18 17:04:08 +03:00
}
2013-05-28 14:37:22 +04:00
if ( ! lvmcache_update_bas ( info , pv ) )
2013-02-15 14:02:53 +04:00
return_0 ;
2009-07-31 01:15:17 +04:00
/*
2011-02-21 15:26:27 +03:00
* FIXME : Allow writing zero offset / size data area to disk .
* This requires defining a special value since we can ' t
* write offset / size that is 0 / 0 - this is already reserved
* as a delimiter in data / metadata area area list in PV header
* ( needs exploring compatibility with older lvm2 ) .
*/
/*
* We can ' t actually write pe_start = 0 ( a data area offset )
* in PV header now . We need to replace this value here . This can
* happen with vgcfgrestore with redefined pe_start or
* pvcreate - - restorefile . However , we can can have this value in
* metadata which will override the value in the PV header .
2009-07-31 01:15:17 +04:00
*/
2002-11-18 17:04:08 +03:00
2012-02-10 05:28:27 +04:00
if ( ! lvmcache_update_das ( info , pv ) )
2008-01-30 16:19:47 +03:00
return_0 ;
2002-11-18 17:04:08 +03:00
2012-02-10 05:28:27 +04:00
baton . pv = pv ;
baton . fmt = fmt ;
2002-11-18 17:04:08 +03:00
2012-02-10 05:28:27 +04:00
if ( ! lvmcache_foreach_mda ( info , _write_single_mda , & baton ) )
return_0 ;
2016-02-12 12:59:27 +03:00
if ( ! _set_ext_flags ( pv , info ) )
return_0 ;
2012-02-10 05:28:27 +04:00
if ( ! label_write ( pv - > dev , label ) ) {
2012-02-28 14:11:35 +04:00
stack ;
return 0 ;
2008-07-17 01:32:38 +04:00
}
2002-11-18 17:04:08 +03:00
2011-02-21 15:26:27 +03:00
/*
* FIXME : We should probably use the format instance ' s metadata
* areas for label_write and only if it ' s successful ,
* update the cache afterwards ?
*/
2002-11-18 17:04:08 +03:00
return 1 ;
}
2016-02-12 15:53:06 +03:00
static int _text_pv_needs_rewrite ( const struct format_type * fmt , struct physical_volume * pv ,
int * needs_rewrite )
{
struct lvmcache_info * info ;
uint32_t ext_vsn ;
2019-02-06 22:18:45 +03:00
uint32_t ext_flags ;
2016-02-12 15:53:06 +03:00
* needs_rewrite = 0 ;
2016-02-12 14:05:59 +03:00
if ( ! pv - > is_labelled )
return 1 ;
2019-02-06 22:18:45 +03:00
if ( ! pv - > dev )
return 1 ;
2016-06-06 22:04:17 +03:00
if ( ! ( info = lvmcache_info_from_pvid ( ( const char * ) & pv - > id , pv - > dev , 0 ) ) ) {
2016-02-12 15:53:06 +03:00
log_error ( " Failed to find cached info for PV %s. " , pv_dev_name ( pv ) ) ;
return 0 ;
}
ext_vsn = lvmcache_ext_version ( info ) ;
2019-02-06 22:18:45 +03:00
if ( ext_vsn < PV_HEADER_EXTENSION_VSN ) {
log_debug ( " PV %s header needs rewrite for new ext version " , dev_name ( pv - > dev ) ) ;
* needs_rewrite = 1 ;
}
ext_flags = lvmcache_ext_flags ( info ) ;
if ( ! ( ext_flags & PV_EXT_USED ) ) {
log_debug ( " PV %s header needs rewrite to set ext used " , dev_name ( pv - > dev ) ) ;
2016-02-12 15:53:06 +03:00
* needs_rewrite = 1 ;
2019-02-06 22:18:45 +03:00
}
2016-02-12 15:53:06 +03:00
return 1 ;
}
2010-02-02 19:26:34 +03:00
/*
2010-06-29 00:31:59 +04:00
* Copy constructor for a metadata_locn .
2010-02-02 19:26:34 +03:00
*/
2010-06-29 00:31:59 +04:00
static void * _metadata_locn_copy_raw ( struct dm_pool * mem , void * metadata_locn )
2010-02-02 19:26:34 +03:00
{
struct mda_context * mdac , * mdac_new ;
2010-06-29 00:31:59 +04:00
mdac = ( struct mda_context * ) metadata_locn ;
2010-02-02 19:26:34 +03:00
if ( ! ( mdac_new = dm_pool_alloc ( mem , sizeof ( * mdac_new ) ) ) ) {
log_error ( " mda_context allocation failed " ) ;
return NULL ;
}
memcpy ( mdac_new , mdac , sizeof ( * mdac ) ) ;
2010-02-16 02:53:15 +03:00
2010-06-29 00:31:59 +04:00
return mdac_new ;
2010-02-02 19:26:34 +03:00
}
2010-06-30 02:37:32 +04:00
/*
* Return a string description of the metadata location .
*/
2010-06-30 17:51:11 +04:00
static const char * _metadata_locn_name_raw ( void * metadata_locn )
2010-06-30 02:37:32 +04:00
{
2010-06-30 17:51:11 +04:00
struct mda_context * mdac = ( struct mda_context * ) metadata_locn ;
2010-06-30 02:37:32 +04:00
return dev_name ( mdac - > area . dev ) ;
}
2010-08-26 16:22:05 +04:00
static uint64_t _metadata_locn_offset_raw ( void * metadata_locn )
2010-06-30 17:51:11 +04:00
{
struct mda_context * mdac = ( struct mda_context * ) metadata_locn ;
return mdac - > area . start ;
}
2010-02-02 19:26:34 +03:00
2011-02-21 15:20:18 +03:00
static int _text_pv_initialise ( const struct format_type * fmt ,
2016-02-19 00:31:27 +03:00
struct pv_create_args * pva ,
2011-02-21 15:20:18 +03:00
struct physical_volume * pv )
{
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
uint64_t data_alignment_sectors = pva - > data_alignment ;
uint64_t data_alignment_offset_sectors = pva - > data_alignment_offset ;
uint64_t adjustment ;
uint64_t final_alignment_sectors = 0 ;
2011-02-21 15:20:18 +03:00
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
log_debug ( " PV init requested data_alignment_sectors %llu data_alignment_offset_sectors %llu " ,
( unsigned long long ) data_alignment_sectors , ( unsigned long long ) data_alignment_offset_sectors ) ;
if ( ! data_alignment_sectors ) {
data_alignment_sectors = find_config_tree_int ( pv - > fmt - > cmd , devices_data_alignment_CFG , NULL ) * 2 ;
if ( data_alignment_sectors )
log_debug ( " PV init config data_alignment_sectors %llu " ,
( unsigned long long ) data_alignment_sectors ) ;
2011-02-21 15:20:18 +03:00
}
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
/* sets pv->pe_align */
set_pe_align ( pv , data_alignment_sectors ) ;
/* sets pv->pe_align_offset */
set_pe_align_offset ( pv , data_alignment_offset_sectors ) ;
2011-02-21 15:20:18 +03:00
if ( pv - > pe_align < pv - > pe_align_offset ) {
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
log_error ( " %s: pe_align (%llu sectors) must not be less than pe_align_offset (%llu sectors) " ,
pv_dev_name ( pv ) , ( unsigned long long ) pv - > pe_align , ( unsigned long long ) pv - > pe_align_offset ) ;
2011-02-21 15:20:18 +03:00
return 0 ;
}
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
final_alignment_sectors = pv - > pe_align + pv - > pe_align_offset ;
2013-02-15 14:02:53 +04:00
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
log_debug ( " PV init final alignment %llu sectors from align %llu align_offset %llu " ,
( unsigned long long ) final_alignment_sectors ,
( unsigned long long ) pv - > pe_align ,
( unsigned long long ) pv - > pe_align_offset ) ;
if ( pv - > size < final_alignment_sectors ) {
2013-02-21 17:47:49 +04:00
log_error ( " %s: Data alignment must not exceed device size. " ,
pv_dev_name ( pv ) ) ;
return 0 ;
}
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
if ( pv - > size < final_alignment_sectors + pva - > ba_size ) {
2013-05-28 14:37:22 +04:00
log_error ( " %s: Bootloader area with data-aligned start must "
2013-02-15 14:02:53 +04:00
" not exceed device size. " , pv_dev_name ( pv ) ) ;
return 0 ;
}
2016-02-19 00:31:27 +03:00
if ( pva - > pe_start = = PV_PE_START_CALC ) {
2013-02-15 14:02:53 +04:00
/*
2013-05-28 14:37:22 +04:00
* Calculate new PE start and bootloader area start value .
2013-02-15 14:02:53 +04:00
* Make sure both are properly aligned !
2014-04-10 16:18:59 +04:00
* If PE start can ' t be aligned because BA is taking
2013-02-15 14:02:53 +04:00
* the whole space , make PE start equal to the PV size
* which effectively disables DA - it will have zero size .
* This needs to be done as we can ' t have a PV without any DA .
2014-04-10 16:18:59 +04:00
* But we still want to support a PV with BA only !
2013-02-15 14:02:53 +04:00
*/
2016-02-19 00:31:27 +03:00
if ( pva - > ba_size ) {
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
pv - > ba_start = final_alignment_sectors ;
2016-02-19 00:31:27 +03:00
pv - > ba_size = pva - > ba_size ;
if ( ( adjustment = pva - > ba_size % pv - > pe_align ) )
2013-05-28 14:37:22 +04:00
pv - > ba_size + = pv - > pe_align - adjustment ;
if ( pv - > size < pv - > ba_start + pv - > ba_size )
pv - > ba_size = pv - > size - pv - > ba_start ;
pv - > pe_start = pv - > ba_start + pv - > ba_size ;
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
log_debug ( " Setting pe start to %llu sectors after ba start %llu size %llu for %s " ,
( unsigned long long ) pv - > pe_start ,
( unsigned long long ) pv - > ba_start ,
( unsigned long long ) pv - > ba_size ,
pv_dev_name ( pv ) ) ;
} else {
pv - > pe_start = final_alignment_sectors ;
log_debug ( " Setting PE start to %llu sectors for %s " ,
( unsigned long long ) pv - > pe_start , pv_dev_name ( pv ) ) ;
}
2013-02-15 14:02:53 +04:00
} else {
/*
* Try to keep the value of PE start set to a firm value if
* requested . This is useful when restoring existing PE start
2014-04-10 16:18:59 +04:00
* value ( e . g . backups ) . Also , if creating a BA , try to place
2013-02-15 14:02:53 +04:00
* it in between the final alignment and existing PE start
* if possible .
*/
2016-02-19 00:31:27 +03:00
pv - > pe_start = pva - > pe_start ;
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
log_debug ( " Setting pe start to requested %llu sectors for %s " ,
( unsigned long long ) pv - > pe_start , pv_dev_name ( pv ) ) ;
2016-02-19 00:31:27 +03:00
if ( pva - > ba_size ) {
if ( ( pva - > ba_start & & pva - > ba_start + pva - > ba_size > pva - > pe_start ) | |
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
( pva - > pe_start < = final_alignment_sectors ) | |
( pva - > pe_start - final_alignment_sectors < pva - > ba_size ) ) {
log_error ( " %s: Bootloader area would overlap data area. " , pv_dev_name ( pv ) ) ;
2013-02-15 14:02:53 +04:00
return 0 ;
}
2017-07-19 17:16:12 +03:00
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
pv - > ba_start = pva - > ba_start ? : final_alignment_sectors ;
2017-07-19 17:16:12 +03:00
pv - > ba_size = pva - > ba_size ;
2013-02-15 14:02:53 +04:00
}
}
2011-02-21 15:20:18 +03:00
2016-02-19 00:31:27 +03:00
if ( pva - > extent_size )
pv - > pe_size = pva - > extent_size ;
2011-02-21 15:20:18 +03:00
2016-02-19 00:31:27 +03:00
if ( pva - > extent_count )
pv - > pe_count = pva - > extent_count ;
2011-02-21 15:20:18 +03:00
2015-05-07 12:08:49 +03:00
if ( ( pv - > pe_start + pv - > pe_count * ( uint64_t ) pv - > pe_size - 1 ) > pv - > size ) {
2011-02-21 15:20:18 +03:00
log_error ( " Physical extents end beyond end of device %s. " ,
2015-05-07 12:08:49 +03:00
pv_dev_name ( pv ) ) ;
2011-02-21 15:20:18 +03:00
return 0 ;
}
2016-02-19 00:31:27 +03:00
if ( pva - > label_sector ! = - 1 )
pv - > label_sector = pva - > label_sector ;
2011-02-21 15:20:18 +03:00
return 1 ;
}
2011-03-11 17:38:38 +03:00
static void _text_destroy_instance ( struct format_instance * fid )
2001-11-21 12:20:05 +03:00
{
2011-03-11 17:38:38 +03:00
if ( - - fid - > ref_count < = 1 ) {
2012-02-13 03:01:19 +04:00
if ( fid - > metadata_areas_index )
2012-02-10 06:53:03 +04:00
dm_hash_destroy ( fid - > metadata_areas_index ) ;
2011-03-11 17:38:38 +03:00
dm_pool_destroy ( fid - > mem ) ;
}
2002-04-24 22:20:51 +04:00
}
2010-12-20 16:32:49 +03:00
static void _text_destroy ( struct format_type * fmt )
2002-04-24 22:20:51 +04:00
{
2012-02-13 15:03:59 +04:00
if ( fmt - > orphan_vg )
free_orphan_vg ( fmt - > orphan_vg ) ;
2012-02-10 06:53:03 +04:00
2018-06-13 20:25:19 +03:00
if ( fmt - > private )
2018-06-08 15:40:53 +03:00
free ( fmt - > private ) ;
2002-04-24 22:20:51 +04:00
2018-06-08 15:40:53 +03:00
free ( fmt ) ;
2002-04-24 22:20:51 +04:00
}
2002-11-18 17:04:08 +03:00
static struct metadata_area_ops _metadata_text_file_ops = {
2006-05-10 01:23:51 +04:00
. vg_read = _vg_read_file ,
. vg_read_precommit = _vg_read_precommit_file ,
. vg_write = _vg_write_file ,
. vg_remove = _vg_remove_file ,
. vg_commit = _vg_commit_file
2002-11-18 17:04:08 +03:00
} ;
static struct metadata_area_ops _metadata_text_file_backup_ops = {
2006-05-10 01:23:51 +04:00
. vg_read = _vg_read_file ,
. vg_write = _vg_write_file ,
. vg_remove = _vg_remove_file ,
. vg_commit = _vg_commit_file_backup
2002-11-18 17:04:08 +03:00
} ;
static struct metadata_area_ops _metadata_text_raw_ops = {
2006-05-10 01:23:51 +04:00
. vg_read = _vg_read_raw ,
. vg_read_precommit = _vg_read_precommit_raw ,
. vg_write = _vg_write_raw ,
. vg_remove = _vg_remove_raw ,
. vg_precommit = _vg_precommit_raw ,
. vg_commit = _vg_commit_raw ,
2007-03-23 15:43:17 +03:00
. vg_revert = _vg_revert_raw ,
2010-06-29 00:31:59 +04:00
. mda_metadata_locn_copy = _metadata_locn_copy_raw ,
2010-06-30 17:51:11 +04:00
. mda_metadata_locn_name = _metadata_locn_name_raw ,
. mda_metadata_locn_offset = _metadata_locn_offset_raw ,
2007-11-05 20:17:55 +03:00
. mda_free_sectors = _mda_free_sectors_raw ,
2009-01-10 01:44:33 +03:00
. mda_total_sectors = _mda_total_sectors_raw ,
2007-03-23 15:43:17 +03:00
. mda_in_vg = _mda_in_vg_raw ,
2011-06-15 21:45:02 +04:00
. mda_locns_match = _mda_locns_match_raw ,
2012-02-23 17:11:07 +04:00
. mda_get_device = _mda_get_device_raw ,
2002-11-18 17:04:08 +03:00
} ;
2006-04-19 19:33:07 +04:00
static int _text_pv_setup ( const struct format_type * fmt ,
2011-02-21 15:24:15 +03:00
struct physical_volume * pv ,
struct volume_group * vg )
2002-11-18 17:04:08 +03:00
{
2011-02-21 15:24:15 +03:00
struct format_instance * fid = pv - > fid ;
2011-02-21 15:31:28 +03:00
const char * pvid = ( const char * ) ( * pv - > old_id . uuid ? & pv - > old_id : & pv - > id ) ;
2011-02-25 16:59:47 +03:00
struct lvmcache_info * info ;
2011-02-21 15:24:15 +03:00
unsigned mda_index ;
2011-03-02 13:23:29 +03:00
struct metadata_area * pv_mda , * pv_mda_copy ;
2011-02-21 15:24:15 +03:00
struct mda_context * pv_mdac ;
2006-11-10 21:24:11 +03:00
uint64_t pe_count ;
2011-02-21 15:24:15 +03:00
uint64_t size_reduction = 0 ;
2002-11-18 17:04:08 +03:00
2011-02-25 16:59:47 +03:00
/* If PV has its own format instance, add mdas from pv->fid to vg->fid. */
if ( pv - > fid ! = vg - > fid ) {
for ( mda_index = 0 ; mda_index < FMT_TEXT_MAX_MDAS_PER_PV ; mda_index + + ) {
if ( ! ( pv_mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , mda_index ) ) )
continue ;
2002-11-18 17:04:08 +03:00
2011-02-25 16:59:47 +03:00
/* Be sure it's not already in VG's format instance! */
2011-03-02 13:23:29 +03:00
if ( ! fid_get_mda_indexed ( vg - > fid , pvid , ID_LEN , mda_index ) ) {
2012-02-13 15:09:25 +04:00
if ( ! ( pv_mda_copy = mda_copy ( vg - > fid - > mem , pv_mda ) ) )
return_0 ;
2011-03-02 13:23:29 +03:00
fid_add_mda ( vg - > fid , pv_mda_copy , pvid , ID_LEN , mda_index ) ;
}
2011-02-25 16:59:47 +03:00
}
}
/*
* Otherwise , if the PV is already a part of the VG ( pv - > fid = = vg - > fid ) ,
* reread PV mda information from the cache and add it to vg - > fid .
*/
else {
2012-02-23 17:11:07 +04:00
if ( ! pv - > dev | |
2016-06-06 22:04:17 +03:00
! ( info = lvmcache_info_from_pvid ( pv - > dev - > pvid , pv - > dev , 0 ) ) ) {
2011-02-25 16:59:47 +03:00
log_error ( " PV %s missing from cache " , pv_dev_name ( pv ) ) ;
return 0 ;
}
2012-02-10 05:28:27 +04:00
if ( ! lvmcache_check_format ( info , fmt ) )
return_0 ;
2011-02-25 16:59:47 +03:00
2012-02-10 05:28:27 +04:00
if ( ! lvmcache_fid_add_mdas_pv ( info , fid ) )
2011-02-25 16:59:47 +03:00
return_0 ;
2011-02-21 15:24:15 +03:00
}
2002-11-18 17:04:08 +03:00
2011-02-21 15:24:15 +03:00
/* If there's the 2nd mda, we need to reduce
* usable size for further pe_count calculation ! */
if ( ( pv_mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , 1 ) ) & &
( pv_mdac = pv_mda - > metadata_locn ) )
size_reduction = pv_mdac - > area . size > > SECTOR_SHIFT ;
2006-10-06 02:02:52 +04:00
2011-02-21 15:24:15 +03:00
/* From now on, VG format instance will be used. */
2011-03-11 17:50:13 +03:00
pv_set_fid ( pv , vg - > fid ) ;
2009-07-30 21:45:28 +04:00
2011-02-21 15:24:15 +03:00
/* FIXME Cope with genuine pe_count 0 */
2009-07-30 21:45:28 +04:00
2011-02-21 15:24:15 +03:00
/* If missing, estimate pv->size from file-based metadata */
if ( ! pv - > size & & pv - > pe_count )
pv - > size = pv - > pe_count * ( uint64_t ) vg - > extent_size +
pv - > pe_start + size_reduction ;
2009-07-31 01:15:17 +04:00
2011-02-21 15:24:15 +03:00
/* Recalculate number of extents that will fit */
2012-05-09 16:30:56 +04:00
if ( ! pv - > pe_count & & vg - > extent_size ) {
2011-02-21 15:24:15 +03:00
pe_count = ( pv - > size - pv - > pe_start - size_reduction ) /
vg - > extent_size ;
if ( pe_count > UINT32_MAX ) {
log_error ( " PV %s too large for extent size %s. " ,
pv_dev_name ( pv ) ,
display_size ( vg - > cmd , ( uint64_t ) vg - > extent_size ) ) ;
return 0 ;
}
pv - > pe_count = ( uint32_t ) pe_count ;
2002-11-18 17:04:08 +03:00
}
return 1 ;
}
2011-03-11 17:45:17 +03:00
static void * _create_text_context ( struct dm_pool * mem , struct text_context * tc )
{
struct text_context * new_tc ;
const char * path ;
char * tmp ;
if ( ! tc )
return NULL ;
path = tc - > path_live ;
if ( ( tmp = strstr ( path , " .tmp " ) ) & & ( tmp = = path + strlen ( path ) - 4 ) ) {
log_error ( " %s: Volume group filename may not end in .tmp " ,
path ) ;
return NULL ;
}
if ( ! ( new_tc = dm_pool_alloc ( mem , sizeof ( * new_tc ) ) ) )
return_NULL ;
if ( ! ( new_tc - > path_live = dm_pool_strdup ( mem , path ) ) )
goto_bad ;
/* If path_edit not defined, create one from path_live with .tmp suffix. */
if ( ! tc - > path_edit ) {
if ( ! ( tmp = dm_pool_alloc ( mem , strlen ( path ) + 5 ) ) )
goto_bad ;
sprintf ( tmp , " %s.tmp " , path ) ;
new_tc - > path_edit = tmp ;
}
else if ( ! ( new_tc - > path_edit = dm_pool_strdup ( mem , tc - > path_edit ) ) )
goto_bad ;
if ( ! ( new_tc - > desc = tc - > desc ? dm_pool_strdup ( mem , tc - > desc )
: dm_pool_strdup ( mem , " " ) ) )
goto_bad ;
return ( void * ) new_tc ;
bad :
dm_pool_free ( mem , new_tc ) ;
log_error ( " Couldn't allocate text format context object. " ) ;
return NULL ;
}
2011-02-21 15:05:49 +03:00
static int _create_vg_text_instance ( struct format_instance * fid ,
const struct format_instance_ctx * fic )
{
uint32_t type = fic - > type ;
2005-10-23 04:14:48 +04:00
struct text_fid_context * fidtc ;
2010-06-29 00:35:17 +04:00
struct metadata_area * mda ;
2003-07-05 02:34:56 +04:00
struct lvmcache_vginfo * vginfo ;
2011-02-21 15:05:49 +03:00
const char * vg_name , * vg_id ;
2002-04-24 22:20:51 +04:00
2005-10-23 04:14:48 +04:00
if ( ! ( fidtc = ( struct text_fid_context * )
2011-03-11 18:10:16 +03:00
dm_pool_zalloc ( fid - > mem , sizeof ( * fidtc ) ) ) ) {
2005-10-23 04:14:48 +04:00
log_error ( " Couldn't allocate text_fid_context. " ) ;
2011-02-21 15:05:49 +03:00
return 0 ;
2005-10-23 04:14:48 +04:00
}
fid - > private = ( void * ) fidtc ;
2002-04-24 22:20:51 +04:00
2011-02-21 15:05:49 +03:00
if ( type & FMT_INSTANCE_PRIVATE_MDAS ) {
2011-03-11 18:10:16 +03:00
if ( ! ( mda = dm_pool_zalloc ( fid - > mem , sizeof ( * mda ) ) ) )
2011-02-21 15:05:49 +03:00
return_0 ;
2002-11-18 17:04:08 +03:00
mda - > ops = & _metadata_text_file_backup_ops ;
2011-03-11 18:10:16 +03:00
mda - > metadata_locn = _create_text_context ( fid - > mem , fic - > context . private ) ;
2010-10-05 21:34:05 +04:00
mda - > status = 0 ;
2012-02-10 06:53:03 +04:00
fid - > metadata_areas_index = NULL ;
2011-02-21 15:05:49 +03:00
fid_add_mda ( fid , mda , NULL , 0 , 0 ) ;
2002-04-24 22:20:51 +04:00
} else {
2011-02-21 15:05:49 +03:00
vg_name = fic - > context . vg_ref . vg_name ;
vg_id = fic - > context . vg_ref . vg_id ;
2002-04-24 22:20:51 +04:00
2012-02-10 06:53:03 +04:00
if ( ! ( fid - > metadata_areas_index = dm_hash_create ( 128 ) ) ) {
2011-02-21 15:05:49 +03:00
log_error ( " Couldn't create metadata index for format "
" instance of VG %s. " , vg_name ) ;
return 0 ;
2002-04-24 22:20:51 +04:00
}
2002-11-18 17:04:08 +03:00
2011-02-21 15:05:49 +03:00
if ( type & FMT_INSTANCE_MDAS ) {
2012-02-10 05:28:27 +04:00
if ( ! ( vginfo = lvmcache_vginfo_from_vgname ( vg_name , vg_id ) ) )
goto_out ;
if ( ! lvmcache_fid_add_mdas_vg ( vginfo , fid ) )
2011-02-21 15:05:49 +03:00
goto_out ;
2002-11-18 17:04:08 +03:00
}
2002-04-24 22:20:51 +04:00
}
2011-02-21 15:05:49 +03:00
out :
return 1 ;
}
2011-03-02 13:19:14 +03:00
static int _add_metadata_area_to_pv ( struct physical_volume * pv ,
unsigned mda_index ,
uint64_t mda_start ,
uint64_t mda_size ,
unsigned mda_ignored )
2011-02-21 15:17:26 +03:00
{
struct metadata_area * mda ;
struct mda_context * mdac ;
struct mda_lists * mda_lists = ( struct mda_lists * ) pv - > fmt - > private ;
if ( mda_index > = FMT_TEXT_MAX_MDAS_PER_PV ) {
log_error ( INTERNAL_ERROR " can't add metadata area with "
" index %u to PV %s. Metadata "
" layout not supported by %s format. " ,
mda_index , dev_name ( pv - > dev ) ,
pv - > fmt - > name ) ;
}
2011-03-11 18:10:16 +03:00
if ( ! ( mda = dm_pool_zalloc ( pv - > fid - > mem , sizeof ( struct metadata_area ) ) ) ) {
2011-02-21 15:17:26 +03:00
log_error ( " struct metadata_area allocation failed " ) ;
return 0 ;
}
2011-03-11 18:10:16 +03:00
if ( ! ( mdac = dm_pool_zalloc ( pv - > fid - > mem , sizeof ( struct mda_context ) ) ) ) {
2011-02-21 15:17:26 +03:00
log_error ( " struct mda_context allocation failed " ) ;
2018-06-08 15:40:53 +03:00
free ( mda ) ;
2011-02-21 15:17:26 +03:00
return 0 ;
}
mda - > ops = mda_lists - > raw_ops ;
mda - > metadata_locn = mdac ;
mda - > status = 0 ;
mdac - > area . dev = pv - > dev ;
mdac - > area . start = mda_start ;
mdac - > area . size = mda_size ;
mdac - > free_sectors = UINT64_C ( 0 ) ;
memset ( & mdac - > rlocn , 0 , sizeof ( mdac - > rlocn ) ) ;
mda_set_ignored ( mda , mda_ignored ) ;
fid_add_mda ( pv - > fid , mda , ( char * ) & pv - > id , ID_LEN , mda_index ) ;
return 1 ;
}
2012-02-10 06:53:03 +04:00
static int _text_pv_remove_metadata_area ( const struct format_type * fmt ,
struct physical_volume * pv ,
unsigned mda_index ) ;
2011-02-21 15:17:26 +03:00
static int _text_pv_add_metadata_area ( const struct format_type * fmt ,
struct physical_volume * pv ,
int pe_start_locked ,
unsigned mda_index ,
uint64_t mda_size ,
unsigned mda_ignored )
{
struct format_instance * fid = pv - > fid ;
2011-02-21 15:31:28 +03:00
const char * pvid = ( const char * ) ( * pv - > old_id . uuid ? & pv - > old_id : & pv - > id ) ;
2017-08-15 14:23:51 +03:00
uint64_t ba_size , pe_start , first_unallocated ;
2011-02-21 15:17:26 +03:00
uint64_t alignment , alignment_offset ;
uint64_t disk_size ;
uint64_t mda_start ;
2011-02-25 16:50:02 +03:00
uint64_t adjustment , limit , tmp_mda_size ;
2011-02-21 15:17:26 +03:00
uint64_t wipe_size = 8 < < SECTOR_SHIFT ;
2018-02-20 00:40:44 +03:00
uint64_t zero_len ;
2011-02-21 15:17:26 +03:00
size_t page_size = lvm_getpagesize ( ) ;
struct metadata_area * mda ;
struct mda_context * mdac ;
2011-02-25 16:50:02 +03:00
const char * limit_name ;
int limit_applied = 0 ;
2011-02-21 15:17:26 +03:00
if ( mda_index > = FMT_TEXT_MAX_MDAS_PER_PV ) {
log_error ( INTERNAL_ERROR " invalid index of value %u used "
" while trying to add metadata area on PV %s. "
" Metadata layout not supported by %s format. " ,
mda_index , pv_dev_name ( pv ) , fmt - > name ) ;
return 0 ;
}
pe_start = pv - > pe_start < < SECTOR_SHIFT ;
2013-05-28 14:37:22 +04:00
ba_size = pv - > ba_size < < SECTOR_SHIFT ;
2011-02-21 15:17:26 +03:00
alignment = pv - > pe_align < < SECTOR_SHIFT ;
alignment_offset = pv - > pe_align_offset < < SECTOR_SHIFT ;
disk_size = pv - > size < < SECTOR_SHIFT ;
mda_size = mda_size < < SECTOR_SHIFT ;
if ( fid_get_mda_indexed ( fid , pvid , ID_LEN , mda_index ) ) {
2012-02-10 06:53:03 +04:00
if ( ! _text_pv_remove_metadata_area ( fmt , pv , mda_index ) ) {
log_error ( INTERNAL_ERROR " metadata area with index %u already "
" exists on PV %s and removal failed. " ,
mda_index , pv_dev_name ( pv ) ) ;
return 0 ;
}
2011-02-21 15:17:26 +03:00
}
/* First metadata area at the start of the device. */
if ( mda_index = = 0 ) {
/*
* Try to fit MDA0 end within given pe_start limit if its value
* is locked . If it ' s not locked , count with any existing MDA1 .
* If there ' s no MDA1 , just use disk size as the limit .
*/
2011-02-25 16:50:02 +03:00
if ( pe_start_locked ) {
2011-02-21 15:17:26 +03:00
limit = pe_start ;
2011-02-25 16:50:02 +03:00
limit_name = " pe_start " ;
}
2011-02-21 15:17:26 +03:00
else if ( ( mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , 1 ) ) & &
2011-02-25 16:50:02 +03:00
( mdac = mda - > metadata_locn ) ) {
2011-02-21 15:17:26 +03:00
limit = mdac - > area . start ;
2011-02-25 16:50:02 +03:00
limit_name = " MDA1 start " ;
}
else {
2011-02-21 15:17:26 +03:00
limit = disk_size ;
2011-02-25 16:50:02 +03:00
limit_name = " disk size " ;
}
2011-02-21 15:17:26 +03:00
2013-05-28 14:37:22 +04:00
/* Adjust limits for bootloader area if present. */
if ( ba_size ) {
limit - = ba_size ;
limit_name = " ba_start " ;
2013-02-15 14:02:53 +04:00
}
2011-02-21 15:17:26 +03:00
if ( limit > disk_size )
goto bad ;
mda_start = LABEL_SCAN_SIZE ;
/* Align MDA0 start with page size if possible. */
if ( limit - mda_start > = MDA_SIZE_MIN ) {
if ( ( adjustment = mda_start % page_size ) )
mda_start + = ( page_size - adjustment ) ;
}
/* Align MDA0 end position with given alignment if possible. */
2011-02-25 16:50:02 +03:00
if ( alignment & &
( adjustment = ( mda_start + mda_size ) % alignment ) ) {
tmp_mda_size = mda_size + alignment - adjustment ;
if ( mda_start + tmp_mda_size < = limit )
mda_size = tmp_mda_size ;
2011-02-21 15:17:26 +03:00
}
/* Align MDA0 end position with given alignment offset if possible. */
2015-11-16 03:04:21 +03:00
if ( alignment & & alignment_offset & &
2011-02-21 15:17:26 +03:00
( ( ( mda_start + mda_size ) % alignment ) = = 0 ) ) {
2011-02-25 16:50:02 +03:00
tmp_mda_size = mda_size + alignment_offset ;
if ( mda_start + tmp_mda_size < = limit )
mda_size = tmp_mda_size ;
2011-02-21 15:17:26 +03:00
}
if ( mda_start + mda_size > limit ) {
/*
* Try to decrease the MDA0 size with twice the
* alignment and then align with given alignment .
* If pe_start is locked , skip this type of
* alignment since it would be useless .
* Check first whether we can apply that !
*/
2013-11-24 22:00:53 +04:00
if ( ! pe_start_locked & & alignment & &
2011-02-21 15:17:26 +03:00
( ( limit - mda_start ) > alignment * 2 ) ) {
mda_size = limit - mda_start - alignment * 2 ;
if ( ( adjustment = ( mda_start + mda_size ) % alignment ) )
mda_size + = ( alignment - adjustment ) ;
/* Still too much? Then there's nothing else to do. */
if ( mda_start + mda_size > limit )
goto bad ;
}
/* Otherwise, give up and take any usable space. */
else
mda_size = limit - mda_start ;
2011-02-25 16:50:02 +03:00
limit_applied = 1 ;
2011-02-21 15:17:26 +03:00
}
/*
* If PV ' s pe_start is not locked , update pe_start value with the
* start of the area that follows the MDA0 we ' ve just calculated .
*/
if ( ! pe_start_locked ) {
2013-05-28 14:37:22 +04:00
if ( ba_size ) {
pv - > ba_start = ( mda_start + mda_size ) > > SECTOR_SHIFT ;
pv - > pe_start = pv - > ba_start + pv - > ba_size ;
2013-02-15 14:02:53 +04:00
} else
pv - > pe_start = ( mda_start + mda_size ) > > SECTOR_SHIFT ;
2011-02-21 15:17:26 +03:00
}
}
/* Second metadata area at the end of the device. */
else {
/*
* Try to fit MDA1 start within given pe_end or pe_start limit
2011-02-25 16:50:02 +03:00
* if defined or locked . If pe_start is not defined yet , count
* with any existing MDA0 . If MDA0 does not exist , just use
* LABEL_SCAN_SIZE .
2017-08-15 14:23:51 +03:00
*
* The first_unallocated here is the first unallocated byte
* beyond existing pe_end if there is any preallocated data area
* reserved already so we can take that as lower limit for our MDA1
* start calculation . If data area is not reserved yet , we set
* first_unallocated to 0 , meaning this is not our limiting factor
* and we will look at other limiting factors if they exist .
* Of course , if we have preallocated data area , we also must
* have pe_start assigned too ( simply , data area needs its start
* and end specification ) .
2011-02-21 15:17:26 +03:00
*/
2017-08-15 14:23:51 +03:00
first_unallocated = pv - > pe_count ? ( pv - > pe_start + pv - > pe_count *
( uint64_t ) pv - > pe_size ) < < SECTOR_SHIFT
: 0 ;
2011-02-25 16:50:02 +03:00
if ( pe_start | | pe_start_locked ) {
2017-08-15 14:23:51 +03:00
limit = first_unallocated ? first_unallocated : pe_start ;
limit_name = first_unallocated ? " pe_end " : " pe_start " ;
2013-02-15 14:02:53 +04:00
} else {
if ( ( mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , 0 ) ) & &
( mdac = mda - > metadata_locn ) ) {
limit = mdac - > area . start + mdac - > area . size ;
limit_name = " MDA0 end " ;
}
else {
limit = LABEL_SCAN_SIZE ;
limit_name = " label scan size " ;
}
2013-05-28 14:37:22 +04:00
/* Adjust limits for bootloader area if present. */
if ( ba_size ) {
limit + = ba_size ;
limit_name = " ba_end " ;
2013-02-15 14:02:53 +04:00
}
2011-02-25 16:50:02 +03:00
}
2011-02-21 15:17:26 +03:00
2017-08-15 14:23:51 +03:00
if ( limit > = disk_size )
2011-02-21 15:17:26 +03:00
goto bad ;
2011-02-25 16:50:02 +03:00
if ( mda_size > disk_size ) {
mda_size = disk_size - limit ;
limit_applied = 1 ;
2011-02-21 15:17:26 +03:00
}
2011-02-25 16:50:02 +03:00
mda_start = disk_size - mda_size ;
/* If MDA1 size is too big, just take any usable space. */
if ( disk_size - mda_size < limit ) {
2011-02-21 15:17:26 +03:00
mda_size = disk_size - limit ;
2011-02-25 16:50:02 +03:00
mda_start = disk_size - mda_size ;
limit_applied = 1 ;
}
/* Otherwise, try to align MDA1 start if possible. */
else if ( alignment & &
( adjustment = mda_start % alignment ) ) {
tmp_mda_size = mda_size + adjustment ;
if ( tmp_mda_size < disk_size & &
disk_size - tmp_mda_size > = limit ) {
mda_size = tmp_mda_size ;
mda_start = disk_size - mda_size ;
}
}
2011-02-21 15:17:26 +03:00
}
2011-02-25 16:50:02 +03:00
if ( limit_applied )
log_very_verbose ( " Using limited metadata area size on %s "
2017-12-11 18:32:53 +03:00
" with value " FMTu64 " (limited by %s of "
2015-07-06 17:09:17 +03:00
FMTu64 " ). " , pv_dev_name ( pv ) ,
2011-02-25 16:50:02 +03:00
mda_size , limit_name , limit ) ;
2011-02-21 15:17:26 +03:00
if ( mda_size ) {
2015-10-30 14:02:29 +03:00
if ( mda_size < MDA_SIZE_MIN ) {
2017-12-11 18:32:53 +03:00
log_error ( " Metadata area size too small: " FMTu64 " bytes. "
2015-10-30 14:02:29 +03:00
" It must be at least %u bytes. " , mda_size , MDA_SIZE_MIN ) ;
goto bad ;
}
2011-02-21 15:17:26 +03:00
/* Wipe metadata area with zeroes. */
2018-02-20 00:40:44 +03:00
zero_len = ( mda_size > wipe_size ) ? wipe_size : mda_size ;
2018-02-27 20:26:04 +03:00
if ( ! dev_write_zeros ( pv - > dev , mda_start , zero_len ) ) {
2018-02-20 00:40:44 +03:00
log_error ( " Failed to wipe new metadata area on %s at %llu len %llu " ,
pv_dev_name ( pv ) ,
( unsigned long long ) mda_start ,
( unsigned long long ) zero_len ) ;
2017-12-05 02:18:56 +03:00
return 0 ;
2011-02-21 15:17:26 +03:00
}
/* Finally, add new metadata area to PV's format instance. */
2011-03-02 13:19:14 +03:00
if ( ! _add_metadata_area_to_pv ( pv , mda_index , mda_start ,
mda_size , mda_ignored ) )
2011-02-21 15:17:26 +03:00
return_0 ;
}
return 1 ;
bad :
log_error ( " Not enough space available for metadata area "
" with index %u on PV %s. " , mda_index , pv_dev_name ( pv ) ) ;
return 0 ;
}
2011-03-02 13:19:14 +03:00
static int _remove_metadata_area_from_pv ( struct physical_volume * pv ,
unsigned mda_index )
2011-02-21 15:17:54 +03:00
{
if ( mda_index > = FMT_TEXT_MAX_MDAS_PER_PV ) {
log_error ( INTERNAL_ERROR " can't remove metadata area with "
" index %u from PV %s. Metadata "
" layou not supported by %s format. " ,
mda_index , dev_name ( pv - > dev ) ,
pv - > fmt - > name ) ;
return 0 ;
}
return fid_remove_mda ( pv - > fid , NULL , ( const char * ) & pv - > id ,
ID_LEN , mda_index ) ;
}
static int _text_pv_remove_metadata_area ( const struct format_type * fmt ,
struct physical_volume * pv ,
unsigned mda_index )
{
2011-03-02 13:19:14 +03:00
return _remove_metadata_area_from_pv ( pv , mda_index ) ;
2011-02-21 15:17:54 +03:00
}
2011-02-21 15:27:26 +03:00
static int _text_pv_resize ( const struct format_type * fmt ,
struct physical_volume * pv ,
struct volume_group * vg ,
uint64_t size )
{
struct format_instance * fid = pv - > fid ;
2011-02-21 15:31:28 +03:00
const char * pvid = ( const char * ) ( * pv - > old_id . uuid ? & pv - > old_id : & pv - > id ) ;
2011-02-21 15:27:26 +03:00
struct metadata_area * mda ;
struct mda_context * mdac ;
uint64_t size_reduction ;
uint64_t mda_size ;
unsigned mda_ignored ;
/*
* First , set the new size and update the cache and reset pe_count .
* ( pe_count must be reset otherwise it would be considered as
* a limiting factor while moving the mda ! )
*/
pv - > size = size ;
pv - > pe_count = 0 ;
/* If there's an mda at the end, move it to a new position. */
if ( ( mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , 1 ) ) & &
( mdac = mda - > metadata_locn ) ) {
/* FIXME: Maybe MDA0 size would be better? */
mda_size = mdac - > area . size > > SECTOR_SHIFT ;
mda_ignored = mda_is_ignored ( mda ) ;
if ( ! _text_pv_remove_metadata_area ( fmt , pv , 1 ) | |
! _text_pv_add_metadata_area ( fmt , pv , 1 , 1 , mda_size ,
mda_ignored ) ) {
log_error ( " Failed to move metadata area with index 1 "
" while resizing PV %s. " , pv_dev_name ( pv ) ) ;
return 0 ;
}
}
/* If there's a VG, reduce size by counting in pe_start and metadata areas. */
2014-01-17 05:12:04 +04:00
if ( vg & & ! is_orphan_vg ( vg - > name ) ) {
2011-02-21 15:27:26 +03:00
size_reduction = pv_pe_start ( pv ) ;
if ( ( mda = fid_get_mda_indexed ( fid , pvid , ID_LEN , 1 ) ) & &
( mdac = mda - > metadata_locn ) )
size_reduction + = mdac - > area . size > > SECTOR_SHIFT ;
pv - > size - = size_reduction ;
}
return 1 ;
}
2011-02-21 15:05:49 +03:00
static struct format_instance * _text_create_text_instance ( const struct format_type * fmt ,
2011-03-11 17:30:27 +03:00
const struct format_instance_ctx * fic )
2011-02-21 15:05:49 +03:00
{
struct format_instance * fid ;
2011-03-11 17:30:27 +03:00
if ( ! ( fid = alloc_fid ( fmt , fic ) ) )
return_NULL ;
2011-02-21 15:05:49 +03:00
2012-12-14 22:43:42 +04:00
if ( ! _create_vg_text_instance ( fid , fic ) ) {
dm_pool_destroy ( fid - > mem ) ;
return_NULL ;
}
2011-02-21 15:05:49 +03:00
2012-12-14 22:43:42 +04:00
return fid ;
2002-04-24 22:20:51 +04:00
}
2001-11-21 12:20:05 +03:00
static struct format_handler _text_handler = {
2011-02-21 15:20:18 +03:00
. pv_initialise = _text_pv_initialise ,
2006-05-10 01:23:51 +04:00
. pv_setup = _text_pv_setup ,
2011-02-21 15:17:26 +03:00
. pv_add_metadata_area = _text_pv_add_metadata_area ,
2011-02-21 15:17:54 +03:00
. pv_remove_metadata_area = _text_pv_remove_metadata_area ,
2011-02-21 15:27:26 +03:00
. pv_resize = _text_pv_resize ,
2006-05-10 01:23:51 +04:00
. pv_write = _text_pv_write ,
2016-02-12 15:53:06 +03:00
. pv_needs_rewrite = _text_pv_needs_rewrite ,
2006-05-10 01:23:51 +04:00
. vg_setup = _text_vg_setup ,
. lv_setup = _text_lv_setup ,
. create_instance = _text_create_text_instance ,
. destroy_instance = _text_destroy_instance ,
. destroy = _text_destroy
2001-11-21 12:20:05 +03:00
} ;
2002-04-24 22:20:51 +04:00
struct format_type * create_text_format ( struct cmd_context * cmd )
{
2012-02-10 06:53:03 +04:00
struct format_instance_ctx fic ;
struct format_instance * fid ;
2002-04-24 22:20:51 +04:00
struct format_type * fmt ;
2002-11-18 17:04:08 +03:00
struct mda_lists * mda_lists ;
2002-04-24 22:20:51 +04:00
2018-06-08 15:40:53 +03:00
if ( ! ( fmt = malloc ( sizeof ( * fmt ) ) ) ) {
2012-02-13 14:56:31 +04:00
log_error ( " Failed to allocate text format type structure. " ) ;
return NULL ;
}
2001-11-21 12:20:05 +03:00
2002-04-24 22:20:51 +04:00
fmt - > cmd = cmd ;
fmt - > ops = & _text_handler ;
fmt - > name = FMT_TEXT_NAME ;
2002-11-18 17:04:08 +03:00
fmt - > alias = FMT_TEXT_ALIAS ;
2008-02-06 18:47:28 +03:00
fmt - > orphan_vg_name = ORPHAN_VG_NAME ( FMT_TEXT_NAME ) ;
2018-04-28 00:22:46 +03:00
fmt - > features = FMT_SEGMENTS | FMT_TAGS | FMT_PRECOMMIT |
2006-04-30 02:08:43 +04:00
FMT_UNLIMITED_VOLS | FMT_RESIZE_PV |
2018-04-28 00:22:46 +03:00
FMT_UNLIMITED_STRIPESIZE | FMT_CONFIG_PROFILE |
2016-02-12 14:58:59 +03:00
FMT_NON_POWER2_EXTENTS | FMT_PV_FLAGS ;
2002-04-24 22:20:51 +04:00
2018-06-08 15:40:53 +03:00
if ( ! ( mda_lists = malloc ( sizeof ( struct mda_lists ) ) ) ) {
2002-04-24 22:20:51 +04:00
log_error ( " Failed to allocate dir_list " ) ;
2018-06-08 15:40:53 +03:00
free ( fmt ) ;
2002-04-24 22:20:51 +04:00
return NULL ;
2002-01-15 20:37:23 +03:00
}
2002-11-18 17:04:08 +03:00
mda_lists - > file_ops = & _metadata_text_file_ops ;
mda_lists - > raw_ops = & _metadata_text_raw_ops ;
fmt - > private = ( void * ) mda_lists ;
2002-01-15 20:37:23 +03:00
2012-02-23 17:11:07 +04:00
dm_list_init ( & fmt - > mda_ops ) ;
dm_list_add ( & fmt - > mda_ops , & _metadata_text_raw_ops . list ) ;
2002-11-18 17:04:08 +03:00
if ( ! ( fmt - > labeller = text_labeller_create ( fmt ) ) ) {
log_error ( " Couldn't create text label handler. " ) ;
2012-02-13 14:56:31 +04:00
goto bad ;
2002-04-24 22:20:51 +04:00
}
2001-11-21 12:20:05 +03:00
2013-07-29 17:58:18 +04:00
if ( ! ( label_register_handler ( fmt - > labeller ) ) ) {
2002-11-18 17:04:08 +03:00
log_error ( " Couldn't register text label handler. " ) ;
2012-02-08 14:49:36 +04:00
fmt - > labeller - > ops - > destroy ( fmt - > labeller ) ;
2012-02-13 14:56:31 +04:00
goto bad ;
2002-11-18 17:04:08 +03:00
}
2012-02-13 14:56:31 +04:00
if ( ! ( fmt - > orphan_vg = alloc_vg ( " text_orphan " , cmd , fmt - > orphan_vg_name ) ) )
goto_bad ;
2012-02-10 06:53:03 +04:00
2012-02-13 03:01:19 +04:00
fic . type = FMT_INSTANCE_AUX_MDAS ;
2012-02-10 06:53:03 +04:00
fic . context . vg_ref . vg_name = fmt - > orphan_vg_name ;
fic . context . vg_ref . vg_id = NULL ;
2012-02-13 14:56:31 +04:00
if ( ! ( fid = _text_create_text_instance ( fmt , & fic ) ) )
goto_bad ;
2012-02-10 06:53:03 +04:00
vg_set_fid ( fmt - > orphan_vg , fid ) ;
2004-09-14 21:37:51 +04:00
log_very_verbose ( " Initialised format: %s " , fmt - > name ) ;
2002-04-24 22:20:51 +04:00
return fmt ;
2012-02-13 14:56:31 +04:00
bad :
2012-02-08 14:49:36 +04:00
_text_destroy ( fmt ) ;
2002-02-08 14:13:47 +03:00
return NULL ;
2001-11-21 12:20:05 +03:00
}
2019-02-06 21:32:26 +03:00
int text_wipe_outdated_pv_mda ( struct cmd_context * cmd , struct device * dev ,
struct metadata_area * mda )
{
struct mda_context * mdac = mda - > metadata_locn ;
uint64_t start_byte = mdac - > area . start ;
struct mda_header * mdab ;
struct raw_locn * rlocn_slot0 ;
struct raw_locn * rlocn_slot1 ;
uint32_t bad_fields = 0 ;
if ( ! ( mdab = raw_read_mda_header ( cmd - > fmt , & mdac - > area , mda_is_primary ( mda ) , 0 , & bad_fields ) ) ) {
log_error ( " Failed to read outdated pv mda header on %s " , dev_name ( dev ) ) ;
return 0 ;
}
rlocn_slot0 = & mdab - > raw_locns [ 0 ] ;
rlocn_slot1 = & mdab - > raw_locns [ 1 ] ;
rlocn_slot0 - > offset = 0 ;
rlocn_slot0 - > size = 0 ;
rlocn_slot0 - > checksum = 0 ;
rlocn_slot1 - > offset = 0 ;
rlocn_slot1 - > size = 0 ;
rlocn_slot1 - > checksum = 0 ;
if ( ! _raw_write_mda_header ( cmd - > fmt , dev , mda_is_primary ( mda ) , start_byte , mdab ) ) {
log_error ( " Failed to write outdated pv mda header on %s " , dev_name ( dev ) ) ;
return 0 ;
}
return 1 ;
}
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00