2007-07-18 19:38:58 +04:00
/*
2009-09-03 01:39:07 +04:00
* Copyright ( C ) 2001 - 2004 Sistina Software , Inc . All rights reserved .
2018-06-29 12:11:14 +03:00
* Copyright ( C ) 2004 - 2018 Red Hat , Inc . All rights reserved .
2007-07-18 19:38:58 +04:00
*
* This file is part of LVM2 .
*
* This copyrighted material is made available to anyone wishing to use ,
* modify , copy , or redistribute it subject to the terms and conditions
2007-08-21 00:55:30 +04:00
* of the GNU Lesser General Public License v .2 .1 .
2007-07-18 19:38:58 +04:00
*
2007-08-21 00:55:30 +04:00
* You should have received a copy of the GNU Lesser General Public License
2007-07-18 19:38:58 +04:00
* along with this program ; if not , write to the Free Software Foundation ,
2016-01-21 13:49:46 +03:00
* Inc . , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA
2007-07-18 19:38:58 +04:00
*/
/*
* This is the representation of LVM metadata that is being adapted
* for library export .
*/
# ifndef _LVM_METADATA_EXPORTED_H
# define _LVM_METADATA_EXPORTED_H
2018-05-14 12:30:20 +03:00
# include "lib/uuid/uuid.h"
# include "lib/metadata/pv.h"
# include "lib/metadata/vg.h"
# include "lib/metadata/lv.h"
# include "lib/misc/lvm-percent.h"
2021-01-26 21:32:24 +03:00
# include <stdbool.h>
2007-07-18 19:38:58 +04:00
# define MAX_STRIPES 128U
# define SECTOR_SHIFT 9L
2010-02-14 06:21:06 +03:00
# define SECTOR_SIZE ( 1L << SECTOR_SHIFT )
2007-07-18 19:38:58 +04:00
# define STRIPE_SIZE_MIN ( (unsigned) lvm_getpagesize() >> SECTOR_SHIFT) /* PAGESIZE in sectors */
# define STRIPE_SIZE_MAX ( 512L * 1024L >> SECTOR_SHIFT) /* 512 KB in sectors */
# define STRIPE_SIZE_LIMIT ((UINT_MAX >> 2) + 1)
# define MAX_RESTRICTED_LVS 255 /* Used by FMT_RESTRICTED_LVIDS */
2011-03-02 23:00:09 +03:00
# define MAX_EXTENT_SIZE ((uint32_t) -1)
2014-10-14 21:12:15 +04:00
# define MIN_NON_POWER2_EXTENT_SIZE (128U * 2U) /* 128KB in sectors */
2007-07-18 19:38:58 +04:00
2016-03-01 17:20:49 +03:00
# define HISTORICAL_LV_PREFIX "-"
2007-12-22 05:13:00 +03:00
/* Layer suffix */
# define MIRROR_SYNC_LAYER "_mimagetmp"
2016-02-12 15:20:34 +03:00
/* PV extension flags */
# define PV_EXT_USED UINT32_C(0x00000001)
2007-07-18 19:38:58 +04:00
/* Various flags */
/* Note that the bits no longer necessarily correspond to LVM1 disk format */
2014-09-16 00:33:53 +04:00
# define PARTIAL_VG UINT64_C(0x0000000000000001) /* VG */
# define EXPORTED_VG UINT64_C(0x0000000000000002) /* VG PV */
# define RESIZEABLE_VG UINT64_C(0x0000000000000004) /* VG */
2011-08-03 02:07:20 +04:00
2007-07-18 19:38:58 +04:00
/* May any free extents on this PV be used or must they be left free? */
2014-09-16 00:33:53 +04:00
# define ALLOCATABLE_PV UINT64_C(0x0000000000000008) /* PV */
2013-06-30 20:01:19 +04:00
# define ARCHIVED_VG ALLOCATABLE_PV /* VG, reuse same bit */
2007-07-18 19:38:58 +04:00
2014-09-16 00:33:53 +04:00
//#define SPINDOWN_LV UINT64_C(0x0000000000000010) /* LV */
//#define BADBLOCK_ON UINT64_C(0x0000000000000020) /* LV */
# define VISIBLE_LV UINT64_C(0x0000000000000040) /* LV */
# define FIXED_MINOR UINT64_C(0x0000000000000080) /* LV */
2013-06-30 19:06:09 +04:00
2014-09-16 00:33:53 +04:00
# define LVM_READ UINT64_C(0x0000000000000100) /* LV, VG */
# define LVM_WRITE UINT64_C(0x0000000000000200) /* LV, VG */
2015-03-09 21:53:22 +03:00
# define LVM_WRITE_LOCKED UINT64_C(0x0020000000000000) /* LV, VG */
2013-06-30 19:06:09 +04:00
2014-09-16 00:33:53 +04:00
# define CLUSTERED UINT64_C(0x0000000000000400) /* VG */
//#define SHARED UINT64_C(0x0000000000000800) /* VG */
2013-06-30 19:06:09 +04:00
2007-07-18 19:38:58 +04:00
/* FIXME Remove when metadata restructuring is completed */
2014-09-16 00:33:53 +04:00
# define SNAPSHOT UINT64_C(0x0000000000001000) /* LV - internal use only */
# define PVMOVE UINT64_C(0x0000000000002000) /* VG LV SEG */
# define LOCKED UINT64_C(0x0000000000004000) /* LV */
# define MIRRORED UINT64_C(0x0000000000008000) /* LV - internal use only */
2014-09-18 02:00:41 +04:00
# define VIRTUAL UINT64_C(0x0000000000010000) /* LV - internal use only */
2014-09-16 03:13:46 +04:00
# define MIRROR UINT64_C(0x0002000000000000) /* LV - Internal use only */
2014-09-16 00:33:53 +04:00
# define MIRROR_LOG UINT64_C(0x0000000000020000) /* LV - Internal use only */
# define MIRROR_IMAGE UINT64_C(0x0000000000040000) /* LV - Internal use only */
# define LV_NOTSYNCED UINT64_C(0x0000000000080000) /* LV */
# define LV_REBUILD UINT64_C(0x0000000000100000) /* LV */
//#define PRECOMMITTED UINT64_C(0x0000000000200000) /* VG - internal use only */
# define CONVERTING UINT64_C(0x0000000000400000) /* LV */
# define MISSING_PV UINT64_C(0x0000000000800000) /* PV */
2019-11-21 01:07:27 +03:00
# define INTEGRITY UINT64_C(0x0000000000800000) /* LV - Internal use only */
2017-10-06 04:12:42 +03:00
# define PV_MOVED_VG UINT64_C(0x4000000000000000) /* PV - Moved to a new VG */
2014-09-16 00:33:53 +04:00
# define PARTIAL_LV UINT64_C(0x0000000001000000) / * LV - derived flag, not
2011-09-06 22:49:31 +04:00
written out in metadata */
2020-02-24 22:52:12 +03:00
# define WRITECACHE_ORIGIN UINT64_C(0x0000000002000000)
2019-11-21 01:07:27 +03:00
# define INTEGRITY_METADATA UINT64_C(0x0000000004000000) /* LV - Internal use only */
2016-01-07 16:30:21 +03:00
# define VIRTUAL_ORIGIN UINT64_C(0x0000000008000000) /* LV - internal use only */
2011-09-06 22:49:31 +04:00
2014-09-16 00:33:53 +04:00
# define MERGING UINT64_C(0x0000000010000000) /* LV SEG */
2011-09-06 22:49:31 +04:00
2014-09-16 00:33:53 +04:00
# define UNLABELLED_PV UINT64_C(0x0000000080000000) /* PV -this PV had no label written yet */
2018-08-27 22:53:09 +03:00
# define WRITECACHE UINT64_C(0x0000000080000000) /* LV - shared with UNLABELLED_PV */
2011-09-06 22:49:31 +04:00
2014-09-16 00:33:53 +04:00
# define RAID UINT64_C(0x0000000100000000) /* LV - Internal use only */
# define RAID_META UINT64_C(0x0000000200000000) /* LV - Internal use only */
# define RAID_IMAGE UINT64_C(0x0000000400000000) /* LV - Internal use only */
2011-09-06 22:49:31 +04:00
2014-09-16 00:33:53 +04:00
# define THIN_VOLUME UINT64_C(0x0000001000000000) /* LV - Internal use only */
# define THIN_POOL UINT64_C(0x0000002000000000) /* LV - Internal use only */
# define THIN_POOL_DATA UINT64_C(0x0000004000000000) /* LV - Internal use only */
# define THIN_POOL_METADATA UINT64_C(0x0000008000000000) /* LV - Internal use only */
# define POOL_METADATA_SPARE UINT64_C(0x0000010000000000) /* LV - Internal use only */
2013-07-05 19:10:47 +04:00
# define LV_WRITEMOSTLY UINT64_C(0x0000020000000000) /* LV (RAID1) */
RAID: Add writemostly/writebehind support for RAID1
'lvchange' is used to alter a RAID 1 logical volume's write-mostly and
write-behind characteristics. The '--writemostly' parameter takes a
PV as an argument with an optional trailing character to specify whether
to set ('y'), unset ('n'), or toggle ('t') the value. If no trailing
character is given, it will set the flag.
Synopsis:
lvchange [--writemostly <PV>:{t|y|n}] [--writebehind <count>] vg/lv
Example:
lvchange --writemostly /dev/sdb1:y --writebehind 512 vg/raid1_lv
The last character in the 'lv_attr' field is used to show whether a device
has the WriteMostly flag set. It is signified with a 'w'. If the device
has failed, the 'p'artial flag has priority.
Example ("nosync" raid1 with mismatch_cnt and writemostly):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg Rwi---r-m 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-w 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-- 1 linear 4.00m
Example (raid1 with mismatch_cnt, writemostly - but failed drive):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg rwi---r-p 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-p 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-p 1 linear 4.00m
A new reportable field has been added for writebehind as well. If
write-behind has not been set or the LV is not RAID1, the field will
be blank.
Example (writebehind is set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r-- 512
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
Example (writebehind is not set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r--
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
2013-04-15 22:59:46 +04:00
2013-07-05 19:10:47 +04:00
# define LV_ACTIVATION_SKIP UINT64_C(0x0000040000000000) /* LV */
activation: flag temporary LVs internally
Add LV_TEMPORARY flag for LVs with limited existence during command
execution. Such LVs are temporary in way that they need to be activated,
some action done and then removed immediately. Such LVs are just like
any normal LV - the only difference is that they are removed during
LVM command execution. This is also the case for LVs representing
future pool metadata spare LVs which we need to initialize by using
the usual LV before they are declared as pool metadata spare.
We can optimize some other parts like udev to do a better job if
it knows that the LV is temporary and any processing on it is just
useless.
This flag is orthogonal to LV_NOSCAN flag introduced recently
as LV_NOSCAN flag is primarily used to mark an LV for the scanning
to be avoided before the zeroing of the device happens. The LV_TEMPORARY
flag makes a difference between a full-fledged LV visible in the system
and the LV just used as a temporary overlay for some action that needs to
be done on underlying PVs.
For example: lvcreate --thinpool POOL --zero n -L 1G vg
- first, the usual LV is created to do a clean up for pool metadata
spare. The LV is activated, zeroed, deactivated.
- between "activated" and "zeroed" stage, the LV_NOSCAN flag is used
to avoid any scanning in udev
- betwen "zeroed" and "deactivated" stage, we need to avoid the WATCH
udev rule, but since the LV is just a usual LV, we can't make a
difference. The LV_TEMPORARY internal LV flag helps here. If we
create the LV with this flag, the DM_UDEV_DISABLE_DISK_RULES
and DM_UDEV_DISABLE_OTHER_RULES flag are set (just like as it is
with "invisible" and non-top-level LVs) - udev is directed to
skip WATCH rule use.
- if the LV_TEMPORARY flag was not used, there would normally be
a WATCH event generated once the LV is closed after "zeroed"
stage. This will make problems with immediated deactivation that
follows.
2013-10-23 16:06:39 +04:00
# define LV_NOSCAN UINT64_C(0x0000080000000000) / * LV - internal use only - the LV
should not be scanned */
# define LV_TEMPORARY UINT64_C(0x0000100000000000) / * LV - internal use only - the LV
2014-01-24 15:28:35 +04:00
is supposed to be created and
removed or reactivated with
this flag dropped during single
LVM command execution . */
2013-07-10 16:06:50 +04:00
2014-09-16 00:33:53 +04:00
# define CACHE_POOL UINT64_C(0x0000200000000000) /* LV - Internal use only */
# define CACHE_POOL_DATA UINT64_C(0x0000400000000000) /* LV - Internal use only */
# define CACHE_POOL_METADATA UINT64_C(0x0000800000000000) /* LV - Internal use only */
# define CACHE UINT64_C(0x0001000000000000) /* LV - Internal use only */
2014-11-10 12:56:43 +03:00
# define LV_PENDING_DELETE UINT64_C(0x0004000000000000) /* LV - Internal use only */
metadata: process_each_lv_in_vg: get the list of LVs to process first, then do the processing
This avoids a problem in which we're using selection on LV list - we
need to do the selection on initial state and not on any intermediary
state as we process LVs one by one - some of the relations among LVs
can be gone during this processing.
For example, processing one LV can cause the other LVs to lose the
relation to this LV and hence they're not selectable anymore with
the original selection criteria as it would be if we did selection
on inital state. A perfect example is with thin snapshots:
$ lvs -o lv_name,origin,layout,role vg
LV Origin Layout Role
lvol1 thin,sparse public,origin,thinorigin,multithinorigin
lvol2 lvol1 thin,sparse public,snapshot,thinsnapshot
lvol3 lvol1 thin,sparse public,snapshot,thinsnapshot
pool thin,pool private
$ lvremove -ff -S 'lv_name=lvol1 || origin=lvol1'
Logical volume "lvol1" successfully removed
The lvremove command above was supposed to remove lvol1 as well as
all its snapshots which have origin=lvol1. It failed to do so, because
once we removed the origin lvol1, the lvol2 and lvol3 which were
snapshots before are not snapshots anymore - the relations change
as we're processing these LVs one by one.
If we do the selection first and then execute any concrete actions on
these LVs (which is what this patch does), the behaviour is correct
then - the selection is done on the *initial state*:
$ lvremove -ff -S 'lv_name=lvol1 || origin=lvol1'
Logical volume "lvol1" successfully removed
Logical volume "lvol2" successfully removed
Logical volume "lvol3" successfully removed
Similarly for all the other situations in which relations among
LVs are being changed by processing the LVs one by one.
This patch also introduces LV_REMOVED internal LV status flag
to mark removed LVs so they're not processed further when we
iterate over collected list of LVs to be processed.
Previously, when we iterated directly over vg->lvs list to
process the LVs, we relied on the fact that once the LV is removed,
it is also removed from the vg->lvs list we're iterating over.
But that was incorrect as we shouldn't remove LVs from the list
during one iteration while we're iterating over that exact list
(dm_list_iterate_items safe can handle only one removal at
one iteration anyway, so it can't be used here).
2015-03-16 19:10:21 +03:00
# define LV_REMOVED UINT64_C(0x0040000000000000) / * LV - Internal use only
This flag is used to mark an LV once it has
been removed from the VG . It might still
be referenced on internal lists of LVs .
Any remaining references should check for
this flag and ignore the LV is set .
2015-03-23 15:32:00 +03:00
FIXME : Remove this flag once we have indexed
vg - > removed_lvs for quick lookup .
metadata: process_each_lv_in_vg: get the list of LVs to process first, then do the processing
This avoids a problem in which we're using selection on LV list - we
need to do the selection on initial state and not on any intermediary
state as we process LVs one by one - some of the relations among LVs
can be gone during this processing.
For example, processing one LV can cause the other LVs to lose the
relation to this LV and hence they're not selectable anymore with
the original selection criteria as it would be if we did selection
on inital state. A perfect example is with thin snapshots:
$ lvs -o lv_name,origin,layout,role vg
LV Origin Layout Role
lvol1 thin,sparse public,origin,thinorigin,multithinorigin
lvol2 lvol1 thin,sparse public,snapshot,thinsnapshot
lvol3 lvol1 thin,sparse public,snapshot,thinsnapshot
pool thin,pool private
$ lvremove -ff -S 'lv_name=lvol1 || origin=lvol1'
Logical volume "lvol1" successfully removed
The lvremove command above was supposed to remove lvol1 as well as
all its snapshots which have origin=lvol1. It failed to do so, because
once we removed the origin lvol1, the lvol2 and lvol3 which were
snapshots before are not snapshots anymore - the relations change
as we're processing these LVs one by one.
If we do the selection first and then execute any concrete actions on
these LVs (which is what this patch does), the behaviour is correct
then - the selection is done on the *initial state*:
$ lvremove -ff -S 'lv_name=lvol1 || origin=lvol1'
Logical volume "lvol1" successfully removed
Logical volume "lvol2" successfully removed
Logical volume "lvol3" successfully removed
Similarly for all the other situations in which relations among
LVs are being changed by processing the LVs one by one.
This patch also introduces LV_REMOVED internal LV status flag
to mark removed LVs so they're not processed further when we
iterate over collected list of LVs to be processed.
Previously, when we iterated directly over vg->lvs list to
process the LVs, we relied on the fact that once the LV is removed,
it is also removed from the vg->lvs list we're iterating over.
But that was incorrect as we shouldn't remove LVs from the list
during one iteration while we're iterating over that exact list
(dm_list_iterate_items safe can handle only one removal at
one iteration anyway, so it can't be used here).
2015-03-16 19:10:21 +03:00
*/
2015-01-13 17:23:03 +03:00
# define LV_ERROR_WHEN_FULL UINT64_C(0x0008000000000000) /* LV - error when full */
2015-01-16 15:44:16 +03:00
# define PV_ALLOCATION_PROHIBITED UINT64_C(0x0010000000000000) / * PV - internal use only - allocation prohibited
e . g . to prohibit allocation of a RAID image
on a PV already holing an image of the RAID set */
2015-07-10 01:02:30 +03:00
# define LOCKD_SANLOCK_LV UINT64_C(0x0080000000000000) /* LV - Internal use only */
2017-02-24 02:50:00 +03:00
# define LV_RESHAPE_DELTA_DISKS_PLUS UINT64_C(0x0100000000000000) /* LV reshape flag delta disks plus image(s) */
# define LV_RESHAPE_DELTA_DISKS_MINUS UINT64_C(0x0200000000000000) /* LV reshape flag delta disks minus image(s) */
# define LV_REMOVE_AFTER_RESHAPE UINT64_C(0x0400000000000000) /* LV needs to be removed after a shrinking reshape */
2017-03-01 14:26:56 +03:00
# define LV_METADATA_FORMAT UINT64_C(0x0800000000000000) /* LV has segments with metadata format */
2021-01-12 19:59:29 +03:00
# define LV_CROP_METADATA UINT64_C(0x0000000000000400) /* LV - also VG CLUSTERED */
2017-06-09 22:31:09 +03:00
# define LV_RESHAPE UINT64_C(0x1000000000000000) / * Ongoing reshape (number of stripes, stripesize or raid algorithm change):
used as SEGTYPE_FLAG to prevent activation on old runtime */
2017-07-14 16:52:18 +03:00
# define LV_RESHAPE_DATA_OFFSET UINT64_C(0x2000000000000000) /* LV reshape flag data offset (out of place reshaping) */
2018-06-29 12:11:14 +03:00
# define LV_VDO UINT64_C(0x0000000020000000) /* LV - Internal user only */
# define LV_VDO_POOL UINT64_C(0x0000000040000000) /* LV - Internal user only */
# define LV_VDO_POOL_DATA UINT64_C(0x8000000000000000) /* LV - Internal user only */
2019-01-30 18:55:34 +03:00
# define LV_CACHE_VOL UINT64_C(0x0010000000000000) /* LV - also a PV flag */
2019-10-14 23:32:13 +03:00
# define LV_CACHE_USES_CACHEVOL UINT64_C(0x4000000000000000) /* LV - also a PV flag */
2018-08-17 23:45:52 +03:00
2014-01-28 22:24:51 +04:00
2007-07-18 19:38:58 +04:00
/* Format features flags */
# define FMT_SEGMENTS 0x00000001U /* Arbitrary segment params? */
2018-04-28 00:22:46 +03:00
// #define FMT_MDAS 0x00000002U /* Proper metadata areas? */
2007-07-18 19:38:58 +04:00
# define FMT_TAGS 0x00000004U /* Tagging? */
# define FMT_UNLIMITED_VOLS 0x00000008U /* Unlimited PVs/LVs? */
# define FMT_RESTRICTED_LVIDS 0x00000010U /* LVID <= 255 */
# define FMT_ORPHAN_ALLOCATABLE 0x00000020U /* Orphan PV allocatable? */
//#define FMT_PRECOMMIT 0x00000040U /* Supports pre-commit? */
# define FMT_RESIZE_PV 0x00000080U /* Supports pvresize? */
# define FMT_UNLIMITED_STRIPESIZE 0x00000100U /* Unlimited stripe size? */
2007-11-09 19:51:54 +03:00
# define FMT_RESTRICTED_READAHEAD 0x00000200U /* Readahead restricted to 2-120? */
2018-04-28 00:22:46 +03:00
// #define FMT_BAS 0x000000400U /* Supports bootloader areas? */
2013-06-25 14:32:09 +04:00
# define FMT_CONFIG_PROFILE 0x000000800U /* Supports configuration profiles? */
2018-04-28 00:22:46 +03:00
// #define FMT_OBSOLETE 0x000001000U /* Obsolete format? */
2014-10-14 21:12:15 +04:00
# define FMT_NON_POWER2_EXTENTS 0x000002000U /* Non-power-of-2 extent sizes? */
2018-04-28 00:22:46 +03:00
// #define FMT_SYSTEMID_ON_PVS 0x000004000U /* System ID is stored on PVs not VG */
2016-02-12 14:58:59 +03:00
# define FMT_PV_FLAGS 0x000008000U /* Supports PV flags */
2015-02-24 02:03:52 +03:00
2007-12-21 01:37:42 +03:00
/* Mirror conversion type flags */
# define MIRROR_BY_SEG 0x00000001U /* segment-by-segment mirror */
2008-01-17 20:17:09 +03:00
# define MIRROR_BY_LV 0x00000002U /* mirror using whole mimage LVs */
2014-06-19 16:40:47 +04:00
# define MIRROR_BY_SEGMENTED_LV 0x00000004U / * mirror using whole mimage LVs that
* preserve the segment structure */
2008-01-19 01:02:37 +03:00
# define MIRROR_SKIP_INIT_SYNC 0x00000010U /* skip initial sync */
2007-12-21 01:37:42 +03:00
2009-01-27 01:13:22 +03:00
/* vg_read and vg_read_for_update flags */
2015-10-22 22:56:22 +03:00
# define READ_OK_NOTFOUND 0x00040000U
2014-10-07 04:04:09 +04:00
# define READ_WARN_INCONSISTENT 0x00080000U
2019-06-12 00:17:24 +03:00
# define READ_FOR_UPDATE 0x00100000U /* command tells vg_read it plans to write the vg */
# define PROCESS_SKIP_SCAN 0x00200000U /* skip lvmcache_label_scan in process_each_pv */
# define READ_FOR_ACTIVATE 0x00400000U /* command tells vg_read it plans to activate the vg */
2019-08-27 01:07:18 +03:00
# define READ_WITHOUT_LOCK 0x00800000U /* caller responsible for vg lock */
2009-01-27 01:13:22 +03:00
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
/* vg_read returns these in error_flags */
# define FAILED_NOT_ENABLED 0x00000001U
2009-01-27 01:13:22 +03:00
# define FAILED_LOCKING 0x00000002U
# define FAILED_NOTFOUND 0x00000004U
# define FAILED_READ_ONLY 0x00000008U
# define FAILED_EXPORTED 0x00000010U
# define FAILED_RESIZEABLE 0x00000020U
# define FAILED_CLUSTERED 0x00000040U
# define FAILED_ALLOCATION 0x00000080U
Add vg_lock_newname() library function.
Various tools need to check for existence of a VG before doing something
(vgsplit, vgrename, vgcreate). Currently we don't have an interface to
check for existence, but the existence check is part of the vg_read* call(s).
This patch is an attempt to pull out some of that functionality into a
separate function, and hopefully simplify our vg_read interface, and
move those patches along.
vg_lock_newname() is only concerned about checking whether a vg exists in
the system. Unfortunately, we cannot just scan the system, but we must first
obtain a lock. Since we are reserving a vgname, we take a WRITE lock on
the vgname. Once obtained, we scan the system to ensure the name does
not exist. The return codes and behavior is in the function header.
You might think of this function as similar to an open() call with
O_CREAT and O_EXCL flags (returns failure with -EEXIST if file already
exists).
NOTE: I think including the word "lock" in the function name is important,
as it clearly states the function obtains a lock and makes the code more
readable, especially when it comes to cleanup / unlocking. The ultimate
function name is somewhat open for debate though so later we may rename.
2009-06-09 18:29:10 +04:00
# define FAILED_EXIST 0x00000100U
2014-11-14 16:55:57 +03:00
# define FAILED_RECOVERY 0x00000200U
2014-10-24 21:29:04 +04:00
# define FAILED_SYSTEMID 0x00000400U
system_id: make new VGs read-only for old lvm versions
Previous versions of lvm will not obey the restrictions
imposed by the new system_id, and would allow such a VG
to be written. So, a VG with a new system_id is further
changed to force previous lvm versions to treat it as
read-only. This is done by removing the WRITE flag from
the metadata status line of these VGs, and putting a new
WRITE_LOCKED flag in the flags line of the metadata.
Versions of lvm that recognize WRITE_LOCKED, also obey the
new system_id. For these lvm versions, WRITE_LOCKED is
identical to WRITE, and the rules associated with matching
system_id's are imposed.
A new VG lock_type field is also added that causes the same
WRITE/WRITE_LOCKED transformation when set. A previous
version of lvm will also see a VG with lock_type as read-only.
Versions of lvm that recognize WRITE_LOCKED, must also obey
the lock_type setting. Until the lock_type feature is added,
lvm will fail to read any VG with lock_type set and report an
error about an unsupported lock_type. Once the lock_type
feature is added, lvm will allow VGs with lock_type to be
used according to the rules imposed by the lock_type.
When both system_id and lock_type settings are removed, a VG
is written with the old WRITE status flag, and without the
new WRITE_LOCKED flag. This allows old versions of lvm to
use the VG as before.
2015-03-04 20:30:53 +03:00
# define FAILED_LOCK_TYPE 0x00000800U
2015-07-14 19:36:04 +03:00
# define FAILED_LOCK_MODE 0x00001000U
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
# define FAILED_INTERNAL_ERROR 0x00002000U
Add vg_lock_newname() library function.
Various tools need to check for existence of a VG before doing something
(vgsplit, vgrename, vgcreate). Currently we don't have an interface to
check for existence, but the existence check is part of the vg_read* call(s).
This patch is an attempt to pull out some of that functionality into a
separate function, and hopefully simplify our vg_read interface, and
move those patches along.
vg_lock_newname() is only concerned about checking whether a vg exists in
the system. Unfortunately, we cannot just scan the system, but we must first
obtain a lock. Since we are reserving a vgname, we take a WRITE lock on
the vgname. Once obtained, we scan the system to ensure the name does
not exist. The return codes and behavior is in the function header.
You might think of this function as similar to an open() call with
O_CREAT and O_EXCL flags (returns failure with -EEXIST if file already
exists).
NOTE: I think including the word "lock" in the function name is important,
as it clearly states the function obtains a lock and makes the code more
readable, especially when it comes to cleanup / unlocking. The ultimate
function name is somewhat open for debate though so later we may rename.
2009-06-09 18:29:10 +04:00
# define SUCCESS 0x00000000U
2009-01-27 01:13:22 +03:00
2010-06-30 23:28:35 +04:00
# define VGMETADATACOPIES_ALL UINT32_MAX
# define VGMETADATACOPIES_UNMANAGED 0
2013-06-30 20:01:19 +04:00
# define vg_is_archived(vg) (((vg)->status & ARCHIVED_VG) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_locked(lv) (((lv)->status & LOCKED) ? 1 : 0)
2016-03-02 22:59:03 +03:00
# define lv_is_partial(lv) (((lv)->status & PARTIAL_LV) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_virtual(lv) (((lv)->status & VIRTUAL) ? 1 : 0)
# define lv_is_merging(lv) (((lv)->status & MERGING) ? 1 : 0)
2016-12-22 23:15:31 +03:00
# define lv_is_merging_origin(lv) (lv_is_merging(lv) && (lv)->snapshot)
2016-01-07 16:30:21 +03:00
# define lv_is_snapshot(lv) (((lv)->status & SNAPSHOT) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_converting(lv) (((lv)->status & CONVERTING) ? 1 : 0)
2013-02-21 13:25:44 +04:00
# define lv_is_external_origin(lv) (((lv)->external_count > 0) ? 1 : 0)
2016-01-07 16:30:21 +03:00
# define lv_is_virtual_origin(lv) (((lv)->status & VIRTUAL_ORIGIN) ? 1 : 0)
2014-06-27 02:02:58 +04:00
2014-09-16 00:33:53 +04:00
# define lv_is_thin_volume(lv) (((lv)->status & THIN_VOLUME) ? 1 : 0)
# define lv_is_thin_pool(lv) (((lv)->status & THIN_POOL) ? 1 : 0)
2014-11-04 12:31:28 +03:00
# define lv_is_new_thin_pool(lv) (lv_is_thin_pool(lv) && !first_seg(lv)->transaction_id)
2014-09-16 00:33:53 +04:00
# define lv_is_used_thin_pool(lv) (lv_is_thin_pool(lv) && !dm_list_empty(&(lv)->segs_using_this_lv))
# define lv_is_thin_pool_data(lv) (((lv)->status & THIN_POOL_DATA) ? 1 : 0)
# define lv_is_thin_pool_metadata(lv) (((lv)->status & THIN_POOL_METADATA) ? 1 : 0)
2014-06-27 02:02:58 +04:00
# define lv_is_thin_type(lv) (((lv)->status & (THIN_POOL | THIN_VOLUME | THIN_POOL_DATA | THIN_POOL_METADATA)) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_mirrored(lv) (((lv)->status & MIRRORED) ? 1 : 0)
# define lv_is_mirror_image(lv) (((lv)->status & MIRROR_IMAGE) ? 1 : 0)
# define lv_is_mirror_log(lv) (((lv)->status & MIRROR_LOG) ? 1 : 0)
2014-09-16 03:13:46 +04:00
# define lv_is_mirror(lv) (((lv)->status & MIRROR) ? 1 : 0)
# define lv_is_mirror_type(lv) (((lv)->status & (MIRROR | MIRROR_LOG | MIRROR_IMAGE)) ? 1 : 0)
2016-07-14 16:21:01 +03:00
# define lv_is_not_synced(lv) (((lv)->status & LV_NOTSYNCED) ? 1 : 0)
2014-09-16 00:33:53 +04:00
2014-11-10 12:56:43 +03:00
# define lv_is_pending_delete(lv) (((lv)->status & LV_PENDING_DELETE) ? 1 : 0)
2015-01-15 17:19:00 +03:00
# define lv_is_error_when_full(lv) (((lv)->status & LV_ERROR_WHEN_FULL) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_pvmove(lv) (((lv)->status & PVMOVE) ? 1 : 0)
# define lv_is_raid(lv) (((lv)->status & RAID) ? 1 : 0)
# define lv_is_raid_image(lv) (((lv)->status & RAID_IMAGE) ? 1 : 0)
2016-12-14 23:47:38 +03:00
# define lv_is_raid_image_with_tracking(lv) ((lv_is_raid_image(lv) && !((lv)->status & LVM_WRITE)) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_raid_metadata(lv) (((lv)->status & RAID_META) ? 1 : 0)
2013-06-17 21:46:22 +04:00
# define lv_is_raid_type(lv) (((lv)->status & (RAID | RAID_IMAGE | RAID_META)) ? 1 : 0)
2011-09-09 00:55:39 +04:00
2014-09-16 00:33:53 +04:00
# define lv_is_cache(lv) (((lv)->status & CACHE) ? 1 : 0)
# define lv_is_cache_pool(lv) (((lv)->status & CACHE_POOL) ? 1 : 0)
2019-01-30 18:55:34 +03:00
# define lv_is_cache_vol(lv) (((lv)->status & LV_CACHE_VOL) ? 1 : 0)
2016-05-24 16:19:01 +03:00
# define lv_is_used_cache_pool(lv) (lv_is_cache_pool(lv) && !dm_list_empty(&(lv)->segs_using_this_lv))
2014-09-16 00:33:53 +04:00
# define lv_is_cache_pool_data(lv) (((lv)->status & CACHE_POOL_DATA) ? 1 : 0)
# define lv_is_cache_pool_metadata(lv) (((lv)->status & CACHE_POOL_METADATA) ? 1 : 0)
2019-01-30 18:55:34 +03:00
# define lv_is_cache_type(lv) (((lv)->status & (CACHE | CACHE_POOL | LV_CACHE_VOL | CACHE_POOL_DATA | CACHE_POOL_METADATA)) ? 1 : 0)
2014-01-28 22:24:51 +04:00
2014-07-08 00:14:08 +04:00
# define lv_is_pool(lv) (((lv)->status & (CACHE_POOL | THIN_POOL)) ? 1 : 0)
2014-10-03 20:48:42 +04:00
# define lv_is_pool_data(lv) (((lv)->status & (CACHE_POOL_DATA | THIN_POOL_DATA)) ? 1 : 0)
2014-07-08 00:14:08 +04:00
# define lv_is_pool_metadata(lv) (((lv)->status & (CACHE_POOL_METADATA | THIN_POOL_METADATA)) ? 1 : 0)
2014-09-16 00:33:53 +04:00
# define lv_is_pool_metadata_spare(lv) (((lv)->status & POOL_METADATA_SPARE) ? 1 : 0)
2015-03-05 23:00:44 +03:00
# define lv_is_lockd_sanlock_lv(lv) (((lv)->status & LOCKD_SANLOCK_LV) ? 1 : 0)
2018-08-27 22:53:09 +03:00
# define lv_is_writecache(lv) (((lv)->status & WRITECACHE) ? 1 : 0)
2019-11-21 01:07:27 +03:00
# define lv_is_integrity(lv) (((lv)->status & INTEGRITY) ? 1 : 0)
# define lv_is_integrity_metadata(lv) (((lv)->status & INTEGRITY_METADATA) ? 1 : 0)
2014-09-16 00:33:53 +04:00
2018-06-29 12:11:14 +03:00
# define lv_is_vdo(lv) (((lv)->status & LV_VDO) ? 1 : 0)
# define lv_is_vdo_pool(lv) (((lv)->status & LV_VDO_POOL) ? 1 : 0)
# define lv_is_vdo_pool_data(lv) (((lv)->status & LV_VDO_POOL_DATA) ? 1 : 0)
# define lv_is_vdo_type(lv) (((lv)->status & (LV_VDO | LV_VDO_POOL | LV_VDO_POOL_DATA)) ? 1 : 0)
metadata: process_each_lv_in_vg: get the list of LVs to process first, then do the processing
This avoids a problem in which we're using selection on LV list - we
need to do the selection on initial state and not on any intermediary
state as we process LVs one by one - some of the relations among LVs
can be gone during this processing.
For example, processing one LV can cause the other LVs to lose the
relation to this LV and hence they're not selectable anymore with
the original selection criteria as it would be if we did selection
on inital state. A perfect example is with thin snapshots:
$ lvs -o lv_name,origin,layout,role vg
LV Origin Layout Role
lvol1 thin,sparse public,origin,thinorigin,multithinorigin
lvol2 lvol1 thin,sparse public,snapshot,thinsnapshot
lvol3 lvol1 thin,sparse public,snapshot,thinsnapshot
pool thin,pool private
$ lvremove -ff -S 'lv_name=lvol1 || origin=lvol1'
Logical volume "lvol1" successfully removed
The lvremove command above was supposed to remove lvol1 as well as
all its snapshots which have origin=lvol1. It failed to do so, because
once we removed the origin lvol1, the lvol2 and lvol3 which were
snapshots before are not snapshots anymore - the relations change
as we're processing these LVs one by one.
If we do the selection first and then execute any concrete actions on
these LVs (which is what this patch does), the behaviour is correct
then - the selection is done on the *initial state*:
$ lvremove -ff -S 'lv_name=lvol1 || origin=lvol1'
Logical volume "lvol1" successfully removed
Logical volume "lvol2" successfully removed
Logical volume "lvol3" successfully removed
Similarly for all the other situations in which relations among
LVs are being changed by processing the LVs one by one.
This patch also introduces LV_REMOVED internal LV status flag
to mark removed LVs so they're not processed further when we
iterate over collected list of LVs to be processed.
Previously, when we iterated directly over vg->lvs list to
process the LVs, we relied on the fact that once the LV is removed,
it is also removed from the vg->lvs list we're iterating over.
But that was incorrect as we shouldn't remove LVs from the list
during one iteration while we're iterating over that exact list
(dm_list_iterate_items safe can handle only one removal at
one iteration anyway, so it can't be used here).
2015-03-16 19:10:21 +03:00
# define lv_is_removed(lv) (((lv)->status & LV_REMOVED) ? 1 : 0)
2018-02-27 16:13:00 +03:00
/* Recognize component LV (matching lib/misc/lvm-string.c _lvname_has_reserved_component_string()) */
2019-09-20 22:04:18 +03:00
# define lv_is_component(lv) (lv_is_cache_origin(lv) || \
lv_is_writecache_origin ( lv ) | | \
2019-11-21 01:07:27 +03:00
lv_is_integrity_origin ( lv ) | | \
2019-09-20 22:04:18 +03:00
( ( lv ) - > status & ( \
CACHE_POOL_DATA | \
CACHE_POOL_METADATA | \
2019-11-21 01:07:27 +03:00
INTEGRITY_METADATA | \
2019-09-20 22:04:18 +03:00
LV_CACHE_VOL | \
LV_VDO_POOL_DATA | \
MIRROR_IMAGE | \
MIRROR_LOG | \
RAID_IMAGE | \
RAID_META | \
THIN_POOL_DATA | \
THIN_POOL_METADATA ) ) ? 1 : 0 )
2018-02-27 16:13:00 +03:00
2014-08-25 11:07:03 +04:00
int lv_layout_and_role ( struct dm_pool * mem , const struct logical_volume * lv ,
struct dm_list * * layout , struct dm_list * * role ) ;
Add lv_layout_and_type fn, lv_layout and lv_type reporting fields.
The lv_layout and lv_type fields together help with LV identification.
We can do basic identification using the lv_attr field which provides
very condensed view. In contrast to that, the new lv_layout and lv_type
fields provide more detialed information on exact layout and type used
for LVs.
For top-level LVs which are pure types not combined with any
other LV types, the lv_layout value is equal to lv_type value.
For non-top-level LVs which may be combined with other types,
the lv_layout describes the underlying layout used, while the
lv_type describes the use/type/usage of the LV.
These two new fields are both string lists so selection (-S/--select)
criteria can be defined using the list operators easily:
[] for strict matching
{} for subset matching.
For example, let's consider this:
$ lvs -a -o name,vg_name,lv_attr,layout,type
LV VG Attr Layout Type
[lvol1_pmspare] vg ewi------- linear metadata,pool,spare
pool vg twi-a-tz-- pool,thin pool,thin
[pool_tdata] vg rwi-aor--- level10,raid data,pool,thin
[pool_tdata_rimage_0] vg iwi-aor--- linear image,raid
[pool_tdata_rimage_1] vg iwi-aor--- linear image,raid
[pool_tdata_rimage_2] vg iwi-aor--- linear image,raid
[pool_tdata_rimage_3] vg iwi-aor--- linear image,raid
[pool_tdata_rmeta_0] vg ewi-aor--- linear metadata,raid
[pool_tdata_rmeta_1] vg ewi-aor--- linear metadata,raid
[pool_tdata_rmeta_2] vg ewi-aor--- linear metadata,raid
[pool_tdata_rmeta_3] vg ewi-aor--- linear metadata,raid
[pool_tmeta] vg ewi-aor--- level1,raid metadata,pool,thin
[pool_tmeta_rimage_0] vg iwi-aor--- linear image,raid
[pool_tmeta_rimage_1] vg iwi-aor--- linear image,raid
[pool_tmeta_rmeta_0] vg ewi-aor--- linear metadata,raid
[pool_tmeta_rmeta_1] vg ewi-aor--- linear metadata,raid
thin_snap1 vg Vwi---tz-k thin snapshot,thin
thin_snap2 vg Vwi---tz-k thin snapshot,thin
thin_vol1 vg Vwi-a-tz-- thin thin
thin_vol2 vg Vwi-a-tz-- thin multiple,origin,thin
Which is a situation with thin pool, thin volumes and thin snapshots.
We can see internal 'pool_tdata' volume that makes up thin pool has
actually a level10 raid layout and the internal 'pool_tmeta' has
level1 raid layout. Also, we can see that 'thin_snap1' and 'thin_snap2'
are both thin snapshots while 'thin_vol1' is thin origin (having
multiple snapshots).
Such reporting scheme provides much better base for selection criteria
in addition to providing more detailed information, for example:
$ lvs -a -o name,vg_name,lv_attr,layout,type -S 'type=metadata'
LV VG Attr Layout Type
[lvol1_pmspare] vg ewi------- linear metadata,pool,spare
[pool_tdata_rmeta_0] vg ewi-aor--- linear metadata,raid
[pool_tdata_rmeta_1] vg ewi-aor--- linear metadata,raid
[pool_tdata_rmeta_2] vg ewi-aor--- linear metadata,raid
[pool_tdata_rmeta_3] vg ewi-aor--- linear metadata,raid
[pool_tmeta] vg ewi-aor--- level1,raid metadata,pool,thin
[pool_tmeta_rmeta_0] vg ewi-aor--- linear metadata,raid
[pool_tmeta_rmeta_1] vg ewi-aor--- linear metadata,raid
(selected all LVs which are related to metadata of any type)
lvs -a -o name,vg_name,lv_attr,layout,type -S 'type={metadata,thin}'
LV VG Attr Layout Type
[pool_tmeta] vg ewi-aor--- level1,raid metadata,pool,thin
(selected all LVs which hold metadata related to thin)
lvs -a -o name,vg_name,lv_attr,layout,type -S 'type={thin,snapshot}'
LV VG Attr Layout Type
thin_snap1 vg Vwi---tz-k thin snapshot,thin
thin_snap2 vg Vwi---tz-k thin snapshot,thin
(selected all LVs which are thin snapshots)
lvs -a -o name,vg_name,lv_attr,layout,type -S 'layout=raid'
LV VG Attr Layout Type
[pool_tdata] vg rwi-aor--- level10,raid data,pool,thin
[pool_tmeta] vg ewi-aor--- level1,raid metadata,pool,thin
(selected all LVs with raid layout, any raid layout)
lvs -a -o name,vg_name,lv_attr,layout,type -S 'layout={raid,level1}'
LV VG Attr Layout Type
[pool_tmeta] vg ewi-aor--- level1,raid metadata,pool,thin
(selected all LVs with raid level1 layout exactly)
And so on...
2014-08-13 12:03:45 +04:00
2007-07-18 19:38:58 +04:00
/* Ordered list - see lv_manip.c */
typedef enum {
AREA_UNASSIGNED ,
AREA_PV ,
AREA_LV
} area_type_t ;
2014-07-08 00:14:08 +04:00
/* Whether or not to force an operation */
2007-08-20 20:16:54 +04:00
typedef enum {
2007-08-21 20:40:33 +04:00
PROMPT = 0 , /* Issue yes/no prompt to confirm operation */
2014-07-08 00:14:08 +04:00
DONT_PROMPT = 1 , /* Add more prompts */
DONT_PROMPT_OVERRIDE = 2 /* Add even more dangerous prompts */
2007-08-20 20:16:54 +04:00
} force_t ;
2014-10-22 23:02:29 +04:00
enum {
MIRROR_LOG_CORE ,
MIRROR_LOG_DISK ,
MIRROR_LOG_MIRRORED ,
} ;
2012-06-28 16:47:34 +04:00
typedef enum {
2017-03-03 22:46:13 +03:00
THIN_ZERO_UNSELECTED = 0 ,
THIN_ZERO_NO ,
THIN_ZERO_YES ,
} thin_zero_t ;
typedef enum {
THIN_DISCARDS_UNSELECTED = 0 ,
2012-08-08 00:24:41 +04:00
THIN_DISCARDS_IGNORE ,
THIN_DISCARDS_NO_PASSDOWN ,
THIN_DISCARDS_PASSDOWN ,
} thin_discards_t ;
2012-06-28 16:47:34 +04:00
2021-01-12 19:59:29 +03:00
typedef enum {
THIN_CROP_METADATA_UNSELECTED = 0 , /* 'auto' selects */
THIN_CROP_METADATA_NO ,
THIN_CROP_METADATA_YES ,
} thin_crop_metadata_t ;
2016-04-25 14:39:30 +03:00
typedef enum {
2017-03-03 16:52:32 +03:00
CACHE_MODE_UNSELECTED = 0 ,
2016-04-25 14:39:30 +03:00
CACHE_MODE_WRITETHROUGH ,
CACHE_MODE_WRITEBACK ,
CACHE_MODE_PASSTHROUGH ,
} cache_mode_t ;
2017-02-26 22:19:07 +03:00
/* ATM used for cache only */
typedef enum {
CACHE_METADATA_FORMAT_UNSELECTED = 0 , /* On input means 'auto' */
CACHE_METADATA_FORMAT_1 ,
CACHE_METADATA_FORMAT_2 ,
} cache_metadata_format_t ;
2015-03-05 23:00:44 +03:00
typedef enum {
LOCK_TYPE_INVALID = - 1 ,
LOCK_TYPE_NONE = 0 ,
LOCK_TYPE_CLVM = 1 ,
LOCK_TYPE_DLM = 2 ,
LOCK_TYPE_SANLOCK = 3 ,
} lock_type_t ;
2007-07-18 19:38:58 +04:00
struct cmd_context ;
struct format_handler ;
struct labeller ;
struct format_type {
2008-11-04 01:14:30 +03:00
struct dm_list list ;
2007-07-18 19:38:58 +04:00
struct cmd_context * cmd ;
struct format_handler * ops ;
2012-02-23 17:11:07 +04:00
struct dm_list mda_ops ; /* List of permissible mda ops. */
2007-07-18 19:38:58 +04:00
struct labeller * labeller ;
const char * name ;
const char * alias ;
2008-02-06 18:47:28 +03:00
const char * orphan_vg_name ;
2012-02-10 06:53:03 +04:00
struct volume_group * orphan_vg ; /* Only one ever exists. */
2007-07-18 19:38:58 +04:00
uint32_t features ;
void * library ;
void * private ;
} ;
struct pv_segment {
2008-11-04 01:14:30 +03:00
struct dm_list list ; /* Member of pv->segments: ordered list
2007-07-18 19:38:58 +04:00
* covering entire data area on this PV */
struct physical_volume * pv ;
uint32_t pe ;
uint32_t len ;
struct lv_segment * lvseg ; /* NULL if free space */
uint32_t lv_area ; /* Index to area in LV segment */
} ;
2016-01-15 16:39:43 +03:00
# define pvseg_is_allocated(pvseg) ((pvseg)->lvseg ? 1 : 0)
2008-03-26 19:48:10 +03:00
2011-02-21 15:01:22 +03:00
/*
2012-02-13 03:01:19 +04:00
* Properties of each format instance type .
* The primary role of the format_instance is to temporarily store metadata
* area information we are working with .
2011-02-21 15:01:22 +03:00
*/
2012-05-10 15:03:07 +04:00
/* Include any existing PV ("on-disk") mdas during format_instance initialisation. */
2011-09-06 22:49:31 +04:00
# define FMT_INSTANCE_MDAS 0x00000002U
2011-02-21 15:01:22 +03:00
2012-05-10 14:37:49 +04:00
/*
* Include any auxiliary mdas during format_instance intialisation .
* Currently , this includes metadata areas as defined by
* metadata / dirs and metadata / raws setting .
*/
2011-09-06 22:49:31 +04:00
# define FMT_INSTANCE_AUX_MDAS 0x00000004U
2011-02-21 15:01:22 +03:00
2012-05-10 15:03:07 +04:00
/*
* Include any other format - specific mdas during format_instance initialisation .
* For example metadata areas used during backup / restore / archive handling .
*/
2011-09-06 22:49:31 +04:00
# define FMT_INSTANCE_PRIVATE_MDAS 0x00000008U
2011-02-21 15:01:22 +03:00
2018-02-07 00:18:11 +03:00
/*
* Each VG has its own fid struct . The fid for a VG describes where
* the metadata for that VG can be found . The lists hold mda locations .
*
* label scan finds the metadata locations ( devs and offsets ) for a VG ,
* and saves this info in lvmcache vginfo / info lists .
*
* vg_read ( ) then creates an fid for a given VG , and the mda locations
* from lvmcache are copied onto the fid lists . Those mda locations
* are read again by vg_read ( ) to get VG metadata that is used to
* create the ' vg ' struct .
*/
2007-07-18 19:38:58 +04:00
struct format_instance {
2011-04-29 00:29:59 +04:00
unsigned ref_count ; /* Refs to this fid from VG and PV structs */
2011-03-11 17:38:38 +03:00
struct dm_pool * mem ;
2011-02-21 15:01:22 +03:00
uint32_t type ;
2007-07-18 19:38:58 +04:00
const struct format_type * fmt ;
2011-02-21 15:01:22 +03:00
2010-06-29 00:33:22 +04:00
/*
* Each mda in a vg is on exactly one of the below lists .
* MDAs on the ' in_use ' list will be read from / written to
* disk , while MDAs on the ' ignored ' list will not be read
* or written to .
*/
2011-02-21 15:01:22 +03:00
/* FIXME: Try to use the index only. Remove these lists. */
2010-06-29 00:33:22 +04:00
struct dm_list metadata_areas_in_use ;
struct dm_list metadata_areas_ignored ;
2012-02-10 06:53:03 +04:00
struct dm_hash_table * metadata_areas_index ;
2011-02-21 15:01:22 +03:00
2007-07-18 19:38:58 +04:00
void * private ;
} ;
/* There will be one area for each stripe */
struct lv_segment_area {
area_type_t type ;
union {
struct {
struct pv_segment * pvseg ;
} pv ;
struct {
struct logical_volume * lv ;
uint32_t le ;
} lv ;
} u ;
} ;
2011-10-17 18:17:09 +04:00
struct lv_thin_message {
struct dm_list list ; /* Chained list of messages */
dm_thin_message_t type ; /* Use dm thin message datatype */
union {
struct logical_volume * lv ; /* For: create_thin, create_snap, trim */
uint32_t delete_id ; /* For delete, needs device_id */
} u ;
} ;
2007-07-18 19:38:58 +04:00
struct segment_type ;
2010-05-21 16:36:30 +04:00
2007-07-18 19:38:58 +04:00
struct lv_segment {
2008-11-04 01:14:30 +03:00
struct dm_list list ;
2007-07-18 19:38:58 +04:00
struct logical_volume * lv ;
const struct segment_type * segtype ;
uint32_t le ;
uint32_t len ;
2017-02-24 02:50:00 +03:00
uint32_t reshape_len ; /* For RAID: user hidden additional out of place reshaping length off area_len and len */
2007-07-18 19:38:58 +04:00
2009-11-25 01:55:55 +03:00
uint64_t status ;
2007-07-18 19:38:58 +04:00
/* FIXME Fields depend on segment type */
2011-09-06 22:49:31 +04:00
uint32_t stripe_size ; /* For stripe and RAID - in sectors */
RAID: Add writemostly/writebehind support for RAID1
'lvchange' is used to alter a RAID 1 logical volume's write-mostly and
write-behind characteristics. The '--writemostly' parameter takes a
PV as an argument with an optional trailing character to specify whether
to set ('y'), unset ('n'), or toggle ('t') the value. If no trailing
character is given, it will set the flag.
Synopsis:
lvchange [--writemostly <PV>:{t|y|n}] [--writebehind <count>] vg/lv
Example:
lvchange --writemostly /dev/sdb1:y --writebehind 512 vg/raid1_lv
The last character in the 'lv_attr' field is used to show whether a device
has the WriteMostly flag set. It is signified with a 'w'. If the device
has failed, the 'p'artial flag has priority.
Example ("nosync" raid1 with mismatch_cnt and writemostly):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg Rwi---r-m 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-w 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-- 1 linear 4.00m
Example (raid1 with mismatch_cnt, writemostly - but failed drive):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg rwi---r-p 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-p 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-p 1 linear 4.00m
A new reportable field has been added for writebehind as well. If
write-behind has not been set or the LV is not RAID1, the field will
be blank.
Example (writebehind is set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r-- 512
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
Example (writebehind is not set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r--
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
2013-04-15 22:59:46 +04:00
uint32_t writebehind ; /* For RAID (RAID1 only) */
2013-05-31 20:25:52 +04:00
uint32_t min_recovery_rate ; /* For RAID */
uint32_t max_recovery_rate ; /* For RAID */
2017-02-24 02:50:00 +03:00
uint32_t data_offset ; /* For RAID: data offset in sectors on each data component image */
2007-07-18 19:38:58 +04:00
uint32_t area_count ;
uint32_t area_len ;
2012-01-24 04:55:03 +04:00
uint32_t chunk_size ; /* For snapshots/thin_pool. In sectors. */
/* For thin_pool, 128..2097152. */
2011-08-26 21:40:53 +04:00
struct logical_volume * origin ; /* snap and thin */
2016-03-01 17:18:42 +03:00
struct generic_logical_volume * indirect_origin ;
2013-11-29 18:51:28 +04:00
struct logical_volume * merge_lv ; /* thin, merge descendent lv into this ancestor */
2007-07-18 19:38:58 +04:00
struct logical_volume * cow ;
2008-11-04 01:14:30 +03:00
struct dm_list origin_list ;
2018-04-18 13:56:32 +03:00
uint32_t region_size ; /* For raids/mirrors - in sectors */
2017-02-24 02:50:00 +03:00
uint32_t data_copies ; /* For RAID: number of data copies (e.g. 3 for RAID 6 */
2016-07-28 00:09:54 +03:00
uint32_t extents_copied ; /* Number of extents synced for raids/mirrors */
2007-07-18 19:38:58 +04:00
struct logical_volume * log_lv ;
2010-04-08 04:28:57 +04:00
struct lv_segment * pvmove_source_seg ;
2009-10-16 21:41:49 +04:00
void * segtype_private ;
2007-07-18 19:38:58 +04:00
2008-11-04 01:14:30 +03:00
struct dm_list tags ;
2007-07-18 19:38:58 +04:00
struct lv_segment_area * areas ;
2011-09-07 02:43:56 +04:00
struct lv_segment_area * meta_areas ; /* For RAID */
2012-01-19 19:23:50 +04:00
struct logical_volume * metadata_lv ; /* For thin_pool */
2011-10-21 15:38:35 +04:00
uint64_t transaction_id ; /* For thin_pool, thin */
2017-03-03 22:46:13 +03:00
thin_zero_t zero_new_blocks ; /* For thin_pool */
2012-08-08 00:24:41 +04:00
thin_discards_t discards ; /* For thin_pool */
2021-01-12 19:59:29 +03:00
thin_crop_metadata_t crop_metadata ; /* For thin_pool */
2011-10-17 18:17:09 +04:00
struct dm_list thin_messages ; /* For thin_pool */
2013-02-21 13:25:44 +04:00
struct logical_volume * external_lv ; /* For thin */
2014-11-09 22:18:00 +03:00
struct logical_volume * pool_lv ; /* For thin, cache */
2011-09-29 12:56:38 +04:00
uint32_t device_id ; /* For thin, 24bit */
2010-05-21 16:36:30 +04:00
2018-08-17 23:45:52 +03:00
uint64_t metadata_start ; /* For cache */
uint64_t metadata_len ; /* For cache */
uint64_t data_start ; /* For cache */
uint64_t data_len ; /* For cache */
2019-10-16 17:05:51 +03:00
struct id * metadata_id ; /* For cache, when NULL uses CVOL id */
struct id * data_id ; /* For cache, when NULL uses CVOL id */
2018-08-17 23:45:52 +03:00
2017-03-01 14:26:56 +03:00
cache_metadata_format_t cache_metadata_format ; /* For cache_pool */
2016-04-25 14:39:30 +03:00
cache_mode_t cache_mode ; /* For cache_pool */
2014-11-11 01:41:03 +03:00
const char * policy_name ; /* For cache_pool */
struct dm_config_node * policy_settings ; /* For cache_pool */
2014-11-09 22:18:00 +03:00
unsigned cleaner_policy ; /* For cache */
2018-06-29 12:11:14 +03:00
2018-08-27 22:53:09 +03:00
struct logical_volume * writecache ; /* For writecache */
uint32_t writecache_block_size ; /* For writecache */
struct writecache_settings writecache_settings ; /* For writecache */
2019-11-21 01:07:27 +03:00
uint64_t integrity_data_sectors ;
struct logical_volume * integrity_meta_dev ;
struct integrity_settings integrity_settings ;
uint32_t integrity_recalculate ;
2018-06-29 12:11:14 +03:00
struct dm_vdo_target_params vdo_params ; /* For VDO-pool */
uint32_t vdo_pool_header_size ; /* For VDO-pool */
uint32_t vdo_pool_virtual_extents ; /* For VDO-pool */
2007-07-18 19:38:58 +04:00
} ;
# define seg_type(seg, s) (seg)->areas[(s)].type
# define seg_pv(seg, s) (seg)->areas[(s)].u.pv.pvseg->pv
# define seg_lv(seg, s) (seg)->areas[(s)].u.lv.lv
2011-08-03 02:07:20 +04:00
# define seg_metalv(seg, s) (seg)->meta_areas[(s)].u.lv.lv
2011-08-11 07:29:51 +04:00
# define seg_metatype(seg, s) (seg)->meta_areas[(s)].type
2007-07-18 19:38:58 +04:00
struct pe_range {
2008-11-04 01:14:30 +03:00
struct dm_list list ;
2007-07-18 19:38:58 +04:00
uint32_t start ; /* PEs */
uint32_t count ; /* PEs */
} ;
struct pv_list {
2008-11-04 01:14:30 +03:00
struct dm_list list ;
2007-07-18 19:38:58 +04:00
struct physical_volume * pv ;
2008-11-04 01:14:30 +03:00
struct dm_list * mdas ; /* Metadata areas */
struct dm_list * pe_ranges ; /* Ranges of PEs e.g. for allocation */
2007-07-18 19:38:58 +04:00
} ;
struct lv_list {
2008-11-04 01:14:30 +03:00
struct dm_list list ;
2007-07-18 19:38:58 +04:00
struct logical_volume * lv ;
} ;
2016-03-01 17:18:42 +03:00
struct glv_list {
struct dm_list list ;
struct generic_logical_volume * glv ;
} ;
2013-03-20 01:37:04 +04:00
struct vg_list {
struct dm_list list ;
struct volume_group * vg ;
} ;
2015-05-06 00:24:50 +03:00
struct vgnameid_list {
struct dm_list list ;
const char * vg_name ;
const char * vgid ;
} ;
2019-08-27 01:07:18 +03:00
struct device_id_list {
struct dm_list list ;
struct device * dev ;
char pvid [ ID_LEN + 1 ] ;
} ;
2013-02-18 15:47:31 +04:00
# define PV_PE_START_CALC ((uint64_t) -1) /* Calculate pe_start value */
2016-02-19 00:31:27 +03:00
/*
* Values used by pv_create ( ) .
*/
struct pv_create_args {
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
uint64_t size ; /* in sectors */
uint64_t data_alignment ; /* in sectors */
uint64_t data_alignment_offset ; /* in sectors */
2016-02-19 00:31:27 +03:00
uint64_t label_sector ;
int pvmetadatacopies ;
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
uint64_t pvmetadatasize ; /* in sectors */
2016-02-19 00:31:27 +03:00
unsigned metadataignore ;
/* used when restoring */
struct id id ;
struct id * idp ;
Place the first PE at 1 MiB for all defaults
. When using default settings, this commit should change
nothing. The first PE continues to be placed at 1 MiB
resulting in a metadata area size of 1020 KiB (for
4K page sizes; slightly smaller for larger page sizes.)
. When default_data_alignment is disabled in lvm.conf,
align pe_start at 1 MiB, based on a default metadata area
size that adapts to the page size. Previously, disabling
this option would result in mda_size that was too small
for common use, and produced a 64 KiB aligned pe_start.
. Customized pe_start and mda_size values continue to be
set as before in lvm.conf and command line.
. Remove the configure option for setting default_data_alignment
at build time.
. Improve alignment related option descriptions.
. Add section about alignment to pvcreate man page.
Previously, DEFAULT_PVMETADATASIZE was 255 sectors.
However, the fact that the config setting named
"default_data_alignment" has a default value of 1 (MiB)
meant that DEFAULT_PVMETADATASIZE was having no effect.
The metadata area size is the space between the start of
the metadata area (page size offset from the start of the
device) and the first PE (1 MiB by default due to
default_data_alignment 1.) The result is a 1020 KiB metadata
area on machines with 4KiB page size (1024 KiB - 4 KiB),
and smaller on machines with larger page size.
If default_data_alignment was set to 0 (disabled), then
DEFAULT_PVMETADATASIZE 255 would take effect, and produce a
metadata area that was 188 KiB and pe_start of 192 KiB.
This was too small for common use.
This is fixed by making the default metadata area size a
computed value that matches the value produced by
default_data_alignment.
2018-11-14 00:00:11 +03:00
uint64_t ba_start ; /* in sectors */
uint64_t ba_size ; /* in sectors */
uint64_t pe_start ; /* in sectors */
2013-02-18 13:51:32 +04:00
uint32_t extent_count ;
uint32_t extent_size ;
} ;
2009-07-26 05:53:09 +04:00
struct pvcreate_params {
2016-02-16 23:15:24 +03:00
/*
* From argc and argv .
*/
char * * pv_names ;
uint32_t pv_count ;
/*
* From command line args .
*/
int zero ;
force_t force ;
unsigned yes ;
/*
* From recovery - specific command line args .
*/
const char * restorefile ; /* NULL if no --restorefile option */
const char * uuid_str ; /* id in printable format, NULL if no id */
/*
2016-02-19 00:31:27 +03:00
* Values used by pv_create ( ) .
2016-02-16 23:15:24 +03:00
*/
2016-02-19 00:31:27 +03:00
struct pv_create_args pva ;
2016-02-16 23:15:24 +03:00
/*
* Used for command processing .
*/
struct dm_list prompts ; /* pvcreate_prompt */
struct dm_list arg_devices ; /* pvcreate_device, one for each pv_name */
struct dm_list arg_process ; /* pvcreate_device, used for processing */
struct dm_list arg_confirm ; /* pvcreate_device, used for processing */
struct dm_list arg_create ; /* pvcreate_device, used for pvcreate */
2016-02-17 00:00:50 +03:00
struct dm_list arg_remove ; /* pvcreate_device, used for pvremove */
2016-02-16 23:15:24 +03:00
struct dm_list arg_fail ; /* pvcreate_device, failed to create */
struct dm_list pvs ; /* pv_list, created and usable for vgcreate/vgextend */
const char * orphan_vg_name ;
2016-02-17 00:00:50 +03:00
unsigned is_remove : 1 ; /* is removing PVs, not creating */
2016-02-16 23:15:24 +03:00
unsigned preserve_existing : 1 ;
unsigned check_failed : 1 ;
2019-08-01 18:06:47 +03:00
unsigned check_consistent_block_size : 1 ;
2016-02-16 23:15:24 +03:00
} ;
2013-03-14 02:00:29 +04:00
struct lvresize_params {
2016-06-17 14:25:41 +03:00
int argc ;
char * * argv ;
2014-06-30 14:02:05 +04:00
const char * vg_name ; /* only-used when VG is not yet opened (in /tools) */
2013-03-14 02:00:29 +04:00
const char * lv_name ;
const struct segment_type * segtype ;
2016-06-17 14:25:41 +03:00
uint64_t poolmetadata_size ;
sign_t poolmetadata_sign ;
/* Per LV applied parameters */
2013-03-14 02:00:29 +04:00
enum {
LV_ANY = 0 ,
LV_REDUCE = 1 ,
LV_EXTEND = 2
} resize ;
2016-06-17 14:25:41 +03:00
int use_policies ;
alloc_policy_t alloc ;
2017-06-21 15:02:57 +03:00
int yes ;
2016-06-17 14:25:41 +03:00
int force ;
int nosync ;
2013-03-14 02:00:29 +04:00
int nofsck ;
2016-06-17 14:25:41 +03:00
int resizefs ;
2013-03-14 02:00:29 +04:00
2016-06-17 14:25:41 +03:00
unsigned mirrors ;
uint32_t stripes ;
uint64_t stripe_size ;
uint32_t extents ;
uint64_t size ;
sign_t sign ;
percent_type_t percent ;
2013-03-14 02:00:29 +04:00
2016-06-17 14:25:41 +03:00
int approx_alloc ;
int extents_are_pes ; /* Is 'extents' counting PEs or LEs? */
2016-07-13 17:23:25 +03:00
int size_changed ; /* Was there actually a size change */
2019-03-20 21:20:26 +03:00
const char * lockopt ;
char * lockd_lv_refresh_path ; /* set during resize to use for refresh at the end */
char * lockd_lv_refresh_uuid ; /* set during resize to use for refresh at the end */
2013-03-14 02:00:29 +04:00
} ;
2009-11-01 22:51:54 +03:00
void pvcreate_params_set_defaults ( struct pvcreate_params * pp ) ;
2009-07-26 05:53:09 +04:00
2014-10-07 04:04:09 +04:00
/*
* Flags that indicate which warnings a library function should issue .
*/
# define WARN_PV_READ 0x00000001
# define WARN_INCONSISTENT 0x00000002
2018-04-24 22:48:30 +03:00
# define SKIP_RESCAN 0x00000004
2014-10-07 04:04:09 +04:00
2007-07-18 19:38:58 +04:00
/*
* Utility functions
*/
int vg_write ( struct volume_group * vg ) ;
int vg_commit ( struct volume_group * vg ) ;
2011-09-27 21:09:42 +04:00
void vg_revert ( struct volume_group * vg ) ;
2013-03-20 01:37:04 +04:00
2009-05-14 01:25:01 +04:00
/*
* Add / remove LV to / from volume group
*/
int link_lv_to_vg ( struct volume_group * vg , struct logical_volume * lv ) ;
int unlink_lv_from_vg ( struct logical_volume * lv ) ;
2009-05-14 01:26:45 +04:00
void lv_set_visible ( struct logical_volume * lv ) ;
2009-05-21 07:04:52 +04:00
void lv_set_hidden ( struct logical_volume * lv ) ;
2009-05-14 01:25:01 +04:00
2011-02-28 16:19:02 +03:00
int pv_write ( struct cmd_context * cmd , struct physical_volume * pv , int allow_non_orphan ) ;
2009-07-14 06:15:21 +04:00
int move_pv ( struct volume_group * vg_from , struct volume_group * vg_to ,
const char * pv_name ) ;
int move_pvs_used_by_lv ( struct volume_group * vg_from ,
struct volume_group * vg_to ,
const char * lv_name ) ;
2007-11-02 16:06:42 +03:00
int is_orphan_vg ( const char * vg_name ) ;
2011-01-12 23:42:50 +03:00
int is_real_vg ( const char * vg_name ) ;
2010-09-30 17:04:55 +04:00
int vg_missing_pv_count ( const struct volume_group * vg ) ;
2008-01-16 22:54:39 +03:00
int vgs_are_compatible ( struct cmd_context * cmd ,
struct volume_group * vg_from ,
struct volume_group * vg_to ) ;
Add vg_lock_newname() library function.
Various tools need to check for existence of a VG before doing something
(vgsplit, vgrename, vgcreate). Currently we don't have an interface to
check for existence, but the existence check is part of the vg_read* call(s).
This patch is an attempt to pull out some of that functionality into a
separate function, and hopefully simplify our vg_read interface, and
move those patches along.
vg_lock_newname() is only concerned about checking whether a vg exists in
the system. Unfortunately, we cannot just scan the system, but we must first
obtain a lock. Since we are reserving a vgname, we take a WRITE lock on
the vgname. Once obtained, we scan the system to ensure the name does
not exist. The return codes and behavior is in the function header.
You might think of this function as similar to an open() call with
O_CREAT and O_EXCL flags (returns failure with -EEXIST if file already
exists).
NOTE: I think including the word "lock" in the function name is important,
as it clearly states the function obtains a lock and makes the code more
readable, especially when it comes to cleanup / unlocking. The ultimate
function name is somewhat open for debate though so later we may rename.
2009-06-09 18:29:10 +04:00
uint32_t vg_lock_newname ( struct cmd_context * cmd , const char * vgname ) ;
2007-07-18 19:38:58 +04:00
2016-06-14 16:32:21 +03:00
int lv_resize ( struct logical_volume * lv ,
struct lvresize_params * lp ,
struct dm_list * pvh ) ;
2013-03-14 02:00:29 +04:00
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
struct volume_group * vg_read ( struct cmd_context * cmd , const char * vg_name , const char * vgid ,
uint32_t read_flags , uint32_t lockd_state ,
uint32_t * error_flags , struct volume_group * * error_vg ) ;
2009-07-29 17:26:01 +04:00
struct volume_group * vg_read_for_update ( struct cmd_context * cmd , const char * vg_name ,
2015-10-22 22:56:22 +03:00
const char * vgid , uint32_t read_flags , uint32_t lockd_state ) ;
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
struct volume_group * vg_read_orphans ( struct cmd_context * cmd , const char * orphan_vgname ) ;
2007-07-18 19:38:58 +04:00
/* pe_start and pe_end relate to any existing data so that new metadata
* areas can avoid overlap */
2009-07-29 17:26:01 +04:00
struct physical_volume * pv_create ( const struct cmd_context * cmd ,
2016-02-19 00:31:27 +03:00
struct device * dev , struct pv_create_args * pva ) ;
2013-06-29 01:25:28 +04:00
int pv_resize_single ( struct cmd_context * cmd ,
struct volume_group * vg ,
struct physical_volume * pv ,
2017-04-27 04:36:34 +03:00
const uint64_t new_size ,
int yes ) ;
2013-06-29 01:25:28 +04:00
2018-05-04 01:12:07 +03:00
int pv_analyze ( struct cmd_context * cmd , struct device * dev ,
2007-08-22 18:38:18 +04:00
uint64_t label_sector ) ;
2007-07-18 19:38:58 +04:00
2007-09-21 01:39:08 +04:00
/* FIXME: move internal to library */
2008-11-04 01:14:30 +03:00
uint32_t pv_list_extents_free ( const struct dm_list * pvh ) ;
2007-09-21 01:39:08 +04:00
2013-09-26 20:37:40 +04:00
int validate_new_vg_name ( struct cmd_context * cmd , const char * vg_name ) ;
2013-07-09 06:07:55 +04:00
int vg_validate ( struct volume_group * vg ) ;
2009-07-29 17:26:01 +04:00
struct volume_group * vg_create ( struct cmd_context * cmd , const char * vg_name ) ;
improve reading and repairing vg metadata
The fact that vg repair is implemented as a part of vg read
has led to a messy and complicated implementation of vg_read,
and limited and uncontrolled repair capability. This splits
read and repair apart.
Summary
-------
- take all kinds of various repairs out of vg_read
- vg_read no longer writes anything
- vg_read now simply reads and returns vg metadata
- vg_read ignores bad or old copies of metadata
- vg_read proceeds with a single good copy of metadata
- improve error checks and handling when reading
- keep track of bad (corrupt) copies of metadata in lvmcache
- keep track of old (seqno) copies of metadata in lvmcache
- keep track of outdated PVs in lvmcache
- vg_write will do basic repairs
- new command vgck --updatemetdata will do all repairs
Details
-------
- In scan, do not delete dev from lvmcache if reading/processing fails;
the dev is still present, and removing it makes it look like the dev
is not there. Records are now kept about the problems with each PV
so they be fixed/repaired in the appropriate places.
- In scan, record a bad mda on failure, and delete the mda from
mda in use list so it will not be used by vg_read or vg_write,
only by repair.
- In scan, succeed if any good mda on a device is found, instead of
failing if any is bad. The bad/old copies of metadata should not
interfere with normal usage while good copies can be used.
- In scan, add a record of old mdas in lvmcache for later, do not repair
them while reading, and do not let them prevent us from finding and
using a good copy of metadata from elsewhere. One result is that
"inconsistent metadata" is no longer a read error, but instead a
record in lvmcache that can be addressed separate from the read.
- Treat a dev with no good mdas like a dev with no mdas, which is an
existing case we already handle.
- Don't use a fake vg "handle" for returning an error from vg_read,
or the vg_read_error function for getting that error number;
just return null if the vg cannot be read or used, and an error_flags
arg with flags set for the specific kind of error (which can be used
later for determining the kind of repair.)
- Saving an original copy of the vg metadata, for purposes of reverting
a write, is now done explicitly in vg_read instead of being hidden in
the vg_make_handle function.
- When a vg is not accessible due to "access restrictions" but is
otherwise fine, return the vg through the new error_vg arg so that
process_each_pv can skip the PVs in the VG while processing.
(This is a temporary accomodation for the way process_each_pv
tracks which devs have been looked at, and can be dropped later
when process_each_pv implementation dev tracking is changed.)
- vg_read does not try to fix or recover a vg, but now just reads the
metadata, checks access restrictions and returns it.
(Checking access restrictions might be better done outside of vg_read,
but this is a later improvement.)
- _vg_read now simply makes one attempt to read metadata from
each mda, and uses the most recent copy to return to the caller
in the form of a 'vg' struct.
(bad mdas were excluded during the scan and are not retried)
(old mdas were not excluded during scan and are retried here)
- vg_read uses _vg_read to get the latest copy of metadata from mdas,
and then makes various checks against it to produce warnings,
and to check if VG access is allowed (access restrictions include:
writable, foreign, shared, clustered, missing pvs).
- Things that were previously silently/automatically written by vg_read
that are now done by vg_write, based on the records made in lvmcache
during the scan and read:
. clearing the missing flag
. updating old copies of metadata
. clearing outdated pvs
. updating pv header flags
- Bad/corrupt metadata are now repaired; they were not before.
Test changes
------------
- A read command no longer writes the VG to repair it, so add a write
command to do a repair.
(inconsistent-metadata, unlost-pv)
- When a missing PV is removed from a VG, and then the device is
enabled again, vgck --updatemetadata is needed to clear the
outdated PV before it can be used again, where it wasn't before.
(lvconvert-repair-policy, lvconvert-repair-raid, lvconvert-repair,
mirror-vgreduce-removemissing, pv-ext-flags, unlost-pv)
Reading bad/old metadata
------------------------
- "bad metadata": the mda_header or metadata text has invalid fields
or can't be parsed by lvm. This is a form of corruption that would
not be caused by known failure scenarios. A checksum error is
typically included among the errors reported.
- "old metadata": a valid copy of the metadata that has a smaller seqno
than other copies of the metadata. This can happen if the device
failed, or io failed, or lvm failed while commiting new metadata
to all the metadata areas. Old metadata on a PV that has been
removed from the VG is the "outdated" case below.
When a VG has some PVs with bad/old metadata, lvm can simply ignore
the bad/old copies, and use a good copy. This is why there are
multiple copies of the metadata -- so it's available even when some
of the copies cannot be used. The bad/old copies do not have to be
repaired before the VG can be used (the repair can happen later.)
A PV with no good copies of the metadata simply falls back to being
treated like a PV with no mdas; a common and harmless configuration.
When bad/old metadata exists, lvm warns the user about it, and
suggests repairing it using a new metadata repair command.
Bad metadata in particular is something that users will want to
investigate and repair themselves, since it should not happen and
may indicate some other problem that needs to be fixed.
PVs with bad/old metadata are not the same as missing devices.
Missing devices will block various kinds of VG modification or
activation, but bad/old metadata will not.
Previously, lvm would attempt to repair bad/old metadata whenever
it was read. This was unnecessary since lvm does not require every
copy of the metadata to be used. It would also hide potential
problems that should be investigated by the user. It was also
dangerous in cases where the VG was on shared storage. The user
is now allowed to investigate potential problems and decide how
and when to repair them.
Repairing bad/old metadata
--------------------------
When label scan sees bad metadata in an mda, that mda is removed
from the lvmcache info->mdas list. This means that vg_read will
skip it, and not attempt to read/process it again. If it was
the only in-use mda on a PV, that PV is treated like a PV with
no mdas. It also means that vg_write will also skip the bad mda,
and not attempt to write new metadata to it. The only way to
repair bad metadata is with the metadata repair command.
When label scan sees old metadata in an mda, that mda is kept
in the lvmcache info->mdas list. This means that vg_read will
read/process it again, and likely see the same mismatch with
the other copies of the metadata. Like the label_scan, the
vg_read will simply ignore the old copy of the metadata and
use the latest copy. If the command is modifying the vg
(e.g. lvcreate), then vg_write, which writes new metadata to
every mda on info->mdas, will write the new metadata to the
mda that had the old version. If successful, this will resolve
the old metadata problem (without needing to run a metadata
repair command.)
Outdated PVs
------------
An outdated PV is a PV that has an old copy of VG metadata
that shows it is a member of the VG, but the latest copy of
the VG metadata does not include this PV. This happens if
the PV is disconnected, vgreduce --removemissing is run to
remove the PV from the VG, then the PV is reconnected.
In this case, the outdated PV needs have its outdated metadata
removed and the PV used flag needs to be cleared. This repair
will be done by the subsequent repair command. It is also done
if vgremove is run on the VG.
MISSING PVs
-----------
When a device is missing, most commands will refuse to modify
the VG. This is the simple case. More complicated is when
a command is allowed to modify the VG while it is missing a
device.
When a VG is written while a device is missing for one of it's PVs,
the VG metadata is written to disk with the MISSING flag on the PV
with the missing device. When the VG is next used, it is treated
as if the PV with the MISSING flag still has a missing device, even
if that device has reappeared.
If all LVs that were using a PV with the MISSING flag are removed
or repaired so that the MISSING PV is no longer used, then the
next time the VG metadata is written, the MISSING flag will be
dropped.
Alternative methods of clearing the MISSING flag are:
vgreduce --removemissing will remove PVs with missing devices,
or PVs with the MISSING flag where the device has reappeared.
vgextend --restoremissing will clear the MISSING flag on PVs
where the device has reappeared, allowing the VG to be used
normally. This must be done with caution since the reappeared
device may have old data that is inconsistent with data on other PVs.
Bad mda repair
--------------
The new command:
vgck --updatemetadata VG
first uses vg_write to repair old metadata, and other basic
issues mentioned above (old metadata, outdated PVs, pv_header
flags, MISSING_PV flags). It will also go further and repair
bad metadata:
. text metadata that has a bad checksum
. text metadata that is not parsable
. corrupt mda_header checksum and version fields
(To keep a clean diff, #if 0 is added around functions that
are replaced by new code. These commented functions are
removed by the following commit.)
2019-05-24 20:04:37 +03:00
struct volume_group * vg_lock_and_create ( struct cmd_context * cmd , const char * vg_name , int * exists ) ;
2009-09-03 01:39:07 +04:00
int vg_remove_mdas ( struct volume_group * vg ) ;
2009-09-03 01:39:29 +04:00
int vg_remove_check ( struct volume_group * vg ) ;
2010-06-30 22:03:52 +04:00
void vg_remove_pvs ( struct volume_group * vg ) ;
2015-03-05 23:00:44 +03:00
int vg_remove_direct ( struct volume_group * vg ) ;
2009-09-03 01:39:29 +04:00
int vg_remove ( struct volume_group * vg ) ;
2007-07-18 19:38:58 +04:00
int vg_rename ( struct cmd_context * cmd , struct volume_group * vg ,
const char * new_name ) ;
2016-02-19 00:38:23 +03:00
int vg_extend_each_pv ( struct volume_group * vg , struct pvcreate_params * pp ) ;
2013-09-04 02:31:45 +04:00
int vgreduce_single ( struct cmd_context * cmd , struct volume_group * vg ,
2013-09-04 03:07:43 +04:00
struct physical_volume * pv , int commit ) ;
2013-09-04 02:31:45 +04:00
2010-02-24 21:15:05 +03:00
int vg_change_tag ( struct volume_group * vg , const char * tag , int add_tag ) ;
2007-07-18 19:38:58 +04:00
int vg_split_mdas ( struct cmd_context * cmd , struct volume_group * vg_from ,
struct volume_group * vg_to ) ;
2010-04-13 21:25:44 +04:00
/* FIXME: Investigate refactoring these functions to take a pv ISO pv_list */
void add_pvl_to_vgs ( struct volume_group * vg , struct pv_list * pvl ) ;
void del_pvl_from_vgs ( struct volume_group * vg , struct pv_list * pvl ) ;
2007-07-18 19:38:58 +04:00
2011-03-11 17:56:56 +03:00
/*
* free_pv_fid ( ) must be called on every struct physical_volume allocated
2013-03-19 17:17:53 +04:00
* by pv_create , pv_read , find_pv_by_name or to free it when no longer required .
2011-03-11 17:56:56 +03:00
*/
void free_pv_fid ( struct physical_volume * pv ) ;
2007-07-18 19:38:58 +04:00
/* Manipulate LVs */
2007-10-11 23:20:38 +04:00
struct logical_volume * lv_create_empty ( const char * name ,
2007-07-18 19:38:58 +04:00
union lvid * lvid ,
2009-11-25 01:55:55 +03:00
uint64_t status ,
2007-07-18 19:38:58 +04:00
alloc_policy_t alloc ,
struct volume_group * vg ) ;
2013-11-28 14:22:24 +04:00
struct wipe_params {
2013-11-06 19:16:34 +04:00
uint64_t zero_sectors ; /* sector count to zero */
2020-09-15 17:40:53 +03:00
uint8_t zero_value ; /* zero-out with this value */
int do_zero ; /* should we do zeroing of LV start? */
2013-11-06 19:16:34 +04:00
int do_wipe_signatures ; /* should we wipe known signatures found on LV? */
int yes ; /* answer yes automatically to all questions */
force_t force ; /* force mode */
2020-06-24 13:11:21 +03:00
int is_metadata ; /* wipe volume is metadata LV */
2013-11-06 19:16:34 +04:00
} ;
/* Zero out LV and/or wipe signatures */
2013-11-28 14:22:24 +04:00
int wipe_lv ( struct logical_volume * lv , struct wipe_params params ) ;
2007-12-21 01:37:42 +03:00
2018-11-01 01:05:08 +03:00
/* Wipe any signatures and zero first sector on @lv */
int activate_and_wipe_lv ( struct logical_volume * lv , int commit ) ;
/* Wipe any signatures and zero first sector of LVs listed on @lv_list */
int activate_and_wipe_lvlist ( struct dm_list * lv_list , int commit ) ;
2010-02-24 21:15:49 +03:00
int lv_change_tag ( struct logical_volume * lv , const char * tag , int add_tag ) ;
2007-07-18 19:38:58 +04:00
/* Reduce the size of an LV by extents */
int lv_reduce ( struct logical_volume * lv , uint32_t extents ) ;
/* Empty an LV prior to deleting it */
int lv_empty ( struct logical_volume * lv ) ;
2008-01-17 16:13:54 +03:00
/* Empty an LV and add error segment */
2008-01-17 16:54:05 +03:00
int replace_lv_with_error_segment ( struct logical_volume * lv ) ;
2008-01-17 16:13:54 +03:00
2016-05-20 11:55:05 +03:00
int lv_refresh_suspend_resume ( const struct logical_volume * lv ) ;
2015-06-16 21:38:40 +03:00
2007-07-18 19:38:58 +04:00
/* Entry point for all LV extent allocations */
int lv_extend ( struct logical_volume * lv ,
const struct segment_type * segtype ,
uint32_t stripes , uint32_t stripe_size ,
2011-04-07 01:32:20 +04:00
uint32_t mirrors , uint32_t region_size ,
2014-10-26 10:13:59 +03:00
uint32_t extents ,
2014-02-14 07:10:28 +04:00
struct dm_list * allocatable_pvs , alloc_policy_t alloc ,
int approx_alloc ) ;
2007-07-18 19:38:58 +04:00
/* lv must be part of lv->vg->lvs */
int lv_remove ( struct logical_volume * lv ) ;
2016-03-01 17:26:57 +03:00
/* historical_glv must be part of lv->vg->historical_lvs */
int historical_glv_remove ( struct generic_logical_volume * historical_glv ) ;
2007-08-20 21:04:53 +04:00
int lv_remove_single ( struct cmd_context * cmd , struct logical_volume * lv ,
2013-12-05 15:39:02 +04:00
force_t force , int suppress_remove_message ) ;
2007-08-20 21:04:53 +04:00
2008-08-05 16:05:26 +04:00
int lv_remove_with_dependencies ( struct cmd_context * cmd , struct logical_volume * lv ,
2010-04-23 23:27:10 +04:00
force_t force , unsigned level ) ;
2008-08-05 16:05:26 +04:00
2007-08-04 01:22:10 +04:00
int lv_rename ( struct cmd_context * cmd , struct logical_volume * lv ,
2007-08-06 18:57:48 +04:00
const char * new_name ) ;
2012-09-27 11:48:25 +04:00
int lv_rename_update ( struct cmd_context * cmd , struct logical_volume * lv ,
const char * new_name , int update_mda ) ;
2019-10-21 10:16:45 +03:00
int lv_uniq_rename_update ( struct cmd_context * cmd , struct logical_volume * lv ,
const char * new_name , int update_mda ) ;
2007-08-04 01:22:10 +04:00
2014-09-09 20:47:27 +04:00
/* Updates and reloads metadata for given lv */
int lv_update_and_reload ( struct logical_volume * lv ) ;
int lv_update_and_reload_origin ( struct logical_volume * lv ) ;
2014-10-30 13:38:49 +03:00
uint32_t extents_from_size ( struct cmd_context * cmd , uint64_t size ,
2009-07-26 06:34:09 +04:00
uint32_t extent_size ) ;
2014-10-31 01:43:12 +03:00
uint32_t extents_from_percent_size ( struct volume_group * vg , const struct dm_list * pvh ,
uint32_t extents , int roundup ,
percent_type_t percent , uint64_t size ) ;
2009-07-26 06:34:09 +04:00
2014-01-08 13:27:17 +04:00
struct logical_volume * find_pool_lv ( const struct logical_volume * lv ) ;
2017-07-20 20:13:32 +03:00
int pool_is_active ( const struct logical_volume * lv ) ;
2014-01-29 17:27:13 +04:00
int pool_supports_external_origin ( const struct lv_segment * pool_seg , const struct logical_volume * external_lv ) ;
2017-07-20 20:13:32 +03:00
int thin_pool_feature_supported ( const struct logical_volume * lv , int feature ) ;
2014-07-23 00:20:18 +04:00
int recalculate_pool_chunk_size_with_dev_hints ( struct logical_volume * pool_lv ,
int chunk_size_calc_policy ) ;
2017-03-05 19:41:16 +03:00
int validate_cache_chunk_size ( struct cmd_context * cmd , uint32_t chunk_size ) ;
int validate_thin_pool_chunk_size ( struct cmd_context * cmd , uint32_t chunk_size ) ;
2014-10-06 14:18:57 +04:00
int validate_pool_chunk_size ( struct cmd_context * cmd , const struct segment_type * segtype , uint32_t chunk_size ) ;
2011-11-03 18:53:58 +04:00
int update_pool_lv ( struct logical_volume * lv , int activate ) ;
2017-03-01 13:23:26 +03:00
int get_default_allocation_thin_pool_chunk_size ( struct cmd_context * cmd , struct profile * profile ,
uint32_t * chunk_size , int * chunk_size_calc_method ) ;
2017-03-09 18:24:28 +03:00
int update_thin_pool_params ( struct cmd_context * cmd ,
struct profile * profile ,
uint32_t extent_size ,
const struct segment_type * segtype ,
unsigned attr ,
uint32_t pool_data_extents ,
2014-10-30 15:04:06 +03:00
uint32_t * pool_metadata_extents ,
2021-01-12 19:59:29 +03:00
struct logical_volume * metadata_lv ,
unsigned * crop_metadata ,
2014-02-04 19:58:35 +04:00
int * chunk_size_calc_method , uint32_t * chunk_size ,
2017-03-03 22:46:13 +03:00
thin_discards_t * discards , thin_zero_t * zero_new_blocks ) ;
2020-09-27 02:11:47 +03:00
struct lv_status_thin_pool {
struct dm_pool * mem ;
struct dm_status_thin_pool * thin_pool ;
dm_percent_t data_usage ;
dm_percent_t metadata_usage ;
} ;
struct lv_status_thin {
struct dm_pool * mem ;
struct dm_status_thin * thin ;
dm_percent_t usage ;
} ;
2012-08-08 00:24:41 +04:00
const char * get_pool_discards_name ( thin_discards_t discards ) ;
2014-11-08 03:28:38 +03:00
int set_pool_discards ( thin_discards_t * discards , const char * str ) ;
2013-07-04 13:04:05 +04:00
struct logical_volume * alloc_pool_metadata ( struct logical_volume * pool_lv ,
const char * name , uint32_t read_ahead ,
uint32_t stripes , uint32_t stripe_size ,
2014-10-30 15:04:06 +03:00
uint32_t extents , alloc_policy_t alloc ,
2013-07-04 13:04:05 +04:00
struct dm_list * pvh ) ;
2013-06-25 15:34:31 +04:00
int handle_pool_metadata_spare ( struct volume_group * vg , uint32_t extents ,
struct dm_list * pvh , int poolmetadataspare ) ;
2013-07-05 19:10:47 +04:00
int vg_set_pool_metadata_spare ( struct logical_volume * lv ) ;
int vg_remove_pool_metadata_spare ( struct volume_group * vg ) ;
2011-11-03 18:53:58 +04:00
2013-07-09 05:51:24 +04:00
int attach_thin_external_origin ( struct lv_segment * seg ,
struct logical_volume * external_lv ) ;
int detach_thin_external_origin ( struct lv_segment * seg ) ;
int attach_pool_metadata_lv ( struct lv_segment * pool_seg ,
2017-07-20 20:13:32 +03:00
struct logical_volume * metadata_lv ) ;
2013-07-09 05:51:24 +04:00
int detach_pool_metadata_lv ( struct lv_segment * pool_seg ,
2017-07-20 20:13:32 +03:00
struct logical_volume * * metadata_lv ) ;
2013-07-09 05:51:24 +04:00
int attach_pool_data_lv ( struct lv_segment * pool_seg ,
struct logical_volume * pool_data_lv ) ;
int is_mirror_image_removable ( struct logical_volume * mimage_lv , void * baton ) ;
2011-06-01 23:21:03 +04:00
/*
* Activation options
*/
2012-08-21 17:49:23 +04:00
typedef enum activation_change {
2012-06-27 15:48:31 +04:00
CHANGE_AY = 0 , /* activate */
CHANGE_AN = 1 , /* deactivate */
2014-10-18 13:01:29 +04:00
CHANGE_AEY = 2 , /* activate exclusively */
2012-06-27 15:48:31 +04:00
CHANGE_ALY = 3 , /* activate locally */
2012-06-27 16:59:34 +04:00
CHANGE_ALN = 4 , /* deactivate locally */
2015-06-16 18:18:16 +03:00
CHANGE_AAY = 5 , /* automatic activation */
CHANGE_ASY = 6 /* activate shared */
2011-06-01 23:21:03 +04:00
} activation_change_t ;
2013-07-18 18:17:23 +04:00
/* Returns true, when change activates device */
static inline int is_change_activating ( activation_change_t change )
{
return ( ( change ! = CHANGE_AN ) & & ( change ! = CHANGE_ALN ) ) ;
}
2009-07-26 06:33:35 +04:00
/* FIXME: refactor and reduce the size of this struct! */
struct lvcreate_params {
/* flags */
int snapshot ; /* snap */
2014-10-18 13:01:29 +04:00
int create_pool ; /* pools */
2009-07-26 06:33:35 +04:00
int zero ; /* all */
2013-11-06 19:05:50 +04:00
int wipe_signatures ; /* all */
2014-09-19 19:04:28 +04:00
int32_t major ; /* all */
int32_t minor ; /* all */
2017-03-08 17:13:59 +03:00
int log_count ; /* mirror/RAID */
int nosync ; /* mirror/RAID */
2014-10-07 12:43:47 +04:00
int pool_metadata_spare ; /* pools */
2014-10-24 17:26:41 +04:00
int type ; /* type arg is given */
activation: flag temporary LVs internally
Add LV_TEMPORARY flag for LVs with limited existence during command
execution. Such LVs are temporary in way that they need to be activated,
some action done and then removed immediately. Such LVs are just like
any normal LV - the only difference is that they are removed during
LVM command execution. This is also the case for LVs representing
future pool metadata spare LVs which we need to initialize by using
the usual LV before they are declared as pool metadata spare.
We can optimize some other parts like udev to do a better job if
it knows that the LV is temporary and any processing on it is just
useless.
This flag is orthogonal to LV_NOSCAN flag introduced recently
as LV_NOSCAN flag is primarily used to mark an LV for the scanning
to be avoided before the zeroing of the device happens. The LV_TEMPORARY
flag makes a difference between a full-fledged LV visible in the system
and the LV just used as a temporary overlay for some action that needs to
be done on underlying PVs.
For example: lvcreate --thinpool POOL --zero n -L 1G vg
- first, the usual LV is created to do a clean up for pool metadata
spare. The LV is activated, zeroed, deactivated.
- between "activated" and "zeroed" stage, the LV_NOSCAN flag is used
to avoid any scanning in udev
- betwen "zeroed" and "deactivated" stage, we need to avoid the WATCH
udev rule, but since the LV is just a usual LV, we can't make a
difference. The LV_TEMPORARY internal LV flag helps here. If we
create the LV with this flag, the DM_UDEV_DISABLE_DISK_RULES
and DM_UDEV_DISABLE_OTHER_RULES flag are set (just like as it is
with "invisible" and non-top-level LVs) - udev is directed to
skip WATCH rule use.
- if the LV_TEMPORARY flag was not used, there would normally be
a WATCH event generated once the LV is closed after "zeroed"
stage. This will make problems with immediated deactivation that
follows.
2013-10-23 16:06:39 +04:00
int temporary ; /* temporary LV */
2013-07-10 16:06:50 +04:00
# define ACTIVATION_SKIP_SET 0x01 /* request to set LV activation skip flag state */
# define ACTIVATION_SKIP_SET_ENABLED 0x02 /* set the LV activation skip flag state to 'enabled' */
# define ACTIVATION_SKIP_IGNORE 0x04 /* request to ignore LV activation skip flag (if any) */
int activation_skip ; /* activation skip flags */
2011-06-01 23:21:03 +04:00
activation_change_t activate ; /* non-snapshot, non-mirror */
2012-08-09 14:20:47 +04:00
thin_discards_t discards ; /* thin */
2017-03-03 22:46:13 +03:00
thin_zero_t zero_new_blocks ;
2013-10-04 14:30:33 +04:00
# define THIN_CHUNK_SIZE_CALC_METHOD_GENERIC 0x01
2013-09-25 18:00:52 +04:00
# define THIN_CHUNK_SIZE_CALC_METHOD_PERFORMANCE 0x02
2013-10-04 17:32:23 +04:00
int thin_chunk_size_calc_policy ;
2016-07-27 14:36:25 +03:00
unsigned suppress_zero_warn : 1 ;
2015-03-05 23:00:44 +03:00
unsigned needs_lockd_init : 1 ;
lvcreate: new cache or writecache lv with single command
To create a new cache or writecache LV with a single command:
lvcreate --type cache|writecache
-n Name -L Size --cachedevice PVfast VG [PVslow ...]
- A new main linear|striped LV is created as usual, using the
specified -n Name and -L Size, and using the optionally
specified PVslow devices.
- Then, a new cachevol LV is created internally, using PVfast
specified by the cachedevice option.
- Then, the cachevol is attached to the main LV, converting the
main LV to type cache|writecache.
Include --cachesize Size to specify the size of cache|writecache
to create from the specified --cachedevice PVs, otherwise the
entire cachedevice PV is used. The --cachedevice option can be
repeated to create the cache from multiple devices, or the
cachedevice option can contain a tag name specifying a set of PVs
to allocate the cache from.
To create a new cache or writecache LV with a single command
using an existing cachevol LV:
lvcreate --type cache|writecache
-n Name -L Size --cachevol LVfast VG [PVslow ...]
- A new main linear|striped LV is created as usual, using the
specified -n Name and -L Size, and using the optionally
specified PVslow devices.
- Then, the cachevol LVfast is attached to the main LV, converting
the main LV to type cache|writecache.
In cases where more advanced types (for the main LV or cachevol LV)
are needed, they should be created independently and then combined
with lvconvert.
Example
-------
user creates a new VG with one slow device and one fast device:
$ vgcreate vg /dev/slow1 /dev/fast1
user creates a new 8G main LV on /dev/slow1 that uses all of
/dev/fast1 as a writecache:
$ lvcreate --type writecache --cachedevice /dev/fast1
-n main -L 8G vg /dev/slow1
Example
-------
user creates a new VG with two slow devs and two fast devs:
$ vgcreate vg /dev/slow1 /dev/slow2 /dev/fast1 /dev/fast2
user creates a new 8G main LV on /dev/slow1 and /dev/slow2
that uses all of /dev/fast1 and /dev/fast2 as a writecache:
$ lvcreate --type writecache --cachedevice /dev/fast1 --cachedevice /dev/fast2
-n main -L 8G vg /dev/slow1 /dev/slow2
Example
-------
A user has several slow devices and several fast devices in their VG,
the slow devs have tag @slow, the fast devs have tag @fast.
user creates a new 8G main LV on the slow devs with a
2G writecache on the fast devs:
$ lvcreate --type writecache -n main -L 8G
--cachedevice @fast --cachesize 2G vg @slow
2020-04-10 21:17:37 +03:00
unsigned ignore_type : 1 ;
2020-06-24 13:11:21 +03:00
unsigned is_metadata : 1 ; /* created LV will be used as metadata LV (and can be zeroed) */
2009-07-26 06:33:35 +04:00
2014-06-30 14:02:05 +04:00
const char * vg_name ; /* only-used when VG is not yet opened (in /tools) */
2009-07-26 06:33:35 +04:00
const char * lv_name ; /* all */
2014-10-07 12:43:47 +04:00
const char * origin_name ; /* snap */
const char * pool_name ; /* thin */
2009-07-26 06:33:35 +04:00
2015-03-05 23:00:44 +03:00
const char * lock_args ;
2017-03-08 17:13:59 +03:00
uint32_t stripes ; /* striped/RAID */
uint32_t stripe_size ; /* striped/RAID */
2009-07-26 06:33:35 +04:00
uint32_t chunk_size ; /* snapshot */
2017-03-08 17:13:59 +03:00
uint32_t region_size ; /* mirror/RAID */
2009-07-26 06:33:35 +04:00
2017-03-08 17:13:59 +03:00
unsigned stripes_supplied ; /* striped/RAID */
unsigned stripe_size_supplied ; /* striped/RAID */
2016-08-19 15:51:43 +03:00
2017-03-08 17:13:59 +03:00
uint32_t mirrors ; /* mirror/RAID */
2009-07-26 06:33:35 +04:00
2013-05-31 20:25:52 +04:00
uint32_t min_recovery_rate ; /* RAID */
uint32_t max_recovery_rate ; /* RAID */
2017-03-01 14:26:56 +03:00
cache_metadata_format_t cache_metadata_format ; /* cache */
2016-04-25 14:39:30 +03:00
cache_mode_t cache_mode ; /* cache */
2015-07-15 12:06:40 +03:00
const char * policy_name ; /* cache */
struct dm_config_tree * policy_settings ; /* cache */
2014-02-04 21:57:08 +04:00
2009-07-26 06:33:35 +04:00
const struct segment_type * segtype ; /* all */
2012-11-15 13:32:13 +04:00
unsigned target_attr ; /* all */
2009-07-26 06:33:35 +04:00
/* size */
uint32_t extents ; /* all */
2014-10-07 12:43:47 +04:00
uint32_t pool_metadata_extents ; /* pools */
uint64_t pool_metadata_size ; /* pools */
2014-10-18 12:58:30 +04:00
uint32_t pool_data_extents ; /* pools */
uint64_t pool_data_size ; /* pools */
2014-10-31 13:33:19 +03:00
uint32_t virtual_extents ; /* snapshots, thins */
2009-07-26 06:33:35 +04:00
struct dm_list * pvh ; /* all */
2014-10-24 15:07:02 +04:00
uint64_t permission ; /* all */
2015-01-13 17:23:03 +03:00
unsigned error_when_full ; /* when segment supports it */
2021-01-12 19:59:29 +03:00
thin_crop_metadata_t crop_metadata ;
2009-07-26 06:33:35 +04:00
uint32_t read_ahead ; /* all */
2014-02-14 07:10:28 +04:00
int approx_alloc ; /* all */
2009-07-26 06:33:35 +04:00
alloc_policy_t alloc ; /* all */
2018-06-29 12:11:14 +03:00
struct dm_vdo_target_params vdo_params ; /* vdo */
2009-07-26 06:33:35 +04:00
2019-11-21 01:07:27 +03:00
int raidintegrity ;
const char * raidintegritymode ;
struct integrity_settings integrity_settings ;
2010-11-11 20:29:05 +03:00
struct dm_list tags ; /* all */
2013-11-06 19:16:34 +04:00
int yes ;
force_t force ;
2009-07-26 06:33:35 +04:00
} ;
2012-11-13 13:49:32 +04:00
struct logical_volume * lv_create_single ( struct volume_group * vg ,
struct lvcreate_params * lp ) ;
2009-07-26 06:33:35 +04:00
2013-07-10 16:06:50 +04:00
/*
* The activation can be skipped for selected LVs . Some LVs are skipped
* by default ( e . g . thin snapshots ) , others can be skipped on demand by
* overriding the default behaviour . The flag that causes the activation
* skip on next activations is stored directly in metadata for each LV
* as ACTIVATION_SKIP flag .
*/
2017-07-20 20:13:32 +03:00
void lv_set_activation_skip ( struct logical_volume * lv , int override_default , int add_skip ) ;
2013-07-10 16:06:50 +04:00
int lv_activation_skip ( struct logical_volume * lv , activation_change_t activate ,
2014-02-19 14:02:16 +04:00
int override_lv_skip_flag ) ;
2013-07-10 16:06:50 +04:00
2007-12-20 18:42:55 +03:00
/*
* Functions for layer manipulation
*/
int insert_layer_for_segments_on_pv ( struct cmd_context * cmd ,
struct logical_volume * lv_where ,
struct logical_volume * layer_lv ,
2009-11-25 01:55:55 +03:00
uint64_t status ,
2017-07-20 20:13:32 +03:00
struct pv_list * pvl ,
2008-11-04 01:14:30 +03:00
struct dm_list * lvs_changed ) ;
2007-12-20 18:42:55 +03:00
int remove_layers_for_segments ( struct cmd_context * cmd ,
struct logical_volume * lv ,
struct logical_volume * layer_lv ,
2009-11-25 01:55:55 +03:00
uint64_t status_mask , struct dm_list * lvs_changed ) ;
2007-12-20 18:42:55 +03:00
int remove_layers_for_segments_all ( struct cmd_context * cmd ,
struct logical_volume * layer_lv ,
2009-11-25 01:55:55 +03:00
uint64_t status_mask ,
2008-11-04 01:14:30 +03:00
struct dm_list * lvs_changed ) ;
2007-12-20 18:42:55 +03:00
int split_parent_segments_for_layer ( struct cmd_context * cmd ,
struct logical_volume * layer_lv ) ;
2007-12-20 21:55:46 +03:00
int remove_layer_from_lv ( struct logical_volume * lv ,
struct logical_volume * layer_lv ) ;
2007-12-20 18:42:55 +03:00
struct logical_volume * insert_layer_for_lv ( struct cmd_context * cmd ,
struct logical_volume * lv_where ,
2009-11-25 01:55:55 +03:00
uint64_t status ,
2007-12-20 18:42:55 +03:00
const char * layer_suffix ) ;
2007-07-18 19:38:58 +04:00
/* Find a PV within a given VG */
2008-03-14 01:51:24 +03:00
struct pv_list * find_pv_in_vg ( const struct volume_group * vg ,
const char * pv_name ) ;
2010-03-16 18:30:48 +03:00
struct pv_list * find_pv_in_vg_by_uuid ( const struct volume_group * vg ,
const struct id * id ) ;
2007-07-18 19:38:58 +04:00
/* Find an LV within a given VG */
2008-03-14 01:51:24 +03:00
struct lv_list * find_lv_in_vg ( const struct volume_group * vg ,
const char * lv_name ) ;
2007-07-18 19:38:58 +04:00
/* FIXME Merge these functions with ones above */
2008-03-14 01:51:24 +03:00
struct logical_volume * find_lv ( const struct volume_group * vg ,
const char * lv_name ) ;
2016-03-01 17:21:21 +03:00
struct generic_logical_volume * find_historical_glv ( const struct volume_group * vg ,
const char * historical_lv_name ,
2016-03-01 17:26:57 +03:00
int check_removed_list ,
2016-03-01 17:21:21 +03:00
struct glv_list * * glvl_found ) ;
2016-03-01 17:31:48 +03:00
int lv_name_is_used_in_vg ( const struct volume_group * vg , const char * name , int * historical ) ;
RAID: Add writemostly/writebehind support for RAID1
'lvchange' is used to alter a RAID 1 logical volume's write-mostly and
write-behind characteristics. The '--writemostly' parameter takes a
PV as an argument with an optional trailing character to specify whether
to set ('y'), unset ('n'), or toggle ('t') the value. If no trailing
character is given, it will set the flag.
Synopsis:
lvchange [--writemostly <PV>:{t|y|n}] [--writebehind <count>] vg/lv
Example:
lvchange --writemostly /dev/sdb1:y --writebehind 512 vg/raid1_lv
The last character in the 'lv_attr' field is used to show whether a device
has the WriteMostly flag set. It is signified with a 'w'. If the device
has failed, the 'p'artial flag has priority.
Example ("nosync" raid1 with mismatch_cnt and writemostly):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg Rwi---r-m 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-w 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-- 1 linear 4.00m
Example (raid1 with mismatch_cnt, writemostly - but failed drive):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg rwi---r-p 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-p 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-p 1 linear 4.00m
A new reportable field has been added for writebehind as well. If
write-behind has not been set or the LV is not RAID1, the field will
be blank.
Example (writebehind is set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r-- 512
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
Example (writebehind is not set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r--
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
2013-04-15 22:59:46 +04:00
int lv_is_on_pv ( struct logical_volume * lv , struct physical_volume * pv ) ;
int lv_is_on_pvs ( struct logical_volume * lv , struct dm_list * pvs ) ;
2013-08-23 17:40:13 +04:00
int get_pv_list_for_lv ( struct dm_pool * mem ,
struct logical_volume * lv , struct dm_list * pvs ) ;
RAID: Add writemostly/writebehind support for RAID1
'lvchange' is used to alter a RAID 1 logical volume's write-mostly and
write-behind characteristics. The '--writemostly' parameter takes a
PV as an argument with an optional trailing character to specify whether
to set ('y'), unset ('n'), or toggle ('t') the value. If no trailing
character is given, it will set the flag.
Synopsis:
lvchange [--writemostly <PV>:{t|y|n}] [--writebehind <count>] vg/lv
Example:
lvchange --writemostly /dev/sdb1:y --writebehind 512 vg/raid1_lv
The last character in the 'lv_attr' field is used to show whether a device
has the WriteMostly flag set. It is signified with a 'w'. If the device
has failed, the 'p'artial flag has priority.
Example ("nosync" raid1 with mismatch_cnt and writemostly):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg Rwi---r-m 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-w 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-- 1 linear 4.00m
Example (raid1 with mismatch_cnt, writemostly - but failed drive):
[~]# lvs -a --segment vg
LV VG Attr #Str Type SSize
raid1 vg rwi---r-p 2 raid1 500.00m
[raid1_rimage_0] vg Iwi---r-- 1 linear 500.00m
[raid1_rimage_1] vg Iwi---r-p 1 linear 500.00m
[raid1_rmeta_0] vg ewi---r-- 1 linear 4.00m
[raid1_rmeta_1] vg ewi---r-p 1 linear 4.00m
A new reportable field has been added for writebehind as well. If
write-behind has not been set or the LV is not RAID1, the field will
be blank.
Example (writebehind is set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r-- 512
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
Example (writebehind is not set):
[~]# lvs -a -o name,attr,writebehind vg
LV Attr WBehind
lv rwi-a-r--
[lv_rimage_0] iwi-aor-w
[lv_rimage_1] iwi-aor--
[lv_rmeta_0] ewi-aor--
[lv_rmeta_1] ewi-aor--
2013-04-15 22:59:46 +04:00
2007-07-18 19:38:58 +04:00
/* Find LV segment containing given LE */
2007-12-20 21:55:46 +03:00
struct lv_segment * first_seg ( const struct logical_volume * lv ) ;
2011-10-29 00:12:54 +04:00
struct lv_segment * last_seg ( const struct logical_volume * lv ) ;
2014-09-17 18:50:24 +04:00
struct lv_segment * get_only_segment_using_this_lv ( const struct logical_volume * lv ) ;
2007-07-18 19:38:58 +04:00
/*
* Useful functions for managing snapshots .
*/
int lv_is_origin ( const struct logical_volume * lv ) ;
commands: new method for defining commands
. Define a prototype for every lvm command.
. Match every user command with one definition.
. Generate help text and man pages from them.
The new file command-lines.in defines a prototype for every
unique lvm command. A unique lvm command is a unique
combination of: command name + required option args +
required positional args. Each of these prototypes also
includes the optional option args and optional positional
args that the command will accept, a description, and a
unique string ID for the definition. Any valid command
will match one of the prototypes.
Here's an example of the lvresize command definitions from
command-lines.in, there are three unique lvresize commands:
lvresize --size SizeMB LV
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync, --reportformat String, --resizefs,
--stripes Number, --stripesize SizeKB, --poolmetadatasize SizeMB
OP: PV ...
ID: lvresize_by_size
DESC: Resize an LV by a specified size.
lvresize LV PV ...
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --resizefs, --stripes Number, --stripesize SizeKB
ID: lvresize_by_pv
DESC: Resize an LV by specified PV extents.
FLAGS: SECONDARY_SYNTAX
lvresize --poolmetadatasize SizeMB LV_thinpool
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --stripes Number, --stripesize SizeKB
OP: PV ...
ID: lvresize_pool_metadata_by_size
DESC: Resize a pool metadata SubLV by a specified size.
The three commands have separate definitions because they have
different required parameters. Required parameters are specified
on the first line of the definition. Optional options are
listed after OO, and optional positional args are listed after OP.
This data is used to generate corresponding command definition
structures for lvm in command-lines.h. usage/help output is also
auto generated, so it is always in sync with the definitions.
Every user-entered command is compared against the set of
command structures, and matched with one. An error is
reported if an entered command does not have the required
parameters for any definition. The closest match is printed
as a suggestion, and running lvresize --help will display
the usage for each possible lvresize command.
The prototype syntax used for help/man output includes
required --option and positional args on the first line,
and optional --option and positional args enclosed in [ ]
on subsequent lines.
command_name <required_opt_args> <required_pos_args>
[ <optional_opt_args> ]
[ <optional_pos_args> ]
Command definitions that are not to be advertised/suggested
have the flag SECONDARY_SYNTAX. These commands will not be
printed in the normal help output.
Man page prototypes are also generated from the same original
command definitions, and are always in sync with the code
and help text.
Very early in command execution, a matching command definition
is found. lvm then knows the operation being done, and that
the provided args conform to the definition. This will allow
lots of ad hoc checking/validation to be removed throughout
the code.
Each command definition can also be routed to a specific
function to implement it. The function is associated with
an enum value for the command definition (generated from
the ID string.) These per-command-definition implementation
functions have not yet been created, so all commands
currently fall back to the existing per-command-name
implementation functions.
Using per-command-definition functions will allow lots of
code to be removed which tries to figure out what the
command is meant to do. This is currently based on ad hoc
and complicated option analysis. When using the new
functions, what the command is doing is already known
from the associated command definition.
2016-08-12 23:52:18 +03:00
# define lv_is_thick_origin lv_is_origin
2016-04-08 12:36:02 +03:00
int lv_is_thin_origin ( const struct logical_volume * lv , unsigned * snap_count ) ;
commands: new method for defining commands
. Define a prototype for every lvm command.
. Match every user command with one definition.
. Generate help text and man pages from them.
The new file command-lines.in defines a prototype for every
unique lvm command. A unique lvm command is a unique
combination of: command name + required option args +
required positional args. Each of these prototypes also
includes the optional option args and optional positional
args that the command will accept, a description, and a
unique string ID for the definition. Any valid command
will match one of the prototypes.
Here's an example of the lvresize command definitions from
command-lines.in, there are three unique lvresize commands:
lvresize --size SizeMB LV
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync, --reportformat String, --resizefs,
--stripes Number, --stripesize SizeKB, --poolmetadatasize SizeMB
OP: PV ...
ID: lvresize_by_size
DESC: Resize an LV by a specified size.
lvresize LV PV ...
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --resizefs, --stripes Number, --stripesize SizeKB
ID: lvresize_by_pv
DESC: Resize an LV by specified PV extents.
FLAGS: SECONDARY_SYNTAX
lvresize --poolmetadatasize SizeMB LV_thinpool
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --stripes Number, --stripesize SizeKB
OP: PV ...
ID: lvresize_pool_metadata_by_size
DESC: Resize a pool metadata SubLV by a specified size.
The three commands have separate definitions because they have
different required parameters. Required parameters are specified
on the first line of the definition. Optional options are
listed after OO, and optional positional args are listed after OP.
This data is used to generate corresponding command definition
structures for lvm in command-lines.h. usage/help output is also
auto generated, so it is always in sync with the definitions.
Every user-entered command is compared against the set of
command structures, and matched with one. An error is
reported if an entered command does not have the required
parameters for any definition. The closest match is printed
as a suggestion, and running lvresize --help will display
the usage for each possible lvresize command.
The prototype syntax used for help/man output includes
required --option and positional args on the first line,
and optional --option and positional args enclosed in [ ]
on subsequent lines.
command_name <required_opt_args> <required_pos_args>
[ <optional_opt_args> ]
[ <optional_pos_args> ]
Command definitions that are not to be advertised/suggested
have the flag SECONDARY_SYNTAX. These commands will not be
printed in the normal help output.
Man page prototypes are also generated from the same original
command definitions, and are always in sync with the code
and help text.
Very early in command execution, a matching command definition
is found. lvm then knows the operation being done, and that
the provided args conform to the definition. This will allow
lots of ad hoc checking/validation to be removed throughout
the code.
Each command definition can also be routed to a specific
function to implement it. The function is associated with
an enum value for the command definition (generated from
the ID string.) These per-command-definition implementation
functions have not yet been created, so all commands
currently fall back to the existing per-command-name
implementation functions.
Using per-command-definition functions will allow lots of
code to be removed which tries to figure out what the
command is meant to do. This is currently based on ad hoc
and complicated option analysis. When using the new
functions, what the command is doing is already known
from the associated command definition.
2016-08-12 23:52:18 +03:00
int lv_is_thin_snapshot ( const struct logical_volume * lv ) ;
2007-07-18 19:38:58 +04:00
int lv_is_cow ( const struct logical_volume * lv ) ;
commands: new method for defining commands
. Define a prototype for every lvm command.
. Match every user command with one definition.
. Generate help text and man pages from them.
The new file command-lines.in defines a prototype for every
unique lvm command. A unique lvm command is a unique
combination of: command name + required option args +
required positional args. Each of these prototypes also
includes the optional option args and optional positional
args that the command will accept, a description, and a
unique string ID for the definition. Any valid command
will match one of the prototypes.
Here's an example of the lvresize command definitions from
command-lines.in, there are three unique lvresize commands:
lvresize --size SizeMB LV
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync, --reportformat String, --resizefs,
--stripes Number, --stripesize SizeKB, --poolmetadatasize SizeMB
OP: PV ...
ID: lvresize_by_size
DESC: Resize an LV by a specified size.
lvresize LV PV ...
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --resizefs, --stripes Number, --stripesize SizeKB
ID: lvresize_by_pv
DESC: Resize an LV by specified PV extents.
FLAGS: SECONDARY_SYNTAX
lvresize --poolmetadatasize SizeMB LV_thinpool
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --stripes Number, --stripesize SizeKB
OP: PV ...
ID: lvresize_pool_metadata_by_size
DESC: Resize a pool metadata SubLV by a specified size.
The three commands have separate definitions because they have
different required parameters. Required parameters are specified
on the first line of the definition. Optional options are
listed after OO, and optional positional args are listed after OP.
This data is used to generate corresponding command definition
structures for lvm in command-lines.h. usage/help output is also
auto generated, so it is always in sync with the definitions.
Every user-entered command is compared against the set of
command structures, and matched with one. An error is
reported if an entered command does not have the required
parameters for any definition. The closest match is printed
as a suggestion, and running lvresize --help will display
the usage for each possible lvresize command.
The prototype syntax used for help/man output includes
required --option and positional args on the first line,
and optional --option and positional args enclosed in [ ]
on subsequent lines.
command_name <required_opt_args> <required_pos_args>
[ <optional_opt_args> ]
[ <optional_pos_args> ]
Command definitions that are not to be advertised/suggested
have the flag SECONDARY_SYNTAX. These commands will not be
printed in the normal help output.
Man page prototypes are also generated from the same original
command definitions, and are always in sync with the code
and help text.
Very early in command execution, a matching command definition
is found. lvm then knows the operation being done, and that
the provided args conform to the definition. This will allow
lots of ad hoc checking/validation to be removed throughout
the code.
Each command definition can also be routed to a specific
function to implement it. The function is associated with
an enum value for the command definition (generated from
the ID string.) These per-command-definition implementation
functions have not yet been created, so all commands
currently fall back to the existing per-command-name
implementation functions.
Using per-command-definition functions will allow lots of
code to be removed which tries to figure out what the
command is meant to do. This is currently based on ad hoc
and complicated option analysis. When using the new
functions, what the command is doing is already known
from the associated command definition.
2016-08-12 23:52:18 +03:00
# define lv_is_thick_snapshot lv_is_cow
int lv_is_cache_origin ( const struct logical_volume * lv ) ;
2019-09-20 22:04:18 +03:00
int lv_is_writecache_origin ( const struct logical_volume * lv ) ;
2019-09-20 22:54:00 +03:00
int lv_is_writecache_cachevol ( const struct logical_volume * lv ) ;
2020-02-26 23:47:29 +03:00
int writecache_settings_to_str_list ( struct writecache_settings * settings , struct dm_list * result , struct dm_pool * mem ) ;
writecache: use two step detach
When detaching a writecache, use the cleaner setting
by default to writeback data prior to suspending the
lv to detach the writecache. This avoids potentially
blocking for a long period with the device suspended.
Detaching a writecache first sets the cleaner option, waits
for a short period of time (less than a second), and checks
if the writecache has quickly become clean. If so, the
writecache is detached immediately. This optimizes the case
where little writeback is needed.
If the writecache does not quickly become clean, then the
detach command leaves the writecache attached with the
cleaner option set. This leaves the LV in the same state
as if the user had set the cleaner option directly with
lvchange --cachesettings cleaner=1 LV.
After leaving the LV with the cleaner option set, the
detach command will wait and watch the writeback progress,
and will finally detach the writecache when the writeback
is finished. The detach command does not need to wait
during the writeback phase, and can be canceled, in which
case the LV will remain with the writecache attached and
the cleaner option set. When the user runs the detach
command again it will complete the detach.
To detach a writecache directly, without using the cleaner
step (which has been the approach previously), add the
option --cachesettings cleaner=0 to the detach command.
2020-06-11 21:33:40 +03:00
int lv_writecache_set_cleaner ( struct logical_volume * lv ) ;
bool lv_writecache_is_clean ( struct cmd_context * cmd , struct logical_volume * lv , uint64_t * dirty_blocks ) ;
bool writecache_cleaner_supported ( struct cmd_context * cmd ) ;
commands: new method for defining commands
. Define a prototype for every lvm command.
. Match every user command with one definition.
. Generate help text and man pages from them.
The new file command-lines.in defines a prototype for every
unique lvm command. A unique lvm command is a unique
combination of: command name + required option args +
required positional args. Each of these prototypes also
includes the optional option args and optional positional
args that the command will accept, a description, and a
unique string ID for the definition. Any valid command
will match one of the prototypes.
Here's an example of the lvresize command definitions from
command-lines.in, there are three unique lvresize commands:
lvresize --size SizeMB LV
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync, --reportformat String, --resizefs,
--stripes Number, --stripesize SizeKB, --poolmetadatasize SizeMB
OP: PV ...
ID: lvresize_by_size
DESC: Resize an LV by a specified size.
lvresize LV PV ...
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --resizefs, --stripes Number, --stripesize SizeKB
ID: lvresize_by_pv
DESC: Resize an LV by specified PV extents.
FLAGS: SECONDARY_SYNTAX
lvresize --poolmetadatasize SizeMB LV_thinpool
OO: --alloc Alloc, --autobackup Bool, --force,
--nofsck, --nosync, --noudevsync,
--reportformat String, --stripes Number, --stripesize SizeKB
OP: PV ...
ID: lvresize_pool_metadata_by_size
DESC: Resize a pool metadata SubLV by a specified size.
The three commands have separate definitions because they have
different required parameters. Required parameters are specified
on the first line of the definition. Optional options are
listed after OO, and optional positional args are listed after OP.
This data is used to generate corresponding command definition
structures for lvm in command-lines.h. usage/help output is also
auto generated, so it is always in sync with the definitions.
Every user-entered command is compared against the set of
command structures, and matched with one. An error is
reported if an entered command does not have the required
parameters for any definition. The closest match is printed
as a suggestion, and running lvresize --help will display
the usage for each possible lvresize command.
The prototype syntax used for help/man output includes
required --option and positional args on the first line,
and optional --option and positional args enclosed in [ ]
on subsequent lines.
command_name <required_opt_args> <required_pos_args>
[ <optional_opt_args> ]
[ <optional_pos_args> ]
Command definitions that are not to be advertised/suggested
have the flag SECONDARY_SYNTAX. These commands will not be
printed in the normal help output.
Man page prototypes are also generated from the same original
command definitions, and are always in sync with the code
and help text.
Very early in command execution, a matching command definition
is found. lvm then knows the operation being done, and that
the provided args conform to the definition. This will allow
lots of ad hoc checking/validation to be removed throughout
the code.
Each command definition can also be routed to a specific
function to implement it. The function is associated with
an enum value for the command definition (generated from
the ID string.) These per-command-definition implementation
functions have not yet been created, so all commands
currently fall back to the existing per-command-name
implementation functions.
Using per-command-definition functions will allow lots of
code to be removed which tries to figure out what the
command is meant to do. This is currently based on ad hoc
and complicated option analysis. When using the new
functions, what the command is doing is already known
from the associated command definition.
2016-08-12 23:52:18 +03:00
2019-11-21 01:07:27 +03:00
int lv_is_integrity_origin ( const struct logical_volume * lv ) ;
2016-01-07 16:30:21 +03:00
int lv_is_merging_cow ( const struct logical_volume * cow ) ;
2013-05-29 14:44:54 +04:00
uint32_t cow_max_extents ( const struct logical_volume * origin , uint32_t chunk_size ) ;
2014-03-17 16:04:14 +04:00
int cow_has_min_chunks ( const struct volume_group * vg , uint32_t cow_extents , uint32_t chunk_size ) ;
2013-05-27 12:18:20 +04:00
int lv_is_cow_covering_origin ( const struct logical_volume * lv ) ;
2007-07-18 19:38:58 +04:00
2008-12-04 18:54:26 +03:00
/* Test if given LV is visible from user's perspective */
2009-05-14 01:27:43 +04:00
int lv_is_visible ( const struct logical_volume * lv ) ;
2008-12-04 18:54:26 +03:00
2016-03-01 17:22:36 +03:00
int lv_is_historical ( const struct logical_volume * lv ) ;
2007-07-18 19:38:58 +04:00
int pv_is_in_vg ( struct volume_group * vg , struct physical_volume * pv ) ;
2013-11-29 18:51:28 +04:00
/* Given a cow or thin LV, return the snapshot lv_segment that uses it */
2013-07-03 00:26:03 +04:00
struct lv_segment * find_snapshot ( const struct logical_volume * lv ) ;
2007-07-18 19:38:58 +04:00
/* Given a cow LV, return its origin */
struct logical_volume * origin_from_cow ( const struct logical_volume * lv ) ;
2016-04-21 02:30:17 +03:00
/* Given an internal snapshot LV, return its cow */
struct logical_volume * find_cow ( const struct logical_volume * snap ) ;
2009-05-14 01:21:58 +04:00
void init_snapshot_seg ( struct lv_segment * seg , struct logical_volume * origin ,
2010-01-13 04:35:49 +03:00
struct logical_volume * cow , uint32_t chunk_size , int merge ) ;
2013-11-22 17:52:35 +04:00
void init_snapshot_merge ( struct lv_segment * snap_seg , struct logical_volume * origin ) ;
2009-05-14 01:21:58 +04:00
2010-01-13 04:55:43 +03:00
void clear_snapshot_merge ( struct logical_volume * origin ) ;
2009-05-14 01:21:58 +04:00
int vg_add_snapshot ( struct logical_volume * origin , struct logical_volume * cow ,
2007-07-18 19:38:58 +04:00
union lvid * lvid , uint32_t extent_count ,
uint32_t chunk_size ) ;
int vg_remove_snapshot ( struct logical_volume * cow ) ;
2017-10-27 17:48:57 +03:00
int validate_snapshot_origin ( const struct logical_volume * origin_lv ) ;
2009-11-25 01:55:55 +03:00
int vg_check_status ( const struct volume_group * vg , uint64_t status ) ;
2007-07-18 19:38:58 +04:00
2013-12-12 14:26:35 +04:00
int vg_check_pv_dev_block_sizes ( const struct volume_group * vg ) ;
2009-05-14 01:22:57 +04:00
2009-05-14 01:29:10 +04:00
/*
* Check if the VG reached maximal LVs count ( if set )
*/
int vg_max_lv_reached ( struct volume_group * vg ) ;
2007-07-18 19:38:58 +04:00
/*
* Mirroring functions
*/
2020-08-28 21:07:34 +03:00
uint32_t get_default_region_size ( struct cmd_context * cmd ) ; /* in lv_manip.c */
2008-01-16 22:00:59 +03:00
struct lv_segment * find_mirror_seg ( struct lv_segment * seg ) ;
2007-12-20 18:42:55 +03:00
int lv_add_mirrors ( struct cmd_context * cmd , struct logical_volume * lv ,
2010-04-09 05:00:10 +04:00
uint32_t mirrors , uint32_t stripes , uint32_t stripe_size ,
2007-12-20 18:42:55 +03:00
uint32_t region_size , uint32_t log_count ,
2008-11-04 01:14:30 +03:00
struct dm_list * pvs , alloc_policy_t alloc , uint32_t flags ) ;
2010-01-09 01:32:35 +03:00
int lv_split_mirror_images ( struct logical_volume * lv , const char * split_lv_name ,
uint32_t split_count , struct dm_list * removable_pvs ) ;
2007-12-20 18:42:55 +03:00
int lv_remove_mirrors ( struct cmd_context * cmd , struct logical_volume * lv ,
uint32_t mirrors , uint32_t log_count ,
2010-05-24 19:32:20 +04:00
int ( * is_removable ) ( struct logical_volume * , void * ) ,
void * removable_baton , uint64_t status_mask ) ;
2014-10-22 23:02:29 +04:00
const char * get_mirror_log_name ( int log_count ) ;
2014-11-08 03:28:38 +03:00
int set_mirror_log_count ( int * log_count , const char * mirrorlog ) ;
2007-12-20 18:42:55 +03:00
2015-01-05 18:45:30 +03:00
int cluster_mirror_is_available ( struct cmd_context * cmd ) ;
2007-12-20 21:55:46 +03:00
int is_temporary_mirror_layer ( const struct logical_volume * lv ) ;
2008-01-16 22:13:51 +03:00
struct logical_volume * find_temporary_mirror ( const struct logical_volume * lv ) ;
2007-12-20 21:55:46 +03:00
uint32_t lv_mirror_count ( const struct logical_volume * lv ) ;
2015-02-25 23:42:15 +03:00
2017-04-20 21:42:21 +03:00
uint32_t adjusted_mirror_region_size ( struct cmd_context * cmd ,
uint32_t extent_size , uint32_t extents ,
2015-02-25 23:42:15 +03:00
uint32_t region_size , int internal , int clustered ) ;
2007-12-20 18:42:55 +03:00
int remove_mirrors_from_segments ( struct logical_volume * lv ,
2009-11-25 01:55:55 +03:00
uint32_t new_mirrors , uint64_t status_mask ) ;
2007-12-20 18:42:55 +03:00
int add_mirrors_to_segments ( struct cmd_context * cmd , struct logical_volume * lv ,
uint32_t mirrors , uint32_t region_size ,
2008-11-04 01:14:30 +03:00
struct dm_list * allocatable_pvs , alloc_policy_t alloc ) ;
2007-07-18 19:38:58 +04:00
2007-12-20 21:55:46 +03:00
int remove_mirror_images ( struct logical_volume * lv , uint32_t num_mirrors ,
2010-05-24 19:32:20 +04:00
int ( * is_removable ) ( struct logical_volume * , void * ) ,
void * removable_baton , unsigned remove_log ) ;
2007-12-20 18:42:55 +03:00
int add_mirror_images ( struct cmd_context * cmd , struct logical_volume * lv ,
2010-04-09 05:00:10 +04:00
uint32_t mirrors , uint32_t stripes , uint32_t stripe_size , uint32_t region_size ,
2008-11-04 01:14:30 +03:00
struct dm_list * allocatable_pvs , alloc_policy_t alloc ,
2007-12-20 18:42:55 +03:00
uint32_t log_count ) ;
2017-07-20 20:13:32 +03:00
struct logical_volume * detach_mirror_log ( struct lv_segment * mirrored_seg ) ;
int attach_mirror_log ( struct lv_segment * seg , struct logical_volume * log_lv ) ;
2007-12-20 18:42:55 +03:00
int remove_mirror_log ( struct cmd_context * cmd , struct logical_volume * lv ,
2010-10-12 20:41:17 +04:00
struct dm_list * removable_pvs , int force ) ;
2016-12-10 21:54:09 +03:00
struct logical_volume * prepare_mirror_log ( struct logical_volume * lv ,
int in_sync , uint32_t region_size ,
struct dm_list * allocatable_pvs ,
alloc_policy_t alloc ) ;
2007-12-20 18:42:55 +03:00
int add_mirror_log ( struct cmd_context * cmd , struct logical_volume * lv ,
uint32_t log_count , uint32_t region_size ,
2008-11-04 01:14:30 +03:00
struct dm_list * allocatable_pvs , alloc_policy_t alloc ) ;
2007-12-20 18:42:55 +03:00
2012-08-15 12:44:19 +04:00
#if 0
/* FIXME: reconfigure_mirror_images: remove this code? */
2007-07-18 19:38:58 +04:00
int reconfigure_mirror_images ( struct lv_segment * mirrored_seg , uint32_t num_mirrors ,
2008-11-04 01:14:30 +03:00
struct dm_list * removable_pvs , unsigned remove_log ) ;
2012-08-15 12:44:19 +04:00
# endif
2007-12-20 21:55:46 +03:00
int collapse_mirrored_lv ( struct logical_volume * lv ) ;
2008-09-18 23:56:50 +04:00
int shift_mirror_images ( struct lv_segment * mirrored_seg , unsigned mimage ) ;
2007-07-18 19:38:58 +04:00
2011-08-11 22:24:40 +04:00
/* ++ metadata/raid_manip.c */
2011-12-01 04:09:34 +04:00
int lv_is_raid_with_tracking ( const struct logical_volume * lv ) ;
2011-08-11 22:24:40 +04:00
uint32_t lv_raid_image_count ( const struct logical_volume * lv ) ;
int lv_raid_change_image_count ( struct logical_volume * lv ,
2017-03-09 04:39:49 +03:00
int yes ,
2017-02-24 02:50:00 +03:00
uint32_t new_count ,
2017-02-24 07:00:55 +03:00
uint32_t new_region_size ,
2017-02-24 02:50:00 +03:00
struct dm_list * allocate_pvs ) ;
2017-03-09 15:59:47 +03:00
int lv_raid_split ( struct logical_volume * lv , int yes , const char * split_name ,
2011-08-18 23:34:18 +04:00
uint32_t new_count , struct dm_list * splittable_pvs ) ;
2011-08-18 23:38:26 +04:00
int lv_raid_split_and_track ( struct logical_volume * lv ,
2017-03-09 05:22:55 +03:00
int yes ,
2011-08-18 23:38:26 +04:00
struct dm_list * splittable_pvs ) ;
2017-07-20 20:13:32 +03:00
int lv_raid_merge ( struct logical_volume * image_lv ) ;
2016-06-22 20:40:22 +03:00
int lv_raid_convert ( struct logical_volume * lv ,
const struct segment_type * new_segtype ,
int yes , int force ,
2017-07-20 20:13:32 +03:00
const unsigned new_stripes ,
2016-08-05 16:28:14 +03:00
const unsigned new_stripe_size_supplied ,
2016-06-22 20:40:22 +03:00
const unsigned new_stripe_size ,
2016-08-03 01:51:20 +03:00
const uint32_t new_region_size ,
2016-06-22 20:40:22 +03:00
struct dm_list * allocate_pvs ) ;
2016-08-05 16:54:49 +03:00
int lv_raid_rebuild ( struct logical_volume * lv , struct dm_list * rebuild_pvs ) ;
2016-10-28 16:54:27 +03:00
int lv_raid_replace ( struct logical_volume * lv , int force ,
struct dm_list * remove_pvs , struct dm_list * allocate_pvs ) ;
2013-02-21 00:52:46 +04:00
int lv_raid_remove_missing ( struct logical_volume * lv ) ;
2017-07-20 20:13:32 +03:00
int partial_raid_lv_supports_degraded_activation ( const struct logical_volume * clv ) ;
2017-02-10 00:41:28 +03:00
uint32_t raid_rmeta_extents_delta ( struct cmd_context * cmd ,
uint32_t rimage_extents_cur , uint32_t rimage_extents_new ,
uint32_t region_size , uint32_t extent_size ) ;
uint32_t raid_rimage_extents ( const struct segment_type * segtype ,
uint32_t extents , uint32_t stripes , uint32_t data_copies ) ;
2017-02-12 19:47:35 +03:00
uint32_t raid_ensure_min_region_size ( const struct logical_volume * lv , uint64_t raid_size , uint32_t region_size ) ;
2017-02-07 20:52:13 +03:00
int lv_raid_change_region_size ( struct logical_volume * lv ,
int yes , int force , uint32_t new_region_size ) ;
2017-02-23 17:09:29 +03:00
int lv_raid_in_sync ( const struct logical_volume * lv ) ;
2017-02-24 02:50:00 +03:00
uint32_t lv_raid_data_copies ( const struct segment_type * segtype , uint32_t area_count ) ;
2017-03-08 00:05:23 +03:00
int lv_raid_free_reshape_space ( const struct logical_volume * lv ) ;
2018-11-01 01:05:08 +03:00
int lv_raid_clear_lv ( struct logical_volume * lv , int commit ) ;
int lv_raid_has_visible_sublvs ( const struct logical_volume * lv ) ;
2011-08-11 22:24:40 +04:00
/* -- metadata/raid_manip.c */
2014-02-04 17:59:58 +04:00
/* ++ metadata/cache_manip.c */
2014-11-03 14:52:29 +03:00
struct lv_status_cache {
struct dm_pool * mem ;
struct dm_status_cache * cache ;
dm_percent_t data_usage ;
dm_percent_t metadata_usage ;
dm_percent_t dirty_usage ;
} ;
2018-11-05 23:53:52 +03:00
const char * cache_mode_num_to_str ( cache_mode_t mode ) ;
2016-04-25 14:39:30 +03:00
const char * display_cache_mode ( const struct lv_segment * seg ) ;
2018-08-17 23:45:52 +03:00
const char * get_cache_mode_name ( const struct lv_segment * seg ) ;
2016-04-25 14:39:30 +03:00
int set_cache_mode ( cache_mode_t * mode , const char * cache_mode ) ;
2017-07-20 20:13:32 +03:00
int cache_set_cache_mode ( struct lv_segment * seg , cache_mode_t mode ) ;
int cache_set_metadata_format ( struct lv_segment * seg , cache_metadata_format_t format ) ;
int cache_set_policy ( struct lv_segment * seg , const char * name ,
2015-08-11 15:01:12 +03:00
const struct dm_config_tree * settings ) ;
2016-05-05 22:30:15 +03:00
int cache_set_params ( struct lv_segment * seg ,
2017-03-09 17:54:30 +03:00
uint32_t chunk_size ,
2017-02-26 22:18:37 +03:00
cache_metadata_format_t format ,
2016-04-25 14:39:30 +03:00
cache_mode_t mode ,
2016-05-05 22:30:15 +03:00
const char * policy_name ,
2017-03-09 17:54:30 +03:00
const struct dm_config_tree * policy_settings ) ;
2019-01-30 18:55:34 +03:00
int cache_vol_set_params ( struct cmd_context * cmd ,
2018-08-17 23:45:52 +03:00
struct logical_volume * cache_lv ,
struct logical_volume * pool_lv ,
uint64_t poolmetadatasize ,
uint32_t chunk_size ,
cache_metadata_format_t format ,
cache_mode_t mode ,
const char * policy ,
const struct dm_config_tree * settings ) ;
2015-09-10 16:07:59 +03:00
void cache_check_for_warns ( const struct lv_segment * seg ) ;
2017-03-09 18:24:28 +03:00
int update_cache_pool_params ( struct cmd_context * cmd ,
struct profile * profile ,
uint32_t extent_size ,
const struct segment_type * segtype ,
unsigned attr ,
uint32_t pool_data_extents ,
2014-10-30 15:04:06 +03:00
uint32_t * pool_metadata_extents ,
2021-01-30 18:32:51 +03:00
struct logical_volume * metadata_lv ,
2014-07-23 00:20:18 +04:00
int * chunk_size_calc_method , uint32_t * chunk_size ) ;
2016-05-03 22:50:04 +03:00
int validate_lv_cache_chunk_size ( struct logical_volume * pool_lv , uint32_t chunk_size ) ;
2014-10-22 23:01:03 +04:00
int validate_lv_cache_create_pool ( const struct logical_volume * pool_lv ) ;
int validate_lv_cache_create_origin ( const struct logical_volume * origin_lv ) ;
2017-07-20 20:13:32 +03:00
struct logical_volume * lv_cache_create ( struct logical_volume * pool_lv ,
struct logical_volume * origin_lv ) ;
2016-04-26 22:45:48 +03:00
int lv_cache_wait_for_clean ( struct logical_volume * cache_lv , int * is_clean ) ;
2014-02-04 17:59:58 +04:00
int lv_cache_remove ( struct logical_volume * cache_lv ) ;
2019-09-24 23:28:02 +03:00
int lv_detach_writecache_cachevol ( struct logical_volume * cache_lv , int noflush ) ;
2014-11-02 20:36:41 +03:00
int wipe_cache_pool ( struct logical_volume * cache_pool_lv ) ;
2014-02-04 17:59:58 +04:00
/* -- metadata/cache_manip.c */
2018-06-29 12:11:14 +03:00
/* ++ metadata/vdo_manip.c */
2018-07-02 18:20:30 +03:00
struct lv_status_vdo {
struct dm_pool * mem ;
struct dm_vdo_status * vdo ;
uint64_t data_blocks_used ; /* grabbed from /sys/kvdo */
uint64_t logical_blocks_used ; /* grabbed from /sys/kvdo */
dm_percent_t usage ;
dm_percent_t saving ;
2018-07-05 00:17:38 +03:00
dm_percent_t data_usage ;
2018-07-02 18:20:30 +03:00
} ;
2018-06-29 12:11:14 +03:00
2018-07-02 22:58:18 +03:00
const char * get_vdo_compression_state_name ( enum dm_vdo_compression_state state ) ;
const char * get_vdo_index_state_name ( enum dm_vdo_index_state state ) ;
const char * get_vdo_operating_mode_name ( enum dm_vdo_operating_mode mode ) ;
2019-10-04 15:59:00 +03:00
const char * get_vdo_write_policy_name ( enum dm_vdo_write_policy policy ) ;
2018-06-29 12:11:14 +03:00
uint64_t get_vdo_pool_virtual_size ( const struct lv_segment * vdo_pool_seg ) ;
2019-01-16 17:34:28 +03:00
int update_vdo_pool_virtual_size ( struct lv_segment * vdo_pool_seg ) ;
2018-07-02 18:20:30 +03:00
int parse_vdo_pool_status ( struct dm_pool * mem , const struct logical_volume * vdo_pool_lv ,
const char * params , struct lv_status_vdo * status ) ;
2018-06-29 12:11:14 +03:00
struct logical_volume * convert_vdo_pool_lv ( struct logical_volume * data_lv ,
const struct dm_vdo_target_params * vtp ,
2021-02-16 23:26:35 +03:00
uint32_t * virtual_extents ,
int format ) ;
2019-10-04 15:59:25 +03:00
int set_vdo_write_policy ( enum dm_vdo_write_policy * vwp , const char * policy ) ;
2018-06-29 14:16:08 +03:00
int fill_vdo_target_params ( struct cmd_context * cmd ,
struct dm_vdo_target_params * vtp ,
struct profile * profile ) ;
2018-06-29 12:11:14 +03:00
/* -- metadata/vdo_manip.c */
2007-07-18 19:38:58 +04:00
struct logical_volume * find_pvmove_lv ( struct volume_group * vg ,
2011-09-06 22:49:31 +04:00
struct device * dev , uint64_t lv_type ) ;
2014-09-22 17:50:07 +04:00
const struct logical_volume * find_pvmove_lv_in_lv ( const struct logical_volume * lv ) ;
const char * get_pvmove_pvname_from_lv ( const struct logical_volume * lv ) ;
const char * get_pvmove_pvname_from_lv_mirr ( const struct logical_volume * lv_mirr ) ;
2008-11-04 01:14:30 +03:00
struct dm_list * lvs_using_lv ( struct cmd_context * cmd , struct volume_group * vg ,
2007-07-18 19:38:58 +04:00
struct logical_volume * lv ) ;
uint32_t find_free_lvnum ( struct logical_volume * lv ) ;
2017-07-20 20:13:32 +03:00
dm_percent_t copy_percent ( const struct logical_volume * lv ) ;
2007-07-18 19:38:58 +04:00
char * generate_lv_name ( struct volume_group * vg , const char * format ,
char * buffer , size_t len ) ;
2016-08-15 19:22:32 +03:00
char * top_level_lv_name ( struct volume_group * vg , const char * lv_name ) ;
2007-07-18 19:38:58 +04:00
2016-03-01 17:19:23 +03:00
struct generic_logical_volume * get_or_create_glv ( struct dm_pool * mem , struct logical_volume * lv , int * glv_created ) ;
struct glv_list * get_or_create_glvl ( struct dm_pool * mem , struct logical_volume * lv , int * glv_created ) ;
2007-07-18 19:38:58 +04:00
/*
* Begin skeleton for external LVM library
*/
2017-07-20 20:13:32 +03:00
int pv_change_metadataignore ( struct physical_volume * pv , uint32_t mda_ignored ) ;
2009-07-29 17:26:01 +04:00
system_id: make new VGs read-only for old lvm versions
Previous versions of lvm will not obey the restrictions
imposed by the new system_id, and would allow such a VG
to be written. So, a VG with a new system_id is further
changed to force previous lvm versions to treat it as
read-only. This is done by removing the WRITE flag from
the metadata status line of these VGs, and putting a new
WRITE_LOCKED flag in the flags line of the metadata.
Versions of lvm that recognize WRITE_LOCKED, also obey the
new system_id. For these lvm versions, WRITE_LOCKED is
identical to WRITE, and the rules associated with matching
system_id's are imposed.
A new VG lock_type field is also added that causes the same
WRITE/WRITE_LOCKED transformation when set. A previous
version of lvm will also see a VG with lock_type as read-only.
Versions of lvm that recognize WRITE_LOCKED, must also obey
the lock_type setting. Until the lock_type feature is added,
lvm will fail to read any VG with lock_type set and report an
error about an unsupported lock_type. Once the lock_type
feature is added, lvm will allow VGs with lock_type to be
used according to the rules imposed by the lock_type.
When both system_id and lock_type settings are removed, a VG
is written with the old WRITE status flag, and without the
new WRITE_LOCKED flag. This allows old versions of lvm to
use the VG as before.
2015-03-04 20:30:53 +03:00
int vg_flag_write_locked ( struct volume_group * vg ) ;
2009-07-29 17:26:01 +04:00
int vg_check_write_mode ( struct volume_group * vg ) ;
2016-01-15 16:39:43 +03:00
# define vg_is_clustered(vg) ((vg_status((vg)) & CLUSTERED) ? 1 : 0)
# define vg_is_exported(vg) ((vg_status((vg)) & EXPORTED_VG) ? 1 : 0)
# define vg_is_resizeable(vg) ((vg_status((vg)) & RESIZEABLE_VG) ? 1 : 0)
2007-07-18 19:38:58 +04:00
2009-10-16 21:41:49 +04:00
int lv_has_unknown_segments ( const struct logical_volume * lv ) ;
int vg_has_unknown_segments ( const struct volume_group * vg ) ;
2011-05-07 17:32:05 +04:00
int vg_mark_partial_lvs ( struct volume_group * vg , int clear ) ;
2008-01-15 00:07:58 +03:00
struct vgcreate_params {
2011-02-18 17:47:28 +03:00
const char * vg_name ;
2008-01-15 00:07:58 +03:00
uint32_t extent_size ;
size_t max_pv ;
size_t max_lv ;
alloc_policy_t alloc ;
int clustered ; /* FIXME: put this into a 'status' variable instead? */
2010-07-01 00:03:52 +04:00
uint32_t vgmetadatacopies ;
2014-10-24 21:29:04 +04:00
const char * system_id ;
2015-03-05 23:00:44 +03:00
const char * lock_type ;
const char * lock_args ;
2008-01-15 00:07:58 +03:00
} ;
2014-09-19 16:51:41 +04:00
int validate_major_minor ( const struct cmd_context * cmd ,
const struct format_type * fmt ,
int32_t major , int32_t minor ) ;
2009-11-01 23:05:17 +03:00
int vgcreate_params_validate ( struct cmd_context * cmd ,
struct vgcreate_params * vp ) ;
2008-01-15 00:07:58 +03:00
2008-01-16 01:56:30 +03:00
int validate_vg_rename_params ( struct cmd_context * cmd ,
const char * vg_name_old ,
const char * vg_name_new ) ;
2015-03-05 23:00:44 +03:00
int is_lockd_type ( const char * lock_type ) ;
2018-05-31 18:23:03 +03:00
int vg_is_shared ( const struct volume_group * vg ) ;
2015-03-05 23:00:44 +03:00
2015-11-30 20:46:55 +03:00
int is_system_id_allowed ( struct cmd_context * cmd , const char * system_id ) ;
2016-03-01 17:27:21 +03:00
int vg_strip_outdated_historical_lvs ( struct volume_group * vg ) ;
2018-08-27 22:53:09 +03:00
int lv_on_pmem ( struct logical_volume * lv ) ;
2019-03-06 00:19:05 +03:00
int vg_is_foreign ( struct volume_group * vg ) ;
2019-02-06 22:39:41 +03:00
void vg_write_commit_bad_mdas ( struct cmd_context * cmd , struct volume_group * vg ) ;
2020-01-14 23:12:20 +03:00
struct dm_list * create_pv_list ( struct dm_pool * mem , struct volume_group * vg , int argc ,
char * * argv , int allocatable_only ) ;
struct dm_list * clone_pv_list ( struct dm_pool * mem , struct dm_list * pvsl ) ;
2019-11-21 01:07:27 +03:00
int lv_add_integrity_to_raid ( struct logical_volume * lv , struct integrity_settings * settings , struct dm_list * pvh ,
struct logical_volume * lv_imeta_0 ) ;
int lv_remove_integrity_from_raid ( struct logical_volume * lv ) ;
void lv_clear_integrity_recalculate_metadata ( struct logical_volume * lv ) ;
int lv_has_integrity_recalculate_metadata ( struct logical_volume * lv ) ;
int lv_raid_has_integrity ( struct logical_volume * lv ) ;
int lv_extend_integrity_in_raid ( struct logical_volume * lv , struct dm_list * pvh ) ;
int lv_get_raid_integrity_settings ( struct logical_volume * lv , struct integrity_settings * * isettings ) ;
2020-09-01 20:53:00 +03:00
int integrity_mode_set ( const char * mode , struct integrity_settings * settings ) ;
2020-09-02 00:15:31 +03:00
int lv_integrity_mismatches ( struct cmd_context * cmd , const struct logical_volume * lv , uint64_t * mismatches ) ;
2020-11-12 00:10:15 +03:00
int lv_raid_integrity_total_mismatches ( struct cmd_context * cmd , const struct logical_volume * lv , uint64_t * mismatches ) ;
2019-11-21 01:07:27 +03:00
2007-07-18 19:38:58 +04:00
# endif