mirror of
git://sourceware.org/git/lvm2.git
synced 2025-01-17 06:04:23 +03:00
Change default alignment of pe_start to 1MB.
The new standard in the storage industry is to default alignment of data areas to 1MB. fdisk, parted, and mdadm have all been updated to this default. Update LVM to align the PV's data area start (pe_start) to 1MB. This provides a more useful default than the previous default of 64K (which generally ended up being a 192K pe_start once the first metadata area was created). Before this patch: # pvs -o name,vg_mda_size,pe_start PV VMdaSize 1st PE /dev/sdd 188.00k 192.00k After this patch: # pvs -o name,vg_mda_size,pe_start PV VMdaSize 1st PE /dev/sdd 1020.00k 1.00m The heuristic for setting the default alignment for LVM data areas is: - If the default value (1MB) is a multiple of the detected alignment then just use the default. - Otherwise, use the detected value. In practice this means we'll almost always use 1MB -- that is unless: - the alignment was explicitly specified with --dataalignment - or MD's full stripe width, or the {minimum,optimal}_io_size exceeds 1MB - or the specified/detected value is not a power-of-2
This commit is contained in:
parent
dff224669d
commit
b123a82d73
@ -1,5 +1,6 @@
|
|||||||
Version 2.02.73 -
|
Version 2.02.73 -
|
||||||
================================
|
================================
|
||||||
|
Change default alignment of pe_start to 1MB.
|
||||||
Add --norestorefile option to pvcreate.
|
Add --norestorefile option to pvcreate.
|
||||||
Require --restorefile when using pvcreate --uuid.
|
Require --restorefile when using pvcreate --uuid.
|
||||||
Recognise and give preference to md device partitions (blkext major).
|
Recognise and give preference to md device partitions (blkext major).
|
||||||
|
@ -113,7 +113,7 @@ devices {
|
|||||||
# Alignment (in KB) of start of data area when creating a new PV.
|
# Alignment (in KB) of start of data area when creating a new PV.
|
||||||
# If a PV is placed directly upon an md device and md_chunk_alignment or
|
# If a PV is placed directly upon an md device and md_chunk_alignment or
|
||||||
# data_alignment_detection is enabled this parameter is ignored.
|
# data_alignment_detection is enabled this parameter is ignored.
|
||||||
# Set to 0 for the default alignment of 64KB or page size, if larger.
|
# Set to 0 for the default alignment of 1MB or page size, if larger.
|
||||||
data_alignment = 0
|
data_alignment = 0
|
||||||
|
|
||||||
# By default, the start of the PV's aligned data area will be shifted by
|
# By default, the start of the PV's aligned data area will be shifted by
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
#ifndef _LVM_DEFAULTS_H
|
#ifndef _LVM_DEFAULTS_H
|
||||||
#define _LVM_DEFAULTS_H
|
#define _LVM_DEFAULTS_H
|
||||||
|
|
||||||
|
#define DEFAULT_PE_ALIGN 2048
|
||||||
|
|
||||||
#define DEFAULT_ARCHIVE_ENABLED 1
|
#define DEFAULT_ARCHIVE_ENABLED 1
|
||||||
#define DEFAULT_BACKUP_ENABLED 1
|
#define DEFAULT_BACKUP_ENABLED 1
|
||||||
|
|
||||||
|
@ -62,15 +62,23 @@ static uint32_t _vg_bad_status_bits(const struct volume_group *vg,
|
|||||||
const char _really_init[] =
|
const char _really_init[] =
|
||||||
"Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
|
"Really INITIALIZE physical volume \"%s\" of volume group \"%s\" [y/n]? ";
|
||||||
|
|
||||||
|
static int _alignment_overrides_default(unsigned long data_alignment)
|
||||||
|
{
|
||||||
|
return data_alignment && (DEFAULT_PE_ALIGN % data_alignment);
|
||||||
|
}
|
||||||
|
|
||||||
unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
|
unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignment)
|
||||||
{
|
{
|
||||||
|
unsigned long temp_pe_align;
|
||||||
|
|
||||||
if (pv->pe_align)
|
if (pv->pe_align)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (data_alignment)
|
if (data_alignment)
|
||||||
pv->pe_align = data_alignment;
|
pv->pe_align = data_alignment;
|
||||||
else
|
else
|
||||||
pv->pe_align = MAX(65536UL, lvm_getpagesize()) >> SECTOR_SHIFT;
|
pv->pe_align = MAX((DEFAULT_PE_ALIGN << SECTOR_SHIFT),
|
||||||
|
lvm_getpagesize()) >> SECTOR_SHIFT;
|
||||||
|
|
||||||
if (!pv->dev)
|
if (!pv->dev)
|
||||||
goto out;
|
goto out;
|
||||||
@ -79,10 +87,11 @@ unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignm
|
|||||||
* Align to stripe-width of underlying md device if present
|
* Align to stripe-width of underlying md device if present
|
||||||
*/
|
*/
|
||||||
if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
|
if (find_config_tree_bool(pv->fmt->cmd, "devices/md_chunk_alignment",
|
||||||
DEFAULT_MD_CHUNK_ALIGNMENT))
|
DEFAULT_MD_CHUNK_ALIGNMENT)) {
|
||||||
pv->pe_align = MAX(pv->pe_align,
|
temp_pe_align = dev_md_stripe_width(pv->fmt->cmd->sysfs_dir, pv->dev);
|
||||||
dev_md_stripe_width(pv->fmt->cmd->sysfs_dir,
|
if (_alignment_overrides_default(temp_pe_align))
|
||||||
pv->dev));
|
pv->pe_align = temp_pe_align;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Align to topology's minimum_io_size or optimal_io_size if present
|
* Align to topology's minimum_io_size or optimal_io_size if present
|
||||||
@ -94,13 +103,13 @@ unsigned long set_pe_align(struct physical_volume *pv, unsigned long data_alignm
|
|||||||
if (find_config_tree_bool(pv->fmt->cmd,
|
if (find_config_tree_bool(pv->fmt->cmd,
|
||||||
"devices/data_alignment_detection",
|
"devices/data_alignment_detection",
|
||||||
DEFAULT_DATA_ALIGNMENT_DETECTION)) {
|
DEFAULT_DATA_ALIGNMENT_DETECTION)) {
|
||||||
pv->pe_align = MAX(pv->pe_align,
|
temp_pe_align = dev_minimum_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
|
||||||
dev_minimum_io_size(pv->fmt->cmd->sysfs_dir,
|
if (_alignment_overrides_default(temp_pe_align))
|
||||||
pv->dev));
|
pv->pe_align = temp_pe_align;
|
||||||
|
|
||||||
pv->pe_align = MAX(pv->pe_align,
|
temp_pe_align = dev_optimal_io_size(pv->fmt->cmd->sysfs_dir, pv->dev);
|
||||||
dev_optimal_io_size(pv->fmt->cmd->sysfs_dir,
|
if (_alignment_overrides_default(temp_pe_align))
|
||||||
pv->dev));
|
pv->pe_align = temp_pe_align;
|
||||||
}
|
}
|
||||||
|
|
||||||
log_very_verbose("%s: Setting PE alignment to %lu sectors.",
|
log_very_verbose("%s: Setting PE alignment to %lu sectors.",
|
||||||
|
@ -103,9 +103,10 @@ check_pv_field_()
|
|||||||
local pv=$1;
|
local pv=$1;
|
||||||
local field=$2;
|
local field=$2;
|
||||||
local expected=$3;
|
local expected=$3;
|
||||||
|
local pvs_args=$4; # optional
|
||||||
local actual;
|
local actual;
|
||||||
|
|
||||||
actual=$(trim $(pvs --noheadings -o $field $pv))
|
actual=$(trim $(pvs --noheadings $pvs_args -o $field $pv))
|
||||||
if test "$verbose" = "t"
|
if test "$verbose" = "t"
|
||||||
then
|
then
|
||||||
echo "check_pv_field_ PV=$pv, field=$field, actual=$actual, expected=$expected"
|
echo "check_pv_field_ PV=$pv, field=$field, actual=$actual, expected=$expected"
|
||||||
|
@ -52,14 +52,14 @@ test -b "$mddev" || exit 200
|
|||||||
|
|
||||||
# Test alignment of PV on MD without any MD-aware or topology-aware detection
|
# Test alignment of PV on MD without any MD-aware or topology-aware detection
|
||||||
# - should treat $mddev just like any other block device
|
# - should treat $mddev just like any other block device
|
||||||
pv_align="192.00k"
|
pv_align="1.00m"
|
||||||
pvcreate --metadatasize 128k \
|
pvcreate --metadatasize 128k \
|
||||||
--config 'devices {md_chunk_alignment=0 data_alignment_detection=0 data_alignment_offset_detection=0}' \
|
--config 'devices {md_chunk_alignment=0 data_alignment_detection=0 data_alignment_offset_detection=0}' \
|
||||||
$mddev
|
$mddev
|
||||||
check_pv_field_ $mddev pe_start $pv_align
|
check_pv_field_ $mddev pe_start $pv_align
|
||||||
|
|
||||||
# Test md_chunk_alignment independent of topology-aware detection
|
# Test md_chunk_alignment independent of topology-aware detection
|
||||||
pv_align="256.00k"
|
pv_align="1.00m"
|
||||||
pvcreate --metadatasize 128k \
|
pvcreate --metadatasize 128k \
|
||||||
--config 'devices {data_alignment_detection=0 data_alignment_offset_detection=0}' \
|
--config 'devices {data_alignment_detection=0 data_alignment_offset_detection=0}' \
|
||||||
$mddev
|
$mddev
|
||||||
@ -71,7 +71,8 @@ linux_minor=$(echo `uname -r` | cut -d'.' -f3 | cut -d'-' -f1)
|
|||||||
# Test newer topology-aware alignment detection
|
# Test newer topology-aware alignment detection
|
||||||
# - first added to 2.6.31 but not "reliable" until 2.6.33
|
# - first added to 2.6.31 but not "reliable" until 2.6.33
|
||||||
if [ $linux_minor -ge 33 ]; then
|
if [ $linux_minor -ge 33 ]; then
|
||||||
pv_align="256.00k"
|
pv_align="1.00m"
|
||||||
|
# optimal_io_size=131072, minimum_io_size=65536
|
||||||
pvcreate --metadatasize 128k \
|
pvcreate --metadatasize 128k \
|
||||||
--config 'devices { md_chunk_alignment=0 }' $mddev
|
--config 'devices { md_chunk_alignment=0 }' $mddev
|
||||||
check_pv_field_ $mddev pe_start $pv_align
|
check_pv_field_ $mddev pe_start $pv_align
|
||||||
@ -103,15 +104,9 @@ EOF
|
|||||||
alignment_offset=`cat $sysfs_alignment_offset` || \
|
alignment_offset=`cat $sysfs_alignment_offset` || \
|
||||||
alignment_offset=0
|
alignment_offset=0
|
||||||
|
|
||||||
if [ "$alignment_offset" = "512" ]; then
|
# default alignment is 1M, add alignment_offset
|
||||||
pv_align="256.50k"
|
pv_align=$((1048576+$alignment_offset))B
|
||||||
pvcreate --metadatasize 128k $mddev_p
|
pvcreate --metadatasize 128k $mddev_p
|
||||||
check_pv_field_ $mddev_p pe_start $pv_align
|
check_pv_field_ $mddev_p pe_start $pv_align "--units b"
|
||||||
pvremove $mddev_p
|
|
||||||
elif [ "$alignment_offset" = "2048" ]; then
|
|
||||||
pv_align="258.00k"
|
|
||||||
pvcreate --metadatasize 128k $mddev_p
|
|
||||||
check_pv_field_ $mddev_p pe_start $pv_align
|
|
||||||
pvremove $mddev_p
|
pvremove $mddev_p
|
||||||
fi
|
fi
|
||||||
fi
|
|
||||||
|
@ -119,11 +119,11 @@ check_pv_field_ $dev1 pe_start $pv_align
|
|||||||
pvcreate --metadatasize 128k --metadatacopies 2 --dataalignment 3.5k $dev1
|
pvcreate --metadatasize 128k --metadatacopies 2 --dataalignment 3.5k $dev1
|
||||||
check_pv_field_ $dev1 pe_start $pv_align
|
check_pv_field_ $dev1 pe_start $pv_align
|
||||||
|
|
||||||
# data area is aligned to 64k by default,
|
# data area is aligned to 1M by default,
|
||||||
# data area start is shifted by the specified alignment_offset
|
# data area start is shifted by the specified alignment_offset
|
||||||
pv_align="195.50k"
|
pv_align="1052160B" # 1048576 + (7*512)
|
||||||
pvcreate --metadatasize 128k --dataalignmentoffset 7s $dev1
|
pvcreate --metadatasize 128k --dataalignmentoffset 7s $dev1
|
||||||
check_pv_field_ $dev1 pe_start $pv_align
|
check_pv_field_ $dev1 pe_start $pv_align "--units b"
|
||||||
|
|
||||||
# 2nd metadata area is created without problems when
|
# 2nd metadata area is created without problems when
|
||||||
# data area start is shifted by the specified alignment_offset
|
# data area start is shifted by the specified alignment_offset
|
||||||
|
@ -57,7 +57,7 @@ test_snapshot_mount()
|
|||||||
# FIXME add more topology-specific tests and validation (striped LVs, etc)
|
# FIXME add more topology-specific tests and validation (striped LVs, etc)
|
||||||
|
|
||||||
NUM_DEVS=1
|
NUM_DEVS=1
|
||||||
PER_DEV_SIZE=33
|
PER_DEV_SIZE=34
|
||||||
DEV_SIZE=$(($NUM_DEVS*$PER_DEV_SIZE))
|
DEV_SIZE=$(($NUM_DEVS*$PER_DEV_SIZE))
|
||||||
|
|
||||||
# ---------------------------------------------
|
# ---------------------------------------------
|
||||||
|
@ -130,11 +130,11 @@ check_pv_field_ $dev1 pe_start 200.00k
|
|||||||
vgremove -f $vg
|
vgremove -f $vg
|
||||||
pvremove -f $dev1
|
pvremove -f $dev1
|
||||||
|
|
||||||
# data area is aligned to 64k by default,
|
# data area is aligned to 1M by default,
|
||||||
# data area start is shifted by the specified alignment_offset
|
# data area start is shifted by the specified alignment_offset
|
||||||
pv_align="195.50k"
|
pv_align="1052160B" # 1048576 + (7*512)
|
||||||
vgcreate -c n --metadatasize 128k --dataalignmentoffset 7s $vg $dev1
|
vgcreate -c n --metadatasize 128k --dataalignmentoffset 7s $vg $dev1
|
||||||
check_pv_field_ $dev1 pe_start $pv_align
|
check_pv_field_ $dev1 pe_start $pv_align "--units b"
|
||||||
vgremove -f $vg
|
vgremove -f $vg
|
||||||
pvremove -f $dev1
|
pvremove -f $dev1
|
||||||
|
|
||||||
|
@ -67,11 +67,11 @@ check_pv_field_ $dev1 pe_start 200.00k
|
|||||||
vgreduce $vg $dev1
|
vgreduce $vg $dev1
|
||||||
pvremove -f $dev1
|
pvremove -f $dev1
|
||||||
|
|
||||||
# data area is aligned to 64k by default,
|
# data area is aligned to 1M by default,
|
||||||
# data area start is shifted by the specified alignment_offset
|
# data area start is shifted by the specified alignment_offset
|
||||||
pv_align="195.50k"
|
pv_align="1052160B" # 1048576 + (7*512)
|
||||||
vgextend --metadatasize 128k --dataalignmentoffset 7s $vg $dev1
|
vgextend --metadatasize 128k --dataalignmentoffset 7s $vg $dev1
|
||||||
check_pv_field_ $dev1 pe_start $pv_align
|
check_pv_field_ $dev1 pe_start $pv_align "--units b"
|
||||||
vgremove -f $vg
|
vgremove -f $vg
|
||||||
pvremove -f $dev1
|
pvremove -f $dev1
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ COMM() {
|
|||||||
LAST_TEST="$@"
|
LAST_TEST="$@"
|
||||||
}
|
}
|
||||||
|
|
||||||
prepare_pvs 5 257
|
prepare_pvs 5 258
|
||||||
# FIXME: paramaterize lvm1 vs lvm2 metadata; most of these tests should run
|
# FIXME: paramaterize lvm1 vs lvm2 metadata; most of these tests should run
|
||||||
# fine with lvm1 metadata as well; for now, just add disks 5 and 6 as lvm1
|
# fine with lvm1 metadata as well; for now, just add disks 5 and 6 as lvm1
|
||||||
# metadata
|
# metadata
|
||||||
|
@ -264,7 +264,7 @@ prepare_devs() {
|
|||||||
local n="$1"
|
local n="$1"
|
||||||
test -z "$n" && n=3
|
test -z "$n" && n=3
|
||||||
local devsize="$2"
|
local devsize="$2"
|
||||||
test -z "$devsize" && devsize=33
|
test -z "$devsize" && devsize=34
|
||||||
local pvname="$3"
|
local pvname="$3"
|
||||||
test -z "$pvname" && pvname="pv"
|
test -z "$pvname" && pvname="pv"
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user