From 3b9f78332b320a9950c8c3b183dec2fe51dda4f7 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 5 Aug 2024 13:20:58 -0500 Subject: [PATCH] integrity: add --integritysettings for kernel tuning The option can be used in multiple ways (like --cachesettings): --integritysettings key=val --integritysettings 'key1=val1 key2=val2' --integritysettings key1=val1 --integritysettings key2=val2 Can be used to tune kernel settings: journal_sectors journal_watermark commit_time bitmap_flush_interval allow_discards Described in the kernel documentation Documentation/admin-guide/device-mapper/dm-integrity.rst Not yet supported: lvs -o integritysettings lvchange --integritysettings lvextend --integritysettings journal_sectors --- device_mapper/all.h | 2 + device_mapper/libdm-deptree.c | 5 ++ lib/integrity/integrity.c | 9 +++ lib/metadata/integrity_manip.c | 32 ++++++++-- tools/args.h | 4 ++ tools/command-lines.in | 4 +- tools/lvconvert.c | 5 ++ tools/lvcreate.c | 7 ++- tools/toollib.c | 112 +++++++++++++++++++++++++++++++++ tools/toollib.h | 2 + 10 files changed, 173 insertions(+), 9 deletions(-) diff --git a/device_mapper/all.h b/device_mapper/all.h index 2bea150af..fb6030419 100644 --- a/device_mapper/all.h +++ b/device_mapper/all.h @@ -1023,6 +1023,7 @@ struct integrity_settings { uint32_t commit_time; uint32_t bitmap_flush_interval; uint64_t sectors_per_bit; + uint32_t allow_discards; unsigned journal_sectors_set:1; unsigned interleave_sectors_set:1; @@ -1031,6 +1032,7 @@ struct integrity_settings { unsigned commit_time_set:1; unsigned bitmap_flush_interval_set:1; unsigned sectors_per_bit_set:1; + unsigned allow_discards_set:1; }; int dm_tree_node_add_integrity_target(struct dm_tree_node *node, diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c index 3140bbbf2..a99ac5543 100644 --- a/device_mapper/libdm-deptree.c +++ b/device_mapper/libdm-deptree.c @@ -2868,6 +2868,8 @@ static int _integrity_emit_segment_line(struct dm_task *dmt, count++; if (set->sectors_per_bit_set) count++; + if (set->allow_discards_set && set->allow_discards) + count++; EMIT_PARAMS(pos, "%s 0 %u %s %d fix_padding block_size:%u internal_hash:%s", origin_dev, @@ -2904,6 +2906,9 @@ static int _integrity_emit_segment_line(struct dm_task *dmt, if (set->sectors_per_bit_set) EMIT_PARAMS(pos, " sectors_per_bit:%llu", (unsigned long long)set->sectors_per_bit); + if (set->allow_discards_set && set->allow_discards) + EMIT_PARAMS(pos, " allow_discards"); + if (!dm_task_secure_data(dmt)) stack; diff --git a/lib/integrity/integrity.c b/lib/integrity/integrity.c index 1576c856f..b630f8e09 100644 --- a/lib/integrity/integrity.c +++ b/lib/integrity/integrity.c @@ -156,6 +156,12 @@ static int _integrity_text_import(struct lv_segment *seg, set->sectors_per_bit_set = 1; } + if (dm_config_has_node(sn, "allow_discards")) { + if (!dm_config_get_uint32(sn, "allow_discards", &set->allow_discards)) + return SEG_LOG_ERROR("Unknown integrity_setting in"); + set->allow_discards_set = 1; + } + seg->origin = origin_lv; seg->integrity_meta_dev = meta_lv; seg->lv->status |= INTEGRITY; @@ -217,6 +223,9 @@ static int _integrity_text_export(const struct lv_segment *seg, if (set->sectors_per_bit) outf(f, "sectors_per_bit = %llu", (unsigned long long)set->sectors_per_bit); + if (set->allow_discards_set) + outf(f, "allow_discards = %u", set->allow_discards); + return 1; } diff --git a/lib/metadata/integrity_manip.c b/lib/metadata/integrity_manip.c index 4ac517d63..3f3f6f61d 100644 --- a/lib/metadata/integrity_manip.c +++ b/lib/metadata/integrity_manip.c @@ -50,7 +50,8 @@ int lv_is_integrity_origin(const struct logical_volume *lv) * plus some initial space for journals. * (again from trial and error testing.) */ -static uint64_t _lv_size_bytes_to_integrity_meta_bytes(uint64_t lv_size_bytes) +static uint64_t _lv_size_bytes_to_integrity_meta_bytes(uint64_t lv_size_bytes, uint32_t journal_sectors, + uint32_t extent_size) { uint64_t meta_bytes; uint64_t initial_bytes; @@ -58,8 +59,16 @@ static uint64_t _lv_size_bytes_to_integrity_meta_bytes(uint64_t lv_size_bytes) /* Every 500M of data needs 4M of metadata. */ meta_bytes = ((lv_size_bytes / (500 * ONE_MB_IN_BYTES)) + 1) * (4 * ONE_MB_IN_BYTES); + if (journal_sectors) { + /* for calculating the metadata LV size for the specified + journal size, round the specified journal size up to the + nearest extent. extent_size is in sectors. */ + initial_bytes = dm_round_up(journal_sectors, extent_size) * 512; + goto out; + } + /* - * initial space used for journals + * initial space used for journals (when journal size is not specified): * lv_size <= 512M -> 4M * lv_size <= 1G -> 8M * lv_size <= 4G -> 32M @@ -73,7 +82,10 @@ static uint64_t _lv_size_bytes_to_integrity_meta_bytes(uint64_t lv_size_bytes) initial_bytes = 32 * ONE_MB_IN_BYTES; else if (lv_size_bytes > (4ULL * ONE_GB_IN_BYTES)) initial_bytes = 64 * ONE_MB_IN_BYTES; - + out: + log_debug("integrity_meta_bytes %llu from lv_size_bytes %llu meta_bytes %llu initial_bytes %llu journal_sectors %u", + (unsigned long long)(meta_bytes+initial_bytes), (unsigned long long)lv_size_bytes, + (unsigned long long)meta_bytes, (unsigned long long)initial_bytes, journal_sectors); return meta_bytes + initial_bytes; } @@ -84,6 +96,7 @@ static uint64_t _lv_size_bytes_to_integrity_meta_bytes(uint64_t lv_size_bytes) static int _lv_create_integrity_metadata(struct cmd_context *cmd, struct volume_group *vg, struct lvcreate_params *lp, + struct integrity_settings *settings, struct logical_volume **meta_lv) { char metaname[NAME_LEN] = { 0 }; @@ -115,7 +128,7 @@ static int _lv_create_integrity_metadata(struct cmd_context *cmd, lp_meta.pvh = lp->pvh; lv_size_bytes = (uint64_t)lp->extents * (uint64_t)vg->extent_size * 512; - meta_bytes = _lv_size_bytes_to_integrity_meta_bytes(lv_size_bytes); + meta_bytes = _lv_size_bytes_to_integrity_meta_bytes(lv_size_bytes, settings->journal_sectors, vg->extent_size); meta_sectors = meta_bytes / 512; lp_meta.extents = meta_sectors / vg->extent_size; @@ -181,13 +194,20 @@ int lv_extend_integrity_in_raid(struct logical_volume *lv, struct dm_list *pvh) } lv_size_bytes = lv_iorig->size * 512; - meta_bytes = _lv_size_bytes_to_integrity_meta_bytes(lv_size_bytes); + meta_bytes = _lv_size_bytes_to_integrity_meta_bytes(lv_size_bytes, 0, 0); meta_sectors = meta_bytes / 512; meta_extents = meta_sectors / vg->extent_size; prev_meta_sectors = lv_imeta->size; prev_meta_extents = prev_meta_sectors / vg->extent_size; + /* + * FIXME: allow --integritysettings journal_sectors to be used + * with lvextend to override the default journal_sectors + * calculation, but only if the LV is inactive (the journal + * sectors cannot be updated in reload.) + */ + if (meta_extents <= prev_meta_extents) { log_debug("extend not needed for imeta LV %s", lv_imeta->name); continue; @@ -597,7 +617,7 @@ int lv_add_integrity_to_raid(struct logical_volume *lv, struct integrity_setting lp.pvh = use_pvh; lp.extents = lv_image->size / vg->extent_size; - if (!_lv_create_integrity_metadata(cmd, vg, &lp, &meta_lv)) + if (!_lv_create_integrity_metadata(cmd, vg, &lp, settings, &meta_lv)) goto_bad; revert_meta_lvs++; diff --git a/tools/args.h b/tools/args.h index ae4fd05a2..7fc225171 100644 --- a/tools/args.h +++ b/tools/args.h @@ -1392,6 +1392,10 @@ arg(ignoreactivationskip_ARG, 'K', "ignoreactivationskip", 0, 0, 0, "Ignore the \"activation skip\" LV flag during activation\n" "to allow LVs with the flag set to be activated.\n") +arg(integritysettings_ARG, '\0', "integritysettings", string_VAL, ARG_GROUPABLE, 0, + "Specifies tunable kernel options for dm-integrity.\n" + "See \\fBlvmraid\\fP(7) for more information.\n") + arg(maps_ARG, 'm', "maps", 0, 0, 0, "#lvdisplay\n" "Display the mapping of logical extents to PVs and physical extents.\n" diff --git a/tools/command-lines.in b/tools/command-lines.in index 3ad5d3c46..cf2a66076 100644 --- a/tools/command-lines.in +++ b/tools/command-lines.in @@ -843,7 +843,7 @@ FLAGS: SECONDARY_SYNTAX --- lvconvert --raidintegrity Bool LV_raid -OO: --raidintegritymode String, --raidintegrityblocksize Number, OO_LVCONVERT +OO: --raidintegritymode String, --raidintegrityblocksize Number, --integritysettings String, OO_LVCONVERT OP: PV ... ID: lvconvert_integrity DESC: Add or remove data integrity checksums to raid images. @@ -869,7 +869,7 @@ OO_LVCREATE_VDO: --compression Bool, --deduplication Bool, --vdosettings String OO_LVCREATE_THINPOOL: --discards Discards, --errorwhenfull Bool, --pooldatavdo Bool, OO_LVCREATE_VDO, OO_LVCREATE_POOL OO_LVCREATE_RAID: --regionsize RegionSize, --minrecoveryrate SizeKB, --maxrecoveryrate SizeKB, ---raidintegrity Bool, --raidintegritymode String, --raidintegrityblocksize Number +--raidintegrity Bool, --raidintegritymode String, --raidintegrityblocksize Number, --integritysettings String --- diff --git a/tools/lvconvert.c b/tools/lvconvert.c index 65e0d51cd..c575f823b 100644 --- a/tools/lvconvert.c +++ b/tools/lvconvert.c @@ -6519,6 +6519,11 @@ static int _lvconvert_integrity_single(struct cmd_context *cmd, struct integrity_settings settings = { .tag_size = 0 }; int ret; + if (arg_is_set(cmd, integritysettings_ARG)) { + if (!get_integrity_settings(cmd, &settings)) + return_ECMD_FAILED; + } + if (!integrity_mode_set(arg_str_value(cmd, raidintegritymode_ARG, NULL), &settings)) return_ECMD_FAILED; diff --git a/tools/lvcreate.c b/tools/lvcreate.c index bfe4035a4..3c24c050d 100644 --- a/tools/lvcreate.c +++ b/tools/lvcreate.c @@ -615,6 +615,10 @@ static int _read_raid_params(struct cmd_context *cmd, if (!integrity_mode_set(arg_str_value(cmd, raidintegritymode_ARG, NULL), &lp->integrity_settings)) return_0; } + if (arg_is_set(cmd, integritysettings_ARG)) { + if (!get_integrity_settings(cmd, &lp->integrity_settings)) + return_0; + } } return 1; @@ -936,7 +940,8 @@ static int _lvcreate_params(struct cmd_context *cmd, raidminrecoveryrate_ARG, \ raidintegrity_ARG, \ raidintegritymode_ARG, \ - raidintegrityblocksize_ARG + raidintegrityblocksize_ARG, \ + integritysettings_ARG #define SIZE_ARGS \ extents_ARG,\ diff --git a/tools/toollib.c b/tools/toollib.c index 62cc5cf65..5031b2cc2 100644 --- a/tools/toollib.c +++ b/tools/toollib.c @@ -1627,6 +1627,118 @@ int get_writecache_settings(struct cmd_context *cmd, struct writecache_settings return 1; } +static int _get_one_integrity_setting(struct cmd_context *cmd, struct integrity_settings *settings, + char *key, char *val) +{ + /* + * Some settings handled by other options: + * settings->mode from --raidintegritymode + * settings->block_size from --raidintegrityblocksize + */ + + /* always set in metadata and on table line */ + + if (!strncmp(key, "journal_sectors", sizeof("journal_sectors") - 1)) { + uint32_t size_mb; + + if (sscanf(val, "%u", &settings->journal_sectors) != 1) + goto_bad; + + size_mb = settings->journal_sectors / 2048; + if (size_mb < 4 || size_mb > 1024) { + log_error("Invalid raid integrity journal size %d MiB (use 4-1024 MiB).", size_mb); + goto_bad; + } + settings->journal_sectors_set = 1; + return 1; + } + + + /* optional, not included in metadata or table line unless set */ + + if (!strncmp(key, "journal_watermark", sizeof("journal_watermark") - 1)) { + if (sscanf(val, "%u", &settings->journal_watermark) != 1) + goto_bad; + if (settings->journal_watermark > 100) + goto_bad; + settings->journal_watermark_set = 1; + return 1; + } + + if (!strncmp(key, "commit_time", sizeof("commit_time") - 1)) { + if (sscanf(val, "%u", &settings->commit_time) != 1) + goto_bad; + settings->commit_time_set = 1; + return 1; + } + + if (!strncmp(key, "bitmap_flush_interval", sizeof("bitmap_flush_interval") - 1)) { + if (sscanf(val, "%u", &settings->bitmap_flush_interval) != 1) + goto_bad; + settings->bitmap_flush_interval_set = 1; + return 1; + } + + if (!strncmp(key, "allow_discards", sizeof("allow_discards") - 1)) { + if (sscanf(val, "%u", &settings->allow_discards) != 1) + goto_bad; + if (settings->allow_discards != 0 && settings->allow_discards != 1) + goto_bad; + settings->allow_discards_set = 1; + return 1; + } + + return 1; + + bad: + log_error("Invalid setting: %s", key); + return 0; +} + +int get_integrity_settings(struct cmd_context *cmd, struct integrity_settings *settings) +{ + struct arg_value_group_list *group; + const char *str; + char key[64]; + char val[64]; + int num; + unsigned pos; + + /* + * "grouped" means that multiple --integritysettings options can be used. + * Each option is also allowed to contain multiple key = val pairs. + */ + + dm_list_iterate_items(group, &cmd->arg_value_groups) { + if (!grouped_arg_is_set(group->arg_values, integritysettings_ARG)) + continue; + + if (!(str = grouped_arg_str_value(group->arg_values, integritysettings_ARG, NULL))) + break; + + pos = 0; + + while (pos < strlen(str)) { + /* scan for "key1=val1 key2 = val2 key3= val3" */ + + memset(key, 0, sizeof(key)); + memset(val, 0, sizeof(val)); + + if (sscanf(str + pos, " %63[^=]=%63s %n", key, val, &num) != 2) { + log_error("Invalid setting at: %s", str+pos); + return 0; + } + + pos += num; + + if (!_get_one_integrity_setting(cmd, settings, key, val)) + return_0; + } + } + + return 1; +} + /* FIXME move to lib */ static int _pv_change_tag(struct physical_volume *pv, const char *tag, int addtag) { diff --git a/tools/toollib.h b/tools/toollib.h index abf3726a3..5671229ed 100644 --- a/tools/toollib.h +++ b/tools/toollib.h @@ -227,6 +227,8 @@ int get_vdo_settings(struct cmd_context *cmd, int get_writecache_settings(struct cmd_context *cmd, struct writecache_settings *settings, uint32_t *block_size_sectors); +int get_integrity_settings(struct cmd_context *cmd, struct integrity_settings *settings); + int change_tag(struct cmd_context *cmd, struct volume_group *vg, struct logical_volume *lv, struct physical_volume *pv, int arg);