diff --git a/configure b/configure index 7c6bd48d2..e2299ee91 100755 --- a/configure +++ b/configure @@ -747,6 +747,7 @@ BUILD_DMFILEMAPD BUILD_LOCKDDLM_CONTROL BUILD_LOCKDDLM BUILD_LOCKDSANLOCK +BUILD_LOCKDIDM BUILD_LVMLOCKD BUILD_LVMPOLLD BUILD_LVMDBUSD @@ -782,6 +783,8 @@ LOCKD_DLM_LIBS LOCKD_DLM_CFLAGS LOCKD_SANLOCK_LIBS LOCKD_SANLOCK_CFLAGS +LOCKD_IDM_LIBS +LOCKD_IDM_CFLAGS VALGRIND_LIBS VALGRIND_CFLAGS GENPNG @@ -946,6 +949,7 @@ enable_lvmpolld enable_lvmlockd_sanlock enable_lvmlockd_dlm enable_lvmlockd_dlmcontrol +enable_lvmlockd_idm enable_use_lvmlockd with_lvmlockd_pidfile enable_use_lvmpolld @@ -1019,6 +1023,8 @@ LOCKD_DLM_CFLAGS LOCKD_DLM_LIBS LOCKD_DLM_CONTROL_CFLAGS LOCKD_DLM_CONTROL_LIBS +LOCKD_IDM_CFLAGS +LOCKD_IDM_LIBS NOTIFY_DBUS_CFLAGS NOTIFY_DBUS_LIBS BLKID_CFLAGS @@ -1678,6 +1684,7 @@ Optional Features: --enable-lvmlockd-dlm enable the LVM lock daemon using dlm --enable-lvmlockd-dlmcontrol enable lvmlockd remote refresh using libdlmcontrol + --enable-lvmlockd-idm enable the LVM lock daemon using idm --disable-use-lvmlockd disable usage of LVM lock daemon --disable-use-lvmpolld disable usage of LVM Poll Daemon --enable-dmfilemapd enable the dmstats filemap daemon @@ -1832,6 +1839,10 @@ Some influential environment variables: C compiler flags for LOCKD_DLM_CONTROL, overriding pkg-config LOCKD_DLM_CONTROL_LIBS linker flags for LOCKD_DLM_CONTROL, overriding pkg-config + LOCKD_IDM_CFLAGS + C compiler flags for LOCKD_IDM, overriding pkg-config + LOCKD_IDM_LIBS + linker flags for LOCKD_IDM, overriding pkg-config NOTIFY_DBUS_CFLAGS C compiler flags for NOTIFY_DBUS, overriding pkg-config NOTIFY_DBUS_LIBS @@ -3124,6 +3135,7 @@ case "$host_os" in LOCKDSANLOCK=no LOCKDDLM=no LOCKDDLM_CONTROL=no + LOCKDIDM=no ODIRECT=yes DM_IOCTLS=yes SELINUX=yes @@ -11191,6 +11203,167 @@ $as_echo "#define LOCKDDLM_CONTROL_SUPPORT 1" >>confdefs.h BUILD_LVMLOCKD=yes fi +################################################################################ +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lvmlockdidm" >&5 +$as_echo_n "checking whether to build lvmlockdidm... " >&6; } +# Check whether --enable-lvmlockd-idm was given. +if test "${enable_lvmlockd_idm+set}" = set; then : + enableval=$enable_lvmlockd_idm; LOCKDIDM=$enableval +fi + +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $LOCKDIDM" >&5 +$as_echo "$LOCKDIDM" >&6; } + +BUILD_LOCKDIDM=$LOCKDIDM + +if test "$BUILD_LOCKDIDM" = yes; then + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for LOCKD_IDM" >&5 +$as_echo_n "checking for LOCKD_IDM... " >&6; } + +if test -n "$LOCKD_IDM_CFLAGS"; then + pkg_cv_LOCKD_IDM_CFLAGS="$LOCKD_IDM_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libseagate_ilm >= 0.1.0\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libseagate_ilm >= 0.1.0") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LOCKD_IDM_CFLAGS=`$PKG_CONFIG --cflags "libseagate_ilm >= 0.1.0" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$LOCKD_IDM_LIBS"; then + pkg_cv_LOCKD_IDM_LIBS="$LOCKD_IDM_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libseagate_ilm >= 0.1.0\""; } >&5 + ($PKG_CONFIG --exists --print-errors "libseagate_ilm >= 0.1.0") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_LOCKD_IDM_LIBS=`$PKG_CONFIG --libs "libseagate_ilm >= 0.1.0" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + LOCKD_IDM_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libseagate_ilm >= 0.1.0" 2>&1` + else + LOCKD_IDM_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libseagate_ilm >= 0.1.0" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$LOCKD_IDM_PKG_ERRORS" >&5 + + $bailout +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + $bailout +else + LOCKD_IDM_CFLAGS=$pkg_cv_LOCKD_IDM_CFLAGS + LOCKD_IDM_LIBS=$pkg_cv_LOCKD_IDM_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } +fi + +pkg_failed=no +{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BLKID" >&5 +$as_echo_n "checking for BLKID... " >&6; } + +if test -n "$BLKID_CFLAGS"; then + pkg_cv_BLKID_CFLAGS="$BLKID_CFLAGS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blkid >= 2.24\""; } >&5 + ($PKG_CONFIG --exists --print-errors "blkid >= 2.24") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_BLKID_CFLAGS=`$PKG_CONFIG --cflags "blkid >= 2.24" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi +if test -n "$BLKID_LIBS"; then + pkg_cv_BLKID_LIBS="$BLKID_LIBS" + elif test -n "$PKG_CONFIG"; then + if test -n "$PKG_CONFIG" && \ + { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"blkid >= 2.24\""; } >&5 + ($PKG_CONFIG --exists --print-errors "blkid >= 2.24") 2>&5 + ac_status=$? + $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; then + pkg_cv_BLKID_LIBS=`$PKG_CONFIG --libs "blkid >= 2.24" 2>/dev/null` + test "x$?" != "x0" && pkg_failed=yes +else + pkg_failed=yes +fi + else + pkg_failed=untried +fi + + + +if test $pkg_failed = yes; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + +if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then + _pkg_short_errors_supported=yes +else + _pkg_short_errors_supported=no +fi + if test $_pkg_short_errors_supported = yes; then + BLKID_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "blkid >= 2.24" 2>&1` + else + BLKID_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "blkid >= 2.24" 2>&1` + fi + # Put the nasty error message in config.log where it belongs + echo "$BLKID_PKG_ERRORS" >&5 + + $bailout +elif test $pkg_failed = untried; then + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + $bailout +else + BLKID_CFLAGS=$pkg_cv_BLKID_CFLAGS + BLKID_LIBS=$pkg_cv_BLKID_LIBS + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + HAVE_LOCKD_IDM=yes +fi + +$as_echo "#define LOCKDIDM_SUPPORT 1" >>confdefs.h + + BUILD_LVMLOCKD=yes +fi + ################################################################################ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build lvmlockd" >&5 $as_echo_n "checking whether to build lvmlockd... " >&6; } diff --git a/configure.ac b/configure.ac index 1a49e7fe7..40acc49c2 100644 --- a/configure.ac +++ b/configure.ac @@ -41,6 +41,7 @@ case "$host_os" in LOCKDSANLOCK=no LOCKDDLM=no LOCKDDLM_CONTROL=no + LOCKDIDM=no ODIRECT=yes DM_IOCTLS=yes SELINUX=yes @@ -989,6 +990,25 @@ if test "$BUILD_LOCKDDLM_CONTROL" = yes; then BUILD_LVMLOCKD=yes fi +################################################################################ +dnl -- Build lvmlockdidm +AC_MSG_CHECKING(whether to build lvmlockdidm) +AC_ARG_ENABLE(lvmlockd-idm, + AC_HELP_STRING([--enable-lvmlockd-idm], + [enable the LVM lock daemon using idm]), + LOCKDIDM=$enableval) +AC_MSG_RESULT($LOCKDIDM) + +BUILD_LOCKDIDM=$LOCKDIDM + +dnl -- Look for Seagate IDM libraries +if test "$BUILD_LOCKDIDM" = yes; then + PKG_CHECK_MODULES(LOCKD_IDM, libseagate_ilm >= 0.1.0, [HAVE_LOCKD_IDM=yes], $bailout) + PKG_CHECK_MODULES(BLKID, blkid >= 2.24, [HAVE_LOCKD_IDM=yes], $bailout) + AC_DEFINE([LOCKDIDM_SUPPORT], 1, [Define to 1 to include code that uses lvmlockd IDM option.]) + BUILD_LVMLOCKD=yes +fi + ################################################################################ dnl -- Build lvmlockd AC_MSG_CHECKING(whether to build lvmlockd) diff --git a/daemons/lvmlockd/Makefile.in b/daemons/lvmlockd/Makefile.in index e69ab9127..91beb1ad8 100644 --- a/daemons/lvmlockd/Makefile.in +++ b/daemons/lvmlockd/Makefile.in @@ -30,6 +30,11 @@ ifeq ("@BUILD_LOCKDDLM@", "yes") LOCK_LIBS += -ldlmcontrol endif +ifeq ("@BUILD_LOCKDIDM@", "yes") + SOURCES += lvmlockd-idm.c + LOCK_LIBS += -lseagate_ilm -lblkid +endif + SOURCES2 = lvmlockctl.c TARGETS = lvmlockd lvmlockctl diff --git a/daemons/lvmlockd/lvmlockd-idm.c b/daemons/lvmlockd/lvmlockd-idm.c new file mode 100644 index 000000000..e9f50535c --- /dev/null +++ b/daemons/lvmlockd/lvmlockd-idm.c @@ -0,0 +1,837 @@ +/* + * Copyright (C) 2020-2021 Seagate Ltd. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + */ + +#define _XOPEN_SOURCE 500 /* pthread */ +#define _ISOC99_SOURCE + +#include "tools/tool.h" + +#include "daemon-server.h" +#include "lib/mm/xlate.h" + +#include "lvmlockd-internal.h" +#include "daemons/lvmlockd/lvmlockd-client.h" + +#include "ilm.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define IDM_TIMEOUT 60000 /* unit: millisecond, 60 seconds */ + +/* + * Each lockspace thread has its own In-Drive Mutex (IDM) lock manager's + * connection. After established socket connection, the lockspace has + * been created in IDM lock manager and afterwards use the socket file + * descriptor to send any requests for lock related operations. + */ + +struct lm_idm { + int sock; /* IDM lock manager connection */ +}; + +struct rd_idm { + struct idm_lock_id id; + struct idm_lock_op op; + uint64_t vb_timestamp; + struct val_blk *vb; +}; + +int lm_data_size_idm(void) +{ + return sizeof(struct rd_idm); +} + +static uint64_t read_utc_us(void) +{ + struct timespec cur_time; + + clock_gettime(CLOCK_REALTIME, &cur_time); + + /* + * Convert to microseconds unit. IDM reserves the MSB in 8 bytes + * and the low 56 bits are used for timestamp; 56 bits can support + * calendar year to 2284, so it has 260 years for overflow. Thus it + * is quite safe for overflow issue when wrote this code. + */ + return cur_time.tv_sec * 1000000 + cur_time.tv_nsec / 1000; +} + +static int uuid_read_format(char *uuid_str, const char *buffer) +{ + int out = 0; + + /* just strip out any dashes */ + while (*buffer) { + + if (*buffer == '-') { + buffer++; + continue; + } + + if (out >= 32) { + log_error("Too many characters to be uuid."); + return -1; + } + + uuid_str[out++] = *buffer; + buffer++; + } + + if (out != 32) { + log_error("Couldn't read uuid: incorrect number of " + "characters."); + return -1; + } + + return 0; +} + +#define SYSFS_ROOT "/sys" +#define BUS_SCSI_DEVS "/bus/scsi/devices" + +static struct idm_lock_op glb_lock_op; + +static void lm_idm_free_dir_list(struct dirent **dir_list, int dir_num) +{ + int i; + + for (i = 0; i < dir_num; ++i) + free(dir_list[i]); + free(dir_list); +} + +static int lm_idm_scsi_directory_select(const struct dirent *s) +{ + regex_t regex; + int ret; + + /* Only select directory with the format x:x:x:x */ + ret = regcomp(®ex, "^[0-9]+:[0-9]+:[0-9]+:[0-9]+$", REG_EXTENDED); + if (ret) + return 0; + + ret = regexec(®ex, s->d_name, 0, NULL, 0); + if (!ret) { + regfree(®ex); + return 1; + } + + regfree(®ex); + return 0; +} + +static int lm_idm_scsi_find_block_dirctory(const char *block_path) +{ + struct stat stats; + + if ((stat(block_path, &stats) >= 0) && S_ISDIR(stats.st_mode)) + return 0; + + return -1; +} + +static int lm_idm_scsi_block_node_select(const struct dirent *s) +{ + if (DT_LNK != s->d_type && DT_DIR != s->d_type) + return 0; + + if (DT_DIR == s->d_type) { + /* Skip this directory: '.' and parent: '..' */ + if (!strcmp(s->d_name, ".") || !strcmp(s->d_name, "..")) + return 0; + } + + return 1; +} + +static int lm_idm_scsi_find_block_node(const char *blk_path, char **blk_dev) +{ + struct dirent **dir_list; + int dir_num; + + dir_num = scandir(blk_path, &dir_list, lm_idm_scsi_block_node_select, NULL); + if (dir_num < 0) { + log_error("Cannot find valid directory entry in %s", blk_path); + return -1; + } + + /* + * Should have only one block name under the path, if the dir_num is + * not 1 (e.g. 0 or any number bigger than 1), it must be wrong and + * should never happen. + */ + if (dir_num == 1) + *blk_dev = strdup(dir_list[0]->d_name); + else + *blk_dev = NULL; + + lm_idm_free_dir_list(dir_list, dir_num); + + if (!*blk_dev) + return -1; + + return dir_num; +} + +static int lm_idm_scsi_search_propeller_partition(char *dev) +{ + int i, nparts; + blkid_probe pr; + blkid_partlist ls; + int found = -1; + + pr = blkid_new_probe_from_filename(dev); + if (!pr) { + log_error("%s: failed to create a new libblkid probe", dev); + return -1; + } + + /* Binary interface */ + ls = blkid_probe_get_partitions(pr); + if (!ls) { + log_error("%s: failed to read partitions", dev); + return -1; + } + + /* List partitions */ + nparts = blkid_partlist_numof_partitions(ls); + if (!nparts) + goto done; + + for (i = 0; i < nparts; i++) { + const char *p; + blkid_partition par = blkid_partlist_get_partition(ls, i); + + p = blkid_partition_get_name(par); + if (p) { + log_debug("partition name='%s'", p); + + if (!strcmp(p, "propeller")) + found = blkid_partition_get_partno(par); + } + + if (found >= 0) + break; + } + +done: + blkid_free_probe(pr); + return found; +} + +static char *lm_idm_scsi_get_block_device_node(const char *scsi_path) +{ + char *blk_path = NULL; + char *blk_dev = NULL; + char *dev_node = NULL; + int ret; + + /* + * Locate the "block" directory, such like: + * /sys/bus/scsi/devices/1:0:0:0/block + */ + ret = asprintf(&blk_path, "%s/%s", scsi_path, "block"); + if (ret < 0) { + log_error("Fail to allocate block path for %s", scsi_path); + goto fail; + } + + ret = lm_idm_scsi_find_block_dirctory(blk_path); + if (ret < 0) { + log_error("Fail to find block path %s", blk_path); + goto fail; + } + + /* + * Locate the block device name, such like: + * /sys/bus/scsi/devices/1:0:0:0/block/sdb + * + * After return from this function and if it makes success, + * the global variable "blk_dev" points to the block device + * name, in this example it points to string "sdb". + */ + ret = lm_idm_scsi_find_block_node(blk_path, &blk_dev); + if (ret < 0) { + log_error("Fail to find block node"); + goto fail; + } + + ret = asprintf(&dev_node, "/dev/%s", blk_dev); + if (ret < 0) { + log_error("Fail to allocate memory for blk node path"); + goto fail; + } + + ret = lm_idm_scsi_search_propeller_partition(dev_node); + if (ret < 0) + goto fail; + + free(blk_path); + free(blk_dev); + return dev_node; + +fail: + free(blk_path); + free(blk_dev); + free(dev_node); + return NULL; +} + +static int lm_idm_get_gl_lock_pv_list(void) +{ + struct dirent **dir_list; + char scsi_bus_path[PATH_MAX]; + char *drive_path; + int i, dir_num, ret; + + if (glb_lock_op.drive_num) + return 0; + + snprintf(scsi_bus_path, sizeof(scsi_bus_path), "%s%s", + SYSFS_ROOT, BUS_SCSI_DEVS); + + dir_num = scandir(scsi_bus_path, &dir_list, + lm_idm_scsi_directory_select, NULL); + if (dir_num < 0) { /* scsi mid level may not be loaded */ + log_error("Attached devices: none"); + return -1; + } + + for (i = 0; i < dir_num; i++) { + char *scsi_path; + + ret = asprintf(&scsi_path, "%s/%s", scsi_bus_path, + dir_list[i]->d_name); + if (ret < 0) { + log_error("Fail to allocate memory for scsi directory"); + goto failed; + } + + if (glb_lock_op.drive_num >= ILM_DRIVE_MAX_NUM) { + log_error("Global lock: drive number %d exceeds limitation (%d) ?!", + glb_lock_op.drive_num, ILM_DRIVE_MAX_NUM); + free(scsi_path); + goto failed; + } + + drive_path = lm_idm_scsi_get_block_device_node(scsi_path); + if (!drive_path) { + free(scsi_path); + continue; + } + + glb_lock_op.drives[glb_lock_op.drive_num] = drive_path; + glb_lock_op.drive_num++; + + free(scsi_path); + } + + lm_idm_free_dir_list(dir_list, dir_num); + return 0; + +failed: + lm_idm_free_dir_list(dir_list, dir_num); + + for (i = 0; i < glb_lock_op.drive_num; i++) { + if (glb_lock_op.drives[i]) { + free(glb_lock_op.drives[i]); + glb_lock_op.drives[i] = NULL; + } + } + + return -1; +} + +static void lm_idm_update_vb_timestamp(uint64_t *vb_timestamp) +{ + uint64_t utc_us = read_utc_us(); + + /* + * It's possible that the multiple nodes have no clock + * synchronization with microsecond prcision and the time + * is going backward. For this case, simply increment the + * existing timestamp and write out to drive. + */ + if (*vb_timestamp >= utc_us) + (*vb_timestamp)++; + else + *vb_timestamp = utc_us; +} + +int lm_prepare_lockspace_idm(struct lockspace *ls) +{ + struct lm_idm *lm = NULL; + + lm = malloc(sizeof(struct lm_idm)); + if (!lm) { + log_error("S %s prepare_lockspace_idm fail to allocate lm_idm for %s", + ls->name, ls->vg_name); + return -ENOMEM; + } + memset(lm, 0x0, sizeof(struct lm_idm)); + + ls->lm_data = lm; + log_debug("S %s prepare_lockspace_idm done", ls->name); + return 0; +} + +int lm_add_lockspace_idm(struct lockspace *ls, int adopt) +{ + char killpath[IDM_FAILURE_PATH_LEN]; + char killargs[IDM_FAILURE_ARGS_LEN]; + struct lm_idm *lmi = (struct lm_idm *)ls->lm_data; + int rv; + + if (daemon_test) + return 0; + + if (!strcmp(ls->name, S_NAME_GL_IDM)) { + /* + * Prepare the pv list for global lock, if the drive contains + * "propeller" partition, then this drive will be considered + * as a member of pv list. + */ + rv = lm_idm_get_gl_lock_pv_list(); + if (rv < 0) { + log_error("S %s add_lockspace_idm fail to get pv list for glb lock", + ls->name); + return -EIO; + } else { + log_error("S %s add_lockspace_idm get pv list for glb lock", + ls->name); + } + } + + /* + * Construct the execution path for command "lvmlockctl" by using the + * path to the lvm binary and appending "lockctl". + */ + memset(killpath, 0, sizeof(killpath)); + snprintf(killpath, IDM_FAILURE_PATH_LEN, "%slockctl", LVM_PATH); + + /* Pass the argument "--kill vg_name" for killpath */ + memset(killargs, 0, sizeof(killargs)); + snprintf(killargs, IDM_FAILURE_ARGS_LEN, "--kill %s", ls->vg_name); + + /* Connect with IDM lock manager per every lockspace. */ + rv = ilm_connect(&lmi->sock); + if (rv < 0) { + log_error("S %s add_lockspace_idm fail to connect the lock manager %d", + ls->name, lmi->sock); + lmi->sock = 0; + rv = -EMANAGER; + goto fail; + } + + rv = ilm_set_killpath(lmi->sock, killpath, killargs); + if (rv < 0) { + log_error("S %s add_lockspace_idm fail to set kill path %d", + ls->name, rv); + rv = -EMANAGER; + goto fail; + } + + log_debug("S %s add_lockspace_idm kill path is: \"%s %s\"", + ls->name, killpath, killargs); + + log_debug("S %s add_lockspace_idm done", ls->name); + return 0; + +fail: + if (lmi && lmi->sock) + close(lmi->sock); + if (lmi) + free(lmi); + return rv; +} + +int lm_rem_lockspace_idm(struct lockspace *ls, int free_vg) +{ + struct lm_idm *lmi = (struct lm_idm *)ls->lm_data; + int i, rv = 0; + + if (daemon_test) + goto out; + + rv = ilm_disconnect(lmi->sock); + if (rv < 0) + log_error("S %s rem_lockspace_idm error %d", ls->name, rv); + + /* Release pv list for global lock */ + if (!strcmp(ls->name, "lvm_global")) { + for (i = 0; i < glb_lock_op.drive_num; i++) { + if (glb_lock_op.drives[i]) { + free(glb_lock_op.drives[i]); + glb_lock_op.drives[i] = NULL; + } + } + } + +out: + free(lmi); + ls->lm_data = NULL; + return rv; +} + +static int lm_add_resource_idm(struct lockspace *ls, struct resource *r) +{ + struct rd_idm *rdi = (struct rd_idm *)r->lm_data; + + if (r->type == LD_RT_GL || r->type == LD_RT_VG) { + rdi->vb = zalloc(sizeof(struct val_blk)); + if (!rdi->vb) + return -ENOMEM; + } + + return 0; +} + +int lm_rem_resource_idm(struct lockspace *ls, struct resource *r) +{ + struct rd_idm *rdi = (struct rd_idm *)r->lm_data; + + if (rdi->vb) + free(rdi->vb); + + memset(rdi, 0, sizeof(struct rd_idm)); + r->lm_init = 0; + return 0; +} + +static int to_idm_mode(int ld_mode) +{ + switch (ld_mode) { + case LD_LK_EX: + return IDM_MODE_EXCLUSIVE; + case LD_LK_SH: + return IDM_MODE_SHAREABLE; + default: + break; + }; + + return -1; +} + +int lm_lock_idm(struct lockspace *ls, struct resource *r, int ld_mode, + struct val_blk *vb_out, char *lv_uuid, struct pvs *pvs, + int adopt) +{ + struct lm_idm *lmi = (struct lm_idm *)ls->lm_data; + struct rd_idm *rdi = (struct rd_idm *)r->lm_data; + char **drive_path = NULL; + uint64_t timestamp; + int reset_vb = 0; + int rv, i; + + if (!r->lm_init) { + rv = lm_add_resource_idm(ls, r); + if (rv < 0) + return rv; + r->lm_init = 1; + } + + rdi->op.mode = to_idm_mode(ld_mode); + if (rv < 0) { + log_error("lock_idm invalid mode %d", ld_mode); + return -EINVAL; + } + + log_debug("S %s R %s lock_idm", ls->name, r->name); + + if (daemon_test) { + if (rdi->vb) { + vb_out->version = le16_to_cpu(rdi->vb->version); + vb_out->flags = le16_to_cpu(rdi->vb->flags); + vb_out->r_version = le32_to_cpu(rdi->vb->r_version); + } + return 0; + } + + rdi->op.timeout = IDM_TIMEOUT; + + /* + * Generate the UUID string, for RT_VG, it only needs to generate + * UUID string for VG level, for RT_LV, it needs to generate + * UUID strings for both VG and LV levels. At the end, these IDs + * are used as identifier for IDM in drive firmware. + */ + if (r->type == LD_RT_VG || r->type == LD_RT_LV) + log_debug("S %s R %s VG uuid %s", ls->name, r->name, ls->vg_uuid); + if (r->type == LD_RT_LV) + log_debug("S %s R %s LV uuid %s", ls->name, r->name, lv_uuid); + + memset(&rdi->id, 0x0, sizeof(struct idm_lock_id)); + if (r->type == LD_RT_VG) { + uuid_read_format(rdi->id.vg_uuid, ls->vg_uuid); + } else if (r->type == LD_RT_LV) { + uuid_read_format(rdi->id.vg_uuid, ls->vg_uuid); + uuid_read_format(rdi->id.lv_uuid, lv_uuid); + } + + /* + * Establish the drive path list for lock, since different lock type + * has different drive list; the GL lock uses the global pv list, + * the VG lock uses the pv list spanned for the whole volume group, + * the LV lock uses the pv list for the logical volume. + */ + switch (r->type) { + case LD_RT_GL: + drive_path = glb_lock_op.drives; + rdi->op.drive_num = glb_lock_op.drive_num; + break; + case LD_RT_VG: + drive_path = (char **)ls->pvs.path; + rdi->op.drive_num = ls->pvs.num; + break; + case LD_RT_LV: + drive_path = (char **)pvs->path; + rdi->op.drive_num = pvs->num; + break; + default: + break; + } + + if (!drive_path) { + log_error("S %s R %s cannot find the valid drive path array", + ls->name, r->name); + return -EINVAL; + } + + if (rdi->op.drive_num >= ILM_DRIVE_MAX_NUM) { + log_error("S %s R %s exceeds limitation for drive path array", + ls->name, r->name); + return -EINVAL; + } + + for (i = 0; i < rdi->op.drive_num; i++) + rdi->op.drives[i] = drive_path[i]; + + log_debug("S %s R %s mode %d drive_num %d timeout %d", + ls->name, r->name, rdi->op.mode, + rdi->op.drive_num, rdi->op.timeout); + + for (i = 0; i < rdi->op.drive_num; i++) + log_debug("S %s R %s drive path[%d] %s", + ls->name, r->name, i, rdi->op.drives[i]); + + rv = ilm_lock(lmi->sock, &rdi->id, &rdi->op); + if (rv < 0) { + log_debug("S %s R %s lock_idm acquire mode %d rv %d", + ls->name, r->name, ld_mode, rv); + return -ELOCKIO; + } + + if (rdi->vb) { + rv = ilm_read_lvb(lmi->sock, &rdi->id, (char *)×tamp, + sizeof(uint64_t)); + + /* + * If fail to read value block, which might be caused by drive + * failure, notify up layer to invalidate metadata. + */ + if (rv < 0) { + log_error("S %s R %s lock_idm get_lvb error %d", + ls->name, r->name, rv); + reset_vb = 1; + + /* Reset timestamp */ + rdi->vb_timestamp = 0; + + /* + * If the cached timestamp mismatches with the stored value + * in the IDM, this means another host has updated timestamp + * for the new VB. Let's reset VB and notify up layer to + * invalidate metadata. + */ + } else if (rdi->vb_timestamp != timestamp) { + log_debug("S %s R %s lock_idm get lvb timestamp %lu:%lu", + ls->name, r->name, rdi->vb_timestamp, + timestamp); + + rdi->vb_timestamp = timestamp; + reset_vb = 1; + } + + if (reset_vb == 1) { + memset(rdi->vb, 0, sizeof(struct val_blk)); + memset(vb_out, 0, sizeof(struct val_blk)); + + /* + * The lock is still acquired, but the vb values has + * been invalidated. + */ + rv = 0; + goto out; + } + + /* Otherwise, copy the cached VB to up layer */ + memcpy(vb_out, rdi->vb, sizeof(struct val_blk)); + } + +out: + return rv; +} + +int lm_convert_idm(struct lockspace *ls, struct resource *r, + int ld_mode, uint32_t r_version) +{ + struct lm_idm *lmi = (struct lm_idm *)ls->lm_data; + struct rd_idm *rdi = (struct rd_idm *)r->lm_data; + int mode, rv; + + if (rdi->vb && r_version && (r->mode == LD_LK_EX)) { + if (!rdi->vb->version) { + /* first time vb has been written */ + rdi->vb->version = VAL_BLK_VERSION; + } + rdi->vb->r_version = r_version; + + log_debug("S %s R %s convert_idm set r_version %u", + ls->name, r->name, r_version); + + lm_idm_update_vb_timestamp(&rdi->vb_timestamp); + log_debug("S %s R %s convert_idm vb %x %x %u timestamp %lu", + ls->name, r->name, rdi->vb->version, rdi->vb->flags, + rdi->vb->r_version, rdi->vb_timestamp); + } + + mode = to_idm_mode(ld_mode); + if (mode < 0) { + log_error("S %s R %s convert_idm invalid mode %d", + ls->name, r->name, ld_mode); + return -EINVAL; + } + + log_debug("S %s R %s convert_idm", ls->name, r->name); + + if (daemon_test) + return 0; + + if (rdi->vb && r_version && (r->mode == LD_LK_EX)) { + rv = ilm_write_lvb(lmi->sock, &rdi->id, + (char *)rdi->vb_timestamp, sizeof(uint64_t)); + if (rv < 0) { + log_error("S %s R %s convert_idm write lvb error %d", + ls->name, r->name, rv); + return -ELMERR; + } + } + + rv = ilm_convert(lmi->sock, &rdi->id, mode); + if (rv < 0) + log_error("S %s R %s convert_idm convert error %d", + ls->name, r->name, rv); + + return rv; +} + +int lm_unlock_idm(struct lockspace *ls, struct resource *r, + uint32_t r_version, uint32_t lmu_flags) +{ + struct lm_idm *lmi = (struct lm_idm *)ls->lm_data; + struct rd_idm *rdi = (struct rd_idm *)r->lm_data; + int rv; + + if (rdi->vb && r_version && (r->mode == LD_LK_EX)) { + if (!rdi->vb->version) { + /* first time vb has been written */ + rdi->vb->version = VAL_BLK_VERSION; + } + if (r_version) + rdi->vb->r_version = r_version; + + lm_idm_update_vb_timestamp(&rdi->vb_timestamp); + log_debug("S %s R %s unlock_idm vb %x %x %u timestamp %lu", + ls->name, r->name, rdi->vb->version, rdi->vb->flags, + rdi->vb->r_version, rdi->vb_timestamp); + } + + log_debug("S %s R %s unlock_idm", ls->name, r->name); + + if (daemon_test) + return 0; + + if (rdi->vb && r_version && (r->mode == LD_LK_EX)) { + rv = ilm_write_lvb(lmi->sock, &rdi->id, + (char *)&rdi->vb_timestamp, sizeof(uint64_t)); + if (rv < 0) { + log_error("S %s R %s unlock_idm set_lvb error %d", + ls->name, r->name, rv); + return -ELMERR; + } + } + + rv = ilm_unlock(lmi->sock, &rdi->id); + if (rv < 0) + log_error("S %s R %s unlock_idm error %d", ls->name, r->name, rv); + + return rv; +} + +int lm_hosts_idm(struct lockspace *ls, int notify) +{ + struct resource *r; + struct lm_idm *lmi = (struct lm_idm *)ls->lm_data; + struct rd_idm *rdi; + int count, self, found_others = 0; + int rv; + + list_for_each_entry(r, &ls->resources, list) { + if (!r->lm_init) + continue; + + rdi = (struct rd_idm *)r->lm_data; + + rv = ilm_get_host_count(lmi->sock, &rdi->id, &rdi->op, + &count, &self); + if (rv < 0) { + log_error("S %s lm_hosts_idm error %d", ls->name, rv); + return rv; + } + + /* Fixup: need to reduce self count */ + if (count > found_others) + found_others = count; + } + + return found_others; +} + +int lm_get_lockspaces_idm(struct list_head *ls_rejoin) +{ + /* TODO: Need to add support for adoption. */ + return -1; +} + +int lm_is_running_idm(void) +{ + int sock, rv; + + if (daemon_test) + return gl_use_idm; + + rv = ilm_connect(&sock); + if (rv < 0) { + log_error("Fail to connect seagate IDM lock manager %d", rv); + return 0; + } + + ilm_disconnect(sock); + return 1; +} diff --git a/daemons/lvmlockd/lvmlockd-internal.h b/daemons/lvmlockd/lvmlockd-internal.h index 14bdfeed0..06bf07eb5 100644 --- a/daemons/lvmlockd/lvmlockd-internal.h +++ b/daemons/lvmlockd/lvmlockd-internal.h @@ -20,6 +20,7 @@ #define R_NAME_GL "GLLK" #define R_NAME_VG "VGLK" #define S_NAME_GL_DLM "lvm_global" +#define S_NAME_GL_IDM "lvm_global" #define LVM_LS_PREFIX "lvm_" /* ls name is prefix + vg_name */ /* global lockspace name for sanlock is a vg name */ @@ -29,6 +30,7 @@ enum { LD_LM_UNUSED = 1, /* place holder so values match lib/locking/lvmlockd.h */ LD_LM_DLM = 2, LD_LM_SANLOCK = 3, + LD_LM_IDM = 4, }; /* operation types */ @@ -118,6 +120,11 @@ struct client { */ #define DEFAULT_MAX_RETRIES 4 +struct pvs { + const char **path; + int num; +}; + struct action { struct list_head list; uint32_t client_id; @@ -140,6 +147,7 @@ struct action { char vg_args[MAX_ARGS+1]; char lv_args[MAX_ARGS+1]; char vg_sysid[MAX_NAME+1]; + struct pvs pvs; /* PV list for idm */ }; struct resource { @@ -184,6 +192,7 @@ struct lockspace { uint64_t free_lock_offset; /* for sanlock, start search for free lock here */ int free_lock_sector_size; /* for sanlock */ int free_lock_align_size; /* for sanlock */ + struct pvs pvs; /* for idm: PV list */ uint32_t start_client_id; /* client_id that started the lockspace */ pthread_t thread; /* makes synchronous lock requests */ @@ -325,6 +334,7 @@ static inline int list_empty(const struct list_head *head) EXTERN int gl_type_static; EXTERN int gl_use_dlm; EXTERN int gl_use_sanlock; +EXTERN int gl_use_idm; EXTERN int gl_vg_removed; EXTERN char gl_lsname_dlm[MAX_NAME+1]; EXTERN char gl_lsname_sanlock[MAX_NAME+1]; @@ -619,4 +629,102 @@ static inline int lm_support_sanlock(void) #endif /* sanlock support */ +#ifdef LOCKDIDM_SUPPORT + +int lm_data_size_idm(void); +int lm_init_vg_idm(char *ls_name, char *vg_name, uint32_t flags, char *vg_args); +int lm_prepare_lockspace_idm(struct lockspace *ls); +int lm_add_lockspace_idm(struct lockspace *ls, int adopt); +int lm_rem_lockspace_idm(struct lockspace *ls, int free_vg); +int lm_lock_idm(struct lockspace *ls, struct resource *r, int ld_mode, + struct val_blk *vb_out, char *lv_uuid, struct pvs *pvs, + int adopt); +int lm_convert_idm(struct lockspace *ls, struct resource *r, + int ld_mode, uint32_t r_version); +int lm_unlock_idm(struct lockspace *ls, struct resource *r, + uint32_t r_version, uint32_t lmu_flags); +int lm_hosts_idm(struct lockspace *ls, int notify); +int lm_get_lockspaces_idm(struct list_head *ls_rejoin); +int lm_is_running_idm(void); +int lm_rem_resource_idm(struct lockspace *ls, struct resource *r); + +static inline int lm_support_idm(void) +{ + return 1; +} + +#else + +static inline int lm_data_size_idm(void) +{ + return -1; +} + +static inline int lm_init_vg_idm(char *ls_name, char *vg_name, uint32_t flags, + char *vg_args) +{ + return -1; +} + +static inline int lm_prepare_lockspace_idm(struct lockspace *ls) +{ + return -1; +} + +static inline int lm_add_lockspace_idm(struct lockspace *ls, int adopt) +{ + return -1; +} + +static inline int lm_rem_lockspace_idm(struct lockspace *ls, int free_vg) +{ + return -1; +} + +static inline int lm_lock_idm(struct lockspace *ls, struct resource *r, int ld_mode, + struct val_blk *vb_out, char *lv_uuid, struct pvs *pvs, + int adopt) +{ + return -1; +} + +static inline int lm_convert_idm(struct lockspace *ls, struct resource *r, + int ld_mode, uint32_t r_version) +{ + return -1; +} + +static inline int lm_unlock_idm(struct lockspace *ls, struct resource *r, + uint32_t r_version, uint32_t lmu_flags) +{ + return -1; +} + +static inline int lm_hosts_idm(struct lockspace *ls, int notify) +{ + return -1; +} + +static inline int lm_get_lockspaces_idm(struct list_head *ls_rejoin) +{ + return -1; +} + +static inline int lm_is_running_idm(void) +{ + return 0; +} + +static inline int lm_rem_resource_idm(struct lockspace *ls, struct resource *r) +{ + return -1; +} + +static inline int lm_support_idm(void) +{ + return 0; +} + +#endif /* Seagate IDM support */ + #endif /* _LVM_LVMLOCKD_INTERNAL_H */