diff --git a/Makefile.in b/Makefile.in index 3274f174f..a1c87468b 100644 --- a/Makefile.in +++ b/Makefile.in @@ -211,8 +211,7 @@ endif endif include test/unit/Makefile - -include device-mapper/Makefile +include device_mapper/Makefile ifneq ($(shell which ctags),) .PHONY: tags diff --git a/configure b/configure index 3b88dadce..60f51d645 100755 --- a/configure +++ b/configure @@ -15559,7 +15559,7 @@ _ACEOF ################################################################################ -ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile" +ac_config_files="$ac_config_files Makefile make.tmpl libdm/make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure @@ -16256,6 +16256,7 @@ do "include/configure.h") CONFIG_HEADERS="$CONFIG_HEADERS include/configure.h" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "make.tmpl") CONFIG_FILES="$CONFIG_FILES make.tmpl" ;; + "libdm/make.tmpl") CONFIG_FILES="$CONFIG_FILES libdm/make.tmpl" ;; "daemons/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;; "daemons/clvmd/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;; "daemons/cmirrord/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/cmirrord/Makefile" ;; diff --git a/configure.ac b/configure.ac index a05f051c4..8dc9c189c 100644 --- a/configure.ac +++ b/configure.ac @@ -2088,6 +2088,7 @@ dnl -- keep utility scripts running properly AC_CONFIG_FILES([ Makefile make.tmpl +libdm/make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in index 83af00e20..47a3411e4 100644 --- a/daemons/clvmd/Makefile.in +++ b/daemons/clvmd/Makefile.in @@ -74,7 +74,7 @@ TARGETS = \ include $(top_builddir)/make.tmpl -LIBS += $(LVMINTERNAL_LIBS) -ldevmapper $(PTHREAD_LIBS) -laio +LIBS += $(LVMINTERNAL_LIBS) $(PTHREAD_LIBS) -laio CFLAGS += -fno-strict-aliasing $(EXTRA_EXEC_CFLAGS) INSTALL_TARGETS = \ diff --git a/daemons/cmirrord/Makefile.in b/daemons/cmirrord/Makefile.in index 96e0db8ce..fc0ef6d5a 100644 --- a/daemons/cmirrord/Makefile.in +++ b/daemons/cmirrord/Makefile.in @@ -26,7 +26,6 @@ TARGETS = cmirrord include $(top_builddir)/make.tmpl -LIBS += -ldevmapper LMLIBS += $(CPG_LIBS) $(SACKPT_LIBS) CFLAGS += $(CPG_CFLAGS) $(SACKPT_CFLAGS) $(EXTRA_EXEC_CFLAGS) LDFLAGS += $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) diff --git a/daemons/cmirrord/cluster.h b/daemons/cmirrord/cluster.h index 0efbd64bf..54ddd79ec 100644 --- a/daemons/cmirrord/cluster.h +++ b/daemons/cmirrord/cluster.h @@ -12,8 +12,8 @@ #ifndef _LVM_CLOG_CLUSTER_H #define _LVM_CLOG_CLUSTER_H -#include "libdm/misc/dm-log-userspace.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/misc/dm-log-userspace.h" +#include "device_mapper/libdevmapper.h" #define DM_ULOG_RESPONSE 0x1000U /* in last byte of 32-bit value */ #define DM_ULOG_CHECKPOINT_READY 21 diff --git a/daemons/cmirrord/functions.h b/daemons/cmirrord/functions.h index c770459a9..8a7301d01 100644 --- a/daemons/cmirrord/functions.h +++ b/daemons/cmirrord/functions.h @@ -12,7 +12,7 @@ #ifndef _LVM_CLOG_FUNCTIONS_H #define _LVM_CLOG_FUNCTIONS_H -#include "libdm/misc/dm-log-userspace.h" +#include "device_mapper/misc/dm-log-userspace.h" #include "cluster.h" #define LOG_RESUMED 1 diff --git a/daemons/dmeventd/Makefile.in b/daemons/dmeventd/Makefile.in index d5241ebfb..e43bb2b2a 100644 --- a/daemons/dmeventd/Makefile.in +++ b/daemons/dmeventd/Makefile.in @@ -57,13 +57,13 @@ all: device-mapper device-mapper: $(TARGETS) CFLAGS_dmeventd.o += $(EXTRA_EXEC_CFLAGS) -LIBS += -ldevmapper $(PTHREAD_LIBS) +LIBS += $(PTHREAD_LIBS) dmeventd: $(LIB_SHARED) dmeventd.o $(CC) $(CFLAGS) -L. $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) dmeventd.o \ - -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS) + -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(top_builddir)/device_mapper/libdevice-mapper.a $(LIBS) -lm -dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a +dmeventd.static: $(LIB_STATIC) dmeventd.o $(CC) $(CFLAGS) $(LDFLAGS) -static -L. -L$(interfacebuilddir) dmeventd.o \ -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS) $(STATIC_LIBS) @@ -73,7 +73,6 @@ endif ifneq ("$(CFLOW_CMD)", "") CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES)) --include $(top_builddir)/libdm/libdevmapper.cflow -include $(top_builddir)/lib/liblvm-internal.cflow -include $(top_builddir)/lib/liblvm2cmd.cflow -include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow diff --git a/daemons/dmeventd/dmeventd.c b/daemons/dmeventd/dmeventd.c index 438cf1668..533186b97 100644 --- a/daemons/dmeventd/dmeventd.c +++ b/daemons/dmeventd/dmeventd.c @@ -16,7 +16,7 @@ * dmeventd - dm event daemon to monitor active mapped devices */ -#include "libdm/misc/dm-logging.h" +#include "device_mapper/misc/dm-logging.h" #include "daemons/dmeventd/libdevmapper-event.h" #include "dmeventd.h" diff --git a/daemons/dmeventd/libdevmapper-event.c b/daemons/dmeventd/libdevmapper-event.c index a75924c27..7f0722831 100644 --- a/daemons/dmeventd/libdevmapper-event.c +++ b/daemons/dmeventd/libdevmapper-event.c @@ -12,10 +12,11 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libdm/misc/dm-logging.h" -#include "libdm/misc/dmlib.h" +#include "device_mapper/misc/dm-logging.h" +#include "device_mapper/misc/dmlib.h" #include "daemons/dmeventd/libdevmapper-event.h" #include "dmeventd.h" +#include "lib/misc/intl.h" #include <fcntl.h> #include <sys/file.h> @@ -25,6 +26,7 @@ #include <arpa/inet.h> /* for htonl, ntohl */ #include <pthread.h> #include <syslog.h> +#include <unistd.h> static int _debug_level = 0; static int _use_syslog = 0; diff --git a/daemons/dmeventd/libdevmapper-event.pc.in b/daemons/dmeventd/libdevmapper-event.pc.in index 839433fb8..fcad5bca4 100644 --- a/daemons/dmeventd/libdevmapper-event.pc.in +++ b/daemons/dmeventd/libdevmapper-event.pc.in @@ -8,4 +8,3 @@ Description: device-mapper event library Version: @DM_LIB_PATCHLEVEL@ Cflags: -I${includedir} Libs: -L${libdir} -ldevmapper-event -Requires.private: devmapper diff --git a/daemons/dmeventd/plugins/lvm2/Makefile.in b/daemons/dmeventd/plugins/lvm2/Makefile.in index 956d31edc..7e4696c68 100644 --- a/daemons/dmeventd/plugins/lvm2/Makefile.in +++ b/daemons/dmeventd/plugins/lvm2/Makefile.in @@ -24,7 +24,7 @@ LIB_VERSION = $(LIB_VERSION_LVM) include $(top_builddir)/make.tmpl -LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS) +LIBS += @LVM2CMD_LIB@ $(PTHREAD_LIBS) install_lvm2: install_lib_shared diff --git a/daemons/dmeventd/plugins/mirror/Makefile.in b/daemons/dmeventd/plugins/mirror/Makefile.in index 1d9666daa..22832ddf0 100644 --- a/daemons/dmeventd/plugins/mirror/Makefile.in +++ b/daemons/dmeventd/plugins/mirror/Makefile.in @@ -30,7 +30,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper +LIBS += -ldevmapper-event-lvm2 install_lvm2: install_dm_plugin diff --git a/daemons/dmeventd/plugins/raid/Makefile.in b/daemons/dmeventd/plugins/raid/Makefile.in index 1bca8b2aa..54343b1ca 100644 --- a/daemons/dmeventd/plugins/raid/Makefile.in +++ b/daemons/dmeventd/plugins/raid/Makefile.in @@ -29,7 +29,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper +LIBS += -ldevmapper-event-lvm2 install_lvm2: install_dm_plugin diff --git a/daemons/dmeventd/plugins/snapshot/Makefile.in b/daemons/dmeventd/plugins/snapshot/Makefile.in index 5eb7a47f1..75f4342ad 100644 --- a/daemons/dmeventd/plugins/snapshot/Makefile.in +++ b/daemons/dmeventd/plugins/snapshot/Makefile.in @@ -26,7 +26,7 @@ LIB_VERSION = $(LIB_VERSION_LVM) include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper +LIBS += -ldevmapper-event-lvm2 install_lvm2: install_dm_plugin diff --git a/daemons/dmeventd/plugins/thin/Makefile.in b/daemons/dmeventd/plugins/thin/Makefile.in index f54ee2da5..9f1c2b34b 100644 --- a/daemons/dmeventd/plugins/thin/Makefile.in +++ b/daemons/dmeventd/plugins/thin/Makefile.in @@ -29,7 +29,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow include $(top_builddir)/make.tmpl -LIBS += -ldevmapper-event-lvm2 -ldevmapper +LIBS += -ldevmapper-event-lvm2 install_lvm2: install_dm_plugin diff --git a/daemons/dmfilemapd/Makefile.in b/daemons/dmfilemapd/Makefile.in index 8a4938b22..1afd6b8ed 100644 --- a/daemons/dmfilemapd/Makefile.in +++ b/daemons/dmfilemapd/Makefile.in @@ -35,13 +35,12 @@ all: device-mapper device-mapper: $(TARGETS) CFLAGS_dmfilemapd.o += $(EXTRA_EXEC_CFLAGS) -LIBS += -ldevmapper dmfilemapd: $(LIB_SHARED) dmfilemapd.o $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \ -o $@ dmfilemapd.o $(DL_LIBS) $(LIBS) -dmfilemapd.static: $(LIB_STATIC) dmfilemapd.o $(interfacebuilddir)/libdevmapper.a +dmfilemapd.static: $(LIB_STATIC) dmfilemapd.o $(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L$(interfacebuilddir) \ -o $@ dmfilemapd.o $(DL_LIBS) $(LIBS) $(STATIC_LIBS) diff --git a/daemons/dmfilemapd/dmfilemapd.c b/daemons/dmfilemapd/dmfilemapd.c index 7fc95c8a4..4e048fff3 100644 --- a/daemons/dmfilemapd/dmfilemapd.c +++ b/daemons/dmfilemapd/dmfilemapd.c @@ -16,7 +16,7 @@ #include "tools/tool.h" -#include "libdm/misc/dm-logging.h" +#include "device_mapper/misc/dm-logging.h" #include "lib/config/defaults.h" diff --git a/daemons/lvmetad/Makefile.in b/daemons/lvmetad/Makefile.in index 1d901aabc..f652db8bf 100644 --- a/daemons/lvmetad/Makefile.in +++ b/daemons/lvmetad/Makefile.in @@ -32,15 +32,17 @@ CFLAGS_lvmetactl.o += $(EXTRA_EXEC_CFLAGS) CFLAGS_lvmetad-core.o += $(EXTRA_EXEC_CFLAGS) INCLUDES += -I$(top_srcdir)/libdaemon/server LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -LIBS += $(RT_LIBS) $(DAEMON_LIBS) -ldevmapper $(PTHREAD_LIBS) +LIBS += $(RT_LIBS) $(DAEMON_LIBS) $(PTHREAD_LIBS) -lm lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \ - $(top_builddir)/libdaemon/server/libdaemonserver.a - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) -ldaemonserver $(LIBS) + $(top_builddir)/libdaemon/server/libdaemonserver.a \ + $(top_builddir)/device_mapper/libdevice-mapper.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(top_builddir)/device_mapper/libdevice-mapper.a -ldaemonserver $(LIBS) lvmetactl: lvmetactl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \ - $(top_builddir)/libdaemon/server/libdaemonserver.a - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(LIBS) + $(top_builddir)/libdaemon/server/libdaemonserver.a \ + $(top_builddir)/device_mapper/libdevice-mapper.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(top_builddir)/device_mapper/libdevice-mapper.a $(LIBS) CLEAN_TARGETS += lvmetactl.o diff --git a/daemons/lvmlockd/Makefile.in b/daemons/lvmlockd/Makefile.in index 8f16d0652..50463999a 100644 --- a/daemons/lvmlockd/Makefile.in +++ b/daemons/lvmlockd/Makefile.in @@ -36,7 +36,7 @@ include $(top_builddir)/make.tmpl CFLAGS += $(EXTRA_EXEC_CFLAGS) INCLUDES += -I$(top_srcdir)/libdaemon/server LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -LIBS += $(RT_LIBS) $(DAEMON_LIBS) -ldevmapper $(PTHREAD_LIBS) +LIBS += $(RT_LIBS) $(DAEMON_LIBS) $(PTHREAD_LIBS) lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \ $(top_builddir)/libdaemon/server/libdaemonserver.a diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c index e75d02bc0..f1a2a2038 100644 --- a/daemons/lvmlockd/lvmlockd-core.c +++ b/daemons/lvmlockd/lvmlockd-core.c @@ -19,7 +19,7 @@ #include "lvm-version.h" #include "daemons/lvmetad/lvmetad-client.h" #include "daemons/lvmlockd/lvmlockd-client.h" -#include "libdm/misc/dm-ioctl.h" +#include "device_mapper/misc/dm-ioctl.h" /* #include <assert.h> */ #include <errno.h> diff --git a/daemons/lvmpolld/Makefile.in b/daemons/lvmpolld/Makefile.in index 483758dcd..69c4a8d86 100644 --- a/daemons/lvmpolld/Makefile.in +++ b/daemons/lvmpolld/Makefile.in @@ -30,7 +30,7 @@ include $(top_builddir)/make.tmpl CFLAGS += $(EXTRA_EXEC_CFLAGS) INCLUDES += -I$(top_srcdir)/libdaemon/server LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -LIBS += $(DAEMON_LIBS) -ldaemonserver -ldevmapper $(PTHREAD_LIBS) +LIBS += $(DAEMON_LIBS) -ldaemonserver $(PTHREAD_LIBS) lvmpolld: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \ $(top_builddir)/libdaemon/server/libdaemonserver.a diff --git a/device-mapper/Makefile b/device-mapper/Makefile deleted file mode 100644 index 76e19f020..000000000 --- a/device-mapper/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright (C) 2018 Red Hat, Inc. All rights reserved. -# -# This file is part of LVM2. -# -# This copyrighted material is made available to anyone wishing to use, -# modify, copy, or redistribute it subject to the terms and conditions -# of the GNU General Public License v.2. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - -DM_SOURCE=\ - device-mapper/vdo/status.c - -DM_DEPENDS=$(subst .c,.d,$(DM_SOURCE)) -DM_OBJECTS=$(DM_SOURCE:%.c=%.o) -CLEAN_TARGETS+=$(DM_DEPENDS) $(DM_OBJECTS) - --include $(DM_DEPENDS) diff --git a/device_mapper/Makefile b/device_mapper/Makefile new file mode 100644 index 000000000..999c3babf --- /dev/null +++ b/device_mapper/Makefile @@ -0,0 +1,46 @@ +# Copyright (C) 2018 Red Hat, Inc. All rights reserved. +# +# This file is part of the device-mapper userspace tools. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU Lesser General Public License v.2.1. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +DEVICE_MAPPER_SOURCE=\ + device_mapper/datastruct/bitset.c \ + device_mapper/datastruct/hash.c \ + device_mapper/datastruct/list.c \ + device_mapper/libdm-common.c \ + device_mapper/libdm-config.c \ + device_mapper/libdm-deptree.c \ + device_mapper/libdm-file.c \ + device_mapper/libdm-report.c \ + device_mapper/libdm-stats.c \ + device_mapper/libdm-string.c \ + device_mapper/libdm-targets.c \ + device_mapper/libdm-timestamp.c \ + device_mapper/mm/dbg_malloc.c \ + device_mapper/mm/pool.c \ + device_mapper/regex/matcher.c \ + device_mapper/regex/parse_rx.c \ + device_mapper/regex/ttree.c \ + device_mapper/ioctl/libdm-iface.c + +DEVICE_MAPPER_DEPENDS=$(subst .c,.d,$(DEVICE_MAPPER_SOURCE)) +DEVICE_MAPPER_OBJECTS=$(subst .c,.o,$(DEVICE_MAPPER_SOURCE)) +CLEAN_TARGETS+=$(DEVICE_MAPPER_DEPENDS) $(DEVICE_MAPPER_OBJECTS) + +-include $(DEVICE_MAPPER_DEPENDS) + +$(DEVICE_MAPPER_OBJECTS): INCLUDES+=-Idevice_mapper/ + +device_mapper/libdevice-mapper.a: $(DEVICE_MAPPER_OBJECTS) + @echo " [AR] $@" + $(Q) $(RM) $@ + $(Q) $(AR) rsv $@ $(DEVICE_MAPPER_OBJECTS) > /dev/null + +CLEAN_TARGETS+=device_mapper/libdevice-mapper.a diff --git a/device_mapper/datastruct/bitset.c b/device_mapper/datastruct/bitset.c new file mode 100644 index 000000000..6ae99d3de --- /dev/null +++ b/device_mapper/datastruct/bitset.c @@ -0,0 +1,258 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +#include <ctype.h> + +/* FIXME: calculate this. */ +#define INT_SHIFT 5 + +dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits) +{ + unsigned n = (num_bits / DM_BITS_PER_INT) + 2; + size_t size = sizeof(int) * n; + dm_bitset_t bs; + + if (mem) + bs = dm_pool_zalloc(mem, size); + else + bs = dm_zalloc(size); + + if (!bs) + return NULL; + + *bs = num_bits; + + return bs; +} + +void dm_bitset_destroy(dm_bitset_t bs) +{ + dm_free(bs); +} + +int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2) +{ + int i; + + for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--) + if (in1[i] != in2[i]) + return 0; + + return 1; +} + +void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2) +{ + int i; + + for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--) + out[i] = in1[i] & in2[i]; +} +void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2) +{ + int i; + for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--) + out[i] = in1[i] | in2[i]; +} + +static int _test_word(uint32_t test, int bit) +{ + uint32_t tb = test >> bit; + + return (tb ? ffs(tb) + bit - 1 : -1); +} + +static int _test_word_rev(uint32_t test, int bit) +{ + uint32_t tb = test << (DM_BITS_PER_INT - 1 - bit); + + return (tb ? bit - clz(tb) : -1); +} + +int dm_bit_get_next(dm_bitset_t bs, int last_bit) +{ + int bit, word; + uint32_t test; + + last_bit++; /* otherwise we'll return the same bit again */ + + /* + * bs[0] holds number of bits + */ + while (last_bit < (int) bs[0]) { + word = last_bit >> INT_SHIFT; + test = bs[word + 1]; + bit = last_bit & (DM_BITS_PER_INT - 1); + + if ((bit = _test_word(test, bit)) >= 0) + return (word * DM_BITS_PER_INT) + bit; + + last_bit = last_bit - (last_bit & (DM_BITS_PER_INT - 1)) + + DM_BITS_PER_INT; + } + + return -1; +} + +int dm_bit_get_prev(dm_bitset_t bs, int last_bit) +{ + int bit, word; + uint32_t test; + + last_bit--; /* otherwise we'll return the same bit again */ + + /* + * bs[0] holds number of bits + */ + while (last_bit >= 0) { + word = last_bit >> INT_SHIFT; + test = bs[word + 1]; + bit = last_bit & (DM_BITS_PER_INT - 1); + + if ((bit = _test_word_rev(test, bit)) >= 0) + return (word * DM_BITS_PER_INT) + bit; + + last_bit = (last_bit & ~(DM_BITS_PER_INT - 1)) - 1; + } + + return -1; +} + +int dm_bit_get_first(dm_bitset_t bs) +{ + return dm_bit_get_next(bs, -1); +} + +int dm_bit_get_last(dm_bitset_t bs) +{ + return dm_bit_get_prev(bs, bs[0] + 1); +} + +/* + * Based on the Linux kernel __bitmap_parselist from lib/bitmap.c + */ +dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem, + size_t min_num_bits) +{ + unsigned a, b; + int c, old_c, totaldigits, ndigits, nmaskbits; + int at_start, in_range; + dm_bitset_t mask = NULL; + const char *start = str; + size_t len; + +scan: + len = strlen(str); + totaldigits = c = 0; + nmaskbits = 0; + do { + at_start = 1; + in_range = 0; + a = b = 0; + ndigits = totaldigits; + + /* Get the next value or range of values */ + while (len) { + old_c = c; + c = *str++; + len--; + if (isspace(c)) + continue; + + /* A '\0' or a ',' signal the end of a value or range */ + if (c == '\0' || c == ',') + break; + /* + * whitespaces between digits are not allowed, + * but it's ok if whitespaces are on head or tail. + * when old_c is whilespace, + * if totaldigits == ndigits, whitespace is on head. + * if whitespace is on tail, it should not run here. + * as c was ',' or '\0', + * the last code line has broken the current loop. + */ + if ((totaldigits != ndigits) && isspace(old_c)) + goto_bad; + + if (c == '-') { + if (at_start || in_range) + goto_bad; + b = 0; + in_range = 1; + at_start = 1; + continue; + } + + if (!isdigit(c)) + goto_bad; + + b = b * 10 + (c - '0'); + if (!in_range) + a = b; + at_start = 0; + totaldigits++; + } + if (ndigits == totaldigits) + continue; + /* if no digit is after '-', it's wrong */ + if (at_start && in_range) + goto_bad; + if (!(a <= b)) + goto_bad; + if (b >= nmaskbits) + nmaskbits = b + 1; + while ((a <= b) && mask) { + dm_bit_set(mask, a); + a++; + } + } while (len && c == ','); + + if (!mask) { + if (min_num_bits && (nmaskbits < min_num_bits)) + nmaskbits = min_num_bits; + + if (!(mask = dm_bitset_create(mem, nmaskbits))) + goto_bad; + str = start; + goto scan; + } + + return mask; +bad: + if (mask) { + if (mem) + dm_pool_free(mem, mask); + else + dm_bitset_destroy(mask); + } + return NULL; +} + +#if defined(__GNUC__) +/* + * Maintain backward compatibility with older versions that did not + * accept a 'min_num_bits' argument to dm_bitset_parse_list(). + */ +dm_bitset_t dm_bitset_parse_list_v1_02_129(const char *str, struct dm_pool *mem); +dm_bitset_t dm_bitset_parse_list_v1_02_129(const char *str, struct dm_pool *mem) +{ + return dm_bitset_parse_list(str, mem, 0); +} + +#else /* if defined(__GNUC__) */ + +#endif diff --git a/device_mapper/datastruct/hash.c b/device_mapper/datastruct/hash.c new file mode 100644 index 000000000..9b9c939f4 --- /dev/null +++ b/device_mapper/datastruct/hash.c @@ -0,0 +1,392 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +struct dm_hash_node { + struct dm_hash_node *next; + void *data; + unsigned data_len; + unsigned keylen; + char key[0]; +}; + +struct dm_hash_table { + unsigned num_nodes; + unsigned num_slots; + struct dm_hash_node **slots; +}; + +/* Permutation of the Integers 0 through 255 */ +static unsigned char _nums[] = { + 1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51, + 87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65, + 49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28, + 12, 181, 103, 70, 22, 58, 75, 78, 183, 167, 238, 157, 124, 147, 172, + 144, + 176, 161, 141, 86, 60, 66, 128, 83, 156, 241, 79, 46, 168, 198, 41, 254, + 178, 85, 253, 237, 250, 154, 133, 88, 35, 206, 95, 116, 252, 192, 54, + 221, + 102, 218, 255, 240, 82, 106, 158, 201, 61, 3, 89, 9, 42, 155, 159, 93, + 166, 80, 50, 34, 175, 195, 100, 99, 26, 150, 16, 145, 4, 33, 8, 189, + 121, 64, 77, 72, 208, 245, 130, 122, 143, 55, 105, 134, 29, 164, 185, + 194, + 193, 239, 101, 242, 5, 171, 126, 11, 74, 59, 137, 228, 108, 191, 232, + 139, + 6, 24, 81, 20, 127, 17, 91, 92, 251, 151, 225, 207, 21, 98, 113, 112, + 84, 226, 18, 214, 199, 187, 13, 32, 94, 220, 224, 212, 247, 204, 196, + 43, + 249, 236, 45, 244, 111, 182, 153, 136, 129, 90, 217, 202, 19, 165, 231, + 71, + 230, 142, 96, 227, 62, 179, 246, 114, 162, 53, 160, 215, 205, 180, 47, + 109, + 44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184, + 163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120, + 209 +}; + +static struct dm_hash_node *_create_node(const char *str, unsigned len) +{ + struct dm_hash_node *n = dm_malloc(sizeof(*n) + len); + + if (n) { + memcpy(n->key, str, len); + n->keylen = len; + } + + return n; +} + +static unsigned long _hash(const char *str, unsigned len) +{ + unsigned long h = 0, g; + unsigned i; + + for (i = 0; i < len; i++) { + h <<= 4; + h += _nums[(unsigned char) *str++]; + g = h & ((unsigned long) 0xf << 16u); + if (g) { + h ^= g >> 16u; + h ^= g >> 5u; + } + } + + return h; +} + +struct dm_hash_table *dm_hash_create(unsigned size_hint) +{ + size_t len; + unsigned new_size = 16u; + struct dm_hash_table *hc = dm_zalloc(sizeof(*hc)); + + if (!hc) + return_0; + + /* round size hint up to a power of two */ + while (new_size < size_hint) + new_size = new_size << 1; + + hc->num_slots = new_size; + len = sizeof(*(hc->slots)) * new_size; + if (!(hc->slots = dm_zalloc(len))) + goto_bad; + + return hc; + + bad: + dm_free(hc->slots); + dm_free(hc); + return 0; +} + +static void _free_nodes(struct dm_hash_table *t) +{ + struct dm_hash_node *c, *n; + unsigned i; + + for (i = 0; i < t->num_slots; i++) + for (c = t->slots[i]; c; c = n) { + n = c->next; + dm_free(c); + } +} + +void dm_hash_destroy(struct dm_hash_table *t) +{ + _free_nodes(t); + dm_free(t->slots); + dm_free(t); +} + +static struct dm_hash_node **_find(struct dm_hash_table *t, const void *key, + uint32_t len) +{ + unsigned h = _hash(key, len) & (t->num_slots - 1); + struct dm_hash_node **c; + + for (c = &t->slots[h]; *c; c = &((*c)->next)) { + if ((*c)->keylen != len) + continue; + + if (!memcmp(key, (*c)->key, len)) + break; + } + + return c; +} + +void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key, + uint32_t len) +{ + struct dm_hash_node **c = _find(t, key, len); + + return *c ? (*c)->data : 0; +} + +int dm_hash_insert_binary(struct dm_hash_table *t, const void *key, + uint32_t len, void *data) +{ + struct dm_hash_node **c = _find(t, key, len); + + if (*c) + (*c)->data = data; + else { + struct dm_hash_node *n = _create_node(key, len); + + if (!n) + return 0; + + n->data = data; + n->next = 0; + *c = n; + t->num_nodes++; + } + + return 1; +} + +void dm_hash_remove_binary(struct dm_hash_table *t, const void *key, + uint32_t len) +{ + struct dm_hash_node **c = _find(t, key, len); + + if (*c) { + struct dm_hash_node *old = *c; + *c = (*c)->next; + dm_free(old); + t->num_nodes--; + } +} + +void *dm_hash_lookup(struct dm_hash_table *t, const char *key) +{ + return dm_hash_lookup_binary(t, key, strlen(key) + 1); +} + +int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data) +{ + return dm_hash_insert_binary(t, key, strlen(key) + 1, data); +} + +void dm_hash_remove(struct dm_hash_table *t, const char *key) +{ + dm_hash_remove_binary(t, key, strlen(key) + 1); +} + +static struct dm_hash_node **_find_str_with_val(struct dm_hash_table *t, + const void *key, const void *val, + uint32_t len, uint32_t val_len) +{ + struct dm_hash_node **c; + unsigned h; + + h = _hash(key, len) & (t->num_slots - 1); + + for (c = &t->slots[h]; *c; c = &((*c)->next)) { + if ((*c)->keylen != len) + continue; + + if (!memcmp(key, (*c)->key, len) && (*c)->data) { + if (((*c)->data_len == val_len) && + !memcmp(val, (*c)->data, val_len)) + return c; + } + } + + return NULL; +} + +int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key, + const void *val, uint32_t val_len) +{ + struct dm_hash_node *n; + struct dm_hash_node *first; + int len = strlen(key) + 1; + unsigned h; + + n = _create_node(key, len); + if (!n) + return 0; + + n->data = (void *)val; + n->data_len = val_len; + + h = _hash(key, len) & (t->num_slots - 1); + + first = t->slots[h]; + + if (first) + n->next = first; + else + n->next = 0; + t->slots[h] = n; + + t->num_nodes++; + return 1; +} + +/* + * Look through multiple entries with the same key for one that has a + * matching val and return that. If none have maching val, return NULL. + */ +void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key, + const void *val, uint32_t val_len) +{ + struct dm_hash_node **c; + + c = _find_str_with_val(t, key, val, strlen(key) + 1, val_len); + + return (c && *c) ? (*c)->data : 0; +} + +/* + * Look through multiple entries with the same key for one that has a + * matching val and remove that. + */ +void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key, + const void *val, uint32_t val_len) +{ + struct dm_hash_node **c; + + c = _find_str_with_val(t, key, val, strlen(key) + 1, val_len); + + if (c && *c) { + struct dm_hash_node *old = *c; + *c = (*c)->next; + dm_free(old); + t->num_nodes--; + } +} + +/* + * Look up the value for a key and count how many + * entries have the same key. + * + * If no entries have key, return NULL and set count to 0. + * + * If one entry has the key, the function returns the val, + * and sets count to 1. + * + * If N entries have the key, the function returns the val + * from the first entry, and sets count to N. + */ +void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count) +{ + struct dm_hash_node **c; + struct dm_hash_node **c1 = NULL; + uint32_t len = strlen(key) + 1; + unsigned h; + + *count = 0; + + h = _hash(key, len) & (t->num_slots - 1); + + for (c = &t->slots[h]; *c; c = &((*c)->next)) { + if ((*c)->keylen != len) + continue; + + if (!memcmp(key, (*c)->key, len)) { + (*count)++; + if (!c1) + c1 = c; + } + } + + if (!c1) + return NULL; + else + return *c1 ? (*c1)->data : 0; +} + +unsigned dm_hash_get_num_entries(struct dm_hash_table *t) +{ + return t->num_nodes; +} + +void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f) +{ + struct dm_hash_node *c, *n; + unsigned i; + + for (i = 0; i < t->num_slots; i++) + for (c = t->slots[i]; c; c = n) { + n = c->next; + f(c->data); + } +} + +void dm_hash_wipe(struct dm_hash_table *t) +{ + _free_nodes(t); + memset(t->slots, 0, sizeof(struct dm_hash_node *) * t->num_slots); + t->num_nodes = 0u; +} + +char *dm_hash_get_key(struct dm_hash_table *t __attribute__((unused)), + struct dm_hash_node *n) +{ + return n->key; +} + +void *dm_hash_get_data(struct dm_hash_table *t __attribute__((unused)), + struct dm_hash_node *n) +{ + return n->data; +} + +static struct dm_hash_node *_next_slot(struct dm_hash_table *t, unsigned s) +{ + struct dm_hash_node *c = NULL; + unsigned i; + + for (i = s; i < t->num_slots && !c; i++) + c = t->slots[i]; + + return c; +} + +struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t) +{ + return _next_slot(t, 0); +} + +struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n) +{ + unsigned h = _hash(n->key, n->keylen) & (t->num_slots - 1); + + return n->next ? n->next : _next_slot(t, h + 1); +} diff --git a/device_mapper/datastruct/list.c b/device_mapper/datastruct/list.c new file mode 100644 index 000000000..86c3e4ef8 --- /dev/null +++ b/device_mapper/datastruct/list.c @@ -0,0 +1,168 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include <assert.h> + +/* + * Initialise a list before use. + * The list head's next and previous pointers point back to itself. + */ +void dm_list_init(struct dm_list *head) +{ + head->n = head->p = head; +} + +/* + * Insert an element before 'head'. + * If 'head' is the list head, this adds an element to the end of the list. + */ +void dm_list_add(struct dm_list *head, struct dm_list *elem) +{ + assert(head->n); + + elem->n = head; + elem->p = head->p; + + head->p->n = elem; + head->p = elem; +} + +/* + * Insert an element after 'head'. + * If 'head' is the list head, this adds an element to the front of the list. + */ +void dm_list_add_h(struct dm_list *head, struct dm_list *elem) +{ + assert(head->n); + + elem->n = head->n; + elem->p = head; + + head->n->p = elem; + head->n = elem; +} + +/* + * Delete an element from its list. + * Note that this doesn't change the element itself - it may still be safe + * to follow its pointers. + */ +void dm_list_del(struct dm_list *elem) +{ + elem->n->p = elem->p; + elem->p->n = elem->n; +} + +/* + * Remove an element from existing list and insert before 'head'. + */ +void dm_list_move(struct dm_list *head, struct dm_list *elem) +{ + dm_list_del(elem); + dm_list_add(head, elem); +} + +/* + * Is the list empty? + */ +int dm_list_empty(const struct dm_list *head) +{ + return head->n == head; +} + +/* + * Is this the first element of the list? + */ +int dm_list_start(const struct dm_list *head, const struct dm_list *elem) +{ + return elem->p == head; +} + +/* + * Is this the last element of the list? + */ +int dm_list_end(const struct dm_list *head, const struct dm_list *elem) +{ + return elem->n == head; +} + +/* + * Return first element of the list or NULL if empty + */ +struct dm_list *dm_list_first(const struct dm_list *head) +{ + return (dm_list_empty(head) ? NULL : head->n); +} + +/* + * Return last element of the list or NULL if empty + */ +struct dm_list *dm_list_last(const struct dm_list *head) +{ + return (dm_list_empty(head) ? NULL : head->p); +} + +/* + * Return the previous element of the list, or NULL if we've reached the start. + */ +struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem) +{ + return (dm_list_start(head, elem) ? NULL : elem->p); +} + +/* + * Return the next element of the list, or NULL if we've reached the end. + */ +struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem) +{ + return (dm_list_end(head, elem) ? NULL : elem->n); +} + +/* + * Return the number of elements in a list by walking it. + */ +unsigned int dm_list_size(const struct dm_list *head) +{ + unsigned int s = 0; + const struct dm_list *v; + + dm_list_iterate(v, head) + s++; + + return s; +} + +/* + * Join two lists together. + * This moves all the elements of the list 'head1' to the end of the list + * 'head', leaving 'head1' empty. + */ +void dm_list_splice(struct dm_list *head, struct dm_list *head1) +{ + assert(head->n); + assert(head1->n); + + if (dm_list_empty(head1)) + return; + + head1->p->n = head; + head1->n->p = head->p; + + head->p->n = head1->n; + head->p = head1->p; + + dm_list_init(head1); +} diff --git a/device_mapper/ioctl/libdm-iface.c b/device_mapper/ioctl/libdm-iface.c new file mode 100644 index 000000000..4825f1d17 --- /dev/null +++ b/device_mapper/ioctl/libdm-iface.c @@ -0,0 +1,2196 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2013 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "libdm-targets.h" +#include "libdm-common.h" + +#include <stddef.h> +#include <fcntl.h> +#include <dirent.h> +#include <sys/ioctl.h> +#include <sys/utsname.h> +#include <limits.h> +#include <unistd.h> + +#ifdef __linux__ +# include "misc/kdev_t.h" +# include <linux/limits.h> +#else +# define MAJOR(x) major((x)) +# define MINOR(x) minor((x)) +# define MKDEV(x,y) makedev((x),(y)) +#endif + +#include "misc/dm-ioctl.h" + +/* + * Ensure build compatibility. + * The hard-coded versions here are the highest present + * in the _cmd_data arrays. + */ + +#if !((DM_VERSION_MAJOR == 4 && DM_VERSION_MINOR >= 6)) +#error The version of dm-ioctl.h included is incompatible. +#endif + +/* FIXME This should be exported in device-mapper.h */ +#define DM_NAME "device-mapper" + +#define PROC_MISC "/proc/misc" +#define PROC_DEVICES "/proc/devices" +#define MISC_NAME "misc" + +#define NUMBER_OF_MAJORS 4096 + +/* + * Static minor number assigned since kernel version 2.6.36. + * The original definition is in kernel's include/linux/miscdevice.h. + * This number is also visible in modules.devname exported by depmod + * utility (support included in module-init-tools version >= 3.12). + */ +#define MAPPER_CTRL_MINOR 236 +#define MISC_MAJOR 10 + +/* dm major version no for running kernel */ +static unsigned _dm_version = DM_VERSION_MAJOR; +static unsigned _dm_version_minor = 0; +static unsigned _dm_version_patchlevel = 0; +static int _log_suppress = 0; +static struct dm_timestamp *_dm_ioctl_timestamp = NULL; + +/* + * If the kernel dm driver only supports one major number + * we store it in _dm_device_major. Otherwise we indicate + * which major numbers have been claimed by device-mapper + * in _dm_bitset. + */ +static unsigned _dm_multiple_major_support = 1; +static dm_bitset_t _dm_bitset = NULL; +static uint32_t _dm_device_major = 0; + +static int _control_fd = -1; +static int _hold_control_fd_open = 0; +static int _version_checked = 0; +static int _version_ok = 1; +static unsigned _ioctl_buffer_double_factor = 0; + +const int _dm_compat = 0; + +/* *INDENT-OFF* */ +static struct cmd_data _cmd_data_v4[] = { + {"create", DM_DEV_CREATE, {4, 0, 0}}, + {"reload", DM_TABLE_LOAD, {4, 0, 0}}, + {"remove", DM_DEV_REMOVE, {4, 0, 0}}, + {"remove_all", DM_REMOVE_ALL, {4, 0, 0}}, + {"suspend", DM_DEV_SUSPEND, {4, 0, 0}}, + {"resume", DM_DEV_SUSPEND, {4, 0, 0}}, + {"info", DM_DEV_STATUS, {4, 0, 0}}, + {"deps", DM_TABLE_DEPS, {4, 0, 0}}, + {"rename", DM_DEV_RENAME, {4, 0, 0}}, + {"version", DM_VERSION, {4, 0, 0}}, + {"status", DM_TABLE_STATUS, {4, 0, 0}}, + {"table", DM_TABLE_STATUS, {4, 0, 0}}, + {"waitevent", DM_DEV_WAIT, {4, 0, 0}}, + {"names", DM_LIST_DEVICES, {4, 0, 0}}, + {"clear", DM_TABLE_CLEAR, {4, 0, 0}}, + {"mknodes", DM_DEV_STATUS, {4, 0, 0}}, +#ifdef DM_LIST_VERSIONS + {"versions", DM_LIST_VERSIONS, {4, 1, 0}}, +#endif +#ifdef DM_TARGET_MSG + {"message", DM_TARGET_MSG, {4, 2, 0}}, +#endif +#ifdef DM_DEV_SET_GEOMETRY + {"setgeometry", DM_DEV_SET_GEOMETRY, {4, 6, 0}}, +#endif +}; +/* *INDENT-ON* */ + +#define ALIGNMENT 8 + +/* FIXME Rejig library to record & use errno instead */ +#ifndef DM_EXISTS_FLAG +# define DM_EXISTS_FLAG 0x00000004 +#endif + +static char *_align(char *ptr, unsigned int a) +{ + register unsigned long agn = --a; + + return (char *) (((unsigned long) ptr + agn) & ~agn); +} + +#ifdef DM_IOCTLS +static unsigned _kernel_major = 0; +static unsigned _kernel_minor = 0; +static unsigned _kernel_release = 0; + +static int _uname(void) +{ + static int _uts_set = 0; + struct utsname _uts; + int parts; + + if (_uts_set) + return 1; + + if (uname(&_uts)) { + log_error("uname failed: %s", strerror(errno)); + return 0; + } + + parts = sscanf(_uts.release, "%u.%u.%u", + &_kernel_major, &_kernel_minor, &_kernel_release); + + /* Kernels with a major number of 2 always had 3 parts. */ + if (parts < 1 || (_kernel_major < 3 && parts < 3)) { + log_error("Could not determine kernel version used."); + return 0; + } + + _uts_set = 1; + return 1; +} + +int get_uname_version(unsigned *major, unsigned *minor, unsigned *release) +{ + if (!_uname()) + return_0; + + *major = _kernel_major; + *minor = _kernel_minor; + *release = _kernel_release; + + return 1; +} +/* + * Set number to NULL to populate _dm_bitset - otherwise first + * match is returned. + * Returns: + * 0 - error + * 1 - success - number found + * 2 - success - number not found (only if require_module_loaded=0) + */ +static int _get_proc_number(const char *file, const char *name, + uint32_t *number, int require_module_loaded) +{ + FILE *fl; + char nm[256]; + char *line = NULL; + size_t len; + uint32_t num; + + if (!(fl = fopen(file, "r"))) { + log_sys_error("fopen", file); + return 0; + } + + while (getline(&line, &len, fl) != -1) { + if (sscanf(line, "%d %255s\n", &num, &nm[0]) == 2) { + if (!strcmp(name, nm)) { + if (number) { + *number = num; + if (fclose(fl)) + log_sys_error("fclose", file); + free(line); + return 1; + } + dm_bit_set(_dm_bitset, num); + } + } + } + if (fclose(fl)) + log_sys_error("fclose", file); + free(line); + + if (number) { + if (require_module_loaded) { + log_error("%s: No entry for %s found", file, name); + return 0; + } + + return 2; + } + + return 1; +} + +static int _control_device_number(uint32_t *major, uint32_t *minor) +{ + if (!_get_proc_number(PROC_DEVICES, MISC_NAME, major, 1) || + !_get_proc_number(PROC_MISC, DM_NAME, minor, 1)) { + *major = 0; + return 0; + } + + return 1; +} + +/* + * Returns 1 if it exists on returning; 0 if it doesn't; -1 if it's wrong. + */ +static int _control_exists(const char *control, uint32_t major, uint32_t minor) +{ + struct stat buf; + + if (stat(control, &buf) < 0) { + if (errno != ENOENT) + log_sys_error("stat", control); + return 0; + } + + if (!S_ISCHR(buf.st_mode)) { + log_verbose("%s: Wrong inode type", control); + if (!unlink(control)) + return 0; + log_sys_error("unlink", control); + return -1; + } + + if (major && buf.st_rdev != MKDEV((dev_t)major, (dev_t)minor)) { + log_verbose("%s: Wrong device number: (%u, %u) instead of " + "(%u, %u)", control, + MAJOR(buf.st_mode), MINOR(buf.st_mode), + major, minor); + if (!unlink(control)) + return 0; + log_sys_error("unlink", control); + return -1; + } + + return 1; +} + +static int _create_control(const char *control, uint32_t major, uint32_t minor) +{ + int ret; + mode_t old_umask; + + /* + * Return if the control already exists with intended major/minor + * or there's an error unlinking an apparently incorrect one. + */ + ret = _control_exists(control, major, minor); + if (ret == -1) + return_0; /* Failed to unlink existing incorrect node */ + if (ret) + return 1; /* Already exists and correct */ + + (void) dm_prepare_selinux_context(dm_dir(), S_IFDIR); + old_umask = umask(DM_DEV_DIR_UMASK); + ret = dm_create_dir(dm_dir()); + umask(old_umask); + (void) dm_prepare_selinux_context(NULL, 0); + + if (!ret) + return_0; + + log_verbose("Creating device %s (%u, %u)", control, major, minor); + + (void) dm_prepare_selinux_context(control, S_IFCHR); + old_umask = umask(DM_CONTROL_NODE_UMASK); + if (mknod(control, S_IFCHR | S_IRUSR | S_IWUSR, + MKDEV((dev_t)major, (dev_t)minor)) < 0) { + log_sys_error("mknod", control); + ret = 0; + } + umask(old_umask); + (void) dm_prepare_selinux_context(NULL, 0); + + return ret; +} +#endif + +/* + * FIXME Update bitset in long-running process if dm claims new major numbers. + */ +/* + * If require_module_loaded=0, caller is responsible to check + * whether _dm_device_major or _dm_bitset is really set. If + * it's not, it means the module is not loaded. + */ +static int _create_dm_bitset(int require_module_loaded) +{ + int r; + +#ifdef DM_IOCTLS + if (_dm_bitset || _dm_device_major) + return 1; + + if (!_uname()) + return 0; + + /* + * 2.6 kernels are limited to one major number. + * Assume 2.4 kernels are patched not to. + * FIXME Check _dm_version and _dm_version_minor if 2.6 changes this. + */ + if (KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) >= + KERNEL_VERSION(2, 6, 0)) + _dm_multiple_major_support = 0; + + if (!_dm_multiple_major_support) { + if (!_get_proc_number(PROC_DEVICES, DM_NAME, &_dm_device_major, + require_module_loaded)) + return 0; + return 1; + } + + /* Multiple major numbers supported */ + if (!(_dm_bitset = dm_bitset_create(NULL, NUMBER_OF_MAJORS))) + return 0; + + r = _get_proc_number(PROC_DEVICES, DM_NAME, NULL, require_module_loaded); + if (!r || r == 2) { + dm_bitset_destroy(_dm_bitset); + _dm_bitset = NULL; + /* + * It's not an error if we didn't find anything and we + * didn't require module to be loaded at the same time. + */ + return r == 2; + } + + return 1; +#else + return 0; +#endif +} + +int dm_is_dm_major(uint32_t major) +{ + if (!_create_dm_bitset(0)) + return 0; + + if (_dm_multiple_major_support) { + if (!_dm_bitset) + return 0; + return dm_bit(_dm_bitset, major) ? 1 : 0; + } + + if (!_dm_device_major) + return 0; + + return (major == _dm_device_major) ? 1 : 0; +} + +static void _close_control_fd(void) +{ + if (_control_fd != -1) { + if (close(_control_fd) < 0) + log_sys_error("close", "_control_fd"); + _control_fd = -1; + } +} + +#ifdef DM_IOCTLS +static int _open_and_assign_control_fd(const char *control) +{ + if ((_control_fd = open(control, O_RDWR)) < 0) { + log_sys_error("open", control); + return 0; + } + + return 1; +} +#endif + +static int _open_control(void) +{ +#ifdef DM_IOCTLS + char control[PATH_MAX]; + uint32_t major = MISC_MAJOR; + uint32_t minor = MAPPER_CTRL_MINOR; + + if (_control_fd != -1) + return 1; + + if (!_uname()) + return 0; + + if (dm_snprintf(control, sizeof(control), "%s/%s", dm_dir(), DM_CONTROL_NODE) < 0) + goto_bad; + + /* + * Prior to 2.6.36 the minor number should be looked up in /proc. + */ + if ((KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) < + KERNEL_VERSION(2, 6, 36)) && + !_control_device_number(&major, &minor)) + goto_bad; + + /* + * Create the node with correct major and minor if not already done. + * Udev may already have created /dev/mapper/control + * from the modules.devname file generated by depmod. + */ + if (!_create_control(control, major, minor)) + goto_bad; + + /* + * As of 2.6.36 kernels, the open can trigger autoloading dm-mod. + */ + if (!_open_and_assign_control_fd(control)) + goto_bad; + + if (!_create_dm_bitset(1)) { + log_error("Failed to set up list of device-mapper major numbers"); + return 0; + } + + return 1; + +bad: + log_error("Failure to communicate with kernel device-mapper driver."); + if (!geteuid()) + log_error("Check that device-mapper is available in the kernel."); + return 0; +#else + return 1; +#endif +} + +static void _dm_zfree_string(char *string) +{ + if (string) { + memset(string, 0, strlen(string)); + dm_free(string); + } +} + +static void _dm_zfree_dmi(struct dm_ioctl *dmi) +{ + if (dmi) { + memset(dmi, 0, dmi->data_size); + dm_free(dmi); + } +} + +static void _dm_task_free_targets(struct dm_task *dmt) +{ + struct target *t, *n; + + for (t = dmt->head; t; t = n) { + n = t->next; + _dm_zfree_string(t->params); + dm_free(t->type); + dm_free(t); + } + + dmt->head = dmt->tail = NULL; +} + +void dm_task_destroy(struct dm_task *dmt) +{ + _dm_task_free_targets(dmt); + _dm_zfree_dmi(dmt->dmi.v4); + dm_free(dmt->dev_name); + dm_free(dmt->mangled_dev_name); + dm_free(dmt->newname); + dm_free(dmt->message); + dm_free(dmt->geometry); + dm_free(dmt->uuid); + dm_free(dmt->mangled_uuid); + dm_free(dmt); +} + +/* + * Protocol Version 4 functions. + */ + +int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size) +{ + unsigned *v; + + if (!dmt->dmi.v4) { + if (version) + version[0] = '\0'; + return 0; + } + + v = dmt->dmi.v4->version; + _dm_version_minor = v[1]; + _dm_version_patchlevel = v[2]; + if (version && + (snprintf(version, size, "%u.%u.%u", v[0], v[1], v[2]) < 0)) { + log_error("Buffer for version is to short."); + if (size > 0) + version[0] = '\0'; + return 0; + } + + return 1; +} + +static int _check_version(char *version, size_t size, int log_suppress) +{ + struct dm_task *task; + int r; + + if (!(task = dm_task_create(DM_DEVICE_VERSION))) { + log_error("Failed to get device-mapper version"); + version[0] = '\0'; + return 0; + } + + if (log_suppress) + _log_suppress = 1; + + r = dm_task_run(task); + if (!dm_task_get_driver_version(task, version, size)) + stack; + dm_task_destroy(task); + _log_suppress = 0; + + return r; +} + +/* + * Find out device-mapper's major version number the first time + * this is called and whether or not we support it. + */ +int dm_check_version(void) +{ + char libversion[64] = "", dmversion[64] = ""; + const char *compat = ""; + + if (_version_checked) + return _version_ok; + + _version_checked = 1; + + if (_check_version(dmversion, sizeof(dmversion), _dm_compat)) + return 1; + + if (!_dm_compat) + goto_bad; + + log_verbose("device-mapper ioctl protocol version %u failed. " + "Trying protocol version 1.", _dm_version); + _dm_version = 1; + if (_check_version(dmversion, sizeof(dmversion), 0)) { + log_verbose("Using device-mapper ioctl protocol version 1"); + return 1; + } + + compat = "(compat)"; + + bad: + dm_get_library_version(libversion, sizeof(libversion)); + + log_error("Incompatible libdevmapper %s%s and kernel driver %s.", + *libversion ? libversion : "(unknown version)", compat, + *dmversion ? dmversion : "(unknown version)"); + + _version_ok = 0; + return 0; +} + +int dm_cookie_supported(void) +{ + return (dm_check_version() && + _dm_version >= 4 && + _dm_version_minor >= 15); +} + +static int _dm_inactive_supported(void) +{ + int inactive_supported = 0; + + if (dm_check_version() && _dm_version >= 4) { + if (_dm_version_minor >= 16) + inactive_supported = 1; /* upstream */ + else if (_dm_version_minor == 11 && + (_dm_version_patchlevel >= 6 && + _dm_version_patchlevel <= 40)) { + inactive_supported = 1; /* RHEL 5.7 */ + } + } + + return inactive_supported; +} + +int dm_message_supports_precise_timestamps(void) +{ + /* + * 4.32.0 supports "precise_timestamps" and "histogram:" options + * to @stats_create messages but lacks the ability to report + * these properties via a subsequent @stats_list: require at + * least 4.33.0 in order to use these features. + */ + if (dm_check_version() && _dm_version >= 4) + if (_dm_version_minor >= 33) + return 1; + return 0; +} + +void *dm_get_next_target(struct dm_task *dmt, void *next, + uint64_t *start, uint64_t *length, + char **target_type, char **params) +{ + struct target *t = (struct target *) next; + + if (!t) + t = dmt->head; + + if (!t) { + *start = 0; + *length = 0; + *target_type = 0; + *params = 0; + return NULL; + } + + *start = t->start; + *length = t->length; + *target_type = t->type; + *params = t->params; + + return t->next; +} + +/* Unmarshall the target info returned from a status call */ +static int _unmarshal_status(struct dm_task *dmt, struct dm_ioctl *dmi) +{ + char *outbuf = (char *) dmi + dmi->data_start; + char *outptr = outbuf; + uint32_t i; + struct dm_target_spec *spec; + + _dm_task_free_targets(dmt); + + for (i = 0; i < dmi->target_count; i++) { + spec = (struct dm_target_spec *) outptr; + if (!dm_task_add_target(dmt, spec->sector_start, + spec->length, + spec->target_type, + outptr + sizeof(*spec))) { + return 0; + } + + outptr = outbuf + spec->next; + } + + return 1; +} + +int dm_format_dev(char *buf, int bufsize, uint32_t dev_major, + uint32_t dev_minor) +{ + int r; + + if (bufsize < 8) + return 0; + + r = snprintf(buf, (size_t) bufsize, "%u:%u", dev_major, dev_minor); + if (r < 0 || r > bufsize - 1) + return 0; + + return 1; +} + +int dm_task_get_info(struct dm_task *dmt, struct dm_info *info) +{ + if (!dmt->dmi.v4) + return 0; + + memset(info, 0, sizeof(*info)); + + info->exists = dmt->dmi.v4->flags & DM_EXISTS_FLAG ? 1 : 0; + if (!info->exists) + return 1; + + info->suspended = dmt->dmi.v4->flags & DM_SUSPEND_FLAG ? 1 : 0; + info->read_only = dmt->dmi.v4->flags & DM_READONLY_FLAG ? 1 : 0; + info->live_table = dmt->dmi.v4->flags & DM_ACTIVE_PRESENT_FLAG ? 1 : 0; + info->inactive_table = dmt->dmi.v4->flags & DM_INACTIVE_PRESENT_FLAG ? + 1 : 0; + info->deferred_remove = dmt->dmi.v4->flags & DM_DEFERRED_REMOVE; + info->internal_suspend = (dmt->dmi.v4->flags & DM_INTERNAL_SUSPEND_FLAG) ? 1 : 0; + info->target_count = dmt->dmi.v4->target_count; + info->open_count = dmt->dmi.v4->open_count; + info->event_nr = dmt->dmi.v4->event_nr; + info->major = MAJOR(dmt->dmi.v4->dev); + info->minor = MINOR(dmt->dmi.v4->dev); + + return 1; +} + +uint32_t dm_task_get_read_ahead(const struct dm_task *dmt, uint32_t *read_ahead) +{ + const char *dev_name; + + *read_ahead = 0; + + if (!dmt->dmi.v4 || !(dmt->dmi.v4->flags & DM_EXISTS_FLAG)) + return 0; + + if (*dmt->dmi.v4->name) + dev_name = dmt->dmi.v4->name; + else if (!(dev_name = DEV_NAME(dmt))) { + log_error("Get read ahead request failed: device name unrecorded."); + return 0; + } + + return get_dev_node_read_ahead(dev_name, MAJOR(dmt->dmi.v4->dev), + MINOR(dmt->dmi.v4->dev), read_ahead); +} + +struct dm_deps *dm_task_get_deps(struct dm_task *dmt) +{ + return (struct dm_deps *) (((char *) dmt->dmi.v4) + + dmt->dmi.v4->data_start); +} + +struct dm_names *dm_task_get_names(struct dm_task *dmt) +{ + return (struct dm_names *) (((char *) dmt->dmi.v4) + + dmt->dmi.v4->data_start); +} + +struct dm_versions *dm_task_get_versions(struct dm_task *dmt) +{ + return (struct dm_versions *) (((char *) dmt->dmi.v4) + + dmt->dmi.v4->data_start); +} + +const char *dm_task_get_message_response(struct dm_task *dmt) +{ + const char *start, *end; + + if (!(dmt->dmi.v4->flags & DM_DATA_OUT_FLAG)) + return NULL; + + start = (const char *) dmt->dmi.v4 + dmt->dmi.v4->data_start; + end = (const char *) dmt->dmi.v4 + dmt->dmi.v4->data_size; + + if (end < start) { + log_error(INTERNAL_ERROR "Corrupted message structure returned: start %d > end %d", (int)dmt->dmi.v4->data_start, (int)dmt->dmi.v4->data_size); + return NULL; + } + + if (!memchr(start, 0, end - start)) { + log_error(INTERNAL_ERROR "Message response doesn't contain terminating NUL character"); + return NULL; + } + + return start; +} + +int dm_task_set_ro(struct dm_task *dmt) +{ + dmt->read_only = 1; + return 1; +} + +int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead, + uint32_t read_ahead_flags) +{ + dmt->read_ahead = read_ahead; + dmt->read_ahead_flags = read_ahead_flags; + + return 1; +} + +int dm_task_suppress_identical_reload(struct dm_task *dmt) +{ + dmt->suppress_identical_reload = 1; + return 1; +} + +int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node) +{ + switch (add_node) { + case DM_ADD_NODE_ON_RESUME: + case DM_ADD_NODE_ON_CREATE: + dmt->add_node = add_node; + return 1; + default: + log_error("Unknown add node parameter"); + return 0; + } +} + +int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid) +{ + dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode(); + char mangled_uuid[DM_UUID_LEN]; + int r = 0; + + if (strlen(newuuid) >= DM_UUID_LEN) { + log_error("Uuid \"%s\" too long", newuuid); + return 0; + } + + if (!check_multiple_mangled_string_allowed(newuuid, "new UUID", mangling_mode)) + return_0; + + if (mangling_mode != DM_STRING_MANGLING_NONE && + (r = mangle_string(newuuid, "new UUID", strlen(newuuid), mangled_uuid, + sizeof(mangled_uuid), mangling_mode)) < 0) { + log_error("Failed to mangle new device UUID \"%s\"", newuuid); + return 0; + } + + if (r) { + log_debug_activation("New device uuid mangled [%s]: %s --> %s", + mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex", + newuuid, mangled_uuid); + newuuid = mangled_uuid; + } + + dm_free(dmt->newname); + if (!(dmt->newname = dm_strdup(newuuid))) { + log_error("dm_task_set_newuuid: strdup(%s) failed", newuuid); + return 0; + } + dmt->new_uuid = 1; + + return 1; +} + +int dm_task_set_message(struct dm_task *dmt, const char *message) +{ + dm_free(dmt->message); + if (!(dmt->message = dm_strdup(message))) { + log_error("dm_task_set_message: strdup failed"); + return 0; + } + + return 1; +} + +int dm_task_set_sector(struct dm_task *dmt, uint64_t sector) +{ + dmt->sector = sector; + + return 1; +} + +int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads, + const char *sectors, const char *start) +{ + dm_free(dmt->geometry); + if (dm_asprintf(&(dmt->geometry), "%s %s %s %s", + cylinders, heads, sectors, start) < 0) { + log_error("dm_task_set_geometry: sprintf failed"); + return 0; + } + + return 1; +} + +int dm_task_no_flush(struct dm_task *dmt) +{ + dmt->no_flush = 1; + + return 1; +} + +int dm_task_no_open_count(struct dm_task *dmt) +{ + dmt->no_open_count = 1; + + return 1; +} + +int dm_task_skip_lockfs(struct dm_task *dmt) +{ + dmt->skip_lockfs = 1; + + return 1; +} + +int dm_task_secure_data(struct dm_task *dmt) +{ + dmt->secure_data = 1; + + return 1; +} + +int dm_task_retry_remove(struct dm_task *dmt) +{ + dmt->retry_remove = 1; + + return 1; +} + +int dm_task_deferred_remove(struct dm_task *dmt) +{ + dmt->deferred_remove = 1; + + return 1; +} + +int dm_task_query_inactive_table(struct dm_task *dmt) +{ + dmt->query_inactive_table = 1; + + return 1; +} + +int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr) +{ + dmt->event_nr = event_nr; + + return 1; +} + +int dm_task_set_record_timestamp(struct dm_task *dmt) +{ + if (!_dm_ioctl_timestamp) + _dm_ioctl_timestamp = dm_timestamp_alloc(); + + if (!_dm_ioctl_timestamp) + return_0; + + dmt->record_timestamp = 1; + + return 1; +} + +struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt) +{ + return dmt->record_timestamp ? _dm_ioctl_timestamp : NULL; +} + +struct target *create_target(uint64_t start, uint64_t len, const char *type, + const char *params) +{ + struct target *t; + + if (strlen(type) >= DM_MAX_TYPE_NAME) { + log_error("Target type name %s is too long.", type); + return NULL; + } + + if (!(t = dm_zalloc(sizeof(*t)))) { + log_error("create_target: malloc(%" PRIsize_t ") failed", + sizeof(*t)); + return NULL; + } + + if (!(t->params = dm_strdup(params))) { + log_error("create_target: strdup(params) failed"); + goto bad; + } + + if (!(t->type = dm_strdup(type))) { + log_error("create_target: strdup(type) failed"); + goto bad; + } + + t->start = start; + t->length = len; + return t; + + bad: + _dm_zfree_string(t->params); + dm_free(t->type); + dm_free(t); + return NULL; +} + +static char *_add_target(struct target *t, char *out, char *end) +{ + char *out_sp = out; + struct dm_target_spec sp; + size_t sp_size = sizeof(struct dm_target_spec); + unsigned int backslash_count = 0; + int len; + char *pt; + + if (strlen(t->type) >= sizeof(sp.target_type)) { + log_error("Target type name %s is too long.", t->type); + return NULL; + } + + sp.status = 0; + sp.sector_start = t->start; + sp.length = t->length; + strncpy(sp.target_type, t->type, sizeof(sp.target_type) - 1); + sp.target_type[sizeof(sp.target_type) - 1] = '\0'; + + out += sp_size; + pt = t->params; + + while (*pt) + if (*pt++ == '\\') + backslash_count++; + len = strlen(t->params) + backslash_count; + + if ((out >= end) || (out + len + 1) >= end) { + log_error("Ran out of memory building ioctl parameter"); + return NULL; + } + + if (backslash_count) { + /* replace "\" with "\\" */ + pt = t->params; + do { + if (*pt == '\\') + *out++ = '\\'; + *out++ = *pt++; + } while (*pt); + *out++ = '\0'; + } + else { + strcpy(out, t->params); + out += len + 1; + } + + /* align next block */ + out = _align(out, ALIGNMENT); + + sp.next = out - out_sp; + memcpy(out_sp, &sp, sp_size); + + return out; +} + +static int _lookup_dev_name(uint64_t dev, char *buf, size_t len) +{ + struct dm_names *names; + unsigned next = 0; + struct dm_task *dmt; + int r = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_LIST))) + return 0; + + if (!dm_task_run(dmt)) + goto out; + + if (!(names = dm_task_get_names(dmt))) + goto out; + + if (!names->dev) + goto out; + + do { + names = (struct dm_names *)((char *) names + next); + if (names->dev == dev) { + strncpy(buf, names->name, len); + r = 1; + break; + } + next = names->next; + } while (next); + + out: + dm_task_destroy(dmt); + return r; +} + +static struct dm_ioctl *_flatten(struct dm_task *dmt, unsigned repeat_count) +{ + const size_t min_size = 16 * 1024; + const int (*version)[3]; + + struct dm_ioctl *dmi; + struct target *t; + struct dm_target_msg *tmsg; + size_t len = sizeof(struct dm_ioctl); + char *b, *e; + int count = 0; + + for (t = dmt->head; t; t = t->next) { + len += sizeof(struct dm_target_spec); + len += strlen(t->params) + 1 + ALIGNMENT; + count++; + } + + if (count && (dmt->sector || dmt->message)) { + log_error("targets and message are incompatible"); + return NULL; + } + + if (count && dmt->newname) { + log_error("targets and rename are incompatible"); + return NULL; + } + + if (count && dmt->geometry) { + log_error("targets and geometry are incompatible"); + return NULL; + } + + if (dmt->newname && (dmt->sector || dmt->message)) { + log_error("message and rename are incompatible"); + return NULL; + } + + if (dmt->newname && dmt->geometry) { + log_error("geometry and rename are incompatible"); + return NULL; + } + + if (dmt->geometry && (dmt->sector || dmt->message)) { + log_error("geometry and message are incompatible"); + return NULL; + } + + if (dmt->sector && !dmt->message) { + log_error("message is required with sector"); + return NULL; + } + + if (dmt->newname) + len += strlen(dmt->newname) + 1; + + if (dmt->message) + len += sizeof(struct dm_target_msg) + strlen(dmt->message) + 1; + + if (dmt->geometry) + len += strlen(dmt->geometry) + 1; + + /* + * Give len a minimum size so that we have space to store + * dependencies or status information. + */ + if (len < min_size) + len = min_size; + + /* Increase buffer size if repeating because buffer was too small */ + while (repeat_count--) + len *= 2; + + if (!(dmi = dm_zalloc(len))) + return NULL; + + version = &_cmd_data_v4[dmt->type].version; + + dmi->version[0] = (*version)[0]; + dmi->version[1] = (*version)[1]; + dmi->version[2] = (*version)[2]; + + dmi->data_size = len; + dmi->data_start = sizeof(struct dm_ioctl); + + if (dmt->minor >= 0) { + if (!_dm_multiple_major_support && dmt->allow_default_major_fallback && + dmt->major != (int) _dm_device_major) { + log_verbose("Overriding major number of %d " + "with %u for persistent device.", + dmt->major, _dm_device_major); + dmt->major = _dm_device_major; + } + + if (dmt->major <= 0) { + log_error("Missing major number for persistent device."); + goto bad; + } + + dmi->flags |= DM_PERSISTENT_DEV_FLAG; + dmi->dev = MKDEV((dev_t)dmt->major, (dev_t)dmt->minor); + } + + /* Does driver support device number referencing? */ + if (_dm_version_minor < 3 && !DEV_NAME(dmt) && !DEV_UUID(dmt) && dmi->dev) { + if (!_lookup_dev_name(dmi->dev, dmi->name, sizeof(dmi->name))) { + log_error("Unable to find name for device (%" PRIu32 + ":%" PRIu32 ")", dmt->major, dmt->minor); + goto bad; + } + log_verbose("device (%" PRIu32 ":%" PRIu32 ") is %s " + "for compatibility with old kernel", + dmt->major, dmt->minor, dmi->name); + } + + /* FIXME Until resume ioctl supplies name, use dev_name for readahead */ + if (DEV_NAME(dmt) && (dmt->type != DM_DEVICE_RESUME || dmt->minor < 0 || + dmt->major < 0)) + strncpy(dmi->name, DEV_NAME(dmt), sizeof(dmi->name)); + + if (DEV_UUID(dmt)) + strncpy(dmi->uuid, DEV_UUID(dmt), sizeof(dmi->uuid)); + + if (dmt->type == DM_DEVICE_SUSPEND) + dmi->flags |= DM_SUSPEND_FLAG; + if (dmt->no_flush) { + if (_dm_version_minor < 12) + log_verbose("No flush flag unsupported by kernel. " + "Buffers will be flushed."); + else + dmi->flags |= DM_NOFLUSH_FLAG; + } + if (dmt->read_only) + dmi->flags |= DM_READONLY_FLAG; + if (dmt->skip_lockfs) + dmi->flags |= DM_SKIP_LOCKFS_FLAG; + if (dmt->deferred_remove && (dmt->type == DM_DEVICE_REMOVE || dmt->type == DM_DEVICE_REMOVE_ALL)) + dmi->flags |= DM_DEFERRED_REMOVE; + + if (dmt->secure_data) { + if (_dm_version_minor < 20) + log_verbose("Secure data flag unsupported by kernel. " + "Buffers will not be wiped after use."); + dmi->flags |= DM_SECURE_DATA_FLAG; + } + if (dmt->query_inactive_table) { + if (!_dm_inactive_supported()) + log_warn("WARNING: Inactive table query unsupported " + "by kernel. It will use live table."); + dmi->flags |= DM_QUERY_INACTIVE_TABLE_FLAG; + } + if (dmt->new_uuid) { + if (_dm_version_minor < 19) { + log_error("WARNING: Setting UUID unsupported by " + "kernel. Aborting operation."); + goto bad; + } + dmi->flags |= DM_UUID_FLAG; + } + + dmi->target_count = count; + dmi->event_nr = dmt->event_nr; + + b = (char *) (dmi + 1); + e = (char *) dmi + len; + + for (t = dmt->head; t; t = t->next) + if (!(b = _add_target(t, b, e))) + goto_bad; + + if (dmt->newname) + strcpy(b, dmt->newname); + + if (dmt->message) { + tmsg = (struct dm_target_msg *) b; + tmsg->sector = dmt->sector; + strcpy(tmsg->message, dmt->message); + } + + if (dmt->geometry) + strcpy(b, dmt->geometry); + + return dmi; + + bad: + _dm_zfree_dmi(dmi); + return NULL; +} + +static int _process_mapper_dir(struct dm_task *dmt) +{ + struct dirent *dirent; + DIR *d; + const char *dir; + int r = 1; + + dir = dm_dir(); + if (!(d = opendir(dir))) { + log_sys_error("opendir", dir); + return 0; + } + + while ((dirent = readdir(d))) { + if (!strcmp(dirent->d_name, ".") || + !strcmp(dirent->d_name, "..") || + !strcmp(dirent->d_name, "control")) + continue; + if (!dm_task_set_name(dmt, dirent->d_name)) { + r = 0; + stack; + continue; /* try next name */ + } + if (!dm_task_run(dmt)) { + r = 0; + stack; /* keep going */ + } + } + + if (closedir(d)) + log_sys_error("closedir", dir); + + return r; +} + +static int _process_all_v4(struct dm_task *dmt) +{ + struct dm_task *task; + struct dm_names *names; + unsigned next = 0; + int r = 1; + + if (!(task = dm_task_create(DM_DEVICE_LIST))) + return 0; + + if (!dm_task_run(task)) { + r = 0; + goto out; + } + + if (!(names = dm_task_get_names(task))) { + r = 0; + goto out; + } + + if (!names->dev) + goto out; + + do { + names = (struct dm_names *)((char *) names + next); + if (!dm_task_set_name(dmt, names->name)) { + r = 0; + goto out; + } + if (!dm_task_run(dmt)) + r = 0; + next = names->next; + } while (next); + + out: + dm_task_destroy(task); + return r; +} + +static int _mknodes_v4(struct dm_task *dmt) +{ + (void) _process_mapper_dir(dmt); + + return _process_all_v4(dmt); +} + +/* + * If an operation that uses a cookie fails, decrement the + * semaphore instead of udev. + */ +static int _udev_complete(struct dm_task *dmt) +{ + uint16_t base; + + if (dmt->cookie_set && + (base = dmt->event_nr & ~DM_UDEV_FLAGS_MASK)) + /* strip flags from the cookie and use cookie magic instead */ + return dm_udev_complete(base | (DM_COOKIE_MAGIC << + DM_UDEV_FLAGS_SHIFT)); + + return 1; +} + +#ifdef DM_IOCTLS +static int _check_uevent_generated(struct dm_ioctl *dmi) +{ + if (!dm_check_version() || + _dm_version < 4 || + _dm_version_minor < 17) + /* can't check, assume uevent is generated */ + return 1; + + return dmi->flags & DM_UEVENT_GENERATED_FLAG; +} +#endif + +static int _create_and_load_v4(struct dm_task *dmt) +{ + struct dm_task *task; + int r; + uint32_t cookie; + + /* Use new task struct to create the device */ + if (!(task = dm_task_create(DM_DEVICE_CREATE))) { + _udev_complete(dmt); + return_0; + } + + /* Copy across relevant fields */ + if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) + goto_bad; + + if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) + goto_bad; + + task->major = dmt->major; + task->minor = dmt->minor; + task->uid = dmt->uid; + task->gid = dmt->gid; + task->mode = dmt->mode; + /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */ + task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK; + task->cookie_set = dmt->cookie_set; + task->add_node = dmt->add_node; + + if (!dm_task_run(task)) + goto_bad; + + dm_task_destroy(task); + + /* Next load the table */ + if (!(task = dm_task_create(DM_DEVICE_RELOAD))) { + stack; + _udev_complete(dmt); + goto revert; + } + + /* Copy across relevant fields */ + if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) { + stack; + dm_task_destroy(task); + _udev_complete(dmt); + goto revert; + } + + task->read_only = dmt->read_only; + task->head = dmt->head; + task->tail = dmt->tail; + task->secure_data = dmt->secure_data; + + r = dm_task_run(task); + + task->head = NULL; + task->tail = NULL; + dm_task_destroy(task); + + if (!r) { + stack; + _udev_complete(dmt); + goto revert; + } + + /* Use the original structure last so the info will be correct */ + dmt->type = DM_DEVICE_RESUME; + dm_free(dmt->uuid); + dmt->uuid = NULL; + dm_free(dmt->mangled_uuid); + dmt->mangled_uuid = NULL; + + if (dm_task_run(dmt)) + return 1; + + revert: + dmt->type = DM_DEVICE_REMOVE; + dm_free(dmt->uuid); + dmt->uuid = NULL; + dm_free(dmt->mangled_uuid); + dmt->mangled_uuid = NULL; + + /* + * Also udev-synchronize "remove" dm task that is a part of this revert! + * But only if the original dm task was supposed to be synchronized. + */ + if (dmt->cookie_set) { + cookie = (dmt->event_nr & ~DM_UDEV_FLAGS_MASK) | + (DM_COOKIE_MAGIC << DM_UDEV_FLAGS_SHIFT); + if (!dm_task_set_cookie(dmt, &cookie, + (dmt->event_nr & DM_UDEV_FLAGS_MASK) >> + DM_UDEV_FLAGS_SHIFT)) + stack; /* keep going */ + } + + if (!dm_task_run(dmt)) + log_error("Failed to revert device creation."); + + return 0; + + bad: + dm_task_destroy(task); + _udev_complete(dmt); + + return 0; +} + +uint64_t dm_task_get_existing_table_size(struct dm_task *dmt) +{ + return dmt->existing_table_size; +} + +static int _reload_with_suppression_v4(struct dm_task *dmt) +{ + struct dm_task *task; + struct target *t1, *t2; + size_t len; + int r; + + /* New task to get existing table information */ + if (!(task = dm_task_create(DM_DEVICE_TABLE))) { + log_error("Failed to create device-mapper task struct"); + return 0; + } + + /* Copy across relevant fields */ + if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) { + dm_task_destroy(task); + return 0; + } + + if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) { + dm_task_destroy(task); + return 0; + } + + task->major = dmt->major; + task->minor = dmt->minor; + + r = dm_task_run(task); + + if (!r) { + dm_task_destroy(task); + return r; + } + + /* Store existing table size */ + t2 = task->head; + while (t2 && t2->next) + t2 = t2->next; + dmt->existing_table_size = t2 ? t2->start + t2->length : 0; + + if (((task->dmi.v4->flags & DM_READONLY_FLAG) ? 1 : 0) != dmt->read_only) + goto no_match; + + t1 = dmt->head; + t2 = task->head; + + while (t1 && t2) { + len = strlen(t2->params); + while (len-- > 0 && t2->params[len] == ' ') + t2->params[len] = '\0'; + if ((t1->start != t2->start) || + (t1->length != t2->length) || + (strcmp(t1->type, t2->type)) || + (strcmp(t1->params, t2->params))) + goto no_match; + t1 = t1->next; + t2 = t2->next; + } + + if (!t1 && !t2) { + dmt->dmi.v4 = task->dmi.v4; + task->dmi.v4 = NULL; + dm_task_destroy(task); + return 1; + } + +no_match: + dm_task_destroy(task); + + /* Now do the original reload */ + dmt->suppress_identical_reload = 0; + r = dm_task_run(dmt); + + return r; +} + +static int _check_children_not_suspended_v4(struct dm_task *dmt, uint64_t device) +{ + struct dm_task *task; + struct dm_info info; + struct dm_deps *deps; + int r = 0; + uint32_t i; + + /* Find dependencies */ + if (!(task = dm_task_create(DM_DEVICE_DEPS))) + return 0; + + /* Copy across or set relevant fields */ + if (device) { + task->major = MAJOR(device); + task->minor = MINOR(device); + } else { + if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) + goto out; + + if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) + goto out; + + task->major = dmt->major; + task->minor = dmt->minor; + } + + task->uid = dmt->uid; + task->gid = dmt->gid; + task->mode = dmt->mode; + /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */ + task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK; + task->cookie_set = dmt->cookie_set; + task->add_node = dmt->add_node; + + if (!(r = dm_task_run(task))) + goto out; + + if (!dm_task_get_info(task, &info) || !info.exists) + goto out; + + /* + * Warn if any of the devices this device depends upon are already + * suspended: I/O could become trapped between the two devices. + */ + if (info.suspended) { + if (!device) + log_debug_activation("Attempting to suspend a device that is already suspended " + "(%u:%u)", info.major, info.minor); + else + log_error(INTERNAL_ERROR "Attempt to suspend device %s%s%s%.0d%s%.0d%s%s" + "that uses already-suspended device (%u:%u)", + DEV_NAME(dmt) ? : "", DEV_UUID(dmt) ? : "", + dmt->major > 0 ? "(" : "", + dmt->major > 0 ? dmt->major : 0, + dmt->major > 0 ? ":" : "", + dmt->minor > 0 ? dmt->minor : 0, + dmt->major > 0 && dmt->minor == 0 ? "0" : "", + dmt->major > 0 ? ") " : "", + info.major, info.minor); + + /* No need for further recursion */ + r = 1; + goto out; + } + + if (!(deps = dm_task_get_deps(task))) + goto out; + + for (i = 0; i < deps->count; i++) { + /* Only recurse with dm devices */ + if (MAJOR(deps->device[i]) != _dm_device_major) + continue; + + if (!_check_children_not_suspended_v4(task, deps->device[i])) + goto out; + } + + r = 1; + +out: + dm_task_destroy(task); + + return r; +} + +static int _suspend_with_validation_v4(struct dm_task *dmt) +{ + /* Avoid recursion */ + dmt->enable_checks = 0; + + /* + * Ensure we can't leave any I/O trapped between suspended devices. + */ + if (!_check_children_not_suspended_v4(dmt, 0)) + return 0; + + /* Finally, perform the original suspend. */ + return dm_task_run(dmt); +} + +static const char *_sanitise_message(char *message) +{ + const char *sanitised_message = message ?: ""; + + /* FIXME: Check for whitespace variations. */ + /* This traps what cryptsetup sends us. */ + if (message && !strncasecmp(message, "key set", 7)) + sanitised_message = "key set"; + + return sanitised_message; +} + +#ifdef DM_IOCTLS +static int _do_dm_ioctl_unmangle_string(char *str, const char *str_name, + char *buf, size_t buf_size, + dm_string_mangling_t mode) +{ + int r; + + if (mode == DM_STRING_MANGLING_NONE) + return 1; + + if (!check_multiple_mangled_string_allowed(str, str_name, mode)) + return_0; + + if ((r = unmangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0) { + log_debug_activation("_do_dm_ioctl_unmangle_string: failed to " + "unmangle %s \"%s\"", str_name, str); + return 0; + } + + if (r) + memcpy(str, buf, strlen(buf) + 1); + + return 1; +} + +static int _dm_ioctl_unmangle_names(int type, struct dm_ioctl *dmi) +{ + char buf[DM_NAME_LEN]; + struct dm_names *names; + unsigned next = 0; + char *name; + int r = 1; + + if ((name = dmi->name)) + r = _do_dm_ioctl_unmangle_string(name, "name", buf, sizeof(buf), + dm_get_name_mangling_mode()); + + if (type == DM_DEVICE_LIST && + ((names = ((struct dm_names *) ((char *)dmi + dmi->data_start)))) && + names->dev) { + do { + names = (struct dm_names *)((char *) names + next); + r = _do_dm_ioctl_unmangle_string(names->name, "name", + buf, sizeof(buf), + dm_get_name_mangling_mode()); + next = names->next; + } while (next); + } + + return r; +} + +static int _dm_ioctl_unmangle_uuids(int type, struct dm_ioctl *dmi) +{ + char buf[DM_UUID_LEN]; + char *uuid = dmi->uuid; + + if (uuid) + return _do_dm_ioctl_unmangle_string(uuid, "UUID", buf, sizeof(buf), + dm_get_name_mangling_mode()); + + return 1; +} +#endif + +static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command, + unsigned buffer_repeat_count, + unsigned retry_repeat_count, + int *retryable) +{ + struct dm_ioctl *dmi; + int ioctl_with_uevent; + int r; + + dmt->ioctl_errno = 0; + + dmi = _flatten(dmt, buffer_repeat_count); + if (!dmi) { + log_error("Couldn't create ioctl argument."); + return NULL; + } + + if (dmt->type == DM_DEVICE_TABLE) + dmi->flags |= DM_STATUS_TABLE_FLAG; + + dmi->flags |= DM_EXISTS_FLAG; /* FIXME */ + + if (dmt->no_open_count) + dmi->flags |= DM_SKIP_BDGET_FLAG; + + ioctl_with_uevent = dmt->type == DM_DEVICE_RESUME || + dmt->type == DM_DEVICE_REMOVE || + dmt->type == DM_DEVICE_RENAME; + + if (ioctl_with_uevent && dm_cookie_supported()) { + /* + * Always mark events coming from libdevmapper as + * "primary sourced". This is needed to distinguish + * any spurious events so we can act appropriately. + * This needs to be applied even when udev_sync is + * not used because udev flags could be used alone. + */ + dmi->event_nr |= DM_UDEV_PRIMARY_SOURCE_FLAG << + DM_UDEV_FLAGS_SHIFT; + + /* + * Prevent udev vs. libdevmapper race when processing nodes + * and symlinks. This can happen when the udev rules are + * installed and udev synchronisation code is enabled in + * libdevmapper but the software using libdevmapper does not + * make use of it (by not calling dm_task_set_cookie before). + * We need to instruct the udev rules not to be applied at + * all in this situation so we can gracefully fallback to + * libdevmapper's node and symlink creation code. + */ + if (!dmt->cookie_set && dm_udev_get_sync_support()) { + log_debug_activation("Cookie value is not set while trying to call %s " + "ioctl. Please, consider using libdevmapper's udev " + "synchronisation interface or disable it explicitly " + "by calling dm_udev_set_sync_support(0).", + dmt->type == DM_DEVICE_RESUME ? "DM_DEVICE_RESUME" : + dmt->type == DM_DEVICE_REMOVE ? "DM_DEVICE_REMOVE" : + "DM_DEVICE_RENAME"); + log_debug_activation("Switching off device-mapper and all subsystem related " + "udev rules. Falling back to libdevmapper node creation."); + /* + * Disable general dm and subsystem rules but keep + * dm disk rules if not flagged out explicitly before. + * We need /dev/disk content for the software that expects it. + */ + dmi->event_nr |= (DM_UDEV_DISABLE_DM_RULES_FLAG | + DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) << + DM_UDEV_FLAGS_SHIFT; + } + } + + log_debug_activation("dm %s %s%s %s%s%s %s%.0d%s%.0d%s" + "%s[ %s%s%s%s%s%s%s%s%s] %.0" PRIu64 " %s [%u] (*%u)", + _cmd_data_v4[dmt->type].name, + dmt->new_uuid ? "UUID " : "", + dmi->name, dmi->uuid, dmt->newname ? " " : "", + dmt->newname ? dmt->newname : "", + dmt->major > 0 ? "(" : "", + dmt->major > 0 ? dmt->major : 0, + dmt->major > 0 ? ":" : "", + dmt->minor > 0 ? dmt->minor : 0, + dmt->major > 0 && dmt->minor == 0 ? "0" : "", + dmt->major > 0 ? ") " : "", + dmt->no_open_count ? "noopencount " : "opencount ", + dmt->no_flush ? "noflush " : "flush ", + dmt->read_only ? "readonly " : "", + dmt->skip_lockfs ? "skiplockfs " : "", + dmt->retry_remove ? "retryremove " : "", + dmt->deferred_remove ? "deferredremove " : "", + dmt->secure_data ? "securedata " : "", + dmt->query_inactive_table ? "inactive " : "", + dmt->enable_checks ? "enablechecks " : "", + dmt->sector, _sanitise_message(dmt->message), + dmi->data_size, retry_repeat_count); +#ifdef DM_IOCTLS + r = ioctl(_control_fd, command, dmi); + + if (dmt->record_timestamp) + if (!dm_timestamp_get(_dm_ioctl_timestamp)) + stack; + + if (r < 0 && dmt->expected_errno != errno) { + dmt->ioctl_errno = errno; + if (dmt->ioctl_errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) || + (dmt->type == DM_DEVICE_MKNODES) || + (dmt->type == DM_DEVICE_STATUS))) + dmi->flags &= ~DM_EXISTS_FLAG; /* FIXME */ + else { + if (_log_suppress || dmt->ioctl_errno == EINTR) + log_verbose("device-mapper: %s ioctl on %s %s%s%.0d%s%.0d%s%s " + "failed: %s", + _cmd_data_v4[dmt->type].name, + dmi->name, dmi->uuid, + dmt->major > 0 ? "(" : "", + dmt->major > 0 ? dmt->major : 0, + dmt->major > 0 ? ":" : "", + dmt->minor > 0 ? dmt->minor : 0, + dmt->major > 0 && dmt->minor == 0 ? "0" : "", + dmt->major > 0 ? ")" : "", + strerror(dmt->ioctl_errno)); + else + log_error("device-mapper: %s ioctl on %s %s%s%.0d%s%.0d%s%s " + "failed: %s", + _cmd_data_v4[dmt->type].name, + dmi->name, dmi->uuid, + dmt->major > 0 ? "(" : "", + dmt->major > 0 ? dmt->major : 0, + dmt->major > 0 ? ":" : "", + dmt->minor > 0 ? dmt->minor : 0, + dmt->major > 0 && dmt->minor == 0 ? "0" : "", + dmt->major > 0 ? ")" : "", + strerror(dmt->ioctl_errno)); + + /* + * It's sometimes worth retrying after EBUSY in case + * it's a transient failure caused by an asynchronous + * process quickly scanning the device. + */ + *retryable = dmt->ioctl_errno == EBUSY; + + goto error; + } + } + + if (ioctl_with_uevent && dm_udev_get_sync_support() && + !_check_uevent_generated(dmi)) { + log_debug_activation("Uevent not generated! Calling udev_complete " + "internally to avoid process lock-up."); + _udev_complete(dmt); + } + + if (!_dm_ioctl_unmangle_names(dmt->type, dmi)) + goto error; + + if (dmt->type != DM_DEVICE_REMOVE && + !_dm_ioctl_unmangle_uuids(dmt->type, dmi)) + goto error; + +#else /* Userspace alternative for testing */ + goto error; +#endif + return dmi; + +error: + _dm_zfree_dmi(dmi); + return NULL; +} + +void dm_task_update_nodes(void) +{ + update_devs(); +} + +#define DM_IOCTL_RETRIES 25 +#define DM_RETRY_USLEEP_DELAY 200000 + +int dm_task_get_errno(struct dm_task *dmt) +{ + return dmt->ioctl_errno; +} + +int dm_task_run(struct dm_task *dmt) +{ + struct dm_ioctl *dmi; + unsigned command; + int check_udev; + int rely_on_udev; + int suspended_counter; + unsigned ioctl_retry = 1; + int retryable = 0; + const char *dev_name = DEV_NAME(dmt); + const char *dev_uuid = DEV_UUID(dmt); + + if ((unsigned) dmt->type >= DM_ARRAY_SIZE(_cmd_data_v4)) { + log_error(INTERNAL_ERROR "unknown device-mapper task %d", + dmt->type); + return 0; + } + + command = _cmd_data_v4[dmt->type].cmd; + + /* Old-style creation had a table supplied */ + if (dmt->type == DM_DEVICE_CREATE && dmt->head) + return _create_and_load_v4(dmt); + + if (dmt->type == DM_DEVICE_MKNODES && !dev_name && + !dev_uuid && dmt->major <= 0) + return _mknodes_v4(dmt); + + if ((dmt->type == DM_DEVICE_RELOAD) && dmt->suppress_identical_reload) + return _reload_with_suppression_v4(dmt); + + if ((dmt->type == DM_DEVICE_SUSPEND) && dmt->enable_checks) + return _suspend_with_validation_v4(dmt); + + if (!_open_control()) { + _udev_complete(dmt); + return_0; + } + + if ((suspended_counter = dm_get_suspended_counter()) && + dmt->type == DM_DEVICE_RELOAD) + log_error(INTERNAL_ERROR "Performing unsafe table load while %d device(s) " + "are known to be suspended: " + "%s%s%s %s%.0d%s%.0d%s%s", + suspended_counter, + dev_name ? : "", + dev_uuid ? " UUID " : "", + dev_uuid ? : "", + dmt->major > 0 ? "(" : "", + dmt->major > 0 ? dmt->major : 0, + dmt->major > 0 ? ":" : "", + dmt->minor > 0 ? dmt->minor : 0, + dmt->major > 0 && dmt->minor == 0 ? "0" : "", + dmt->major > 0 ? ") " : ""); + + /* FIXME Detect and warn if cookie set but should not be. */ +repeat_ioctl: + if (!(dmi = _do_dm_ioctl(dmt, command, _ioctl_buffer_double_factor, + ioctl_retry, &retryable))) { + /* + * Async udev rules that scan devices commonly cause transient + * failures. Normally you'd expect the user to have made sure + * nothing was using the device before issuing REMOVE, so it's + * worth retrying in case the failure is indeed transient. + */ + if (retryable && dmt->type == DM_DEVICE_REMOVE && + dmt->retry_remove && ++ioctl_retry <= DM_IOCTL_RETRIES) { + usleep(DM_RETRY_USLEEP_DELAY); + goto repeat_ioctl; + } + + _udev_complete(dmt); + return 0; + } + + if (dmi->flags & DM_BUFFER_FULL_FLAG) { + switch (dmt->type) { + case DM_DEVICE_LIST_VERSIONS: + case DM_DEVICE_LIST: + case DM_DEVICE_DEPS: + case DM_DEVICE_STATUS: + case DM_DEVICE_TABLE: + case DM_DEVICE_WAITEVENT: + case DM_DEVICE_TARGET_MSG: + _ioctl_buffer_double_factor++; + _dm_zfree_dmi(dmi); + goto repeat_ioctl; + default: + log_error("WARNING: libdevmapper buffer too small for data"); + } + } + + /* + * Are we expecting a udev operation to occur that we need to check for? + */ + check_udev = dmt->cookie_set && + !(dmt->event_nr >> DM_UDEV_FLAGS_SHIFT & + DM_UDEV_DISABLE_DM_RULES_FLAG); + + rely_on_udev = dmt->cookie_set ? (dmt->event_nr >> DM_UDEV_FLAGS_SHIFT & + DM_UDEV_DISABLE_LIBRARY_FALLBACK) : 0; + + switch (dmt->type) { + case DM_DEVICE_CREATE: + if ((dmt->add_node == DM_ADD_NODE_ON_CREATE) && + dev_name && *dev_name && !rely_on_udev) + add_dev_node(dev_name, MAJOR(dmi->dev), + MINOR(dmi->dev), dmt->uid, dmt->gid, + dmt->mode, check_udev, rely_on_udev); + break; + case DM_DEVICE_REMOVE: + /* FIXME Kernel needs to fill in dmi->name */ + if (dev_name && !rely_on_udev) + rm_dev_node(dev_name, check_udev, rely_on_udev); + break; + + case DM_DEVICE_RENAME: + /* FIXME Kernel needs to fill in dmi->name */ + if (!dmt->new_uuid && dev_name) + rename_dev_node(dev_name, dmt->newname, + check_udev, rely_on_udev); + break; + + case DM_DEVICE_RESUME: + if ((dmt->add_node == DM_ADD_NODE_ON_RESUME) && + dev_name && *dev_name) + add_dev_node(dev_name, MAJOR(dmi->dev), + MINOR(dmi->dev), dmt->uid, dmt->gid, + dmt->mode, check_udev, rely_on_udev); + /* FIXME Kernel needs to fill in dmi->name */ + set_dev_node_read_ahead(dev_name, + MAJOR(dmi->dev), MINOR(dmi->dev), + dmt->read_ahead, dmt->read_ahead_flags); + break; + + case DM_DEVICE_MKNODES: + if (dmi->flags & DM_EXISTS_FLAG) + add_dev_node(dmi->name, MAJOR(dmi->dev), + MINOR(dmi->dev), dmt->uid, + dmt->gid, dmt->mode, 0, rely_on_udev); + else if (dev_name) + rm_dev_node(dev_name, 0, rely_on_udev); + break; + + case DM_DEVICE_STATUS: + case DM_DEVICE_TABLE: + case DM_DEVICE_WAITEVENT: + if (!_unmarshal_status(dmt, dmi)) + goto bad; + break; + } + + /* Was structure reused? */ + _dm_zfree_dmi(dmt->dmi.v4); + dmt->dmi.v4 = dmi; + return 1; + + bad: + _dm_zfree_dmi(dmi); + return 0; +} + +void dm_hold_control_dev(int hold_open) +{ + _hold_control_fd_open = hold_open ? 1 : 0; + + log_debug("Hold of control device is now %sset.", + _hold_control_fd_open ? "" : "un"); +} + +void dm_lib_release(void) +{ + if (!_hold_control_fd_open) + _close_control_fd(); + dm_timestamp_destroy(_dm_ioctl_timestamp); + _dm_ioctl_timestamp = NULL; + update_devs(); +} + +void dm_pools_check_leaks(void); + +void dm_lib_exit(void) +{ + int suspended_counter; + static unsigned _exited = 0; + + if (_exited++) + return; + + if ((suspended_counter = dm_get_suspended_counter())) + log_error("libdevmapper exiting with %d device(s) still suspended.", suspended_counter); + + dm_lib_release(); + selinux_release(); + if (_dm_bitset) + dm_bitset_destroy(_dm_bitset); + _dm_bitset = NULL; + dm_pools_check_leaks(); + dm_dump_memory(); + _version_ok = 1; + _version_checked = 0; +} + +#if defined(__GNUC__) +/* + * Maintain binary backward compatibility. + * Version script mechanism works with 'gcc' compatible compilers only. + */ + +/* + * This following code is here to retain ABI compatibility after adding + * the field deferred_remove to struct dm_info in version 1.02.89. + * + * Binaries linked against version 1.02.88 of libdevmapper or earlier + * will use this function that returns dm_info without the + * deferred_remove field. + * + * Binaries compiled against version 1.02.89 onwards will use + * the new function dm_task_get_info_with_deferred_remove due to the + * #define. + * + * N.B. Keep this function at the end of the file to make sure that + * no code in this file accidentally calls it. + */ + +int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info); +int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info) +{ + struct dm_info new_info; + + if (!dm_task_get_info(dmt, &new_info)) + return 0; + + memcpy(info, &new_info, offsetof(struct dm_info, deferred_remove)); + + return 1; +} + +int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info); +int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info) +{ + struct dm_info new_info; + + if (!dm_task_get_info(dmt, &new_info)) + return 0; + + memcpy(info, &new_info, offsetof(struct dm_info, internal_suspend)); + + return 1; +} +#endif diff --git a/device_mapper/ioctl/libdm-targets.h b/device_mapper/ioctl/libdm-targets.h new file mode 100644 index 000000000..b5b20d5e9 --- /dev/null +++ b/device_mapper/ioctl/libdm-targets.h @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef LIB_DMTARGETS_H +#define LIB_DMTARGETS_H + +#include <inttypes.h> +#include <sys/types.h> + +struct dm_ioctl; + +struct target { + uint64_t start; + uint64_t length; + char *type; + char *params; + + struct target *next; +}; + +struct dm_task { + int type; + char *dev_name; + char *mangled_dev_name; + + struct target *head, *tail; + + int read_only; + uint32_t event_nr; + int major; + int minor; + int allow_default_major_fallback; + uid_t uid; + gid_t gid; + mode_t mode; + uint32_t read_ahead; + uint32_t read_ahead_flags; + union { + struct dm_ioctl *v4; + } dmi; + char *newname; + char *message; + char *geometry; + uint64_t sector; + int no_flush; + int no_open_count; + int skip_lockfs; + int query_inactive_table; + int suppress_identical_reload; + dm_add_node_t add_node; + uint64_t existing_table_size; + int cookie_set; + int new_uuid; + int secure_data; + int retry_remove; + int deferred_remove; + int enable_checks; + int expected_errno; + int ioctl_errno; + + int record_timestamp; + + char *uuid; + char *mangled_uuid; +}; + +struct cmd_data { + const char *name; + const unsigned cmd; + const int version[3]; +}; + +int dm_check_version(void); +uint64_t dm_task_get_existing_table_size(struct dm_task *dmt); + +#endif diff --git a/device_mapper/libdevmapper.h b/device_mapper/libdevmapper.h new file mode 100644 index 000000000..2438f74c1 --- /dev/null +++ b/device_mapper/libdevmapper.h @@ -0,0 +1,3755 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved. + * Copyright (C) 2006 Rackable Systems All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef LIB_DEVICE_MAPPER_H +#define LIB_DEVICE_MAPPER_H + +#include <inttypes.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> + +#ifdef __linux__ +# include <linux/types.h> +#endif + +#include <limits.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +#ifndef __GNUC__ +# define __typeof__ typeof +#endif + +/* Macros to make string defines */ +#define DM_TO_STRING_EXP(A) #A +#define DM_TO_STRING(A) DM_TO_STRING_EXP(A) + +#define DM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0])) + +#ifdef __cplusplus +extern "C" { +#endif + +/***************************************************************** + * The first section of this file provides direct access to the + * individual device-mapper ioctls. Since it is quite laborious to + * build the ioctl arguments for the device-mapper, people are + * encouraged to use this library. + ****************************************************************/ + +/* + * The library user may wish to register their own + * logging function. By default errors go to stderr. + * Use dm_log_with_errno_init(NULL) to restore the default log fn. + * Error messages may have a non-zero errno. + * Debug messages may have a non-zero class. + * Aborts on internal error when env DM_ABORT_ON_INTERNAL_ERRORS is 1 + */ + +typedef void (*dm_log_with_errno_fn) (int level, const char *file, int line, + int dm_errno_or_class, const char *f, ...) + __attribute__ ((format(printf, 5, 6))); + +void dm_log_with_errno_init(dm_log_with_errno_fn fn); +void dm_log_init_verbose(int level); + +/* + * Original version of this function. + * dm_errno is set to 0. + * + * Deprecated: Use the _with_errno_ versions above instead. + */ +typedef void (*dm_log_fn) (int level, const char *file, int line, + const char *f, ...) + __attribute__ ((format(printf, 4, 5))); + +void dm_log_init(dm_log_fn fn); +/* + * For backward-compatibility, indicate that dm_log_init() was used + * to set a non-default value of dm_log(). + */ +int dm_log_is_non_default(void); + +/* + * Number of devices currently in suspended state (via the library). + */ +int dm_get_suspended_counter(void); + +enum { + DM_DEVICE_CREATE, + DM_DEVICE_RELOAD, + DM_DEVICE_REMOVE, + DM_DEVICE_REMOVE_ALL, + + DM_DEVICE_SUSPEND, + DM_DEVICE_RESUME, + + DM_DEVICE_INFO, + DM_DEVICE_DEPS, + DM_DEVICE_RENAME, + + DM_DEVICE_VERSION, + + DM_DEVICE_STATUS, + DM_DEVICE_TABLE, + DM_DEVICE_WAITEVENT, + + DM_DEVICE_LIST, + + DM_DEVICE_CLEAR, + + DM_DEVICE_MKNODES, + + DM_DEVICE_LIST_VERSIONS, + + DM_DEVICE_TARGET_MSG, + + DM_DEVICE_SET_GEOMETRY +}; + +/* + * You will need to build a struct dm_task for + * each ioctl command you want to execute. + */ + +struct dm_pool; +struct dm_task; +struct dm_timestamp; + +struct dm_task *dm_task_create(int type); +void dm_task_destroy(struct dm_task *dmt); + +int dm_task_set_name(struct dm_task *dmt, const char *name); +int dm_task_set_uuid(struct dm_task *dmt, const char *uuid); + +/* + * Retrieve attributes after an info. + */ +struct dm_info { + int exists; + int suspended; + int live_table; + int inactive_table; + int32_t open_count; + uint32_t event_nr; + uint32_t major; + uint32_t minor; /* minor device number */ + int read_only; /* 0:read-write; 1:read-only */ + + int32_t target_count; + + int deferred_remove; + int internal_suspend; +}; + +struct dm_deps { + uint32_t count; + uint32_t filler; + uint64_t device[0]; +}; + +struct dm_names { + uint64_t dev; + uint32_t next; /* Offset to next struct from start of this struct */ + char name[0]; +}; + +struct dm_versions { + uint32_t next; /* Offset to next struct from start of this struct */ + uint32_t version[3]; + + char name[0]; +}; + +int dm_get_library_version(char *version, size_t size); +int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size); +int dm_task_get_info(struct dm_task *dmt, struct dm_info *dmi); + +/* + * This function returns dm device's UUID based on the value + * of the mangling mode set during preceding dm_task_run call: + * - unmangled UUID for DM_STRING_MANGLING_{AUTO, HEX}, + * - UUID without any changes for DM_STRING_MANGLING_NONE. + * + * To get mangled or unmangled form of the UUID directly, use + * dm_task_get_uuid_mangled or dm_task_get_uuid_unmangled function. + */ +const char *dm_task_get_uuid(const struct dm_task *dmt); + +struct dm_deps *dm_task_get_deps(struct dm_task *dmt); +struct dm_versions *dm_task_get_versions(struct dm_task *dmt); +const char *dm_task_get_message_response(struct dm_task *dmt); + +/* + * These functions return device-mapper names based on the value + * of the mangling mode set during preceding dm_task_run call: + * - unmangled name for DM_STRING_MANGLING_{AUTO, HEX}, + * - name without any changes for DM_STRING_MANGLING_NONE. + * + * To get mangled or unmangled form of the name directly, use + * dm_task_get_name_mangled or dm_task_get_name_unmangled function. + */ +const char *dm_task_get_name(const struct dm_task *dmt); +struct dm_names *dm_task_get_names(struct dm_task *dmt); + +int dm_task_set_ro(struct dm_task *dmt); +int dm_task_set_newname(struct dm_task *dmt, const char *newname); +int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid); +int dm_task_set_minor(struct dm_task *dmt, int minor); +int dm_task_set_major(struct dm_task *dmt, int major); +int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor, int allow_default_major_fallback); +int dm_task_set_uid(struct dm_task *dmt, uid_t uid); +int dm_task_set_gid(struct dm_task *dmt, gid_t gid); +int dm_task_set_mode(struct dm_task *dmt, mode_t mode); +/* See also description for DM_UDEV_DISABLE_LIBRARY_FALLBACK flag! */ +int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags); +int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr); +int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads, const char *sectors, const char *start); +int dm_task_set_message(struct dm_task *dmt, const char *message); +int dm_task_set_sector(struct dm_task *dmt, uint64_t sector); +int dm_task_no_flush(struct dm_task *dmt); +int dm_task_no_open_count(struct dm_task *dmt); +int dm_task_skip_lockfs(struct dm_task *dmt); +int dm_task_query_inactive_table(struct dm_task *dmt); +int dm_task_suppress_identical_reload(struct dm_task *dmt); +int dm_task_secure_data(struct dm_task *dmt); +int dm_task_retry_remove(struct dm_task *dmt); +int dm_task_deferred_remove(struct dm_task *dmt); + +/* + * Record timestamp immediately after the ioctl returns. + */ +int dm_task_set_record_timestamp(struct dm_task *dmt); +struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt); + +/* + * Enable checks for common mistakes such as issuing ioctls in an unsafe order. + */ +int dm_task_enable_checks(struct dm_task *dmt); + +typedef enum { + DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */ + DM_ADD_NODE_ON_CREATE /* add /dev/mapper node with dmsetup create */ +} dm_add_node_t; +int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node); + +/* + * Control read_ahead. + */ +#define DM_READ_AHEAD_AUTO UINT32_MAX /* Use kernel default readahead */ +#define DM_READ_AHEAD_NONE 0 /* Disable readahead */ + +#define DM_READ_AHEAD_MINIMUM_FLAG 0x1 /* Value supplied is minimum */ + +/* + * Read ahead is set with DM_DEVICE_CREATE with a table or DM_DEVICE_RESUME. + */ +int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead, + uint32_t read_ahead_flags); +uint32_t dm_task_get_read_ahead(const struct dm_task *dmt, + uint32_t *read_ahead); + +/* + * Use these to prepare for a create or reload. + */ +int dm_task_add_target(struct dm_task *dmt, + uint64_t start, + uint64_t size, const char *ttype, const char *params); + +/* + * Format major/minor numbers correctly for input to driver. + */ +#define DM_FORMAT_DEV_BUFSIZE 13 /* Minimum bufsize to handle worst case. */ +int dm_format_dev(char *buf, int bufsize, uint32_t dev_major, uint32_t dev_minor); + +/* Use this to retrive target information returned from a STATUS call */ +void *dm_get_next_target(struct dm_task *dmt, + void *next, uint64_t *start, uint64_t *length, + char **target_type, char **params); + +/* + * Following dm_get_status_* functions will allocate approriate status structure + * from passed mempool together with the necessary character arrays. + * Destroying the mempool will release all asociated allocation. + */ + +/* Parse params from STATUS call for mirror target */ +typedef enum { + DM_STATUS_MIRROR_ALIVE = 'A',/* No failures */ + DM_STATUS_MIRROR_FLUSH_FAILED = 'F',/* Mirror out-of-sync */ + DM_STATUS_MIRROR_WRITE_FAILED = 'D',/* Mirror out-of-sync */ + DM_STATUS_MIRROR_SYNC_FAILED = 'S',/* Mirror out-of-sync */ + DM_STATUS_MIRROR_READ_FAILED = 'R',/* Mirror data unaffected */ + DM_STATUS_MIRROR_UNCLASSIFIED = 'U' /* Bug */ +} dm_status_mirror_health_t; + +struct dm_status_mirror { + uint64_t total_regions; + uint64_t insync_regions; + uint32_t dev_count; /* # of devs[] elements (<= 8) */ + struct { + dm_status_mirror_health_t health; + uint32_t major; + uint32_t minor; + } *devs; /* array with individual legs */ + const char *log_type; /* core, disk,.... */ + uint32_t log_count; /* # of logs[] elements */ + struct { + dm_status_mirror_health_t health; + uint32_t major; + uint32_t minor; + } *logs; /* array with individual logs */ +}; + +int dm_get_status_mirror(struct dm_pool *mem, const char *params, + struct dm_status_mirror **status); + +/* Parse params from STATUS call for raid target */ +struct dm_status_raid { + uint64_t reserved; + uint64_t total_regions; /* sectors */ + uint64_t insync_regions; /* sectors */ + uint64_t mismatch_count; + uint32_t dev_count; + char *raid_type; + /* A - alive, a - alive not in-sync, D - dead/failed */ + char *dev_health; + /* idle, frozen, resync, recover, check, repair */ + char *sync_action; + uint64_t data_offset; /* RAID out-of-place reshaping */ +}; + +int dm_get_status_raid(struct dm_pool *mem, const char *params, + struct dm_status_raid **status); + +/* Parse params from STATUS call for cache target */ +struct dm_status_cache { + uint64_t version; /* zero for now */ + + uint32_t metadata_block_size; /* in 512B sectors */ + uint32_t block_size; /* AKA 'chunk_size' */ + + uint64_t metadata_used_blocks; + uint64_t metadata_total_blocks; + + uint64_t used_blocks; + uint64_t dirty_blocks; + uint64_t total_blocks; + + uint64_t read_hits; + uint64_t read_misses; + uint64_t write_hits; + uint64_t write_misses; + + uint64_t demotions; + uint64_t promotions; + + uint64_t feature_flags; /* DM_CACHE_FEATURE_? */ + + int core_argc; + char **core_argv; + + char *policy_name; + int policy_argc; + char **policy_argv; + + unsigned error : 1; /* detected error (switches to fail soon) */ + unsigned fail : 1; /* all I/O fails */ + unsigned needs_check : 1; /* metadata needs check */ + unsigned read_only : 1; /* metadata may not be changed */ + uint32_t reserved : 28; +}; + +int dm_get_status_cache(struct dm_pool *mem, const char *params, + struct dm_status_cache **status); + +/* + * Parse params from STATUS call for snapshot target + * + * Snapshot target's format: + * <= 1.7.0: <used_sectors>/<total_sectors> + * >= 1.8.0: <used_sectors>/<total_sectors> <metadata_sectors> + */ +struct dm_status_snapshot { + uint64_t used_sectors; /* in 512b units */ + uint64_t total_sectors; + uint64_t metadata_sectors; + unsigned has_metadata_sectors : 1; /* set when metadata_sectors is present */ + unsigned invalid : 1; /* set when snapshot is invalidated */ + unsigned merge_failed : 1; /* set when snapshot merge failed */ + unsigned overflow : 1; /* set when snapshot overflows */ +}; + +int dm_get_status_snapshot(struct dm_pool *mem, const char *params, + struct dm_status_snapshot **status); + +/* Parse params from STATUS call for thin_pool target */ +typedef enum { + DM_THIN_DISCARDS_IGNORE, + DM_THIN_DISCARDS_NO_PASSDOWN, + DM_THIN_DISCARDS_PASSDOWN +} dm_thin_discards_t; + +struct dm_status_thin_pool { + uint64_t transaction_id; + uint64_t used_metadata_blocks; + uint64_t total_metadata_blocks; + uint64_t used_data_blocks; + uint64_t total_data_blocks; + uint64_t held_metadata_root; + uint32_t read_only; /* metadata may not be changed */ + dm_thin_discards_t discards; + uint32_t fail : 1; /* all I/O fails */ + uint32_t error_if_no_space : 1; /* otherwise queue_if_no_space */ + uint32_t out_of_data_space : 1; /* metadata may be changed, but data may not be allocated (no rw) */ + uint32_t needs_check : 1; /* metadata needs check */ + uint32_t error : 1; /* detected error (switches to fail soon) */ + uint32_t reserved : 27; +}; + +int dm_get_status_thin_pool(struct dm_pool *mem, const char *params, + struct dm_status_thin_pool **status); + +/* Parse params from STATUS call for thin target */ +struct dm_status_thin { + uint64_t mapped_sectors; + uint64_t highest_mapped_sector; + uint32_t fail : 1; /* Thin volume fails I/O */ + uint32_t reserved : 31; +}; + +int dm_get_status_thin(struct dm_pool *mem, const char *params, + struct dm_status_thin **status); + +/* + * device-mapper statistics support + */ + +/* + * Statistics handle. + * + * Operations on dm_stats objects include managing statistics regions + * and obtaining and manipulating current counter values from the + * kernel. Methods are provided to return baisc count values and to + * derive time-based metrics when a suitable interval estimate is + * provided. + * + * Internally the dm_stats handle contains a pointer to a table of one + * or more dm_stats_region objects representing the regions registered + * with the dm_stats_create_region() method. These in turn point to a + * table of one or more dm_stats_counters objects containing the + * counter sets for each defined area within the region: + * + * dm_stats->dm_stats_region[nr_regions]->dm_stats_counters[nr_areas] + * + * This structure is private to the library and may change in future + * versions: all users should make use of the public interface and treat + * the dm_stats type as an opaque handle. + * + * Regions and counter sets are stored in order of increasing region_id. + * Depending on region specifications and the sequence of create and + * delete operations this may not correspond to increasing sector + * number: users of the library should not assume that this is the case + * unless region creation is deliberately managed to ensure this (by + * always creating regions in strict order of ascending sector address). + * + * Regions may also overlap so the same sector range may be included in + * more than one region or area: applications should be prepared to deal + * with this or manage regions such that it does not occur. + */ +struct dm_stats; + +/* + * Histogram handle. + * + * A histogram object represents the latency histogram values and bin + * boundaries of the histogram associated with a particular area. + * + * Operations on the handle allow the number of bins, bin boundaries, + * counts and relative proportions to be obtained as well as the + * conversion of a histogram or its bounds to a compact string + * representation. + */ +struct dm_histogram; + +/* + * Allocate a dm_stats handle to use for subsequent device-mapper + * statistics operations. A program_id may be specified and will be + * used by default for subsequent operations on this handle. + * + * If program_id is NULL or the empty string a program_id will be + * automatically set to the value contained in /proc/self/comm. + */ +struct dm_stats *dm_stats_create(const char *program_id); + +/* + * Bind a dm_stats handle to the specified device major and minor + * values. Any previous binding is cleared and any preexisting counter + * data contained in the handle is released. + */ +int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor); + +/* + * Bind a dm_stats handle to the specified device name. + * Any previous binding is cleared and any preexisting counter + * data contained in the handle is released. + */ +int dm_stats_bind_name(struct dm_stats *dms, const char *name); + +/* + * Bind a dm_stats handle to the specified device UUID. + * Any previous binding is cleared and any preexisting counter + * data contained in the handle is released. + */ +int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid); + +/* + * Bind a dm_stats handle to the device backing the file referenced + * by the specified file descriptor. + * + * File descriptor fd must reference a regular file, open for reading, + * in a local file system, backed by a device-mapper device, that + * supports the FIEMAP ioctl, and that returns data describing the + * physical location of extents. + */ +int dm_stats_bind_from_fd(struct dm_stats *dms, int fd); +/* + * Test whether the running kernel supports the precise_timestamps + * feature. Presence of this feature also implies histogram support. + * The library will check this call internally and fails any attempt + * to use nanosecond counters or histograms on kernels that fail to + * meet this check. + */ +int dm_message_supports_precise_timestamps(void); + +/* + * Precise timetamps and histogram support. + * + * Test for the presence of precise_timestamps and histogram support. + */ +int dm_stats_driver_supports_precise(void); +int dm_stats_driver_supports_histogram(void); + +/* + * Returns 1 if the specified region has the precise_timestamps feature + * enabled (i.e. produces nanosecond-precision counter values) or 0 for + * a region using the default milisecond precision. + */ +int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms, + uint64_t region_id); + +/* + * Returns 1 if the region at the current cursor location has the + * precise_timestamps feature enabled (i.e. produces + * nanosecond-precision counter values) or 0 for a region using the + * default milisecond precision. + */ +int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms); + +#define DM_STATS_ALL_PROGRAMS "" +/* + * Parse the response from a @stats_list message. dm_stats_list will + * allocate the necessary dm_stats and dm_stats region structures from + * the embedded dm_pool. No counter data will be obtained (the counters + * members of dm_stats_region objects are set to NULL). + * + * A program_id may optionally be supplied; if the argument is non-NULL + * only regions with a matching program_id value will be considered. If + * the argument is NULL then the default program_id associated with the + * dm_stats handle will be used. Passing the special value + * DM_STATS_ALL_PROGRAMS will cause all regions to be queried + * regardless of region program_id. + */ +int dm_stats_list(struct dm_stats *dms, const char *program_id); + +#define DM_STATS_REGIONS_ALL UINT64_MAX +/* + * Populate a dm_stats object with statistics for one or more regions of + * the specified device. + * + * A program_id may optionally be supplied; if the argument is non-NULL + * only regions with a matching program_id value will be considered. If + * the argument is NULL then the default program_id associated with the + * dm_stats handle will be used. Passing the special value + * DM_STATS_ALL_PROGRAMS will cause all regions to be queried + * regardless of region program_id. + * + * Passing the special value DM_STATS_REGIONS_ALL as the region_id + * argument will attempt to retrieve all regions selected by the + * program_id argument. + * + * If region_id is used to request a single region_id to be populated + * the program_id is ignored. + */ +int dm_stats_populate(struct dm_stats *dms, const char *program_id, + uint64_t region_id); + +/* + * Create a new statistics region on the device bound to dms. + * + * start and len specify the region start and length in 512b sectors. + * Passing zero for both start and len will create a region spanning + * the entire device. + * + * Step determines how to subdivide the region into discrete counter + * sets: a positive value specifies the size of areas into which the + * region should be split while a negative value will split the region + * into a number of areas equal to the absolute value of step: + * + * - a region with one area spanning the entire device: + * + * dm_stats_create_region(dms, 0, 0, -1, p, a); + * + * - a region with areas of 1MiB: + * + * dm_stats_create_region(dms, 0, 0, 1 << 11, p, a); + * + * - one 1MiB region starting at 1024 sectors with two areas: + * + * dm_stats_create_region(dms, 1024, 1 << 11, -2, p, a); + * + * If precise is non-zero attempt to create a region with nanosecond + * precision counters using the kernel precise_timestamps feature. + * + * precise - A flag to request nanosecond precision counters + * to be used for this region. + * + * histogram_bounds - specify the boundaries of a latency histogram to + * be tracked for the region. The values are expressed as an array of + * uint64_t terminated with a zero. Values must be in order of ascending + * magnitude and specify the upper bounds of successive histogram bins + * in nanoseconds (with an implicit lower bound of zero on the first bin + * and an implicit upper bound of infinity on the final bin). For + * example: + * + * uint64_t bounds_ary[] = { 1000, 2000, 3000, 0 }; + * + * Specifies a histogram with four bins: 0-1000ns, 1000-2000ns, + * 2000-3000ns and >3000ns. + * + * The smallest latency value that can be tracked for a region not using + * precise_timestamps is 1ms: attempting to create a region with + * histogram boundaries < 1ms will cause the precise_timestamps feature + * to be enabled for that region automatically if it was not requested + * explicitly. + * + * program_id is an optional string argument that identifies the + * program creating the region. If program_id is NULL or the empty + * string the default program_id stored in the handle will be used. + * + * user_data is an optional string argument that is added to the + * content of the aux_data field stored with the statistics region by + * the kernel. + * + * The library may also use this space internally, for example, to + * store a group descriptor or other metadata: in this case the + * library will strip any internal data fields from the value before + * it is returned via a call to dm_stats_get_region_aux_data(). + * + * The user data stored is not accessed by the library or kernel and + * may be used to store an arbitrary data word (embedded whitespace is + * not permitted). + * + * An application using both the library and direct access to the + * @stats_list device-mapper message may see the internal values stored + * in this field by the library. In such cases any string up to and + * including the first '#' in the field must be treated as an opaque + * value and preserved across any external modification of aux_data. + * + * The region_id of the newly-created region is returned in *region_id + * if it is non-NULL. + */ +int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + int precise, struct dm_histogram *bounds, + const char *program_id, const char *user_data); + +/* + * Delete the specified statistics region. This will also mark the + * region as not-present and discard any existing statistics data. + */ +int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id); + +/* + * Clear the specified statistics region. This requests the kernel to + * zero all counter values (except in-flight I/O). Note that this + * operation is not atomic with respect to reads of the counters; any IO + * events occurring between the last print operation and the clear will + * be lost. This can be avoided by using the atomic print-and-clear + * function of the dm_stats_print_region() call or by using the higher + * level dm_stats_populate() interface. + */ +int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id); + +/* + * Print the current counter values for the specified statistics region + * and return them as a string. The memory for the string buffer will + * be allocated from the dm_stats handle's private pool and should be + * returned by calling dm_stats_buffer_destroy() when no longer + * required. The pointer will become invalid following any call that + * clears or reinitializes the handle (destroy, list, populate, bind). + * + * This allows applications that wish to access the raw message response + * to obtain it via a dm_stats handle; no parsing of the textual counter + * data is carried out by this function. + * + * Most users are recommended to use the dm_stats_populate() call + * instead since this will automatically parse the statistics data into + * numeric form accessible via the dm_stats_get_*() counter access + * methods. + * + * A subset of the data lines may be requested by setting the + * start_line and num_lines parameters. If both are zero all data + * lines are returned. + * + * If the clear parameter is non-zero the operation will also + * atomically reset all counter values to zero (except in-flight IO). + */ +char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id, + unsigned start_line, unsigned num_lines, + unsigned clear); + +/* + * Destroy a statistics response buffer obtained from a call to + * dm_stats_print_region(). + */ +void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer); + +/* + * Determine the number of regions contained in a dm_stats handle + * following a dm_stats_list() or dm_stats_populate() call. + * + * The value returned is the number of registered regions visible with the + * progam_id value used for the list or populate operation and may not be + * equal to the highest present region_id (either due to program_id + * filtering or gaps in the sequence of region_id values). + * + * Always returns zero on an empty handle. + */ +uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms); + +/* + * Determine the number of groups contained in a dm_stats handle + * following a dm_stats_list() or dm_stats_populate() call. + * + * The value returned is the number of registered groups visible with the + * progam_id value used for the list or populate operation and may not be + * equal to the highest present group_id (either due to program_id + * filtering or gaps in the sequence of group_id values). + * + * Always returns zero on an empty handle. + */ +uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms); + +/* + * Test whether region_id is present in this dm_stats handle. + */ +int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id); + +/* + * Returns the number of areas (counter sets) contained in the specified + * region_id of the supplied dm_stats handle. + */ +uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms, + uint64_t region_id); + +/* + * Returns the total number of areas (counter sets) in all regions of the + * given dm_stats object. + */ +uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms); + +/* + * Test whether group_id is present in this dm_stats handle. + */ +int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id); + +/* + * Return the number of bins in the histogram configuration for the + * specified region or zero if no histogram specification is configured. + * Valid following a dm_stats_list() or dm_stats_populate() operation. + */ +int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms, + uint64_t region_id); + +/* + * Parse a histogram string with optional unit suffixes into a + * dm_histogram bounds description. + * + * A histogram string is a string of numbers "n1,n2,n3,..." that + * represent the boundaries of a histogram. The first and final bins + * have implicit lower and upper bounds of zero and infinity + * respectively and boundary values must occur in order of ascending + * magnitude. Unless a unit suffix is given all values are specified in + * nanoseconds. + * + * For example, if bounds_str="300,600,900", the region will be created + * with a histogram containing four bins. Each report will include four + * numbers a:b:c:d. a is the number of requests that took between 0 and + * 300ns to complete, b is the number of requests that took 300-600ns to + * complete, c is the number of requests that took 600-900ns to complete + * and d is the number of requests that took more than 900ns to + * complete. + * + * An optional unit suffix of 's', 'ms', 'us', or 'ns' may be used to + * specify units of seconds, miliseconds, microseconds, or nanoseconds: + * + * bounds_str="1ns,1us,1ms,1s" + * bounds_str="500us,1ms,1500us,2ms" + * bounds_str="200ms,400ms,600ms,800ms,1s" + * + * The smallest valid unit of time for a histogram specification depends + * on whether the region uses precise timestamps: for a region with the + * default milisecond precision the smallest possible histogram boundary + * magnitude is one milisecond: attempting to use a histogram with a + * boundary less than one milisecond when creating a region will cause + * the region to be created with the precise_timestamps feature enabled. + * + * On sucess a pointer to the struct dm_histogram representing the + * bounds values is returned, or NULL in the case of error. The returned + * pointer should be freed using dm_free() when no longer required. + */ +struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str); + +/* + * Parse a zero terminated array of uint64_t into a dm_histogram bounds + * description. + * + * Each value in the array specifies the upper bound of a bin in the + * latency histogram in nanoseconds. Values must appear in ascending + * order of magnitude. + * + * The smallest valid unit of time for a histogram specification depends + * on whether the region uses precise timestamps: for a region with the + * default milisecond precision the smallest possible histogram boundary + * magnitude is one milisecond: attempting to use a histogram with a + * boundary less than one milisecond when creating a region will cause + * the region to be created with the precise_timestamps feature enabled. + */ +struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds); + +/* + * Destroy the histogram bounds array obtained from a call to + * dm_histogram_bounds_from_string(). + */ +void dm_histogram_bounds_destroy(struct dm_histogram *bounds); + +/* + * Destroy a dm_stats object and all associated regions, counter + * sets and histograms. + */ +void dm_stats_destroy(struct dm_stats *dms); + +/* + * Counter sampling interval + */ + +/* + * Set the sampling interval for counter data to the specified value in + * either nanoseconds or milliseconds. + * + * The interval is used to calculate time-based metrics from the basic + * counter data: an interval must be set before calling any of the + * metric methods. + * + * For best accuracy the duration should be measured and updated at the + * end of each interval. + * + * All values are stored internally with nanosecond precision and are + * converted to or from ms when the millisecond interfaces are used. + */ +void dm_stats_set_sampling_interval_ns(struct dm_stats *dms, + uint64_t interval_ns); + +void dm_stats_set_sampling_interval_ms(struct dm_stats *dms, + uint64_t interval_ms); + +/* + * Retrieve the configured sampling interval in either nanoseconds or + * milliseconds. + */ +uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms); +uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms); + +/* + * Override program_id. This may be used to change the default + * program_id value for an existing handle. If the allow_empty argument + * is non-zero a NULL or empty program_id is permitted. + * + * Use with caution! Most users of the library should set a valid, + * non-NULL program_id for every statistics region created. Failing to + * do so may result in confusing state when multiple programs are + * creating and managing statistics regions. + * + * All users of the library are encouraged to choose an unambiguous, + * unique program_id: this could be based on PID (for programs that + * create, report, and delete regions in a single process), session id, + * executable name, or some other distinguishing string. + * + * Use of the empty string as a program_id does not simplify use of the + * library or the command line tools and use of this value is strongly + * discouraged. + */ +int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty, + const char *program_id); + +/* + * Region properties: size, length & area_len. + * + * Region start and length are returned in units of 512b as specified + * at region creation time. The area_len value gives the size of areas + * into which the region has been subdivided. For regions with a single + * area spanning the range this value is equal to the region length. + * + * For regions created with a specified number of areas the value + * represents the size of the areas into which the kernel divided the + * region excluding any rounding of the last area size. The number of + * areas may be obtained using the dm_stats_nr_areas_region() call. + * + * All values are returned in units of 512b sectors. + */ +int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start, + uint64_t region_id); + +int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len, + uint64_t region_id); + +int dm_stats_get_region_area_len(const struct dm_stats *dms, + uint64_t *len, uint64_t region_id); + +/* + * Area properties: start, offset and length. + * + * The area length is always equal to the area length of the region + * that contains it and is obtained from dm_stats_get_region_area_len(). + * + * The start of an area is a function of the area_id and the containing + * region's start and area length: it gives the absolute offset into the + * containing device of the beginning of the area. + * + * The offset expresses the area's relative offset into the current + * region. I.e. the area start minus the start offset of the containing + * region. + * + * All values are returned in units of 512b sectors. + */ +int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset, + uint64_t region_id, uint64_t area_id); + +/* + * Retrieve program_id and user aux_data for a specific region. + * + * Only valid following a call to dm_stats_list(). + */ + +/* + * Retrieve program_id for the specified region. + * + * The returned pointer does not need to be freed separately from the + * dm_stats handle but will become invalid after a dm_stats_destroy(), + * dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the + * handle from which it was obtained. + */ +const char *dm_stats_get_region_program_id(const struct dm_stats *dms, + uint64_t region_id); + +/* + * Retrieve user aux_data set for the specified region. This function + * will return any stored user aux_data as a string in the memory + * pointed to by the aux_data argument. + * + * Any library internal aux_data fields, such as DMS_GROUP descriptors, + * are stripped before the value is returned. + * + * The returned pointer does not need to be freed separately from the + * dm_stats handle but will become invalid after a dm_stats_destroy(), + * dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the + * handle from which it was obtained. + */ +const char *dm_stats_get_region_aux_data(const struct dm_stats *dms, + uint64_t region_id); + +typedef enum { + DM_STATS_OBJECT_TYPE_NONE, + DM_STATS_OBJECT_TYPE_AREA, + DM_STATS_OBJECT_TYPE_REGION, + DM_STATS_OBJECT_TYPE_GROUP +} dm_stats_obj_type_t; + +/* + * Statistics cursor + * + * A dm_stats handle maintains an optional cursor into the statistics + * tables that it stores. Iterators are provided to visit each region, + * area, or group in a handle and accessor methods are provided to + * obtain properties and values for the object at the current cursor + * position. + * + * Using the cursor simplifies walking all regions or groups when + * the tables are sparse (i.e. contains some present and some + * non-present region_id or group_id values either due to program_id + * filtering or the ordering of region and group creation and deletion). + * + * Simple macros are provided to visit each area, region, or group, + * contained in a handle and applications are encouraged to use these + * where possible. + */ + +/* + * Walk flags are used to initialise a dm_stats handle's cursor control + * and to select region or group aggregation when calling a metric or + * counter property method with immediate group, region, and area ID + * values. + * + * Walk flags are stored in the uppermost word of a uint64_t so that + * a region_id or group_id may be encoded in the lower bits. This + * allows an aggregate region_id or group_id to be specified when + * retrieving counter or metric values. + * + * Flags may be ORred together when used to initialise a dm_stats_walk: + * the resulting walk will visit instance of each type specified by + * the flag combination. + */ +#define DM_STATS_WALK_AREA 0x1000000000000ULL +#define DM_STATS_WALK_REGION 0x2000000000000ULL +#define DM_STATS_WALK_GROUP 0x4000000000000ULL + +#define DM_STATS_WALK_ALL 0x7000000000000ULL +#define DM_STATS_WALK_DEFAULT (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION) + +/* + * Skip regions from a DM_STATS_WALK_REGION that contain only a single + * area: in this case the region's aggregate values are identical to + * the values of the single contained area. Setting this flag will + * suppress these duplicate entries during a dm_stats_walk_* with the + * DM_STATS_WALK_REGION flag set. + */ +#define DM_STATS_WALK_SKIP_SINGLE_AREA 0x8000000000000ULL + +/* + * Initialise the cursor control of a dm_stats handle for the specified + * walk type(s). Including a walk flag in the flags argument will cause + * any subsequent walk to visit that type of object (until the next + * call to dm_stats_walk_init()). + */ +int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags); + +/* + * Set the cursor of a dm_stats handle to address the first present + * group, region, or area of the currently configured walk. It is + * valid to attempt to walk a NULL stats handle or a handle containing + * no present regions; in this case any call to dm_stats_walk_next() + * becomes a no-op and all calls to dm_stats_walk_end() return true. + */ +void dm_stats_walk_start(struct dm_stats *dms); + +/* + * Advance the statistics cursor to the next area, or to the next + * present region if at the end of the current region. If the end of + * the region, area, or group tables is reached a subsequent call to + * dm_stats_walk_end() will return 1 and dm_stats_object_type() called + * on the location will return DM_STATS_OBJECT_TYPE_NONE, + */ +void dm_stats_walk_next(struct dm_stats *dms); + +/* + * Force the statistics cursor to advance to the next region. This will + * stop any in-progress area walk (by clearing DM_STATS_WALK_AREA) and + * advance the cursor to the next present region, the first present + * group (if DM_STATS_GROUP_WALK is set), or to the end. In this case a + * subsequent call to dm_stats_walk_end() will return 1 and a call to + * dm_stats_object_type() for the location will return + * DM_STATS_OBJECT_TYPE_NONE. + */ +void dm_stats_walk_next_region(struct dm_stats *dms); + +/* + * Test whether the end of a statistics walk has been reached. + */ +int dm_stats_walk_end(struct dm_stats *dms); + +/* + * Return the type of object at the location specified by region_id + * and area_id. If either region_id or area_id uses one of the special + * values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the + * corresponding region or area identifier will be taken from the + * current cursor location. If the cursor location or the value encoded + * by region_id and area_id indicates an aggregate region or group, + * this will be reflected in the value returned. + */ +dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms, + uint64_t region_id, + uint64_t area_id); + +/* + * Return the type of object at the current stats cursor location. + */ +dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms); + +/* + * Stats iterators + * + * C 'for' and 'do'/'while' style iterators for dm_stats data. + * + * It is not safe to call any function that modifies the region table + * within the loop body (i.e. dm_stats_list(), dm_stats_populate(), + * dm_stats_init(), or dm_stats_destroy()). + * + * All counter and property (dm_stats_get_*) access methods, as well as + * dm_stats_populate_region() can be safely called from loops. + * + */ + +/* + * Iterate over the regions table visiting each region. + * + * If the region table is empty or unpopulated the loop body will not be + * executed. + */ +#define dm_stats_foreach_region(dms) \ +for (dm_stats_walk_init((dms), DM_STATS_WALK_REGION), \ + dm_stats_walk_start((dms)); \ + !dm_stats_walk_end((dms)); dm_stats_walk_next_region((dms))) + +/* + * Iterate over the regions table visiting each area. + * + * If the region table is empty or unpopulated the loop body will not + * be executed. + */ +#define dm_stats_foreach_area(dms) \ +for (dm_stats_walk_init((dms), DM_STATS_WALK_AREA), \ + dm_stats_walk_start((dms)); \ + !dm_stats_walk_end((dms)); dm_stats_walk_next((dms))) + +/* + * Iterate over the regions table visiting each group. Metric and + * counter methods will return values for the group. + * + * If the group table is empty or unpopulated the loop body will not + * be executed. + */ +#define dm_stats_foreach_group(dms) \ +for (dm_stats_walk_init((dms), DM_STATS_WALK_GROUP), \ + dm_stats_walk_start(dms); \ + !dm_stats_walk_end(dms); \ + dm_stats_walk_next(dms)) + +/* + * Start a walk iterating over the regions contained in dm_stats handle + * 'dms'. + * + * The body of the loop should call dm_stats_walk_next() or + * dm_stats_walk_next_region() to advance to the next element. + * + * The loop body is executed at least once even if the stats handle is + * empty. + */ +#define dm_stats_walk_do(dms) \ +do { \ + dm_stats_walk_start((dms)); \ + do + +/* + * Start a 'while' style loop or end a 'do..while' loop iterating over the + * regions contained in dm_stats handle 'dms'. + */ +#define dm_stats_walk_while(dms) \ + while(!dm_stats_walk_end((dms))); \ +} while (0) + +/* + * Cursor relative property methods + * + * Calls with the prefix dm_stats_get_current_* operate relative to the + * current cursor location, returning properties for the current region + * or area of the supplied dm_stats handle. + * + */ + +/* + * Returns the number of areas (counter sets) contained in the current + * region of the supplied dm_stats handle. + */ +uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms); + +/* + * Retrieve the current values of the stats cursor. + */ +uint64_t dm_stats_get_current_region(const struct dm_stats *dms); +uint64_t dm_stats_get_current_area(const struct dm_stats *dms); + +/* + * Current region properties: size, length & area_len. + * + * See the comments for the equivalent dm_stats_get_* versions for a + * complete description of these methods. + * + * All values are returned in units of 512b sectors. + */ +int dm_stats_get_current_region_start(const struct dm_stats *dms, + uint64_t *start); + +int dm_stats_get_current_region_len(const struct dm_stats *dms, + uint64_t *len); + +int dm_stats_get_current_region_area_len(const struct dm_stats *dms, + uint64_t *area_len); + +/* + * Current area properties: start and length. + * + * See the comments for the equivalent dm_stats_get_* versions for a + * complete description of these methods. + * + * All values are returned in units of 512b sectors. + */ +int dm_stats_get_current_area_start(const struct dm_stats *dms, + uint64_t *start); + +int dm_stats_get_current_area_offset(const struct dm_stats *dms, + uint64_t *offset); + +int dm_stats_get_current_area_len(const struct dm_stats *dms, + uint64_t *start); + +/* + * Return a pointer to the program_id string for region at the current + * cursor location. + */ +const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms); + +/* + * Return a pointer to the user aux_data string for the region at the + * current cursor location. + */ +const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms); + +/* + * Statistics groups and data aggregation. + */ + +/* + * Create a new group in stats handle dms from the group descriptor + * passed in group. The group descriptor is a string containing a list + * of region_id values that will be included in the group. The first + * region_id found will be the group leader. Ranges of identifiers may + * be expressed as "M-N", where M and N are the start and end region_id + * values for the range. + */ +int dm_stats_create_group(struct dm_stats *dms, const char *group, + const char *alias, uint64_t *group_id); + +/* + * Remove the specified group_id. If the remove argument is zero the + * group will be removed but the regions that it contained will remain. + * If remove is non-zero then all regions that belong to the group will + * also be removed. + */ +int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id, int remove); + +/* + * Set an alias for this group or region. The alias will be returned + * instead of the normal dm-stats name for this region or group. + */ +int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id, + const char *alias); + +/* + * Returns a pointer to the currently configured alias for id, or the + * name of the dm device the handle is bound to if no alias has been + * set. The pointer will be freed automatically when a new alias is set + * or when the stats handle is cleared. + */ +const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id); + +#define DM_STATS_GROUP_NONE UINT64_MAX +/* + * Return the group_id that the specified region_id belongs to, or the + * special value DM_STATS_GROUP_NONE if the region does not belong + * to any group. + */ +uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id); + +/* + * Store a pointer to a string describing the regions that are members + * of the group specified by group_id in the memory pointed to by buf. + * The string is in the same format as the 'group' argument to + * dm_stats_create_group(). + * + * The pointer does not need to be freed explicitly by the caller: it + * will become invalid following a subsequent dm_stats_list(), + * dm_stats_populate() or dm_stats_destroy() of the corresponding + * dm_stats handle. + */ +int dm_stats_get_group_descriptor(const struct dm_stats *dms, + uint64_t group_id, char **buf); + +/* + * Create regions that correspond to the extents of a file in the + * filesystem and optionally place them into a group. + * + * File descriptor fd must reference a regular file, open for reading, + * in a local file system that supports the FIEMAP ioctl, and that + * returns data describing the physical location of extents. + * + * The file descriptor can be closed by the caller following the call + * to dm_stats_create_regions_from_fd(). + * + * Unless nogroup is non-zero the regions will be placed into a group + * and the group alias set to the value supplied (if alias is NULL no + * group alias will be assigned). + * + * On success the function returns a pointer to an array of uint64_t + * containing the IDs of the newly created regions. The region_id + * array is terminated by the value DM_STATS_REGION_NOT_PRESENT and + * should be freed using dm_free() when no longer required. + * + * On error NULL is returned. + * + * Following a call to dm_stats_create_regions_from_fd() the handle + * is guaranteed to be in a listed state, and to contain any region + * and group identifiers created by the operation. + * + * The group_id for the new group is equal to the region_id value in + * the first array element. + */ +uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd, + int group, int precise, + struct dm_histogram *bounds, + const char *alias); +/* + * Update a group of regions that correspond to the extents of a file + * in the filesystem, adding and removing regions to account for + * allocation changes in the underlying file. + * + * File descriptor fd must reference a regular file, open for reading, + * in a local file system that supports the FIEMAP ioctl, and that + * returns data describing the physical location of extents. + * + * The file descriptor can be closed by the caller following the call + * to dm_stats_update_regions_from_fd(). + * + * On success the function returns a pointer to an array of uint64_t + * containing the IDs of the updated regions (including any existing + * regions that were not modified by the call). + * + * The region_id array is terminated by the special value + * DM_STATS_REGION_NOT_PRESENT and should be freed using dm_free() + * when no longer required. + * + * On error NULL is returned. + * + * Following a call to dm_stats_update_regions_from_fd() the handle + * is guaranteed to be in a listed state, and to contain any region + * and group identifiers created by the operation. + * + * This function cannot be used with file mapped regions that are + * not members of a group: either group the regions, or remove them + * and re-map them with dm_stats_create_regions_from_fd(). + */ +uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd, + uint64_t group_id); + + +/* + * The file map monitoring daemon can monitor files in two distinct + * ways: the mode affects the behaviour of the daemon when a file + * under monitoring is renamed or unlinked, and the conditions which + * cause the daemon to terminate. + * + * In both modes, the daemon will always shut down when the group + * being monitored is deleted. + * + * Follow inode: + * The daemon follows the inode of the file, as it was at the time the + * daemon started. The file descriptor referencing the file is kept + * open at all times, and the daemon will exit when it detects that + * the file has been unlinked and it is the last holder of a reference + * to the file. + * + * This mode is useful if the file is expected to be renamed, or moved + * within the file system, while it is being monitored. + * + * Follow path: + * The daemon follows the path that was given on the daemon command + * line. The file descriptor referencing the file is re-opened on each + * iteration of the daemon, and the daemon will exit if no file exists + * at this location (a tolerance is allowed so that a brief delay + * between unlink() and creat() is permitted). + * + * This mode is useful if the file is updated by unlinking the original + * and placing a new file at the same path. + */ + +typedef enum { + DM_FILEMAPD_FOLLOW_INODE, + DM_FILEMAPD_FOLLOW_PATH, + DM_FILEMAPD_FOLLOW_NONE +} dm_filemapd_mode_t; + +/* + * Parse a string representation of a dmfilemapd mode. + * + * Returns a valid dm_filemapd_mode_t value on success, or + * DM_FILEMAPD_FOLLOW_NONE on error. + */ +dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str); + +/* + * Start the dmfilemapd filemap monitoring daemon for the specified + * file descriptor, group, and file system path. The daemon will + * monitor the file for allocation changes, and when a change is + * detected, call dm_stats_update_regions_from_fd() to update the + * mapped regions for the file. + * + * The path provided to dm_stats_start_filemapd() must be an absolute + * path, and should reflect the path of 'fd' at the time that it was + * opened. + * + * The mode parameter controls the behaviour of the daemon when the + * file being monitored is unlinked or moved: see the comments for + * dm_filemapd_mode_t for a full description and possible values. + * + * The daemon can be stopped at any time by sending SIGTERM to the + * daemon pid. + */ +int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path, + dm_filemapd_mode_t mode, unsigned foreground, + unsigned verbose); + +/* + * Call this to actually run the ioctl. + */ +int dm_task_run(struct dm_task *dmt); + +/* + * The errno from the last device-mapper ioctl performed by dm_task_run. + */ +int dm_task_get_errno(struct dm_task *dmt); + +/* + * Call this to make or remove the device nodes associated with previously + * issued commands. + */ +void dm_task_update_nodes(void); + +/* + * Mangling support + * + * Character whitelist: 0-9, A-Z, a-z, #+-.:=@_ + * HEX mangling format: \xNN, NN being the hex value of the character. + * (whitelist and format supported by udev) +*/ +typedef enum { + DM_STRING_MANGLING_NONE, /* do not mangle at all */ + DM_STRING_MANGLING_AUTO, /* mangle only if not already mangled with hex, error when mixed */ + DM_STRING_MANGLING_HEX /* always mangle with hex encoding, no matter what the input is */ +} dm_string_mangling_t; + +/* + * Set/get mangling mode used for device-mapper names and uuids. + */ +int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling); +dm_string_mangling_t dm_get_name_mangling_mode(void); + +/* + * Get mangled/unmangled form of the device-mapper name or uuid + * irrespective of the global setting (set by dm_set_name_mangling_mode). + * The name or uuid returned needs to be freed after use by calling dm_free! + */ +char *dm_task_get_name_mangled(const struct dm_task *dmt); +char *dm_task_get_name_unmangled(const struct dm_task *dmt); +char *dm_task_get_uuid_mangled(const struct dm_task *dmt); +char *dm_task_get_uuid_unmangled(const struct dm_task *dmt); + +/* + * Configure the device-mapper directory + */ +int dm_set_dev_dir(const char *dir); +const char *dm_dir(void); + +/* + * Configure sysfs directory, /sys by default + */ +int dm_set_sysfs_dir(const char *dir); +const char *dm_sysfs_dir(void); + +/* + * Configure default UUID prefix string. + * Conventionally this is a short capitalised prefix indicating the subsystem + * that is managing the devices, e.g. "LVM-" or "MPATH-". + * To support stacks of devices from different subsystems, recursive functions + * stop recursing if they reach a device with a different prefix. + */ +int dm_set_uuid_prefix(const char *uuid_prefix); +const char *dm_uuid_prefix(void); + +/* + * Determine whether a major number belongs to device-mapper or not. + */ +int dm_is_dm_major(uint32_t major); + +/* + * Get associated device name for given major and minor number by reading + * the sysfs content. If this is a dm device, get associated dm name, the one + * that appears in /dev/mapper. DM names could be resolved this way only if + * kernel used >= 2.6.29, kernel name is found otherwise (e.g. dm-0). + * If prefer_kernel_name is set, the kernel name is always preferred over + * device-mapper name for dm devices no matter what the kernel version is. + * For non-dm devices, we always get associated kernel name, e.g sda, md0 etc. + * Returns 0 on error or if sysfs is not used (or configured incorrectly), + * otherwise returns 1 and the supplied buffer holds the device name. + */ +int dm_device_get_name(uint32_t major, uint32_t minor, + int prefer_kernel_name, + char *buf, size_t buf_size); + +/* + * Determine whether a device has any holders (devices + * using this device). If sysfs is not used (or configured + * incorrectly), returns 0. + */ +int dm_device_has_holders(uint32_t major, uint32_t minor); + +/* + * Determine whether a device contains mounted filesystem. + * If sysfs is not used (or configured incorrectly), returns 0. + */ +int dm_device_has_mounted_fs(uint32_t major, uint32_t minor); + + +/* + * Callback is invoked for individal mountinfo lines, + * minor, major and mount target are parsed and unmangled. + */ +typedef int (*dm_mountinfo_line_callback_fn) (char *line, unsigned maj, unsigned min, + char *target, void *cb_data); + +/* + * Read all lines from /proc/self/mountinfo, + * for each line calls read_fn callback. + */ +int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data); + +/* + * Initialise library + */ +void dm_lib_init(void) __attribute__((constructor)); + +/* + * Release library resources + */ +void dm_lib_release(void); +void dm_lib_exit(void) __attribute__((destructor)); + +/* An optimisation for clients making repeated calls involving dm ioctls */ +void dm_hold_control_dev(int hold_open); + +/* + * Use NULL for all devices. + */ +int dm_mknodes(const char *name); +int dm_driver_version(char *version, size_t size); + +/****************************************************** + * Functions to build and manipulate trees of devices * + ******************************************************/ +struct dm_tree; +struct dm_tree_node; + +/* + * Initialise an empty dependency tree. + * + * The tree consists of a root node together with one node for each mapped + * device which has child nodes for each device referenced in its table. + * + * Every node in the tree has one or more children and one or more parents. + * + * The root node is the parent/child of every node that doesn't have other + * parents/children. + */ +struct dm_tree *dm_tree_create(void); +void dm_tree_free(struct dm_tree *tree); + +/* + * List of suffixes to be ignored when matching uuids against existing devices. + */ +void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes); + +/* + * Add nodes to the tree for a given device and all the devices it uses. + */ +int dm_tree_add_dev(struct dm_tree *tree, uint32_t major, uint32_t minor); +int dm_tree_add_dev_with_udev_flags(struct dm_tree *tree, uint32_t major, + uint32_t minor, uint16_t udev_flags); + +/* + * Add a new node to the tree if it doesn't already exist. + */ +struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *tree, + const char *name, + const char *uuid, + uint32_t major, uint32_t minor, + int read_only, + int clear_inactive, + void *context); +struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *tree, + const char *name, + const char *uuid, + uint32_t major, + uint32_t minor, + int read_only, + int clear_inactive, + void *context, + uint16_t udev_flags); + +/* + * Search for a node in the tree. + * Set major and minor to 0 or uuid to NULL to get the root node. + */ +struct dm_tree_node *dm_tree_find_node(struct dm_tree *tree, + uint32_t major, + uint32_t minor); +struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *tree, + const char *uuid); + +/* + * Use this to walk through all children of a given node. + * Set handle to NULL in first call. + * Returns NULL after the last child. + * Set inverted to use inverted tree. + */ +struct dm_tree_node *dm_tree_next_child(void **handle, + const struct dm_tree_node *parent, + uint32_t inverted); + +/* + * Get properties of a node. + */ +const char *dm_tree_node_get_name(const struct dm_tree_node *node); +const char *dm_tree_node_get_uuid(const struct dm_tree_node *node); +const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node); +void *dm_tree_node_get_context(const struct dm_tree_node *node); +/* + * Returns 0 when node size and its children is unchanged. + * Returns 1 when node or any of its children has increased size. + * Rerurns -1 when node or any of its children has reduced size. + */ +int dm_tree_node_size_changed(const struct dm_tree_node *dnode); + +/* + * Returns the number of children of the given node (excluding the root node). + * Set inverted for the number of parents. + */ +int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted); + +/* + * Deactivate a device plus all dependencies. + * Ignores devices that don't have a uuid starting with uuid_prefix. + */ +int dm_tree_deactivate_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len); +/* + * Preload/create a device plus all dependencies. + * Ignores devices that don't have a uuid starting with uuid_prefix. + */ +int dm_tree_preload_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len); + +/* + * Resume a device plus all dependencies. + * Ignores devices that don't have a uuid starting with uuid_prefix. + */ +int dm_tree_activate_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len); + +/* + * Suspend a device plus all dependencies. + * Ignores devices that don't have a uuid starting with uuid_prefix. + */ +int dm_tree_suspend_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len); + +/* + * Skip the filesystem sync when suspending. + * Does nothing with other functions. + * Use this when no snapshots are involved. + */ +void dm_tree_skip_lockfs(struct dm_tree_node *dnode); + +/* + * Set the 'noflush' flag when suspending devices. + * If the kernel supports it, instead of erroring outstanding I/O that + * cannot be completed, the I/O is queued and resubmitted when the + * device is resumed. This affects multipath devices when all paths + * have failed and queue_if_no_path is set, and mirror devices when + * block_on_error is set and the mirror log has failed. + */ +void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode); + +/* + * Retry removal of each device if not successful. + */ +void dm_tree_retry_remove(struct dm_tree_node *dnode); + +/* + * Is the uuid prefix present in the tree? + * Only returns 0 if every node was checked successfully. + * Returns 1 if the tree walk has to be aborted. + */ +int dm_tree_children_use_uuid(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len); + +/* + * Construct tables for new nodes before activating them. + */ +int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode, + uint64_t size, + const char *origin_uuid); +int dm_tree_node_add_snapshot_target(struct dm_tree_node *node, + uint64_t size, + const char *origin_uuid, + const char *cow_uuid, + int persistent, + uint32_t chunk_size); +int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node, + uint64_t size, + const char *origin_uuid, + const char *cow_uuid, + const char *merge_uuid, + uint32_t chunk_size); +int dm_tree_node_add_error_target(struct dm_tree_node *node, + uint64_t size); +int dm_tree_node_add_zero_target(struct dm_tree_node *node, + uint64_t size); +int dm_tree_node_add_linear_target(struct dm_tree_node *node, + uint64_t size); +int dm_tree_node_add_striped_target(struct dm_tree_node *node, + uint64_t size, + uint32_t stripe_size); + +#define DM_CRYPT_IV_DEFAULT UINT64_C(-1) /* iv_offset == seg offset */ +/* + * Function accepts one string in cipher specification + * (chainmode and iv should be NULL because included in cipher string) + * or + * separate arguments which will be joined to "cipher-chainmode-iv" + */ +int dm_tree_node_add_crypt_target(struct dm_tree_node *node, + uint64_t size, + const char *cipher, + const char *chainmode, + const char *iv, + uint64_t iv_offset, + const char *key); +int dm_tree_node_add_mirror_target(struct dm_tree_node *node, + uint64_t size); + +/* Mirror log flags */ +#define DM_NOSYNC 0x00000001 /* Known already in sync */ +#define DM_FORCESYNC 0x00000002 /* Force resync */ +#define DM_BLOCK_ON_ERROR 0x00000004 /* On error, suspend I/O */ +#define DM_CORELOG 0x00000008 /* In-memory log */ + +int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node, + uint32_t region_size, + unsigned clustered, + const char *log_uuid, + unsigned area_count, + uint32_t flags); + +int dm_tree_node_add_raid_target(struct dm_tree_node *node, + uint64_t size, + const char *raid_type, + uint32_t region_size, + uint32_t stripe_size, + uint64_t rebuilds, + uint64_t flags); + +/* + * Defines below are based on kernel's dm-cache.c defines + * DM_CACHE_MIN_DATA_BLOCK_SIZE (32 * 1024 >> SECTOR_SHIFT) + * DM_CACHE_MAX_DATA_BLOCK_SIZE (1024 * 1024 * 1024 >> SECTOR_SHIFT) + */ +#define DM_CACHE_MIN_DATA_BLOCK_SIZE (UINT32_C(64)) +#define DM_CACHE_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152)) +/* + * Max supported size for cache pool metadata device. + * Limitation is hardcoded into the kernel and bigger device sizes + * are not accepted. + * + * Limit defined in drivers/md/dm-cache-metadata.h + */ +#define DM_CACHE_METADATA_MAX_SECTORS DM_THIN_METADATA_MAX_SECTORS + +/* + * Define number of elements in rebuild and writemostly arrays + * 'of struct dm_tree_node_raid_params'. + */ + +struct dm_tree_node_raid_params { + const char *raid_type; + + uint32_t stripes; + uint32_t mirrors; + uint32_t region_size; + uint32_t stripe_size; + + /* + * 'rebuilds' and 'writemostly' are bitfields that signify + * which devices in the array are to be rebuilt or marked + * writemostly. The kernel supports up to 253 legs. + * We limit ourselves by choosing a lower value + * for DEFAULT_RAID{1}_MAX_IMAGES in defaults.h. + */ + uint64_t rebuilds; + uint64_t writemostly; + uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */ + uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */ + uint32_t max_recovery_rate; /* kB/sec/disk */ + uint32_t min_recovery_rate; /* kB/sec/disk */ + uint32_t stripe_cache; /* sectors */ + + uint64_t flags; /* [no]sync */ + uint32_t reserved2; +}; + +/* + * Version 2 of above node raid params struct to keeep API compatibility. + * + * Extended for more than 64 legs (max 253 in the MD kernel runtime!), + * delta_disks for disk add/remove reshaping, + * data_offset for out-of-place reshaping + * and data_copies for odd number of raid10 legs. + */ +#define RAID_BITMAP_SIZE 4 /* 4 * 64 bit elements in rebuilds/writemostly arrays */ +struct dm_tree_node_raid_params_v2 { + const char *raid_type; + + uint32_t stripes; + uint32_t mirrors; + uint32_t region_size; + uint32_t stripe_size; + + int delta_disks; /* +/- number of disks to add/remove (reshaping) */ + int data_offset; /* data offset to set (out-of-place reshaping) */ + + /* + * 'rebuilds' and 'writemostly' are bitfields that signify + * which devices in the array are to be rebuilt or marked + * writemostly. The kernel supports up to 253 legs. + * We limit ourselvs by choosing a lower value + * for DEFAULT_RAID_MAX_IMAGES. + */ + uint64_t rebuilds[RAID_BITMAP_SIZE]; + uint64_t writemostly[RAID_BITMAP_SIZE]; + uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */ + uint32_t data_copies; /* RAID # of data copies */ + uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */ + uint32_t max_recovery_rate; /* kB/sec/disk */ + uint32_t min_recovery_rate; /* kB/sec/disk */ + uint32_t stripe_cache; /* sectors */ + + uint64_t flags; /* [no]sync */ +}; + +int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node, + uint64_t size, + const struct dm_tree_node_raid_params *p); + +/* Version 2 API function taking dm_tree_node_raid_params_v2 for aforementioned extensions. */ +int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node, + uint64_t size, + const struct dm_tree_node_raid_params_v2 *p); + +/* Cache feature_flags */ +#define DM_CACHE_FEATURE_WRITEBACK 0x00000001 +#define DM_CACHE_FEATURE_WRITETHROUGH 0x00000002 +#define DM_CACHE_FEATURE_PASSTHROUGH 0x00000004 +#define DM_CACHE_FEATURE_METADATA2 0x00000008 /* cache v1.10 */ + +struct dm_config_node; +/* + * Use for passing cache policy and all its args e.g.: + * + * policy_settings { + * migration_threshold=2048 + * sequention_threashold=100 + * ... + * } + * + * For policy without any parameters use NULL. + */ +int dm_tree_node_add_cache_target(struct dm_tree_node *node, + uint64_t size, + uint64_t feature_flags, /* DM_CACHE_FEATURE_* */ + const char *metadata_uuid, + const char *data_uuid, + const char *origin_uuid, + const char *policy_name, + const struct dm_config_node *policy_settings, + uint32_t data_block_size); + +/* + * FIXME Add individual cache policy pairs <key> = value, like: + * int dm_tree_node_add_cache_policy_arg(struct dm_tree_node *dnode, + * const char *key, uint64_t value); + */ + +/* + * Replicator operation mode + * Note: API for Replicator is not yet stable + */ +typedef enum { + DM_REPLICATOR_SYNC, /* Synchronous replication */ + DM_REPLICATOR_ASYNC_WARN, /* Warn if async replicator is slow */ + DM_REPLICATOR_ASYNC_STALL, /* Stall replicator if not fast enough */ + DM_REPLICATOR_ASYNC_DROP, /* Drop sites out of sync */ + DM_REPLICATOR_ASYNC_FAIL, /* Fail replicator if slow */ + NUM_DM_REPLICATOR_MODES +} dm_replicator_mode_t; + +int dm_tree_node_add_replicator_target(struct dm_tree_node *node, + uint64_t size, + const char *rlog_uuid, + const char *rlog_type, + unsigned rsite_index, + dm_replicator_mode_t mode, + uint32_t async_timeout, + uint64_t fall_behind_data, + uint32_t fall_behind_ios); + +int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node, + uint64_t size, + const char *replicator_uuid, /* Replicator control device */ + uint64_t rdevice_index, + const char *rdev_uuid, /* Rimage device name/uuid */ + unsigned rsite_index, + const char *slog_uuid, + uint32_t slog_flags, /* Mirror log flags */ + uint32_t slog_region_size); +/* End of Replicator API */ + +/* + * FIXME: Defines bellow are based on kernel's dm-thin.c defines + * DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT) + * DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT) + */ +#define DM_THIN_MIN_DATA_BLOCK_SIZE (UINT32_C(128)) +#define DM_THIN_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152)) +/* + * Max supported size for thin pool metadata device (17112760320 bytes) + * Limitation is hardcoded into the kernel and bigger device size + * is not accepted. + * drivers/md/dm-thin-metadata.h THIN_METADATA_MAX_SECTORS + */ +#define DM_THIN_MAX_METADATA_SIZE (UINT64_C(255) * (1 << 14) * (4096 / (1 << 9)) - 256 * 1024) + +int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node, + uint64_t size, + uint64_t transaction_id, + const char *metadata_uuid, + const char *pool_uuid, + uint32_t data_block_size, + uint64_t low_water_mark, + unsigned skip_block_zeroing); + +/* Supported messages for thin provision target */ +typedef enum { + DM_THIN_MESSAGE_CREATE_SNAP, /* device_id, origin_id */ + DM_THIN_MESSAGE_CREATE_THIN, /* device_id */ + DM_THIN_MESSAGE_DELETE, /* device_id */ + DM_THIN_MESSAGE_SET_TRANSACTION_ID, /* current_id, new_id */ + DM_THIN_MESSAGE_RESERVE_METADATA_SNAP, /* target version >= 1.1 */ + DM_THIN_MESSAGE_RELEASE_METADATA_SNAP, /* target version >= 1.1 */ +} dm_thin_message_t; + +int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node, + dm_thin_message_t type, + uint64_t id1, uint64_t id2); + +/* + * Set thin pool discard features + * ignore - Disable support for discards + * no_passdown - Don't pass discards down to underlying data device, + * just remove the mapping + * Feature is available since version 1.1 of the thin target. + */ +int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node, + unsigned ignore, + unsigned no_passdown); +/* + * Set error if no space, instead of queueing for thin pool. + */ +int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node, + unsigned error_if_no_space); +/* Start thin pool with metadata in read-only mode */ +int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node, + unsigned read_only); +/* + * FIXME: Defines bellow are based on kernel's dm-thin.c defines + * MAX_DEV_ID ((1 << 24) - 1) + */ +#define DM_THIN_MAX_DEVICE_ID (UINT32_C((1 << 24) - 1)) +int dm_tree_node_add_thin_target(struct dm_tree_node *node, + uint64_t size, + const char *pool_uuid, + uint32_t device_id); + +int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node, + const char *external_uuid); + +void dm_tree_node_set_udev_flags(struct dm_tree_node *node, uint16_t udev_flags); + +void dm_tree_node_set_presuspend_node(struct dm_tree_node *node, + struct dm_tree_node *presuspend_node); + +int dm_tree_node_add_target_area(struct dm_tree_node *node, + const char *dev_name, + const char *dlid, + uint64_t offset); + +/* + * Only for temporarily-missing raid devices where changes are tracked. + */ +int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset); + +/* + * Set readahead (in sectors) after loading the node. + */ +void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode, + uint32_t read_ahead, + uint32_t read_ahead_flags); + +/* + * Set node callback hook before de/activation. + * Callback is called before 'activation' of node for activation tree, + * or 'deactivation' of node for deactivation tree. + */ +typedef enum { + DM_NODE_CALLBACK_PRELOADED, /* Node has preload deps */ + DM_NODE_CALLBACK_DEACTIVATED, /* Node is deactivated */ +} dm_node_callback_t; +typedef int (*dm_node_callback_fn) (struct dm_tree_node *node, + dm_node_callback_t type, void *cb_data); +void dm_tree_node_set_callback(struct dm_tree_node *node, + dm_node_callback_fn cb, void *cb_data); + +void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie); +uint32_t dm_tree_get_cookie(struct dm_tree_node *node); + +/***************************************************************************** + * Library functions + *****************************************************************************/ + +/******************* + * Memory management + *******************/ + +/* + * Never use these functions directly - use the macros following instead. + */ +void *dm_malloc_wrapper(size_t s, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_zalloc_wrapper(size_t s, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line) + __attribute__((__warn_unused_result__)); +void dm_free_wrapper(void *ptr); +char *dm_strdup_wrapper(const char *s, const char *file, int line) + __attribute__((__warn_unused_result__)); +int dm_dump_memory_wrapper(void); +void dm_bounds_check_wrapper(void); + +#define dm_malloc(s) dm_malloc_wrapper((s), __FILE__, __LINE__) +#define dm_malloc_aligned(s, a) dm_malloc_aligned_wrapper((s), (a), __FILE__, __LINE__) +#define dm_zalloc(s) dm_zalloc_wrapper((s), __FILE__, __LINE__) +#define dm_strdup(s) dm_strdup_wrapper((s), __FILE__, __LINE__) +#define dm_free(p) dm_free_wrapper(p) +#define dm_realloc(p, s) dm_realloc_wrapper((p), (s), __FILE__, __LINE__) +#define dm_dump_memory() dm_dump_memory_wrapper() +#define dm_bounds_check() dm_bounds_check_wrapper() + +/* + * The pool allocator is useful when you are going to allocate + * lots of memory, use the memory for a bit, and then free the + * memory in one go. A surprising amount of code has this usage + * profile. + * + * You should think of the pool as an infinite, contiguous chunk + * of memory. The front of this chunk of memory contains + * allocated objects, the second half is free. dm_pool_alloc grabs + * the next 'size' bytes from the free half, in effect moving it + * into the allocated half. This operation is very efficient. + * + * dm_pool_free frees the allocated object *and* all objects + * allocated after it. It is important to note this semantic + * difference from malloc/free. This is also extremely + * efficient, since a single dm_pool_free can dispose of a large + * complex object. + * + * dm_pool_destroy frees all allocated memory. + * + * eg, If you are building a binary tree in your program, and + * know that you are only ever going to insert into your tree, + * and not delete (eg, maintaining a symbol table for a + * compiler). You can create yourself a pool, allocate the nodes + * from it, and when the tree becomes redundant call dm_pool_destroy + * (no nasty iterating through the tree to free nodes). + * + * eg, On the other hand if you wanted to repeatedly insert and + * remove objects into the tree, you would be better off + * allocating the nodes from a free list; you cannot free a + * single arbitrary node with pool. + */ + +struct dm_pool; + +/* constructor and destructor */ +struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint) + __attribute__((__warn_unused_result__)); +void dm_pool_destroy(struct dm_pool *p); + +/* simple allocation/free routines */ +void *dm_pool_alloc(struct dm_pool *p, size_t s) + __attribute__((__warn_unused_result__)); +void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment) + __attribute__((__warn_unused_result__)); +void dm_pool_empty(struct dm_pool *p); +void dm_pool_free(struct dm_pool *p, void *ptr); + +/* + * To aid debugging, a pool can be locked. Any modifications made + * to the content of the pool while it is locked can be detected. + * Default compilation is using a crc checksum to notice modifications. + * The pool locking is using the mprotect with the compilation flag + * DEBUG_ENFORCE_POOL_LOCKING to enforce the memory protection. + */ +/* query pool lock status */ +int dm_pool_locked(struct dm_pool *p); +/* mark pool as locked */ +int dm_pool_lock(struct dm_pool *p, int crc) + __attribute__((__warn_unused_result__)); +/* mark pool as unlocked */ +int dm_pool_unlock(struct dm_pool *p, int crc) + __attribute__((__warn_unused_result__)); + +/* + * Object building routines: + * + * These allow you to 'grow' an object, useful for + * building strings, or filling in dynamic + * arrays. + * + * It's probably best explained with an example: + * + * char *build_string(struct dm_pool *mem) + * { + * int i; + * char buffer[16]; + * + * if (!dm_pool_begin_object(mem, 128)) + * return NULL; + * + * for (i = 0; i < 50; i++) { + * snprintf(buffer, sizeof(buffer), "%d, ", i); + * if (!dm_pool_grow_object(mem, buffer, 0)) + * goto bad; + * } + * + * // add null + * if (!dm_pool_grow_object(mem, "\0", 1)) + * goto bad; + * + * return dm_pool_end_object(mem); + * + * bad: + * + * dm_pool_abandon_object(mem); + * return NULL; + *} + * + * So start an object by calling dm_pool_begin_object + * with a guess at the final object size - if in + * doubt make the guess too small. + * + * Then append chunks of data to your object with + * dm_pool_grow_object. Finally get your object with + * a call to dm_pool_end_object. + * + * Setting delta to 0 means it will use strlen(extra). + */ +int dm_pool_begin_object(struct dm_pool *p, size_t hint); +int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta); +void *dm_pool_end_object(struct dm_pool *p); +void dm_pool_abandon_object(struct dm_pool *p); + +/* utilities */ +char *dm_pool_strdup(struct dm_pool *p, const char *str) + __attribute__((__warn_unused_result__)); +char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n) + __attribute__((__warn_unused_result__)); +void *dm_pool_zalloc(struct dm_pool *p, size_t s) + __attribute__((__warn_unused_result__)); + +/****************** + * bitset functions + ******************/ + +typedef uint32_t *dm_bitset_t; + +dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits); +void dm_bitset_destroy(dm_bitset_t bs); + +int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2); + +void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2); +void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2); +int dm_bit_get_first(dm_bitset_t bs); +int dm_bit_get_next(dm_bitset_t bs, int last_bit); +int dm_bit_get_last(dm_bitset_t bs); +int dm_bit_get_prev(dm_bitset_t bs, int last_bit); + +#define DM_BITS_PER_INT (sizeof(int) * CHAR_BIT) + +#define dm_bit(bs, i) \ + ((bs)[((i) / DM_BITS_PER_INT) + 1] & (0x1 << ((i) & (DM_BITS_PER_INT - 1)))) + +#define dm_bit_set(bs, i) \ + ((bs)[((i) / DM_BITS_PER_INT) + 1] |= (0x1 << ((i) & (DM_BITS_PER_INT - 1)))) + +#define dm_bit_clear(bs, i) \ + ((bs)[((i) / DM_BITS_PER_INT) + 1] &= ~(0x1 << ((i) & (DM_BITS_PER_INT - 1)))) + +#define dm_bit_set_all(bs) \ + memset((bs) + 1, -1, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int)) + +#define dm_bit_clear_all(bs) \ + memset((bs) + 1, 0, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int)) + +#define dm_bit_copy(bs1, bs2) \ + memcpy((bs1) + 1, (bs2) + 1, ((*(bs2) / DM_BITS_PER_INT) + 1) * sizeof(int)) + +/* + * Parse a string representation of a bitset into a dm_bitset_t. The + * notation used is identical to the kernel bitmap parser (cpuset etc.) + * and supports both lists ("1,2,3") and ranges ("1-2,5-8"). If the mem + * parameter is NULL memory for the bitset will be allocated using + * dm_malloc(). Otherwise the bitset will be allocated using the supplied + * dm_pool. + */ +dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem, + size_t min_num_bits); + +/* Returns number of set bits */ +static inline unsigned hweight32(uint32_t i) +{ + unsigned r = (i & 0x55555555) + ((i >> 1) & 0x55555555); + + r = (r & 0x33333333) + ((r >> 2) & 0x33333333); + r = (r & 0x0F0F0F0F) + ((r >> 4) & 0x0F0F0F0F); + r = (r & 0x00FF00FF) + ((r >> 8) & 0x00FF00FF); + return (r & 0x0000FFFF) + ((r >> 16) & 0x0000FFFF); +} + +/**************** + * hash functions + ****************/ + +struct dm_hash_table; +struct dm_hash_node; + +typedef void (*dm_hash_iterate_fn) (void *data); + +struct dm_hash_table *dm_hash_create(unsigned size_hint) + __attribute__((__warn_unused_result__)); +void dm_hash_destroy(struct dm_hash_table *t); +void dm_hash_wipe(struct dm_hash_table *t); + +void *dm_hash_lookup(struct dm_hash_table *t, const char *key); +int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data); +void dm_hash_remove(struct dm_hash_table *t, const char *key); + +void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key, uint32_t len); +int dm_hash_insert_binary(struct dm_hash_table *t, const void *key, uint32_t len, + void *data); +void dm_hash_remove_binary(struct dm_hash_table *t, const void *key, uint32_t len); + +unsigned dm_hash_get_num_entries(struct dm_hash_table *t); +void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f); + +char *dm_hash_get_key(struct dm_hash_table *t, struct dm_hash_node *n); +void *dm_hash_get_data(struct dm_hash_table *t, struct dm_hash_node *n); +struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t); +struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n); + +/* + * dm_hash_insert() replaces the value of an existing + * entry with a matching key if one exists. Otherwise + * it adds a new entry. + * + * dm_hash_insert_with_val() inserts a new entry if + * another entry with the same key already exists. + * val_len is the size of the data being inserted. + * + * If two entries with the same key exist, + * (added using dm_hash_insert_allow_multiple), then: + * . dm_hash_lookup() returns the first one it finds, and + * dm_hash_lookup_with_val() returns the one with a matching + * val_len/val. + * . dm_hash_remove() removes the first one it finds, and + * dm_hash_remove_with_val() removes the one with a matching + * val_len/val. + * + * If a single entry with a given key exists, and it has + * zero val_len, then: + * . dm_hash_lookup() returns it + * . dm_hash_lookup_with_val(val_len=0) returns it + * . dm_hash_remove() removes it + * . dm_hash_remove_with_val(val_len=0) removes it + * + * dm_hash_lookup_with_count() is a single call that will + * both lookup a key's value and check if there is more + * than one entry with the given key. + * + * (It is not meant to retrieve all the entries with the + * given key. In the common case where a single entry exists + * for the key, it is useful to have a single call that will + * both look up the value and indicate if multiple values + * exist for the key.) + * + * dm_hash_lookup_with_count: + * . If no entries exist, the function returns NULL, and + * the count is set to 0. + * . If only one entry exists, the value of that entry is + * returned and count is set to 1. + * . If N entries exists, the value of the first entry is + * returned and count is set to N. + */ + +void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key, + const void *val, uint32_t val_len); +void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key, + const void *val, uint32_t val_len); +int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key, + const void *val, uint32_t val_len); +void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count); + + +#define dm_hash_iterate(v, h) \ + for (v = dm_hash_get_first((h)); v; \ + v = dm_hash_get_next((h), v)) + +/**************** + * list functions + ****************/ + +/* + * A list consists of a list head plus elements. + * Each element has 'next' and 'previous' pointers. + * The list head's pointers point to the first and the last element. + */ + +struct dm_list { + struct dm_list *n, *p; +}; + +/* + * String list. + */ +struct dm_str_list { + struct dm_list list; + const char *str; +}; + +/* + * Initialise a list before use. + * The list head's next and previous pointers point back to itself. + */ +#define DM_LIST_HEAD_INIT(name) { &(name), &(name) } +#define DM_LIST_INIT(name) struct dm_list name = DM_LIST_HEAD_INIT(name) +void dm_list_init(struct dm_list *head); + +/* + * Insert an element before 'head'. + * If 'head' is the list head, this adds an element to the end of the list. + */ +void dm_list_add(struct dm_list *head, struct dm_list *elem); + +/* + * Insert an element after 'head'. + * If 'head' is the list head, this adds an element to the front of the list. + */ +void dm_list_add_h(struct dm_list *head, struct dm_list *elem); + +/* + * Delete an element from its list. + * Note that this doesn't change the element itself - it may still be safe + * to follow its pointers. + */ +void dm_list_del(struct dm_list *elem); + +/* + * Remove an element from existing list and insert before 'head'. + */ +void dm_list_move(struct dm_list *head, struct dm_list *elem); + +/* + * Join 'head1' to the end of 'head'. + */ +void dm_list_splice(struct dm_list *head, struct dm_list *head1); + +/* + * Is the list empty? + */ +int dm_list_empty(const struct dm_list *head); + +/* + * Is this the first element of the list? + */ +int dm_list_start(const struct dm_list *head, const struct dm_list *elem); + +/* + * Is this the last element of the list? + */ +int dm_list_end(const struct dm_list *head, const struct dm_list *elem); + +/* + * Return first element of the list or NULL if empty + */ +struct dm_list *dm_list_first(const struct dm_list *head); + +/* + * Return last element of the list or NULL if empty + */ +struct dm_list *dm_list_last(const struct dm_list *head); + +/* + * Return the previous element of the list, or NULL if we've reached the start. + */ +struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem); + +/* + * Return the next element of the list, or NULL if we've reached the end. + */ +struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem); + +/* + * Given the address v of an instance of 'struct dm_list' called 'head' + * contained in a structure of type t, return the containing structure. + */ +#define dm_list_struct_base(v, t, head) \ + ((t *)((const char *)(v) - (const char *)&((t *) 0)->head)) + +/* + * Given the address v of an instance of 'struct dm_list list' contained in + * a structure of type t, return the containing structure. + */ +#define dm_list_item(v, t) dm_list_struct_base((v), t, list) + +/* + * Given the address v of one known element e in a known structure of type t, + * return another element f. + */ +#define dm_struct_field(v, t, e, f) \ + (((t *)((uintptr_t)(v) - (uintptr_t)&((t *) 0)->e))->f) + +/* + * Given the address v of a known element e in a known structure of type t, + * return the list head 'list' + */ +#define dm_list_head(v, t, e) dm_struct_field(v, t, e, list) + +/* + * Set v to each element of a list in turn. + */ +#define dm_list_iterate(v, head) \ + for (v = (head)->n; v != head; v = v->n) + +/* + * Set v to each element in a list in turn, starting from the element + * in front of 'start'. + * You can use this to 'unwind' a list_iterate and back out actions on + * already-processed elements. + * If 'start' is 'head' it walks the list backwards. + */ +#define dm_list_uniterate(v, head, start) \ + for (v = (start)->p; v != head; v = v->p) + +/* + * A safe way to walk a list and delete and free some elements along + * the way. + * t must be defined as a temporary variable of the same type as v. + */ +#define dm_list_iterate_safe(v, t, head) \ + for (v = (head)->n, t = v->n; v != head; v = t, t = v->n) + +/* + * Walk a list, setting 'v' in turn to the containing structure of each item. + * The containing structure should be the same type as 'v'. + * The 'struct dm_list' variable within the containing structure is 'field'. + */ +#define dm_list_iterate_items_gen(v, head, field) \ + for (v = dm_list_struct_base((head)->n, __typeof__(*v), field); \ + &v->field != (head); \ + v = dm_list_struct_base(v->field.n, __typeof__(*v), field)) + +/* + * Walk a list, setting 'v' in turn to the containing structure of each item. + * The containing structure should be the same type as 'v'. + * The list should be 'struct dm_list list' within the containing structure. + */ +#define dm_list_iterate_items(v, head) dm_list_iterate_items_gen(v, (head), list) + +/* + * Walk a list, setting 'v' in turn to the containing structure of each item. + * The containing structure should be the same type as 'v'. + * The 'struct dm_list' variable within the containing structure is 'field'. + * t must be defined as a temporary variable of the same type as v. + */ +#define dm_list_iterate_items_gen_safe(v, t, head, field) \ + for (v = dm_list_struct_base((head)->n, __typeof__(*v), field), \ + t = dm_list_struct_base(v->field.n, __typeof__(*v), field); \ + &v->field != (head); \ + v = t, t = dm_list_struct_base(v->field.n, __typeof__(*v), field)) +/* + * Walk a list, setting 'v' in turn to the containing structure of each item. + * The containing structure should be the same type as 'v'. + * The list should be 'struct dm_list list' within the containing structure. + * t must be defined as a temporary variable of the same type as v. + */ +#define dm_list_iterate_items_safe(v, t, head) \ + dm_list_iterate_items_gen_safe(v, t, (head), list) + +/* + * Walk a list backwards, setting 'v' in turn to the containing structure + * of each item. + * The containing structure should be the same type as 'v'. + * The 'struct dm_list' variable within the containing structure is 'field'. + */ +#define dm_list_iterate_back_items_gen(v, head, field) \ + for (v = dm_list_struct_base((head)->p, __typeof__(*v), field); \ + &v->field != (head); \ + v = dm_list_struct_base(v->field.p, __typeof__(*v), field)) + +/* + * Walk a list backwards, setting 'v' in turn to the containing structure + * of each item. + * The containing structure should be the same type as 'v'. + * The list should be 'struct dm_list list' within the containing structure. + */ +#define dm_list_iterate_back_items(v, head) dm_list_iterate_back_items_gen(v, (head), list) + +/* + * Return the number of elements in a list by walking it. + */ +unsigned int dm_list_size(const struct dm_list *head); + +/********* + * selinux + *********/ + +/* + * Obtain SELinux security context assigned for the path and set this + * context for creating a new file system object. This security context + * is global and it is used until reset to default policy behaviour + * by calling 'dm_prepare_selinux_context(NULL, 0)'. + */ +int dm_prepare_selinux_context(const char *path, mode_t mode); +/* + * Set SELinux context for existing file system object. + */ +int dm_set_selinux_context(const char *path, mode_t mode); + +/********************* + * string manipulation + *********************/ + +/* + * Break up the name of a mapped device into its constituent + * Volume Group, Logical Volume and Layer (if present). + * If mem is supplied, the result is allocated from the mempool. + * Otherwise the strings are changed in situ. + */ +int dm_split_lvm_name(struct dm_pool *mem, const char *dmname, + char **vgname, char **lvname, char **layer); + +/* + * Destructively split buffer into NULL-separated words in argv. + * Returns number of words. + */ +int dm_split_words(char *buffer, unsigned max, + unsigned ignore_comments, /* Not implemented */ + char **argv); + +/* + * Returns -1 if buffer too small + */ +int dm_snprintf(char *buf, size_t bufsize, const char *format, ...) + __attribute__ ((format(printf, 3, 4))); + +/* + * Returns pointer to the last component of the path. + */ +const char *dm_basename(const char *path); + +/* + * Returns number of occurrences of 'c' in 'str' of length 'size'. + */ +unsigned dm_count_chars(const char *str, size_t len, const int c); + +/* + * Length of string after escaping double quotes and backslashes. + */ +size_t dm_escaped_len(const char *str); + +/* + * <vg>-<lv>-<layer> or if !layer just <vg>-<lv>. + */ +char *dm_build_dm_name(struct dm_pool *mem, const char *vgname, + const char *lvname, const char *layer); +char *dm_build_dm_uuid(struct dm_pool *mem, const char *prefix, const char *lvid, const char *layer); + +/* + * Copies a string, quoting double quotes with backslashes. + */ +char *dm_escape_double_quotes(char *out, const char *src); + +/* + * Undo quoting in situ. + */ +void dm_unescape_double_quotes(char *src); + +/* + * Unescape colons and "at" signs in situ and save the substrings + * starting at the position of the first unescaped colon and the + * first unescaped "at" sign. This is normally used to unescape + * device names used as PVs. + */ +void dm_unescape_colons_and_at_signs(char *src, + char **substr_first_unquoted_colon, + char **substr_first_unquoted_at_sign); + +/* + * Replacement for strncpy() function. + * + * Copies no more than n bytes from string pointed by src to the buffer + * pointed by dest and ensure string is finished with '\0'. + * Returns 0 if the whole string does not fit. + */ +int dm_strncpy(char *dest, const char *src, size_t n); + +/* + * Recognize unit specifier in the 'units' arg and return a factor + * representing that unit. If the 'units' contains a prefix with digits, + * the 'units' is considered to be a custom unit. + * + * Also, set 'unit_type' output arg to the character that represents + * the unit specified. The 'unit_type' character equals to the unit + * character itself recognized in the 'units' arg for canonical units. + * Otherwise, the 'unit_type' character is set to 'U' for custom unit. + * + * An example for k/K canonical units and 8k/8K custom units: + * + * units unit_type return value (factor) + * k k 1024 + * K K 1000 + * 8k U 1024*8 + * 8K U 1000*8 + * etc... + * + * Recognized units: + * + * h/H - human readable (returns 1 for both) + * b/B - byte (returns 1 for both) + * s/S - sector (returns 512 for both) + * k/K - kilo (returns 1024/1000 respectively) + * m/M - mega (returns 1024^2/1000^2 respectively) + * g/G - giga (returns 1024^3/1000^3 respectively) + * t/T - tera (returns 1024^4/1000^4 respectively) + * p/P - peta (returns 1024^5/1000^5 respectively) + * e/E - exa (returns 1024^6/1000^6 respectively) + * + * Only one units character is allowed in the 'units' arg + * if strict mode is enabled by 'strict' arg. + * + * The 'endptr' output arg, if not NULL, saves the pointer + * in the 'units' string which follows the unit specifier + * recognized (IOW the position where the parsing of the + * unit specifier stopped). + * + * Returns the unit factor or 0 if no unit is recognized. + */ +uint64_t dm_units_to_factor(const char *units, char *unit_type, + int strict, const char **endptr); + +/* + * Type of unit specifier used by dm_size_to_string(). + */ +typedef enum { + DM_SIZE_LONG = 0, /* Megabyte */ + DM_SIZE_SHORT = 1, /* MB or MiB */ + DM_SIZE_UNIT = 2 /* M or m */ +} dm_size_suffix_t; + +/* + * Convert a size (in 512-byte sectors) into a printable string using units of unit_type. + * An upper-case unit_type indicates output units based on powers of 1000 are + * required; a lower-case unit_type indicates powers of 1024. + * For correct operation, unit_factor must be one of: + * 0 - the correct value will be calculated internally; + * or the output from dm_units_to_factor() corresponding to unit_type; + * or 'u' or 'U', an arbitrary number of bytes to use as the power base. + * Set include_suffix to 1 to include a suffix of suffix_type. + * Set use_si_units to 0 for suffixes that don't distinguish between 1000 and 1024. + * Set use_si_units to 1 for a suffix that does distinguish. + */ +const char *dm_size_to_string(struct dm_pool *mem, uint64_t size, + char unit_type, int use_si_units, + uint64_t unit_factor, int include_suffix, + dm_size_suffix_t suffix_type); + +/************************** + * file/stream manipulation + **************************/ + +/* + * Create a directory (with parent directories if necessary). + * Returns 1 on success, 0 on failure. + */ +int dm_create_dir(const char *dir); + +int dm_is_empty_dir(const char *dir); + +/* + * Close a stream, with nicer error checking than fclose's. + * Derived from gnulib's close-stream.c. + * + * Close "stream". Return 0 if successful, and EOF (setting errno) + * otherwise. Upon failure, set errno to 0 if the error number + * cannot be determined. Useful mainly for writable streams. + */ +int dm_fclose(FILE *stream); + +/* + * Returns size of a buffer which is allocated with dm_malloc. + * Pointer to the buffer is stored in *buf. + * Returns -1 on failure leaving buf undefined. + */ +int dm_asprintf(char **buf, const char *format, ...) + __attribute__ ((format(printf, 2, 3))); +int dm_vasprintf(char **buf, const char *format, va_list ap) + __attribute__ ((format(printf, 2, 0))); + +/* + * create lockfile (pidfile) - create and lock a lock file + * @lockfile: location of lock file + * + * Returns: 1 on success, 0 otherwise, errno is handled internally + */ +int dm_create_lockfile(const char* lockfile); + +/* + * Query whether a daemon is running based on its lockfile + * + * Returns: 1 if running, 0 if not + */ +int dm_daemon_is_running(const char* lockfile); + +/********************* + * regular expressions + *********************/ +struct dm_regex; + +/* + * Initialise an array of num patterns for matching. + * Uses memory from mem. + */ +struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns, + unsigned num_patterns); + +/* + * Match string s against the patterns. + * Returns the index of the highest pattern in the array that matches, + * or -1 if none match. + */ +int dm_regex_match(struct dm_regex *regex, const char *s); + +/* + * This is useful for regression testing only. The idea is if two + * fingerprints are different, then the two dfas are certainly not + * isomorphic. If two fingerprints _are_ the same then it's very likely + * that the dfas are isomorphic. + * + * This function must be called before any matching is done. + */ +uint32_t dm_regex_fingerprint(struct dm_regex *regex); + +/****************** + * percent handling + ******************/ +/* + * A fixed-point representation of percent values. One percent equals to + * DM_PERCENT_1 as defined below. Values that are not multiples of DM_PERCENT_1 + * represent fractions, with precision of 1/1000000 of a percent. See + * dm_percent_to_float for a conversion to a floating-point representation. + * + * You should always use dm_make_percent when building dm_percent_t values. The + * implementation of dm_make_percent is biased towards the middle: it ensures that + * the result is DM_PERCENT_0 or DM_PERCENT_100 if and only if this is the actual + * value -- it never rounds any intermediate value (> 0 or < 100) to either 0 + * or 100. +*/ +#define DM_PERCENT_CHAR '%' + +typedef enum { + DM_PERCENT_0 = 0, + DM_PERCENT_1 = 1000000, + DM_PERCENT_100 = 100 * DM_PERCENT_1, + DM_PERCENT_INVALID = -1, + DM_PERCENT_FAILED = -2 +} dm_percent_range_t; + +typedef int32_t dm_percent_t; + +float dm_percent_to_float(dm_percent_t percent); +/* + * Return adjusted/rounded float for better percent value printing. + * Function ensures for given precision of digits: + * 100.0% returns only when the value is DM_PERCENT_100 + * for close smaller values rounds to nearest smaller value + * 0.0% returns only for value DM_PERCENT_0 + * for close bigger values rounds to nearest bigger value + * In all other cases returns same value as dm_percent_to_float() + */ +float dm_percent_to_round_float(dm_percent_t percent, unsigned digits); +dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator); + +/******************** + * timestamp handling + ********************/ + +/* + * Create a dm_timestamp object to use with dm_timestamp_get. + */ +struct dm_timestamp *dm_timestamp_alloc(void); + +/* + * Update dm_timestamp object to represent the current time. + */ +int dm_timestamp_get(struct dm_timestamp *ts); + +/* + * Copy a timestamp from ts_old to ts_new. + */ +void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old); + +/* + * Compare two timestamps. + * + * Return: -1 if ts1 is less than ts2 + * 0 if ts1 is equal to ts2 + * 1 if ts1 is greater than ts2 + */ +int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2); + +/* + * Return the absolute difference in nanoseconds between + * the dm_timestamp objects ts1 and ts2. + * + * Callers that need to know whether ts1 is before, equal to, or after ts2 + * in addition to the magnitude should use dm_timestamp_compare. + */ +uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2); + +/* + * Destroy a dm_timestamp object. + */ +void dm_timestamp_destroy(struct dm_timestamp *ts); + +/********************* + * reporting functions + *********************/ + +struct dm_report_object_type { + uint32_t id; /* Powers of 2 */ + const char *desc; + const char *prefix; /* field id string prefix (optional) */ + /* FIXME: convert to proper usage of const pointers here */ + void *(*data_fn)(void *object); /* callback from report_object() */ +}; + +struct dm_report_field; + +/* + * dm_report_field_type flags + */ +#define DM_REPORT_FIELD_MASK 0x00000FFF +#define DM_REPORT_FIELD_ALIGN_MASK 0x0000000F +#define DM_REPORT_FIELD_ALIGN_LEFT 0x00000001 +#define DM_REPORT_FIELD_ALIGN_RIGHT 0x00000002 +#define DM_REPORT_FIELD_TYPE_MASK 0x00000FF0 +#define DM_REPORT_FIELD_TYPE_NONE 0x00000000 +#define DM_REPORT_FIELD_TYPE_STRING 0x00000010 +#define DM_REPORT_FIELD_TYPE_NUMBER 0x00000020 +#define DM_REPORT_FIELD_TYPE_SIZE 0x00000040 +#define DM_REPORT_FIELD_TYPE_PERCENT 0x00000080 +#define DM_REPORT_FIELD_TYPE_STRING_LIST 0x00000100 +#define DM_REPORT_FIELD_TYPE_TIME 0x00000200 + +/* For use with reserved values only! */ +#define DM_REPORT_FIELD_RESERVED_VALUE_MASK 0x0000000F +#define DM_REPORT_FIELD_RESERVED_VALUE_NAMED 0x00000001 /* only named value, less strict form of reservation */ +#define DM_REPORT_FIELD_RESERVED_VALUE_RANGE 0x00000002 /* value is range - low and high value defined */ +#define DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE 0x00000004 /* value is computed in runtime */ +#define DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES 0x00000008 /* value names are recognized in runtime */ + +#define DM_REPORT_FIELD_TYPE_ID_LEN 32 +#define DM_REPORT_FIELD_TYPE_HEADING_LEN 32 + +struct dm_report; +struct dm_report_field_type { + uint32_t type; /* object type id */ + uint32_t flags; /* DM_REPORT_FIELD_* */ + uint32_t offset; /* byte offset in the object */ + int32_t width; /* default width */ + /* string used to specify the field */ + const char id[DM_REPORT_FIELD_TYPE_ID_LEN]; + /* string printed in header */ + const char heading[DM_REPORT_FIELD_TYPE_HEADING_LEN]; + int (*report_fn)(struct dm_report *rh, struct dm_pool *mem, + struct dm_report_field *field, const void *data, + void *private_data); + const char *desc; /* description of the field */ +}; + +/* + * Per-field reserved value. + */ +struct dm_report_field_reserved_value { + /* field_num is the position of the field in 'fields' + array passed to dm_report_init_with_selection */ + uint32_t field_num; + /* the value is of the same type as the field + identified by field_num */ + const void *value; +}; + +/* + * Reserved value is a 'value' that is used directly if any of the 'names' is hit + * or in case of fuzzy names, if such fuzzy name matches. + * + * If type is any of DM_REPORT_FIELD_TYPE_*, the reserved value is recognized + * for all fields of that type. + * + * If type is DM_REPORT_FIELD_TYPE_NONE, the reserved value is recognized + * for the exact field specified - hence the type of the value is automatically + * the same as the type of the field itself. + * + * The array of reserved values is used to initialize reporting with + * selection enabled (see also dm_report_init_with_selection function). + */ +struct dm_report_reserved_value { + const uint32_t type; /* DM_REPORT_FIELD_RESERVED_VALUE_* and DM_REPORT_FIELD_TYPE_* */ + const void *value; /* reserved value: + uint64_t for DM_REPORT_FIELD_TYPE_NUMBER + uint64_t for DM_REPORT_FIELD_TYPE_SIZE (number of 512-byte sectors) + uint64_t for DM_REPORT_FIELD_TYPE_PERCENT + const char* for DM_REPORT_FIELD_TYPE_STRING + struct dm_report_field_reserved_value for DM_REPORT_FIELD_TYPE_NONE + dm_report_reserved_handler* if DM_REPORT_FIELD_RESERVED_VALUE_{DYNAMIC_VALUE,FUZZY_NAMES} is used */ + const char **names; /* null-terminated array of static names for this reserved value */ + const char *description; /* description of the reserved value */ +}; + +/* + * Available actions for dm_report_reserved_value_handler. + */ +typedef enum { + DM_REPORT_RESERVED_PARSE_FUZZY_NAME, + DM_REPORT_RESERVED_GET_DYNAMIC_VALUE, +} dm_report_reserved_action_t; + +/* + * Generic reserved value handler to process reserved value names and/or values. + * + * Actions and their input/output: + * + * DM_REPORT_RESERVED_PARSE_FUZZY_NAME + * data_in: const char *fuzzy_name + * data_out: const char *canonical_name, NULL if fuzzy_name not recognized + * + * DM_REPORT_RESERVED_GET_DYNAMIC_VALUE + * data_in: const char *canonical_name + * data_out: void *value, NULL if canonical_name not recognized + * + * All actions return: + * + * -1 if action not implemented + * 0 on error + * 1 on success + */ +typedef int (*dm_report_reserved_handler) (struct dm_report *rh, + struct dm_pool *mem, + uint32_t field_num, + dm_report_reserved_action_t action, + const void *data_in, + const void **data_out); + +/* + * The dm_report_value_cache_{set,get} are helper functions to store and retrieve + * various values used during reporting (dm_report_field_type.report_fn) and/or + * selection processing (dm_report_reserved_handler instances) to avoid + * recalculation of these values or to share values among calls. + */ +int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data); +const void *dm_report_value_cache_get(struct dm_report *rh, const char *name); +/* + * dm_report_init output_flags + */ +#define DM_REPORT_OUTPUT_MASK 0x000000FF +#define DM_REPORT_OUTPUT_ALIGNED 0x00000001 +#define DM_REPORT_OUTPUT_BUFFERED 0x00000002 +#define DM_REPORT_OUTPUT_HEADINGS 0x00000004 +#define DM_REPORT_OUTPUT_FIELD_NAME_PREFIX 0x00000008 +#define DM_REPORT_OUTPUT_FIELD_UNQUOTED 0x00000010 +#define DM_REPORT_OUTPUT_COLUMNS_AS_ROWS 0x00000020 +#define DM_REPORT_OUTPUT_MULTIPLE_TIMES 0x00000040 + +struct dm_report *dm_report_init(uint32_t *report_types, + const struct dm_report_object_type *types, + const struct dm_report_field_type *fields, + const char *output_fields, + const char *output_separator, + uint32_t output_flags, + const char *sort_keys, + void *private_data); +struct dm_report *dm_report_init_with_selection(uint32_t *report_types, + const struct dm_report_object_type *types, + const struct dm_report_field_type *fields, + const char *output_fields, + const char *output_separator, + uint32_t output_flags, + const char *sort_keys, + const char *selection, + const struct dm_report_reserved_value reserved_values[], + void *private_data); +/* + * Report an object, pass it through the selection criteria if they + * are present and display the result on output if it passes the criteria. + */ +int dm_report_object(struct dm_report *rh, void *object); +/* + * The same as dm_report_object, but display the result on output only if + * 'do_output' arg is set. Also, save the result of selection in 'selected' + * arg if it's not NULL (either 1 if the object passes, otherwise 0). + */ +int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected); + +/* + * Compact report output so that if field value is empty for all rows in + * the report, drop the field from output completely (including headers). + * Compact output is applicable only if report is buffered, otherwise + * this function has no effect. + */ +int dm_report_compact_fields(struct dm_report *rh); + +/* + * The same as dm_report_compact_fields, but for selected fields only. + * The "fields" arg is comma separated list of field names (the same format + * as used for "output_fields" arg in dm_report_init fn). + */ +int dm_report_compact_given_fields(struct dm_report *rh, const char *fields); + +/* + * Returns 1 if there is no data waiting to be output. + */ +int dm_report_is_empty(struct dm_report *rh); + +/* + * Destroy report content without doing output. + */ +void dm_report_destroy_rows(struct dm_report *rh); + +int dm_report_output(struct dm_report *rh); + +/* + * Output the report headings for a columns-based report, even if they + * have already been shown. Useful for repeating reports that wish to + * issue a periodic reminder of the column headings. + */ +int dm_report_column_headings(struct dm_report *rh); + +void dm_report_free(struct dm_report *rh); + +/* + * Prefix added to each field name with DM_REPORT_OUTPUT_FIELD_NAME_PREFIX + */ +int dm_report_set_output_field_name_prefix(struct dm_report *rh, + const char *report_prefix); + +int dm_report_set_selection(struct dm_report *rh, const char *selection); + +/* + * Report functions are provided for simple data types. + * They take care of allocating copies of the data. + */ +int dm_report_field_string(struct dm_report *rh, struct dm_report_field *field, + const char *const *data); +int dm_report_field_string_list(struct dm_report *rh, struct dm_report_field *field, + const struct dm_list *data, const char *delimiter); +int dm_report_field_string_list_unsorted(struct dm_report *rh, struct dm_report_field *field, + const struct dm_list *data, const char *delimiter); +int dm_report_field_int32(struct dm_report *rh, struct dm_report_field *field, + const int32_t *data); +int dm_report_field_uint32(struct dm_report *rh, struct dm_report_field *field, + const uint32_t *data); +int dm_report_field_int(struct dm_report *rh, struct dm_report_field *field, + const int *data); +int dm_report_field_uint64(struct dm_report *rh, struct dm_report_field *field, + const uint64_t *data); +int dm_report_field_percent(struct dm_report *rh, struct dm_report_field *field, + const dm_percent_t *data); + +/* + * For custom fields, allocate the data in 'mem' and use + * dm_report_field_set_value(). + * 'sortvalue' may be NULL if it matches 'value' + */ +void dm_report_field_set_value(struct dm_report_field *field, const void *value, + const void *sortvalue); + +/* + * Report group support. + */ +struct dm_report_group; + +typedef enum { + DM_REPORT_GROUP_SINGLE, + DM_REPORT_GROUP_BASIC, + DM_REPORT_GROUP_JSON +} dm_report_group_type_t; + +struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data); +int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data); +int dm_report_group_pop(struct dm_report_group *group); +int dm_report_group_output_and_pop_all(struct dm_report_group *group); +int dm_report_group_destroy(struct dm_report_group *group); + +/* + * Stats counter access methods + * + * Each method returns the corresponding stats counter value from the + * supplied dm_stats handle for the specified region_id and area_id. + * If either region_id or area_id uses one of the special values + * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then the region + * or area is selected according to the current state of the dm_stats + * handle's embedded cursor. + * + * Two methods are provided to access counter values: a named function + * for each available counter field and a single function that accepts + * an enum value specifying the required field. New code is encouraged + * to use the enum based interface as calls to the named functions are + * implemented using the enum method internally. + * + * See the kernel documentation for complete descriptions of each + * counter field: + * + * Documentation/device-mapper/statistics.txt + * Documentation/iostats.txt + * + * reads: the number of reads completed + * reads_merged: the number of reads merged + * read_sectors: the number of sectors read + * read_nsecs: the number of nanoseconds spent reading + * writes: the number of writes completed + * writes_merged: the number of writes merged + * write_sectors: the number of sectors written + * write_nsecs: the number of nanoseconds spent writing + * io_in_progress: the number of I/Os currently in progress + * io_nsecs: the number of nanoseconds spent doing I/Os + * weighted_io_nsecs: the weighted number of nanoseconds spent doing I/Os + * total_read_nsecs: the total time spent reading in nanoseconds + * total_write_nsecs: the total time spent writing in nanoseconds + */ + +#define DM_STATS_REGION_CURRENT UINT64_MAX +#define DM_STATS_AREA_CURRENT UINT64_MAX + +typedef enum { + DM_STATS_READS_COUNT, + DM_STATS_READS_MERGED_COUNT, + DM_STATS_READ_SECTORS_COUNT, + DM_STATS_READ_NSECS, + DM_STATS_WRITES_COUNT, + DM_STATS_WRITES_MERGED_COUNT, + DM_STATS_WRITE_SECTORS_COUNT, + DM_STATS_WRITE_NSECS, + DM_STATS_IO_IN_PROGRESS_COUNT, + DM_STATS_IO_NSECS, + DM_STATS_WEIGHTED_IO_NSECS, + DM_STATS_TOTAL_READ_NSECS, + DM_STATS_TOTAL_WRITE_NSECS, + DM_STATS_NR_COUNTERS +} dm_stats_counter_t; + +uint64_t dm_stats_get_counter(const struct dm_stats *dms, + dm_stats_counter_t counter, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_reads(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_reads_merged(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_read_sectors(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_read_nsecs(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_writes(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_writes_merged(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_write_sectors(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_write_nsecs(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_io_in_progress(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_io_nsecs(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_weighted_io_nsecs(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_total_read_nsecs(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +uint64_t dm_stats_get_total_write_nsecs(const struct dm_stats *dms, + uint64_t region_id, uint64_t area_id); + +/* + * Derived statistics access methods + * + * Each method returns the corresponding value calculated from the + * counters stored in the supplied dm_stats handle for the specified + * region_id and area_id. If either region_id or area_id uses one of the + * special values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then + * the region or area is selected according to the current state of the + * dm_stats handle's embedded cursor. + * + * The set of metrics is based on the fields provided by the Linux + * iostats program. + * + * rd_merges_per_sec: the number of reads merged per second + * wr_merges_per_sec: the number of writes merged per second + * reads_per_sec: the number of reads completed per second + * writes_per_sec: the number of writes completed per second + * read_sectors_per_sec: the number of sectors read per second + * write_sectors_per_sec: the number of sectors written per second + * average_request_size: the average size of requests submitted + * service_time: the average service time (in ns) for requests issued + * average_queue_size: the average queue length + * average_wait_time: the average time for requests to be served (in ns) + * average_rd_wait_time: the average read wait time + * average_wr_wait_time: the average write wait time + */ + +typedef enum { + DM_STATS_RD_MERGES_PER_SEC, + DM_STATS_WR_MERGES_PER_SEC, + DM_STATS_READS_PER_SEC, + DM_STATS_WRITES_PER_SEC, + DM_STATS_READ_SECTORS_PER_SEC, + DM_STATS_WRITE_SECTORS_PER_SEC, + DM_STATS_AVERAGE_REQUEST_SIZE, + DM_STATS_AVERAGE_QUEUE_SIZE, + DM_STATS_AVERAGE_WAIT_TIME, + DM_STATS_AVERAGE_RD_WAIT_TIME, + DM_STATS_AVERAGE_WR_WAIT_TIME, + DM_STATS_SERVICE_TIME, + DM_STATS_THROUGHPUT, + DM_STATS_UTILIZATION, + DM_STATS_NR_METRICS +} dm_stats_metric_t; + +int dm_stats_get_metric(const struct dm_stats *dms, int metric, + uint64_t region_id, uint64_t area_id, double *value); + +int dm_stats_get_rd_merges_per_sec(const struct dm_stats *dms, double *rrqm, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_wr_merges_per_sec(const struct dm_stats *dms, double *rrqm, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_reads_per_sec(const struct dm_stats *dms, double *rd_s, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_writes_per_sec(const struct dm_stats *dms, double *wr_s, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_read_sectors_per_sec(const struct dm_stats *dms, + double *rsec_s, uint64_t region_id, + uint64_t area_id); + +int dm_stats_get_write_sectors_per_sec(const struct dm_stats *dms, + double *wr_s, uint64_t region_id, + uint64_t area_id); + +int dm_stats_get_average_request_size(const struct dm_stats *dms, + double *arqsz, uint64_t region_id, + uint64_t area_id); + +int dm_stats_get_service_time(const struct dm_stats *dms, double *svctm, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_average_queue_size(const struct dm_stats *dms, double *qusz, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_average_wait_time(const struct dm_stats *dms, double *await, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_average_rd_wait_time(const struct dm_stats *dms, + double *await, uint64_t region_id, + uint64_t area_id); + +int dm_stats_get_average_wr_wait_time(const struct dm_stats *dms, + double *await, uint64_t region_id, + uint64_t area_id); + +int dm_stats_get_throughput(const struct dm_stats *dms, double *tput, + uint64_t region_id, uint64_t area_id); + +int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util, + uint64_t region_id, uint64_t area_id); + +/* + * Statistics histogram access methods. + * + * Methods to access latency histograms for regions that have them + * enabled. Each histogram contains a configurable number of bins + * spanning a user defined latency interval. + * + * The bin count, upper and lower bin bounds, and bin values are + * made available via the following area methods. + * + * Methods to obtain a simple string representation of the histogram + * and its bounds are also provided. + */ + +/* + * Retrieve a pointer to the histogram associated with the specified + * area. If the area does not have a histogram configured this function + * returns NULL. + * + * The pointer does not need to be freed explicitly by the caller: it + * will become invalid following a subsequent dm_stats_list(), + * dm_stats_populate() or dm_stats_destroy() of the corresponding + * dm_stats handle. + * + * If region_id or area_id is one of the special values + * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the current cursor + * value is used to select the region or area. + */ +struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms, + uint64_t region_id, + uint64_t area_id); + +/* + * Return the number of bins in the specified histogram handle. + */ +int dm_histogram_get_nr_bins(const struct dm_histogram *dmh); + +/* + * Get the lower bound of the specified bin of the histogram for the + * area specified by region_id and area_id. The value is returned in + * nanoseconds. + */ +uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin); + +/* + * Get the upper bound of the specified bin of the histogram for the + * area specified by region_id and area_id. The value is returned in + * nanoseconds. + */ +uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin); + +/* + * Get the width of the specified bin of the histogram for the area + * specified by region_id and area_id. The width is equal to the bin + * upper bound minus the lower bound and yields the range of latency + * values covered by this bin. The value is returned in nanoseconds. + */ +uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin); + +/* + * Get the value of the specified bin of the histogram for the area + * specified by region_id and area_id. + */ +uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin); + +/* + * Get the percentage (relative frequency) of the specified bin of the + * histogram for the area specified by region_id and area_id. + */ +dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh, + int bin); + +/* + * Return the total observations (sum of bin counts) for the histogram + * of the area specified by region_id and area_id. + */ +uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh); + +/* + * Histogram formatting flags. + */ +#define DM_HISTOGRAM_SUFFIX 0x1 +#define DM_HISTOGRAM_VALUES 0x2 +#define DM_HISTOGRAM_PERCENT 0X4 +#define DM_HISTOGRAM_BOUNDS_LOWER 0x10 +#define DM_HISTOGRAM_BOUNDS_UPPER 0x20 +#define DM_HISTOGRAM_BOUNDS_RANGE 0x30 + +/* + * Return a string representation of the supplied histogram's values and + * bin boundaries. + * + * The bin argument selects the bin to format. If this argument is less + * than zero all bins will be included in the resulting string. + * + * width specifies a minimum width for the field in characters; if it is + * zero the width will be determined automatically based on the options + * selected for formatting. A value less than zero disables field width + * control: bin boundaries and values will be output with a minimum + * amount of whitespace. + * + * flags is a collection of flag arguments that control the string format: + * + * DM_HISTOGRAM_VALUES - Include bin values in the string. + * DM_HISTOGRAM_SUFFIX - Include time unit suffixes when printing bounds. + * DM_HISTOGRAM_PERCENT - Format bin values as a percentage. + * + * DM_HISTOGRAM_BOUNDS_LOWER - Include the lower bound of each bin. + * DM_HISTOGRAM_BOUNDS_UPPER - Include the upper bound of each bin. + * DM_HISTOGRAM_BOUNDS_RANGE - Show the span of each bin as "lo-up". + * + * The returned pointer does not need to be freed explicitly by the + * caller: it will become invalid following a subsequent + * dm_stats_list(), dm_stats_populate() or dm_stats_destroy() of the + * corresponding dm_stats handle. + */ +const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin, + int width, int flags); + +/************************* + * config file parse/print + *************************/ +typedef enum { + DM_CFG_INT, + DM_CFG_FLOAT, + DM_CFG_STRING, + DM_CFG_EMPTY_ARRAY +} dm_config_value_type_t; + +struct dm_config_value { + dm_config_value_type_t type; + + union { + int64_t i; + float f; + double d; /* Unused. */ + const char *str; + } v; + + struct dm_config_value *next; /* For arrays */ + uint32_t format_flags; +}; + +struct dm_config_node { + const char *key; + struct dm_config_node *parent, *sib, *child; + struct dm_config_value *v; + int id; +}; + +struct dm_config_tree { + struct dm_config_node *root; + struct dm_config_tree *cascade; + struct dm_pool *mem; + void *custom; +}; + +struct dm_config_tree *dm_config_create(void); +struct dm_config_tree *dm_config_from_string(const char *config_settings); +int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end); +int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end); + +void *dm_config_get_custom(struct dm_config_tree *cft); +void dm_config_set_custom(struct dm_config_tree *cft, void *custom); + +/* + * When searching, first_cft is checked before second_cft. + */ +struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft); + +/* + * If there's a cascaded dm_config_tree, remove the top layer + * and return the layer below. Otherwise return NULL. + */ +struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft); + +/* + * Create a new, uncascaded config tree equivalent to the input cascade. + */ +struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft); + +void dm_config_destroy(struct dm_config_tree *cft); + +/* Simple output line by line. */ +typedef int (*dm_putline_fn)(const char *line, void *baton); +/* More advaced output with config node reference. */ +typedef int (*dm_config_node_out_fn)(const struct dm_config_node *cn, const char *line, void *baton); + +/* + * Specification for advanced config node output. + */ +struct dm_config_node_out_spec { + dm_config_node_out_fn prefix_fn; /* called before processing config node lines */ + dm_config_node_out_fn line_fn; /* called for each config node line */ + dm_config_node_out_fn suffix_fn; /* called after processing config node lines */ +}; + +/* Write the node and any subsequent siblings it has. */ +int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton); +int dm_config_write_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton); + +/* Write given node only without subsequent siblings. */ +int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton); +int dm_config_write_one_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton); + +struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn, const char *path); +int dm_config_has_node(const struct dm_config_node *cn, const char *path); +int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *remove); +const char *dm_config_find_str(const struct dm_config_node *cn, const char *path, const char *fail); +const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn, const char *path, const char *fail); +int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail); +int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail); +float dm_config_find_float(const struct dm_config_node *cn, const char *path, float fail); + +const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft, const char *path); +const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path, const char *fail); +const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path, const char *fail); +int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail); +int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail); +float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path, float fail); +int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail); + +/* + * Understands (0, ~0), (y, n), (yes, no), (on, + * off), (true, false). + */ +int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail); +int dm_config_value_is_bool(const struct dm_config_value *v); + +int dm_config_get_uint32(const struct dm_config_node *cn, const char *path, uint32_t *result); +int dm_config_get_uint64(const struct dm_config_node *cn, const char *path, uint64_t *result); +int dm_config_get_str(const struct dm_config_node *cn, const char *path, const char **result); +int dm_config_get_list(const struct dm_config_node *cn, const char *path, const struct dm_config_value **result); +int dm_config_get_section(const struct dm_config_node *cn, const char *path, const struct dm_config_node **result); + +unsigned dm_config_maybe_section(const char *str, unsigned len); + +const char *dm_config_parent_name(const struct dm_config_node *n); + +struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *node, int siblings); +struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key); +struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft); +struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *cn, int siblings); + +/* + * Common formatting flags applicable to all config node types (lower 16 bits). + */ +#define DM_CONFIG_VALUE_FMT_COMMON_ARRAY 0x00000001 /* value is array */ +#define DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES 0x00000002 /* add spaces in "key = value" pairs in constrast to "key=value" for better readability */ + +/* + * Type-related config node formatting flags (higher 16 bits). + */ +/* int-related formatting flags */ +#define DM_CONFIG_VALUE_FMT_INT_OCTAL 0x00010000 /* print number in octal form */ + +/* string-related formatting flags */ +#define DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES 0x00010000 /* do not print quotes around string value */ + +void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags); +uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv); + +struct dm_pool *dm_config_memory(struct dm_config_tree *cft); + +/* Udev device directory. */ +#define DM_UDEV_DEV_DIR "/dev/" + +/* Cookie prefixes. + * + * The cookie value consists of a prefix (16 bits) and a base (16 bits). + * We can use the prefix to store the flags. These flags are sent to + * kernel within given dm task. When returned back to userspace in + * DM_COOKIE udev environment variable, we can control several aspects + * of udev rules we use by decoding the cookie prefix. When doing the + * notification, we replace the cookie prefix with DM_COOKIE_MAGIC, + * so we notify the right semaphore. + * + * It is still possible to use cookies for passing the flags to udev + * rules even when udev_sync is disabled. The base part of the cookie + * will be zero (there's no notification semaphore) and prefix will be + * set then. However, having udev_sync enabled is highly recommended. + */ +#define DM_COOKIE_MAGIC 0x0D4D +#define DM_UDEV_FLAGS_MASK 0xFFFF0000 +#define DM_UDEV_FLAGS_SHIFT 16 + +/* + * DM_UDEV_DISABLE_DM_RULES_FLAG is set in case we need to disable + * basic device-mapper udev rules that create symlinks in /dev/<DM_DIR> + * directory. However, we can't reliably prevent creating default + * nodes by udev (commonly /dev/dm-X, where X is a number). + */ +#define DM_UDEV_DISABLE_DM_RULES_FLAG 0x0001 +/* + * DM_UDEV_DISABLE_SUBSYTEM_RULES_FLAG is set in case we need to disable + * subsystem udev rules, but still we need the general DM udev rules to + * be applied (to create the nodes and symlinks under /dev and /dev/disk). + */ +#define DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG 0x0002 +/* + * DM_UDEV_DISABLE_DISK_RULES_FLAG is set in case we need to disable + * general DM rules that set symlinks in /dev/disk directory. + */ +#define DM_UDEV_DISABLE_DISK_RULES_FLAG 0x0004 +/* + * DM_UDEV_DISABLE_OTHER_RULES_FLAG is set in case we need to disable + * all the other rules that are not general device-mapper nor subsystem + * related (the rules belong to other software or packages). All foreign + * rules should check this flag directly and they should ignore further + * rule processing for such event. + */ +#define DM_UDEV_DISABLE_OTHER_RULES_FLAG 0x0008 +/* + * DM_UDEV_LOW_PRIORITY_FLAG is set in case we need to instruct the + * udev rules to give low priority to the device that is currently + * processed. For example, this provides a way to select which symlinks + * could be overwritten by high priority ones if their names are equal. + * Common situation is a name based on FS UUID while using origin and + * snapshot devices. + */ +#define DM_UDEV_LOW_PRIORITY_FLAG 0x0010 +/* + * DM_UDEV_DISABLE_LIBRARY_FALLBACK is set in case we need to disable + * libdevmapper's node management. We will rely on udev completely + * and there will be no fallback action provided by libdevmapper if + * udev does something improperly. Using the library fallback code has + * a consequence that you need to take into account: any device node + * or symlink created without udev is not recorded in udev database + * which other applications may read to get complete list of devices. + * For this reason, use of DM_UDEV_DISABLE_LIBRARY_FALLBACK is + * recommended on systems where udev is used. Keep library fallback + * enabled just for exceptional cases where you need to debug udev-related + * problems. If you hit such problems, please contact us through upstream + * LVM2 development mailing list (see also README file). This flag is + * currently not set by default in libdevmapper so you need to set it + * explicitly if you're sure that udev is behaving correctly on your + * setups. + */ +#define DM_UDEV_DISABLE_LIBRARY_FALLBACK 0x0020 +/* + * DM_UDEV_PRIMARY_SOURCE_FLAG is automatically appended by + * libdevmapper for all ioctls generating udev uevents. Once used in + * udev rules, we know if this is a real "primary sourced" event or not. + * We need to distinguish real events originated in libdevmapper from + * any spurious events to gather all missing information (e.g. events + * generated as a result of "udevadm trigger" command or as a result + * of the "watch" udev rule). + */ +#define DM_UDEV_PRIMARY_SOURCE_FLAG 0x0040 + +/* + * Udev flags reserved for use by any device-mapper subsystem. + */ +#define DM_SUBSYSTEM_UDEV_FLAG0 0x0100 +#define DM_SUBSYSTEM_UDEV_FLAG1 0x0200 +#define DM_SUBSYSTEM_UDEV_FLAG2 0x0400 +#define DM_SUBSYSTEM_UDEV_FLAG3 0x0800 +#define DM_SUBSYSTEM_UDEV_FLAG4 0x1000 +#define DM_SUBSYSTEM_UDEV_FLAG5 0x2000 +#define DM_SUBSYSTEM_UDEV_FLAG6 0x4000 +#define DM_SUBSYSTEM_UDEV_FLAG7 0x8000 + +int dm_cookie_supported(void); + +/* + * Udev synchronisation functions. + */ +void dm_udev_set_sync_support(int sync_with_udev); +int dm_udev_get_sync_support(void); +void dm_udev_set_checking(int checking); +int dm_udev_get_checking(void); + +/* + * Default value to get new auto generated cookie created + */ +#define DM_COOKIE_AUTO_CREATE 0 +int dm_udev_create_cookie(uint32_t *cookie); +int dm_udev_complete(uint32_t cookie); +int dm_udev_wait(uint32_t cookie); + +/* + * dm_dev_wait_immediate + * If *ready is 1 on return, the wait is complete. + * If *ready is 0 on return, the wait is incomplete and either + * this function or dm_udev_wait() must be called again. + * Returns 0 on error, when neither function should be called again. + */ +int dm_udev_wait_immediate(uint32_t cookie, int *ready); + +#define DM_DEV_DIR_UMASK 0022 +#define DM_CONTROL_NODE_UMASK 0177 + +#ifdef __cplusplus +} +#endif +#endif /* LIB_DEVICE_MAPPER_H */ diff --git a/device_mapper/libdm-common.c b/device_mapper/libdm-common.c new file mode 100644 index 000000000..bcf12cbdf --- /dev/null +++ b/device_mapper/libdm-common.c @@ -0,0 +1,2691 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "device_mapper/misc/dmlib.h" +#include "ioctl/libdm-targets.h" +#include "libdm-common.h" +#include "misc/kdev_t.h" +#include "misc/dm-ioctl.h" + +#include <stdarg.h> +#include <sys/param.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> + +#ifdef UDEV_SYNC_SUPPORT +# include <sys/types.h> +# include <sys/ipc.h> +# include <sys/sem.h> +# include <libudev.h> +#endif + +#ifdef __linux__ +# include <linux/fs.h> +#endif + +#ifdef HAVE_SELINUX +# include <selinux/selinux.h> +#endif +#ifdef HAVE_SELINUX_LABEL_H +# include <selinux/label.h> +#endif + +#define DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME "DM_DEFAULT_NAME_MANGLING_MODE" + +#define DEV_DIR "/dev/" + +#ifdef UDEV_SYNC_SUPPORT +#ifdef _SEM_SEMUN_UNDEFINED +union semun +{ + int val; /* value for SETVAL */ + struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */ + unsigned short int *array; /* array for GETALL & SETALL */ + struct seminfo *__buf; /* buffer for IPC_INFO */ +}; +#endif +#endif + +static char _dm_dir[PATH_MAX] = DEV_DIR DM_DIR; +static char _sysfs_dir[PATH_MAX] = "/sys/"; +static char _path0[PATH_MAX]; /* path buffer, safe 4kB on stack */ +static const char _mountinfo[] = "/proc/self/mountinfo"; + +#define DM_MAX_UUID_PREFIX_LEN 15 +static char _default_uuid_prefix[DM_MAX_UUID_PREFIX_LEN + 1] = "LVM-"; + +static int _verbose = 0; +static int _suspended_dev_counter = 0; +static dm_string_mangling_t _name_mangling_mode = DEFAULT_DM_NAME_MANGLING; + +#ifdef HAVE_SELINUX_LABEL_H +static struct selabel_handle *_selabel_handle = NULL; +#endif + +static int _udev_disabled = 0; + +#ifdef UDEV_SYNC_SUPPORT +static int _semaphore_supported = -1; +static int _udev_running = -1; +static int _sync_with_udev = 1; +static int _udev_checking = 1; +#endif + +void dm_lib_init(void) +{ + const char *env; + + if (getenv("DM_DISABLE_UDEV")) + _udev_disabled = 1; + + _name_mangling_mode = DEFAULT_DM_NAME_MANGLING; + if ((env = getenv(DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME))) { + if (!strcasecmp(env, "none")) + _name_mangling_mode = DM_STRING_MANGLING_NONE; + else if (!strcasecmp(env, "auto")) + _name_mangling_mode = DM_STRING_MANGLING_AUTO; + else if (!strcasecmp(env, "hex")) + _name_mangling_mode = DM_STRING_MANGLING_HEX; + } +} + +/* + * Library users can provide their own logging + * function. + */ + +__attribute__((format(printf, 5, 0))) +static void _default_log_line(int level, const char *file, + int line, int dm_errno_or_class, + const char *f, va_list ap) +{ + static int _abort_on_internal_errors = -1; + static int _debug_with_line_numbers = -1; + FILE *out = log_stderr(level) ? stderr : stdout; + + level = log_level(level); + + if (level <= _LOG_WARN || _verbose) { + if (level < _LOG_WARN) + out = stderr; + + if (_debug_with_line_numbers < 0) + /* Set when env DM_DEBUG_WITH_LINE_NUMBERS is not "0" */ + _debug_with_line_numbers = + strcmp(getenv("DM_DEBUG_WITH_LINE_NUMBERS") ? : "0", "0"); + + if (_debug_with_line_numbers) + fprintf(out, "%s:%d ", file, line); + + vfprintf(out, f, ap); + fputc('\n', out); + } + + if (_abort_on_internal_errors < 0) + /* Set when env DM_ABORT_ON_INTERNAL_ERRORS is not "0" */ + _abort_on_internal_errors = + strcmp(getenv("DM_ABORT_ON_INTERNAL_ERRORS") ? : "0", "0"); + + if (_abort_on_internal_errors && + !strncmp(f, INTERNAL_ERROR, sizeof(INTERNAL_ERROR) - 1)) + abort(); +} + +__attribute__((format(printf, 5, 6))) +static void _default_log_with_errno(int level, + const char *file, int line, int dm_errno_or_class, + const char *f, ...) +{ + va_list ap; + + va_start(ap, f); + _default_log_line(level, file, line, dm_errno_or_class, f, ap); + va_end(ap); +} + +__attribute__((format(printf, 4, 5))) +static void _default_log(int level, const char *file, + int line, const char *f, ...) +{ + va_list ap; + + va_start(ap, f); + _default_log_line(level, file, line, 0, f, ap); + va_end(ap); +} + +dm_log_fn dm_log = _default_log; +dm_log_with_errno_fn dm_log_with_errno = _default_log_with_errno; + +/* + * Wrapper function to reformat new messages to and + * old style logging which had not used errno parameter + * + * As we cannot simply pass '...' to old function we + * need to process arg list locally and just pass '%s' + buffer + */ +__attribute__((format(printf, 5, 6))) +static void _log_to_default_log(int level, + const char *file, int line, int dm_errno_or_class, + const char *f, ...) +{ + int n; + va_list ap; + char buf[2 * PATH_MAX + 256]; /* big enough for most messages */ + + va_start(ap, f); + n = vsnprintf(buf, sizeof(buf), f, ap); + va_end(ap); + + if (n > 0) /* Could be truncated */ + dm_log(level, file, line, "%s", buf); +} + +/* + * Wrapper function take 'old' style message without errno + * and log it via new logging function with errno arg + * + * This minor case may happen if new libdm is used with old + * recompiled tool that would decided to use new logging, + * but still would like to use old binary plugins. + */ +__attribute__((format(printf, 4, 5))) +static void _log_to_default_log_with_errno(int level, + const char *file, int line, const char *f, ...) +{ + int n; + va_list ap; + char buf[2 * PATH_MAX + 256]; /* big enough for most messages */ + + va_start(ap, f); + n = vsnprintf(buf, sizeof(buf), f, ap); + va_end(ap); + + if (n > 0) /* Could be truncated */ + dm_log_with_errno(level, file, line, 0, "%s", buf); +} + +void dm_log_init(dm_log_fn fn) +{ + if (fn) { + dm_log = fn; + dm_log_with_errno = _log_to_default_log; + } else { + dm_log = _default_log; + dm_log_with_errno = _default_log_with_errno; + } +} + +int dm_log_is_non_default(void) +{ + return (dm_log == _default_log && dm_log_with_errno == _default_log_with_errno) ? 0 : 1; +} + +void dm_log_with_errno_init(dm_log_with_errno_fn fn) +{ + if (fn) { + dm_log = _log_to_default_log_with_errno; + dm_log_with_errno = fn; + } else { + dm_log = _default_log; + dm_log_with_errno = _default_log_with_errno; + } +} + +void dm_log_init_verbose(int level) +{ + _verbose = level; +} + +static int _build_dev_path(char *buffer, size_t len, const char *dev_name) +{ + int r; + + /* If there's a /, assume caller knows what they're doing */ + if (strchr(dev_name, '/')) + r = dm_strncpy(buffer, dev_name, len); + else + r = (dm_snprintf(buffer, len, "%s/%s", + _dm_dir, dev_name) < 0) ? 0 : 1; + if (!r) + log_error("Failed to build dev path for \"%s\".", dev_name); + + return r; +} + +int dm_get_library_version(char *version, size_t size) +{ + return dm_strncpy(version, DM_LIB_VERSION, size); +} + +void inc_suspended(void) +{ + _suspended_dev_counter++; + log_debug_activation("Suspended device counter increased to %d", _suspended_dev_counter); +} + +void dec_suspended(void) +{ + if (!_suspended_dev_counter) { + log_error("Attempted to decrement suspended device counter below zero."); + return; + } + + _suspended_dev_counter--; + log_debug_activation("Suspended device counter reduced to %d", _suspended_dev_counter); +} + +int dm_get_suspended_counter(void) +{ + return _suspended_dev_counter; +} + +int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling_mode) +{ + _name_mangling_mode = name_mangling_mode; + + return 1; +} + +dm_string_mangling_t dm_get_name_mangling_mode(void) +{ + return _name_mangling_mode; +} + +struct dm_task *dm_task_create(int type) +{ + struct dm_task *dmt = dm_zalloc(sizeof(*dmt)); + + if (!dmt) { + log_error("dm_task_create: malloc(%" PRIsize_t ") failed", + sizeof(*dmt)); + return NULL; + } + + if (!dm_check_version()) { + dm_free(dmt); + return_NULL; + } + + dmt->type = type; + dmt->minor = -1; + dmt->major = -1; + dmt->allow_default_major_fallback = 1; + dmt->uid = DM_DEVICE_UID; + dmt->gid = DM_DEVICE_GID; + dmt->mode = DM_DEVICE_MODE; + dmt->no_open_count = 0; + dmt->read_ahead = DM_READ_AHEAD_AUTO; + dmt->read_ahead_flags = 0; + dmt->event_nr = 0; + dmt->cookie_set = 0; + dmt->query_inactive_table = 0; + dmt->new_uuid = 0; + dmt->secure_data = 0; + dmt->record_timestamp = 0; + + return dmt; +} + +/* + * Find the name associated with a given device number by scanning _dm_dir. + */ +static int _find_dm_name_of_device(dev_t st_rdev, char *buf, size_t buf_len) +{ + const char *name; + char path[PATH_MAX]; + struct dirent *dirent; + DIR *d; + struct stat st; + int r = 0; + + if (!(d = opendir(_dm_dir))) { + log_sys_error("opendir", _dm_dir); + return 0; + } + + while ((dirent = readdir(d))) { + name = dirent->d_name; + + if (!strcmp(name, ".") || !strcmp(name, "..")) + continue; + + if (dm_snprintf(path, sizeof(path), "%s/%s", _dm_dir, + name) == -1) { + log_error("Couldn't create path for %s", name); + continue; + } + + if (stat(path, &st)) + continue; + + if (st.st_rdev == st_rdev) { + strncpy(buf, name, buf_len); + r = 1; + break; + } + } + + if (closedir(d)) + log_sys_error("closedir", _dm_dir); + + return r; +} + +static int _is_whitelisted_char(char c) +{ + /* + * Actually, DM supports any character in a device name. + * This whitelist is just for proper integration with udev. + */ + if ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + strchr("#+-.:=@_", c) != NULL) + return 1; + + return 0; +} + +int check_multiple_mangled_string_allowed(const char *str, const char *str_name, + dm_string_mangling_t mode) +{ + if (mode == DM_STRING_MANGLING_AUTO && strstr(str, "\\x5cx")) { + log_error("The %s \"%s\" seems to be mangled more than once. " + "This is not allowed in auto mode.", str_name, str); + return 0; + } + + return 1; +} + +/* + * Mangle all characters in the input string which are not on a whitelist + * with '\xNN' format where NN is the hex value of the character. + */ +int mangle_string(const char *str, const char *str_name, size_t len, + char *buf, size_t buf_len, dm_string_mangling_t mode) +{ + int need_mangling = -1; /* -1 don't know yet, 0 no, 1 yes */ + size_t i, j; + + if (!str || !buf) + return -1; + + /* Is there anything to do at all? */ + if (!*str || !len) + return 0; + + if (buf_len < DM_NAME_LEN) { + log_error(INTERNAL_ERROR "mangle_string: supplied buffer too small"); + return -1; + } + + if (mode == DM_STRING_MANGLING_NONE) + mode = DM_STRING_MANGLING_AUTO; + + for (i = 0, j = 0; str[i]; i++) { + if (mode == DM_STRING_MANGLING_AUTO) { + /* + * Detect already mangled part of the string and keep it. + * Return error on mixture of mangled/not mangled! + */ + if (str[i] == '\\' && str[i+1] == 'x') { + if ((len - i < 4) || (need_mangling == 1)) + goto bad1; + if (buf_len - j < 4) + goto bad2; + + memcpy(&buf[j], &str[i], 4); + i+=3; j+=4; + + need_mangling = 0; + continue; + } + } + + if (_is_whitelisted_char(str[i])) { + /* whitelisted, keep it. */ + if (buf_len - j < 1) + goto bad2; + buf[j] = str[i]; + j++; + } else { + /* + * Not on a whitelist, mangle it. + * Return error on mixture of mangled/not mangled + * unless a DM_STRING_MANGLING_HEX is used!. + */ + if ((mode != DM_STRING_MANGLING_HEX) && (need_mangling == 0)) + goto bad1; + if (buf_len - j < 4) + goto bad2; + + sprintf(&buf[j], "\\x%02x", (unsigned char) str[i]); + j+=4; + + need_mangling = 1; + } + } + + if (buf_len - j < 1) + goto bad2; + buf[j] = '\0'; + + /* All chars in the string whitelisted? */ + if (need_mangling == -1) + need_mangling = 0; + + return need_mangling; + +bad1: + log_error("The %s \"%s\" contains mixed mangled and unmangled " + "characters or it's already mangled improperly.", str_name, str); + return -1; +bad2: + log_error("Mangled form of the %s too long for \"%s\".", str_name, str); + return -1; +} + +/* + * Try to unmangle supplied string. + * Return value: -1 on error, 0 when no unmangling needed, 1 when unmangling applied + */ +int unmangle_string(const char *str, const char *str_name, size_t len, + char *buf, size_t buf_len, dm_string_mangling_t mode) +{ + int strict = mode != DM_STRING_MANGLING_NONE; + char str_rest[DM_NAME_LEN]; + size_t i, j; + int code; + int r = 0; + + if (!str || !buf) + return -1; + + /* Is there anything to do at all? */ + if (!*str || !len) + return 0; + + if (buf_len < DM_NAME_LEN) { + log_error(INTERNAL_ERROR "unmangle_string: supplied buffer too small"); + return -1; + } + + for (i = 0, j = 0; str[i]; i++, j++) { + if (strict && !(_is_whitelisted_char(str[i]) || str[i]=='\\')) { + log_error("The %s \"%s\" should be mangled but " + "it contains blacklisted characters.", str_name, str); + j=0; r=-1; + goto out; + } + + if (str[i] == '\\' && str[i+1] == 'x') { + if (!sscanf(&str[i+2], "%2x%s", &code, str_rest)) { + log_debug_activation("Hex encoding mismatch detected in %s \"%s\" " + "while trying to unmangle it.", str_name, str); + goto out; + } + buf[j] = (unsigned char) code; + + /* skip the encoded part we've just decoded! */ + i+= 3; + + /* unmangling applied */ + r = 1; + } else + buf[j] = str[i]; + } + +out: + buf[j] = '\0'; + return r; +} + +static int _dm_task_set_name(struct dm_task *dmt, const char *name, + dm_string_mangling_t mangling_mode) +{ + char mangled_name[DM_NAME_LEN]; + int r = 0; + + dm_free(dmt->dev_name); + dmt->dev_name = NULL; + dm_free(dmt->mangled_dev_name); + dmt->mangled_dev_name = NULL; + + if (strlen(name) >= DM_NAME_LEN) { + log_error("Name \"%s\" too long.", name); + return 0; + } + + if (!check_multiple_mangled_string_allowed(name, "name", mangling_mode)) + return_0; + + if (mangling_mode != DM_STRING_MANGLING_NONE && + (r = mangle_string(name, "name", strlen(name), mangled_name, + sizeof(mangled_name), mangling_mode)) < 0) { + log_error("Failed to mangle device name \"%s\".", name); + return 0; + } + + /* Store mangled_dev_name only if it differs from dev_name! */ + if (r) { + log_debug_activation("Device name mangled [%s]: %s --> %s", + mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex", + name, mangled_name); + if (!(dmt->mangled_dev_name = dm_strdup(mangled_name))) { + log_error("_dm_task_set_name: dm_strdup(%s) failed", mangled_name); + return 0; + } + } + + if (!(dmt->dev_name = dm_strdup(name))) { + log_error("_dm_task_set_name: strdup(%s) failed", name); + return 0; + } + + return 1; +} + +static int _dm_task_set_name_from_path(struct dm_task *dmt, const char *path, + const char *name) +{ + char buf[PATH_MAX]; + struct stat st1, st2; + const char *final_name = NULL; + size_t len; + + if (dmt->type == DM_DEVICE_CREATE) { + log_error("Name \"%s\" invalid. It contains \"/\".", path); + return 0; + } + + if (!stat(path, &st1)) { + /* + * Found directly. + * If supplied path points to same device as last component + * under /dev/mapper, use that name directly. + */ + if (dm_snprintf(buf, sizeof(buf), "%s/%s", _dm_dir, name) == -1) { + log_error("Couldn't create path for %s", name); + return 0; + } + + if (!stat(buf, &st2) && (st1.st_rdev == st2.st_rdev)) + final_name = name; + } else { + /* Not found. */ + /* If there is exactly one '/' try a prefix of /dev */ + if ((len = strlen(path)) < 3 || path[0] == '/' || + dm_count_chars(path, len, '/') != 1) { + log_error("Device %s not found", path); + return 0; + } + if (dm_snprintf(buf, sizeof(buf), "%s/../%s", _dm_dir, path) == -1) { + log_error("Couldn't create /dev path for %s", path); + return 0; + } + if (stat(buf, &st1)) { + log_error("Device %s not found", path); + return 0; + } + /* Found */ + } + + /* + * If we don't have the dm name yet, Call _find_dm_name_of_device() to + * scan _dm_dir for a match. + */ + if (!final_name) { + if (_find_dm_name_of_device(st1.st_rdev, buf, sizeof(buf))) + final_name = buf; + else { + log_error("Device %s not found", name); + return 0; + } + } + + /* This is an already existing path - do not mangle! */ + return _dm_task_set_name(dmt, final_name, DM_STRING_MANGLING_NONE); +} + +int dm_task_set_name(struct dm_task *dmt, const char *name) +{ + char *pos; + + /* Path supplied for existing device? */ + if ((pos = strrchr(name, '/'))) + return _dm_task_set_name_from_path(dmt, name, pos + 1); + + return _dm_task_set_name(dmt, name, dm_get_name_mangling_mode()); +} + +const char *dm_task_get_name(const struct dm_task *dmt) +{ + return (dmt->dmi.v4->name); +} + +static char *_task_get_string_mangled(const char *str, const char *str_name, + char *buf, size_t buf_size, + dm_string_mangling_t mode) +{ + char *rs; + int r; + + if ((r = mangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0) + return NULL; + + if (!(rs = r ? dm_strdup(buf) : dm_strdup(str))) + log_error("_task_get_string_mangled: dm_strdup failed"); + + return rs; +} + +static char *_task_get_string_unmangled(const char *str, const char *str_name, + char *buf, size_t buf_size, + dm_string_mangling_t mode) +{ + char *rs; + int r = 0; + + /* + * Unless the mode used is 'none', the string + * is *already* unmangled on ioctl return! + */ + if (mode == DM_STRING_MANGLING_NONE && + (r = unmangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0) + return NULL; + + if (!(rs = r ? dm_strdup(buf) : dm_strdup(str))) + log_error("_task_get_string_unmangled: dm_strdup failed"); + + return rs; +} + +char *dm_task_get_name_mangled(const struct dm_task *dmt) +{ + const char *s = dm_task_get_name(dmt); + char buf[DM_NAME_LEN]; + char *rs; + + if (!(rs = _task_get_string_mangled(s, "name", buf, sizeof(buf), dm_get_name_mangling_mode()))) + log_error("Failed to mangle device name \"%s\".", s); + + return rs; +} + +char *dm_task_get_name_unmangled(const struct dm_task *dmt) +{ + const char *s = dm_task_get_name(dmt); + char buf[DM_NAME_LEN]; + char *rs; + + if (!(rs = _task_get_string_unmangled(s, "name", buf, sizeof(buf), dm_get_name_mangling_mode()))) + log_error("Failed to unmangle device name \"%s\".", s); + + return rs; +} + +const char *dm_task_get_uuid(const struct dm_task *dmt) +{ + return (dmt->dmi.v4->uuid); +} + +char *dm_task_get_uuid_mangled(const struct dm_task *dmt) +{ + const char *s = dm_task_get_uuid(dmt); + char buf[DM_UUID_LEN]; + char *rs; + + if (!(rs = _task_get_string_mangled(s, "UUID", buf, sizeof(buf), dm_get_name_mangling_mode()))) + log_error("Failed to mangle device uuid \"%s\".", s); + + return rs; +} + +char *dm_task_get_uuid_unmangled(const struct dm_task *dmt) +{ + const char *s = dm_task_get_uuid(dmt); + char buf[DM_UUID_LEN]; + char *rs; + + if (!(rs = _task_get_string_unmangled(s, "UUID", buf, sizeof(buf), dm_get_name_mangling_mode()))) + log_error("Failed to unmangle device uuid \"%s\".", s); + + return rs; +} + +int dm_task_set_newname(struct dm_task *dmt, const char *newname) +{ + dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode(); + char mangled_name[DM_NAME_LEN]; + int r = 0; + + if (strchr(newname, '/')) { + log_error("Name \"%s\" invalid. It contains \"/\".", newname); + return 0; + } + + if (strlen(newname) >= DM_NAME_LEN) { + log_error("Name \"%s\" too long", newname); + return 0; + } + + if (!*newname) { + log_error("Non empty new name is required."); + return 0; + } + + if (!check_multiple_mangled_string_allowed(newname, "new name", mangling_mode)) + return_0; + + if (mangling_mode != DM_STRING_MANGLING_NONE && + (r = mangle_string(newname, "new name", strlen(newname), mangled_name, + sizeof(mangled_name), mangling_mode)) < 0) { + log_error("Failed to mangle new device name \"%s\"", newname); + return 0; + } + + if (r) { + log_debug_activation("New device name mangled [%s]: %s --> %s", + mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex", + newname, mangled_name); + newname = mangled_name; + } + + dm_free(dmt->newname); + if (!(dmt->newname = dm_strdup(newname))) { + log_error("dm_task_set_newname: strdup(%s) failed", newname); + return 0; + } + + dmt->new_uuid = 0; + + return 1; +} + +int dm_task_set_uuid(struct dm_task *dmt, const char *uuid) +{ + char mangled_uuid[DM_UUID_LEN]; + dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode(); + int r = 0; + + dm_free(dmt->uuid); + dmt->uuid = NULL; + dm_free(dmt->mangled_uuid); + dmt->mangled_uuid = NULL; + + if (!check_multiple_mangled_string_allowed(uuid, "UUID", mangling_mode)) + return_0; + + if (mangling_mode != DM_STRING_MANGLING_NONE && + (r = mangle_string(uuid, "UUID", strlen(uuid), mangled_uuid, + sizeof(mangled_uuid), mangling_mode)) < 0) { + log_error("Failed to mangle device uuid \"%s\".", uuid); + return 0; + } + + if (r) { + log_debug_activation("Device uuid mangled [%s]: %s --> %s", + mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex", + uuid, mangled_uuid); + + if (!(dmt->mangled_uuid = dm_strdup(mangled_uuid))) { + log_error("dm_task_set_uuid: dm_strdup(%s) failed", mangled_uuid); + return 0; + } + } + + if (!(dmt->uuid = dm_strdup(uuid))) { + log_error("dm_task_set_uuid: strdup(%s) failed", uuid); + return 0; + } + + return 1; +} + +int dm_task_set_major(struct dm_task *dmt, int major) +{ + dmt->major = major; + dmt->allow_default_major_fallback = 0; + + return 1; +} + +int dm_task_set_minor(struct dm_task *dmt, int minor) +{ + dmt->minor = minor; + + return 1; +} + +int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor, + int allow_default_major_fallback) +{ + dmt->major = major; + dmt->minor = minor; + dmt->allow_default_major_fallback = allow_default_major_fallback; + + return 1; +} + +int dm_task_set_uid(struct dm_task *dmt, uid_t uid) +{ + dmt->uid = uid; + + return 1; +} + +int dm_task_set_gid(struct dm_task *dmt, gid_t gid) +{ + dmt->gid = gid; + + return 1; +} + +int dm_task_set_mode(struct dm_task *dmt, mode_t mode) +{ + dmt->mode = mode; + + return 1; +} + +int dm_task_enable_checks(struct dm_task *dmt) +{ + dmt->enable_checks = 1; + + return 1; +} + +int dm_task_add_target(struct dm_task *dmt, uint64_t start, uint64_t size, + const char *ttype, const char *params) +{ + struct target *t = create_target(start, size, ttype, params); + if (!t) + return_0; + + if (!dmt->head) + dmt->head = dmt->tail = t; + else { + dmt->tail->next = t; + dmt->tail = t; + } + + return 1; +} + +#ifdef HAVE_SELINUX +static int _selabel_lookup(const char *path, mode_t mode, + security_context_t *scontext) +{ +#ifdef HAVE_SELINUX_LABEL_H + if (!_selabel_handle && + !(_selabel_handle = selabel_open(SELABEL_CTX_FILE, NULL, 0))) { + log_error("selabel_open failed: %s", strerror(errno)); + return 0; + } + + if (selabel_lookup(_selabel_handle, scontext, path, mode)) { + log_debug_activation("selabel_lookup failed for %s: %s", + path, strerror(errno)); + return 0; + } +#else + if (matchpathcon(path, mode, scontext)) { + log_debug_activation("matchpathcon failed for %s: %s", + path, strerror(errno)); + return 0; + } +#endif + return 1; +} +#endif + +#ifdef HAVE_SELINUX +static int _is_selinux_enabled(void) +{ + static int _tested = 0; + static int _enabled; + + if (!_tested) { + _tested = 1; + _enabled = is_selinux_enabled(); + } + + return _enabled; +} +#endif + +int dm_prepare_selinux_context(const char *path, mode_t mode) +{ +#ifdef HAVE_SELINUX + security_context_t scontext = NULL; + + if (_is_selinux_enabled() <= 0) + return 1; + + if (path) { + if (!_selabel_lookup(path, mode, &scontext)) + return_0; + + log_debug_activation("Preparing SELinux context for %s to %s.", path, scontext); + } + else + log_debug_activation("Resetting SELinux context to default value."); + + if (setfscreatecon(scontext) < 0) { + log_sys_error("setfscreatecon", (path ? : "SELinux context reset")); + freecon(scontext); + return 0; + } + + freecon(scontext); +#endif + return 1; +} + +int dm_set_selinux_context(const char *path, mode_t mode) +{ +#ifdef HAVE_SELINUX + security_context_t scontext = NULL; + + if (_is_selinux_enabled() <= 0) + return 1; + + if (!_selabel_lookup(path, mode, &scontext)) + return_0; + + log_debug_activation("Setting SELinux context for %s to %s.", path, scontext); + + if ((lsetfilecon(path, scontext) < 0) && (errno != ENOTSUP)) { + log_sys_error("lsetfilecon", path); + freecon(scontext); + return 0; + } + + freecon(scontext); +#endif + return 1; +} + +void selinux_release(void) +{ +#ifdef HAVE_SELINUX_LABEL_H + if (_selabel_handle) + selabel_close(_selabel_handle); + _selabel_handle = NULL; +#endif +} + +static int _warn_if_op_needed(int warn_if_udev_failed) +{ + return warn_if_udev_failed && dm_udev_get_sync_support() && dm_udev_get_checking(); +} + +static int _add_dev_node(const char *dev_name, uint32_t major, uint32_t minor, + uid_t uid, gid_t gid, mode_t mode, int warn_if_udev_failed) +{ + char path[PATH_MAX]; + struct stat info; + dev_t dev = MKDEV((dev_t)major, (dev_t)minor); + mode_t old_mask; + + if (!_build_dev_path(path, sizeof(path), dev_name)) + return_0; + + if (stat(path, &info) >= 0) { + if (!S_ISBLK(info.st_mode)) { + log_error("A non-block device file at '%s' " + "is already present", path); + return 0; + } + + /* If right inode already exists we don't touch uid etc. */ + if (info.st_rdev == dev) + return 1; + + if (unlink(path) < 0) { + log_error("Unable to unlink device node for '%s'", + dev_name); + return 0; + } + } else if (_warn_if_op_needed(warn_if_udev_failed)) + log_warn("%s not set up by udev: Falling back to direct " + "node creation.", path); + + (void) dm_prepare_selinux_context(path, S_IFBLK); + old_mask = umask(0); + + /* The node may already have been created by udev. So ignore EEXIST. */ + if (mknod(path, S_IFBLK | mode, dev) < 0 && errno != EEXIST) { + log_error("%s: mknod for %s failed: %s", path, dev_name, strerror(errno)); + umask(old_mask); + (void) dm_prepare_selinux_context(NULL, 0); + return 0; + } + umask(old_mask); + (void) dm_prepare_selinux_context(NULL, 0); + + if (chown(path, uid, gid) < 0) { + log_sys_error("chown", path); + return 0; + } + + log_debug_activation("Created %s", path); + + return 1; +} + +static int _rm_dev_node(const char *dev_name, int warn_if_udev_failed) +{ + char path[PATH_MAX]; + struct stat info; + + if (!_build_dev_path(path, sizeof(path), dev_name)) + return_0; + if (lstat(path, &info) < 0) + return 1; + else if (_warn_if_op_needed(warn_if_udev_failed)) + log_warn("Node %s was not removed by udev. " + "Falling back to direct node removal.", path); + + /* udev may already have deleted the node. Ignore ENOENT. */ + if (unlink(path) < 0 && errno != ENOENT) { + log_error("Unable to unlink device node for '%s'", dev_name); + return 0; + } + + log_debug_activation("Removed %s", path); + + return 1; +} + +static int _rename_dev_node(const char *old_name, const char *new_name, + int warn_if_udev_failed) +{ + char oldpath[PATH_MAX]; + char newpath[PATH_MAX]; + struct stat info, info2; + struct stat *info_block_dev; + + if (!_build_dev_path(oldpath, sizeof(oldpath), old_name) || + !_build_dev_path(newpath, sizeof(newpath), new_name)) + return_0; + + if (lstat(newpath, &info) == 0) { + if (S_ISLNK(info.st_mode)) { + if (stat(newpath, &info2) == 0) + info_block_dev = &info2; + else { + log_sys_error("stat", newpath); + return 0; + } + } else + info_block_dev = &info; + + if (!S_ISBLK(info_block_dev->st_mode)) { + log_error("A non-block device file at '%s' " + "is already present", newpath); + return 0; + } + else if (_warn_if_op_needed(warn_if_udev_failed)) { + if (lstat(oldpath, &info) < 0 && + errno == ENOENT) + /* assume udev already deleted this */ + return 1; + + log_warn("The node %s should have been renamed to %s " + "by udev but old node is still present. " + "Falling back to direct old node removal.", + oldpath, newpath); + return _rm_dev_node(old_name, 0); + } + + if (unlink(newpath) < 0) { + if (errno == EPERM) { + /* devfs, entry has already been renamed */ + return 1; + } + log_error("Unable to unlink device node for '%s'", + new_name); + return 0; + } + } + else if (_warn_if_op_needed(warn_if_udev_failed)) + log_warn("The node %s should have been renamed to %s " + "by udev but new node is not present. " + "Falling back to direct node rename.", + oldpath, newpath); + + /* udev may already have renamed the node. Ignore ENOENT. */ + /* FIXME: when renaming to target mangling mode "none" with udev + * while there are some blacklisted characters in the node name, + * udev will remove the old_node, but fails to properly rename + * to new_node. The libdevmapper code tries to call + * rename(old_node,new_node), but that won't do anything + * since the old node is already removed by udev. + * For example renaming 'a\x20b' to 'a b': + * - udev removes 'a\x20b' + * - udev creates 'a' and 'b' (since it considers the ' ' as a delimiter + * - libdevmapper checks udev has done the rename properly + * - libdevmapper calls stat(new_node) and it does not see it + * - libdevmapper calls rename(old_node,new_node) + * - the rename is a NOP since the old_node does not exist anymore + * + * However, this situation is very rare - why would anyone need + * to rename to an unsupported mode??? So a fix for this would be + * just for completeness. + */ + if (rename(oldpath, newpath) < 0 && errno != ENOENT) { + log_error("Unable to rename device node from '%s' to '%s'", + old_name, new_name); + return 0; + } + + log_debug_activation("Renamed %s to %s", oldpath, newpath); + + return 1; +} + +#ifdef __linux__ +static int _open_dev_node(const char *dev_name) +{ + int fd = -1; + char path[PATH_MAX]; + + if (!_build_dev_path(path, sizeof(path), dev_name)) + return fd; + + if ((fd = open(path, O_RDONLY, 0)) < 0) + log_sys_error("open", path); + + return fd; +} + +int get_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor, + uint32_t *read_ahead) +{ + char buf[24]; + int len; + int r = 1; + int fd; + long read_ahead_long; + + /* + * If we know the device number, use sysfs if we can. + * Otherwise use BLKRAGET ioctl. + */ + if (*_sysfs_dir && major != 0) { + if (dm_snprintf(_path0, sizeof(_path0), "%sdev/block/%" PRIu32 + ":%" PRIu32 "/bdi/read_ahead_kb", _sysfs_dir, + major, minor) < 0) { + log_error("Failed to build sysfs_path."); + return 0; + } + + if ((fd = open(_path0, O_RDONLY, 0)) != -1) { + /* Reading from sysfs, expecting number\n */ + if ((len = read(fd, buf, sizeof(buf) - 1)) < 1) { + log_sys_error("read", _path0); + r = 0; + } else { + buf[len] = 0; /* kill \n and ensure \0 */ + *read_ahead = atoi(buf) * 2; + log_debug_activation("%s (%d:%d): read ahead is %" PRIu32, + dev_name, major, minor, *read_ahead); + } + + if (close(fd)) + log_sys_debug("close", _path0); + + return r; + } + + log_sys_debug("open", _path0); + /* Fall back to use dev_name */ + } + + /* + * Open/close dev_name may block the process + * (i.e. overfilled thin pool volume) + */ + if (!*dev_name) { + log_error("Empty device name passed to BLKRAGET"); + return 0; + } + + if ((fd = _open_dev_node(dev_name)) < 0) + return_0; + + if (ioctl(fd, BLKRAGET, &read_ahead_long)) { + log_sys_error("BLKRAGET", dev_name); + *read_ahead = 0; + r = 0; + } else { + *read_ahead = (uint32_t) read_ahead_long; + log_debug_activation("%s: read ahead is %" PRIu32, dev_name, *read_ahead); + } + + if (close(fd)) + log_sys_debug("close", dev_name); + + return r; +} + +static int _set_read_ahead(const char *dev_name, uint32_t major, uint32_t minor, + uint32_t read_ahead) +{ + char buf[24]; + int len; + int r = 1; + int fd; + long read_ahead_long = (long) read_ahead; + + log_debug_activation("%s (%d:%d): Setting read ahead to %" PRIu32, dev_name, + major, minor, read_ahead); + + /* + * If we know the device number, use sysfs if we can. + * Otherwise use BLKRASET ioctl. RA is set after resume. + */ + if (*_sysfs_dir && major != 0) { + if (dm_snprintf(_path0, sizeof(_path0), "%sdev/block/%" PRIu32 + ":%" PRIu32 "/bdi/read_ahead_kb", + _sysfs_dir, major, minor) < 0) { + log_error("Failed to build sysfs_path."); + return 0; + } + + /* Sysfs is kB based, round up to kB */ + if ((len = dm_snprintf(buf, sizeof(buf), FMTu32, + (read_ahead + 1) / 2)) < 0) { + log_error("Failed to build size in kB."); + return 0; + } + + if ((fd = open(_path0, O_WRONLY, 0)) != -1) { + if (write(fd, buf, len) < len) { + log_sys_error("write", _path0); + r = 0; + } + + if (close(fd)) + log_sys_debug("close", _path0); + + return r; + } + + log_sys_debug("open", _path0); + /* Fall back to use dev_name */ + } + + if (!*dev_name) { + log_error("Empty device name passed to BLKRAGET"); + return 0; + } + + if ((fd = _open_dev_node(dev_name)) < 0) + return_0; + + if (ioctl(fd, BLKRASET, read_ahead_long)) { + log_sys_error("BLKRASET", dev_name); + r = 0; + } + + if (close(fd)) + log_sys_debug("close", dev_name); + + return r; +} + +static int _set_dev_node_read_ahead(const char *dev_name, + uint32_t major, uint32_t minor, + uint32_t read_ahead, uint32_t read_ahead_flags) +{ + uint32_t current_read_ahead; + + if (read_ahead == DM_READ_AHEAD_AUTO) + return 1; + + if (read_ahead == DM_READ_AHEAD_NONE) + read_ahead = 0; + + if (read_ahead_flags & DM_READ_AHEAD_MINIMUM_FLAG) { + if (!get_dev_node_read_ahead(dev_name, major, minor, ¤t_read_ahead)) + return_0; + + if (current_read_ahead >= read_ahead) { + log_debug_activation("%s: retaining kernel read ahead of %" PRIu32 + " (requested %" PRIu32 ")", + dev_name, current_read_ahead, read_ahead); + return 1; + } + } + + return _set_read_ahead(dev_name, major, minor, read_ahead); +} + +#else + +int get_dev_node_read_ahead(const char *dev_name, uint32_t *read_ahead) +{ + *read_ahead = 0; + + return 1; +} + +static int _set_dev_node_read_ahead(const char *dev_name, + uint32_t major, uint32_t minor, + uint32_t read_ahead, uint32_t read_ahead_flags) +{ + return 1; +} +#endif + +typedef enum { + NODE_ADD, + NODE_DEL, + NODE_RENAME, + NODE_READ_AHEAD, + NUM_NODES +} node_op_t; + +static int _do_node_op(node_op_t type, const char *dev_name, uint32_t major, + uint32_t minor, uid_t uid, gid_t gid, mode_t mode, + const char *old_name, uint32_t read_ahead, + uint32_t read_ahead_flags, int warn_if_udev_failed) +{ + switch (type) { + case NODE_ADD: + return _add_dev_node(dev_name, major, minor, uid, gid, + mode, warn_if_udev_failed); + case NODE_DEL: + return _rm_dev_node(dev_name, warn_if_udev_failed); + case NODE_RENAME: + return _rename_dev_node(old_name, dev_name, warn_if_udev_failed); + case NODE_READ_AHEAD: + return _set_dev_node_read_ahead(dev_name, major, minor, + read_ahead, read_ahead_flags); + default: + ; /* NOTREACHED */ + } + + return 1; +} + +static DM_LIST_INIT(_node_ops); +static int _count_node_ops[NUM_NODES]; + +struct node_op_parms { + struct dm_list list; + node_op_t type; + char *dev_name; + uint32_t major; + uint32_t minor; + uid_t uid; + gid_t gid; + mode_t mode; + uint32_t read_ahead; + uint32_t read_ahead_flags; + char *old_name; + int warn_if_udev_failed; + unsigned rely_on_udev; + char names[0]; +}; + +static void _store_str(char **pos, char **ptr, const char *str) +{ + strcpy(*pos, str); + *ptr = *pos; + *pos += strlen(*ptr) + 1; +} + +static void _del_node_op(struct node_op_parms *nop) +{ + _count_node_ops[nop->type]--; + dm_list_del(&nop->list); + dm_free(nop); + +} + +/* Check if there is other the type of node operation stacked */ +static int _other_node_ops(node_op_t type) +{ + unsigned i; + + for (i = 0; i < NUM_NODES; i++) + if (type != i && _count_node_ops[i]) + return 1; + return 0; +} + +static void _log_node_op(const char *action_str, struct node_op_parms *nop) +{ + const char *rely = nop->rely_on_udev ? " [trust_udev]" : "" ; + const char *verify = nop->warn_if_udev_failed ? " [verify_udev]" : ""; + + switch (nop->type) { + case NODE_ADD: + log_debug_activation("%s: %s NODE_ADD (%" PRIu32 ",%" PRIu32 ") %u:%u 0%o%s%s", + nop->dev_name, action_str, nop->major, nop->minor, nop->uid, nop->gid, nop->mode, + rely, verify); + break; + case NODE_DEL: + log_debug_activation("%s: %s NODE_DEL%s%s", nop->dev_name, action_str, rely, verify); + break; + case NODE_RENAME: + log_debug_activation("%s: %s NODE_RENAME to %s%s%s", nop->old_name, action_str, nop->dev_name, rely, verify); + break; + case NODE_READ_AHEAD: + log_debug_activation("%s: %s NODE_READ_AHEAD %" PRIu32 " (flags=%" PRIu32 ")%s%s", + nop->dev_name, action_str, nop->read_ahead, nop->read_ahead_flags, rely, verify); + break; + default: + ; /* NOTREACHED */ + } +} + +static int _stack_node_op(node_op_t type, const char *dev_name, uint32_t major, + uint32_t minor, uid_t uid, gid_t gid, mode_t mode, + const char *old_name, uint32_t read_ahead, + uint32_t read_ahead_flags, int warn_if_udev_failed, + unsigned rely_on_udev) +{ + struct node_op_parms *nop; + struct dm_list *noph, *nopht; + size_t len = strlen(dev_name) + strlen(old_name) + 2; + char *pos; + + /* + * Note: warn_if_udev_failed must have valid content + */ + if ((type == NODE_DEL) && _other_node_ops(type)) + /* + * Ignore any outstanding operations on the node if deleting it. + */ + dm_list_iterate_safe(noph, nopht, &_node_ops) { + nop = dm_list_item(noph, struct node_op_parms); + if (!strcmp(dev_name, nop->dev_name)) { + _log_node_op("Unstacking", nop); + _del_node_op(nop); + if (!_other_node_ops(type)) + break; /* no other non DEL ops */ + } + } + else if ((type == NODE_ADD) && _count_node_ops[NODE_DEL]) + /* + * Ignore previous DEL operation on added node. + * (No other operations for this device then DEL could be stacked here). + */ + dm_list_iterate_safe(noph, nopht, &_node_ops) { + nop = dm_list_item(noph, struct node_op_parms); + if ((nop->type == NODE_DEL) && + !strcmp(dev_name, nop->dev_name)) { + _log_node_op("Unstacking", nop); + _del_node_op(nop); + break; /* no other DEL ops */ + } + } + else if (type == NODE_RENAME) + /* + * Ignore any outstanding operations if renaming it. + * + * Currently RENAME operation happens through 'suspend -> resume'. + * On 'resume' device is added with read_ahead settings, so it is + * safe to remove any stacked ADD, RENAME, READ_AHEAD operation + * There cannot be any DEL operation on the renamed device. + */ + dm_list_iterate_safe(noph, nopht, &_node_ops) { + nop = dm_list_item(noph, struct node_op_parms); + if (!strcmp(old_name, nop->dev_name)) { + _log_node_op("Unstacking", nop); + _del_node_op(nop); + } + } + else if (type == NODE_READ_AHEAD) { + /* udev doesn't process readahead */ + rely_on_udev = 0; + warn_if_udev_failed = 0; + } + + if (!(nop = dm_malloc(sizeof(*nop) + len))) { + log_error("Insufficient memory to stack mknod operation"); + return 0; + } + + pos = nop->names; + nop->type = type; + nop->major = major; + nop->minor = minor; + nop->uid = uid; + nop->gid = gid; + nop->mode = mode; + nop->read_ahead = read_ahead; + nop->read_ahead_flags = read_ahead_flags; + nop->rely_on_udev = rely_on_udev; + + /* + * Clear warn_if_udev_failed if rely_on_udev is set. It doesn't get + * checked in this case - this just removes the flag from log messages. + */ + nop->warn_if_udev_failed = rely_on_udev ? 0 : warn_if_udev_failed; + + _store_str(&pos, &nop->dev_name, dev_name); + _store_str(&pos, &nop->old_name, old_name); + + _count_node_ops[type]++; + dm_list_add(&_node_ops, &nop->list); + + _log_node_op("Stacking", nop); + + return 1; +} + +static void _pop_node_ops(void) +{ + struct dm_list *noph, *nopht; + struct node_op_parms *nop; + + dm_list_iterate_safe(noph, nopht, &_node_ops) { + nop = dm_list_item(noph, struct node_op_parms); + if (!nop->rely_on_udev) { + _log_node_op("Processing", nop); + _do_node_op(nop->type, nop->dev_name, nop->major, nop->minor, + nop->uid, nop->gid, nop->mode, nop->old_name, + nop->read_ahead, nop->read_ahead_flags, + nop->warn_if_udev_failed); + } else + _log_node_op("Skipping", nop); + _del_node_op(nop); + } +} + +int add_dev_node(const char *dev_name, uint32_t major, uint32_t minor, + uid_t uid, gid_t gid, mode_t mode, int check_udev, unsigned rely_on_udev) +{ + return _stack_node_op(NODE_ADD, dev_name, major, minor, uid, + gid, mode, "", 0, 0, check_udev, rely_on_udev); +} + +int rename_dev_node(const char *old_name, const char *new_name, int check_udev, unsigned rely_on_udev) +{ + return _stack_node_op(NODE_RENAME, new_name, 0, 0, 0, + 0, 0, old_name, 0, 0, check_udev, rely_on_udev); +} + +int rm_dev_node(const char *dev_name, int check_udev, unsigned rely_on_udev) +{ + return _stack_node_op(NODE_DEL, dev_name, 0, 0, 0, + 0, 0, "", 0, 0, check_udev, rely_on_udev); +} + +int set_dev_node_read_ahead(const char *dev_name, + uint32_t major, uint32_t minor, + uint32_t read_ahead, uint32_t read_ahead_flags) +{ + if (read_ahead == DM_READ_AHEAD_AUTO) + return 1; + + return _stack_node_op(NODE_READ_AHEAD, dev_name, major, minor, 0, 0, + 0, "", read_ahead, read_ahead_flags, 0, 0); +} + +void update_devs(void) +{ + _pop_node_ops(); +} + +static int _canonicalize_and_set_dir(const char *src, const char *suffix, size_t max_len, char *dir) +{ + size_t len; + const char *slash; + + if (*src != '/') { + log_debug_activation("Invalid directory value, %s: " + "not an absolute name.", src); + return 0; + } + + len = strlen(src); + slash = src[len-1] == '/' ? "" : "/"; + + if (dm_snprintf(dir, max_len, "%s%s%s", src, slash, suffix ? suffix : "") < 0) { + log_debug_activation("Invalid directory value, %s: name too long.", src); + return 0; + } + + return 1; +} + +int dm_set_dev_dir(const char *dev_dir) +{ + return _canonicalize_and_set_dir(dev_dir, DM_DIR, sizeof _dm_dir, _dm_dir); +} + +const char *dm_dir(void) +{ + return _dm_dir; +} + +int dm_set_sysfs_dir(const char *sysfs_dir) +{ + if (!sysfs_dir || !*sysfs_dir) { + _sysfs_dir[0] = '\0'; + return 1; + } + + return _canonicalize_and_set_dir(sysfs_dir, NULL, sizeof _sysfs_dir, _sysfs_dir); +} + +const char *dm_sysfs_dir(void) +{ + return _sysfs_dir; +} + +/* + * Replace existing uuid_prefix provided it isn't too long. + */ +int dm_set_uuid_prefix(const char *uuid_prefix) +{ + if (!uuid_prefix) + return_0; + + if (strlen(uuid_prefix) > DM_MAX_UUID_PREFIX_LEN) { + log_error("New uuid prefix %s too long.", uuid_prefix); + return 0; + } + + strcpy(_default_uuid_prefix, uuid_prefix); + + return 1; +} + +const char *dm_uuid_prefix(void) +{ + return _default_uuid_prefix; +} + +static int _is_octal(int a) +{ + return (((a) & ~7) == '0'); +} + +/* Convert mangled mountinfo into normal ASCII string */ +static void _unmangle_mountinfo_string(const char *src, char *buf) +{ + while (*src) { + if ((*src == '\\') && + _is_octal(src[1]) && _is_octal(src[2]) && _is_octal(src[3])) { + *buf++ = 64 * (src[1] & 7) + 8 * (src[2] & 7) + (src[3] & 7); + src += 4; + } else + *buf++ = *src++; + } + *buf = '\0'; +} + +/* Parse one line of mountinfo and unmangled target line */ +static int _mountinfo_parse_line(const char *line, unsigned *maj, unsigned *min, char *buf) +{ + char root[PATH_MAX + 1]; /* sscanf needs extra '\0' */ + char target[PATH_MAX + 1]; + char *devmapper; + struct dm_task *dmt; + struct dm_info info; + unsigned i; + + /* TODO: maybe detect availability of %ms glib support ? */ + if (sscanf(line, "%*u %*u %u:%u %" DM_TO_STRING(PATH_MAX) + "s %" DM_TO_STRING(PATH_MAX) "s", + maj, min, root, target) < 4) { + log_error("Failed to parse mountinfo line."); + return 0; + } + + /* btrfs fakes device numbers, but there is still /dev/mapper name + * placed in mountinfo, so try to detect proper major:minor via this */ + if (*maj == 0 && (devmapper = strstr(line, "/dev/mapper/"))) { + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) { + log_error("Mount info task creation failed."); + return 0; + } + devmapper += 12; /* skip fixed prefix */ + for (i = 0; devmapper[i] && devmapper[i] != ' ' && i < sizeof(root); ++i) + root[i] = devmapper[i]; + root[i] = 0; + _unmangle_mountinfo_string(root, buf); + buf[DM_NAME_LEN] = 0; /* cut away */ + + if (dm_task_set_name(dmt, buf) && + dm_task_no_open_count(dmt) && + dm_task_run(dmt) && + dm_task_get_info(dmt, &info)) { + log_debug("Replacing mountinfo device (%u:%u) with matching DM device %s (%u:%u).", + *maj, *min, buf, info.major, info.minor); + *maj = info.major; + *min = info.minor; + } + dm_task_destroy(dmt); + } + + _unmangle_mountinfo_string(target, buf); + + return 1; +} + +/* + * Function to operate on individal mountinfo line, + * minor, major and mount target are parsed and unmangled + */ +int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data) +{ + FILE *minfo; + char buffer[2 * PATH_MAX]; + char target[PATH_MAX]; + unsigned maj, min; + int r = 1; + + if (!(minfo = fopen(_mountinfo, "r"))) { + if (errno != ENOENT) + log_sys_error("fopen", _mountinfo); + else + log_sys_debug("fopen", _mountinfo); + return 0; + } + + while (!feof(minfo) && fgets(buffer, sizeof(buffer), minfo)) + if (!_mountinfo_parse_line(buffer, &maj, &min, target) || + !read_fn(buffer, maj, min, target, cb_data)) { + stack; + r = 0; + break; + } + + if (fclose(minfo)) + log_sys_error("fclose", _mountinfo); + + return r; +} + +static int _sysfs_get_dm_name(uint32_t major, uint32_t minor, char *buf, size_t buf_size) +{ + char *sysfs_path, *temp_buf = NULL; + FILE *fp = NULL; + int r = 0; + size_t len; + + if (!(sysfs_path = dm_malloc(PATH_MAX)) || + !(temp_buf = dm_malloc(PATH_MAX))) { + log_error("_sysfs_get_dm_name: failed to allocate temporary buffers"); + goto bad; + } + + if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 ":%" PRIu32 + "/dm/name", _sysfs_dir, major, minor) < 0) { + log_error("_sysfs_get_dm_name: dm_snprintf failed"); + goto bad; + } + + if (!(fp = fopen(sysfs_path, "r"))) { + if (errno != ENOENT) + log_sys_error("fopen", sysfs_path); + else + log_sys_debug("fopen", sysfs_path); + goto bad; + } + + if (!fgets(temp_buf, PATH_MAX, fp)) { + log_sys_error("fgets", sysfs_path); + goto bad; + } + + len = strlen(temp_buf); + + if (len > buf_size) { + log_error("_sysfs_get_dm_name: supplied buffer too small"); + goto bad; + } + + temp_buf[len ? len - 1 : 0] = '\0'; /* \n */ + strcpy(buf, temp_buf); + r = 1; +bad: + if (fp && fclose(fp)) + log_sys_error("fclose", sysfs_path); + + dm_free(temp_buf); + dm_free(sysfs_path); + + return r; +} + +static int _sysfs_get_kernel_name(uint32_t major, uint32_t minor, char *buf, size_t buf_size) +{ + char *name, *sysfs_path, *temp_buf = NULL; + ssize_t size; + size_t len; + int r = 0; + + if (!(sysfs_path = dm_malloc(PATH_MAX)) || + !(temp_buf = dm_malloc(PATH_MAX))) { + log_error("_sysfs_get_kernel_name: failed to allocate temporary buffers"); + goto bad; + } + + if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 ":%" PRIu32, + _sysfs_dir, major, minor) < 0) { + log_error("_sysfs_get_kernel_name: dm_snprintf failed"); + goto bad; + } + + if ((size = readlink(sysfs_path, temp_buf, PATH_MAX - 1)) < 0) { + if (errno != ENOENT) + log_sys_error("readlink", sysfs_path); + else + log_sys_debug("readlink", sysfs_path); + goto bad; + } + temp_buf[size] = '\0'; + + if (!(name = strrchr(temp_buf, '/'))) { + log_error("Could not locate device kernel name in sysfs path %s", temp_buf); + goto bad; + } + name += 1; + len = size - (name - temp_buf) + 1; + + if (len > buf_size) { + log_error("_sysfs_get_kernel_name: output buffer too small"); + goto bad; + } + + strcpy(buf, name); + r = 1; +bad: + dm_free(temp_buf); + dm_free(sysfs_path); + + return r; +} + +int dm_device_get_name(uint32_t major, uint32_t minor, int prefer_kernel_name, + char *buf, size_t buf_size) +{ + if (!*_sysfs_dir) + return 0; + + /* + * device-mapper devices and prefer_kernel_name = 0 + * get dm name by reading /sys/dev/block/major:minor/dm/name, + * fallback to _sysfs_get_kernel_name if not successful + */ + if (dm_is_dm_major(major) && !prefer_kernel_name) { + if (_sysfs_get_dm_name(major, minor, buf, buf_size)) + return 1; + else + stack; + } + + /* + * non-device-mapper devices or prefer_kernel_name = 1 + * get kernel name using readlink /sys/dev/block/major:minor -> .../dm-X + */ + return _sysfs_get_kernel_name(major, minor, buf, buf_size); +} + +int dm_device_has_holders(uint32_t major, uint32_t minor) +{ + char sysfs_path[PATH_MAX]; + struct stat st; + + if (!*_sysfs_dir) + return 0; + + if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 + ":%" PRIu32 "/holders", _sysfs_dir, major, minor) < 0) { + log_warn("WARNING: sysfs_path dm_snprintf failed."); + return 0; + } + + if (stat(sysfs_path, &st)) { + if (errno != ENOENT) + log_sys_debug("stat", sysfs_path); + return 0; + } + + return !dm_is_empty_dir(sysfs_path); +} + +static int _mounted_fs_on_device(const char *kernel_dev_name) +{ + char sysfs_path[PATH_MAX]; + struct dirent *dirent; + DIR *d; + struct stat st; + int r = 0; + + if (dm_snprintf(sysfs_path, PATH_MAX, "%sfs", _sysfs_dir) < 0) { + log_warn("WARNING: sysfs_path dm_snprintf failed."); + return 0; + } + + if (!(d = opendir(sysfs_path))) { + if (errno != ENOENT) + log_sys_debug("opendir", sysfs_path); + return 0; + } + + while ((dirent = readdir(d))) { + if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, "..")) + continue; + + if (dm_snprintf(sysfs_path, PATH_MAX, "%sfs/%s/%s", + _sysfs_dir, dirent->d_name, kernel_dev_name) < 0) { + log_warn("WARNING: sysfs_path dm_snprintf failed."); + break; + } + + if (!stat(sysfs_path, &st)) { + /* found! */ + r = 1; + break; + } + else if (errno != ENOENT) { + log_sys_debug("stat", sysfs_path); + break; + } + } + + if (closedir(d)) + log_sys_debug("closedir", kernel_dev_name); + + return r; +} + +struct mountinfo_s { + unsigned maj; + unsigned min; + int mounted; +}; + +static int _device_has_mounted_fs(char *buffer, unsigned major, unsigned minor, + char *target, void *cb_data) +{ + struct mountinfo_s *data = cb_data; + char kernel_dev_name[PATH_MAX]; + + if ((major == data->maj) && (minor == data->min)) { + if (!dm_device_get_name(major, minor, 1, kernel_dev_name, + sizeof(kernel_dev_name))) { + stack; + *kernel_dev_name = '\0'; + } + log_verbose("Device %s (%u:%u) appears to be mounted on %s.", + kernel_dev_name, major, minor, target); + data->mounted = 1; + } + + return 1; +} + +int dm_device_has_mounted_fs(uint32_t major, uint32_t minor) +{ + char kernel_dev_name[PATH_MAX]; + struct mountinfo_s data = { + .maj = major, + .min = minor, + }; + + if (!dm_mountinfo_read(_device_has_mounted_fs, &data)) + stack; + + if (data.mounted) + return 1; + /* + * TODO: Verify dm_mountinfo_read() is superset + * and remove sysfs check (namespaces) + */ + /* Get kernel device name first */ + if (!dm_device_get_name(major, minor, 1, kernel_dev_name, PATH_MAX)) + return 0; + + /* Check /sys/fs/<fs_name>/<kernel_dev_name> presence */ + return _mounted_fs_on_device(kernel_dev_name); +} + +int dm_mknodes(const char *name) +{ + struct dm_task *dmt; + int r = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_MKNODES))) + return_0; + + if (name && !dm_task_set_name(dmt, name)) + goto out; + + if (!dm_task_no_open_count(dmt)) + goto out; + + r = dm_task_run(dmt); + +out: + dm_task_destroy(dmt); + return r; +} + +int dm_driver_version(char *version, size_t size) +{ + struct dm_task *dmt; + int r = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_VERSION))) + return_0; + + if (!dm_task_run(dmt)) + log_error("Failed to get driver version"); + + if (!dm_task_get_driver_version(dmt, version, size)) + goto out; + + r = 1; + +out: + dm_task_destroy(dmt); + return r; +} + +static void _set_cookie_flags(struct dm_task *dmt, uint16_t flags) +{ + if (!dm_cookie_supported()) + return; + + if (_udev_disabled) { + /* + * If udev is disabled, hardcode this functionality: + * - we want libdm to create the nodes + * - we don't want the /dev/mapper and any subsystem + * related content to be created by udev if udev + * rules are installed + */ + flags &= ~DM_UDEV_DISABLE_LIBRARY_FALLBACK; + flags |= DM_UDEV_DISABLE_DM_RULES_FLAG | DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG; + } + + dmt->event_nr = flags << DM_UDEV_FLAGS_SHIFT; +} + +#ifndef UDEV_SYNC_SUPPORT +void dm_udev_set_sync_support(int sync_with_udev) +{ +} + +int dm_udev_get_sync_support(void) +{ + return 0; +} + +void dm_udev_set_checking(int checking) +{ +} + +int dm_udev_get_checking(void) +{ + return 0; +} + +int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags) +{ + _set_cookie_flags(dmt, flags); + + *cookie = 0; + dmt->cookie_set = 1; + + return 1; +} + +int dm_udev_complete(uint32_t cookie) +{ + return 1; +} + +int dm_udev_wait(uint32_t cookie) +{ + update_devs(); + + return 1; +} + +int dm_udev_wait_immediate(uint32_t cookie, int *ready) +{ + update_devs(); + *ready = 1; + + return 1; +} + +#else /* UDEV_SYNC_SUPPORT */ + +static int _check_semaphore_is_supported(void) +{ + int maxid; + union semun arg; + struct seminfo seminfo; + + arg.__buf = &seminfo; + maxid = semctl(0, 0, SEM_INFO, arg); + + if (maxid < 0) { + log_warn("Kernel not configured for semaphores (System V IPC). " + "Not using udev synchronisation code."); + return 0; + } + + return 1; +} + +static int _check_udev_is_running(void) +{ + struct udev *udev; + struct udev_queue *udev_queue; + int r; + + if (!(udev = udev_new())) + goto_bad; + + if (!(udev_queue = udev_queue_new(udev))) { + udev_unref(udev); + goto_bad; + } + + if (!(r = udev_queue_get_udev_is_active(udev_queue))) + log_debug_activation("Udev is not running. " + "Not using udev synchronisation code."); + + udev_queue_unref(udev_queue); + udev_unref(udev); + + return r; + +bad: + log_error("Could not get udev state. Assuming udev is not running."); + return 0; +} + +static void _check_udev_sync_requirements_once(void) +{ + if (_semaphore_supported < 0) + _semaphore_supported = _check_semaphore_is_supported(); + + if (_udev_running < 0) { + _udev_running = _check_udev_is_running(); + if (_udev_disabled && _udev_running) + log_warn("Udev is running and DM_DISABLE_UDEV environment variable is set. " + "Bypassing udev, device-mapper library will manage device " + "nodes in device directory."); + } +} + +void dm_udev_set_sync_support(int sync_with_udev) +{ + _check_udev_sync_requirements_once(); + _sync_with_udev = sync_with_udev; +} + +int dm_udev_get_sync_support(void) +{ + _check_udev_sync_requirements_once(); + + return !_udev_disabled && _semaphore_supported && + dm_cookie_supported() &&_udev_running && _sync_with_udev; +} + +void dm_udev_set_checking(int checking) +{ + if ((_udev_checking = checking)) + log_debug_activation("DM udev checking enabled"); + else + log_debug_activation("DM udev checking disabled"); +} + +int dm_udev_get_checking(void) +{ + return _udev_checking; +} + +static int _get_cookie_sem(uint32_t cookie, int *semid) +{ + if (cookie >> 16 != DM_COOKIE_MAGIC) { + log_error("Could not continue to access notification " + "semaphore identified by cookie value %" + PRIu32 " (0x%x). Incorrect cookie prefix.", + cookie, cookie); + return 0; + } + + if ((*semid = semget((key_t) cookie, 1, 0)) >= 0) + return 1; + + switch (errno) { + case ENOENT: + log_error("Could not find notification " + "semaphore identified by cookie " + "value %" PRIu32 " (0x%x)", + cookie, cookie); + break; + case EACCES: + log_error("No permission to access " + "notificaton semaphore identified " + "by cookie value %" PRIu32 " (0x%x)", + cookie, cookie); + break; + default: + log_error("Failed to access notification " + "semaphore identified by cookie " + "value %" PRIu32 " (0x%x): %s", + cookie, cookie, strerror(errno)); + break; + } + + return 0; +} + +static int _udev_notify_sem_inc(uint32_t cookie, int semid) +{ + struct sembuf sb = {0, 1, 0}; + int val; + + if (semop(semid, &sb, 1) < 0) { + log_error("semid %d: semop failed for cookie 0x%" PRIx32 ": %s", + semid, cookie, strerror(errno)); + return 0; + } + + if ((val = semctl(semid, 0, GETVAL)) < 0) { + log_error("semid %d: sem_ctl GETVAL failed for " + "cookie 0x%" PRIx32 ": %s", + semid, cookie, strerror(errno)); + return 0; + } + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) incremented to %d", + cookie, semid, val); + + return 1; +} + +static int _udev_notify_sem_dec(uint32_t cookie, int semid) +{ + struct sembuf sb = {0, -1, IPC_NOWAIT}; + int val; + + if ((val = semctl(semid, 0, GETVAL)) < 0) { + log_error("semid %d: sem_ctl GETVAL failed for " + "cookie 0x%" PRIx32 ": %s", + semid, cookie, strerror(errno)); + return 0; + } + + if (semop(semid, &sb, 1) < 0) { + switch (errno) { + case EAGAIN: + log_error("semid %d: semop failed for cookie " + "0x%" PRIx32 ": " + "incorrect semaphore state", + semid, cookie); + break; + default: + log_error("semid %d: semop failed for cookie " + "0x%" PRIx32 ": %s", + semid, cookie, strerror(errno)); + break; + } + return 0; + } + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) decremented to %d", + cookie, semid, val - 1); + + return 1; +} + +static int _udev_notify_sem_destroy(uint32_t cookie, int semid) +{ + if (semctl(semid, 0, IPC_RMID, 0) < 0) { + log_error("Could not cleanup notification semaphore " + "identified by cookie value %" PRIu32 " (0x%x): %s", + cookie, cookie, strerror(errno)); + return 0; + } + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) destroyed", cookie, + semid); + + return 1; +} + +static int _udev_notify_sem_create(uint32_t *cookie, int *semid) +{ + int fd; + int gen_semid; + int val; + uint16_t base_cookie; + uint32_t gen_cookie; + union semun sem_arg; + + if ((fd = open("/dev/urandom", O_RDONLY)) < 0) { + log_error("Failed to open /dev/urandom " + "to create random cookie value"); + *cookie = 0; + return 0; + } + + /* Generate random cookie value. Be sure it is unique and non-zero. */ + do { + /* FIXME Handle non-error returns from read(). Move _io() into libdm? */ + if (read(fd, &base_cookie, sizeof(base_cookie)) != sizeof(base_cookie)) { + log_error("Failed to initialize notification cookie"); + goto bad; + } + + gen_cookie = DM_COOKIE_MAGIC << 16 | base_cookie; + + if (base_cookie && (gen_semid = semget((key_t) gen_cookie, + 1, 0600 | IPC_CREAT | IPC_EXCL)) < 0) { + switch (errno) { + case EEXIST: + /* if the semaphore key exists, we + * simply generate another random one */ + base_cookie = 0; + break; + case ENOMEM: + log_error("Not enough memory to create " + "notification semaphore"); + goto bad; + case ENOSPC: + log_error("Limit for the maximum number " + "of semaphores reached. You can " + "check and set the limits in " + "/proc/sys/kernel/sem."); + goto bad; + default: + log_error("Failed to create notification " + "semaphore: %s", strerror(errno)); + goto bad; + } + } + } while (!base_cookie); + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) created", + gen_cookie, gen_semid); + + sem_arg.val = 1; + + if (semctl(gen_semid, 0, SETVAL, sem_arg) < 0) { + log_error("semid %d: semctl failed: %s", gen_semid, strerror(errno)); + /* We have to destroy just created semaphore + * so it won't stay in the system. */ + (void) _udev_notify_sem_destroy(gen_cookie, gen_semid); + goto bad; + } + + if ((val = semctl(gen_semid, 0, GETVAL)) < 0) { + log_error("semid %d: sem_ctl GETVAL failed for " + "cookie 0x%" PRIx32 ": %s", + gen_semid, gen_cookie, strerror(errno)); + goto bad; + } + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) incremented to %d", + gen_cookie, gen_semid, val); + + if (close(fd)) + stack; + + *semid = gen_semid; + *cookie = gen_cookie; + + return 1; + +bad: + if (close(fd)) + stack; + + *cookie = 0; + + return 0; +} + +int dm_udev_create_cookie(uint32_t *cookie) +{ + int semid; + + if (!dm_udev_get_sync_support()) { + *cookie = 0; + return 1; + } + + return _udev_notify_sem_create(cookie, &semid); +} + +static const char *_task_type_disp(int type) +{ + switch(type) { + case DM_DEVICE_CREATE: + return "CREATE"; + case DM_DEVICE_RELOAD: + return "RELOAD"; + case DM_DEVICE_REMOVE: + return "REMOVE"; + case DM_DEVICE_REMOVE_ALL: + return "REMOVE_ALL"; + case DM_DEVICE_SUSPEND: + return "SUSPEND"; + case DM_DEVICE_RESUME: + return "RESUME"; + case DM_DEVICE_INFO: + return "INFO"; + case DM_DEVICE_DEPS: + return "DEPS"; + case DM_DEVICE_RENAME: + return "RENAME"; + case DM_DEVICE_VERSION: + return "VERSION"; + case DM_DEVICE_STATUS: + return "STATUS"; + case DM_DEVICE_TABLE: + return "TABLE"; + case DM_DEVICE_WAITEVENT: + return "WAITEVENT"; + case DM_DEVICE_LIST: + return "LIST"; + case DM_DEVICE_CLEAR: + return "CLEAR"; + case DM_DEVICE_MKNODES: + return "MKNODES"; + case DM_DEVICE_LIST_VERSIONS: + return "LIST_VERSIONS"; + case DM_DEVICE_TARGET_MSG: + return "TARGET_MSG"; + case DM_DEVICE_SET_GEOMETRY: + return "SET_GEOMETRY"; + } + return "unknown"; +} + +int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags) +{ + int semid; + + _set_cookie_flags(dmt, flags); + + if (!dm_udev_get_sync_support()) { + *cookie = 0; + dmt->cookie_set = 1; + return 1; + } + + if (*cookie) { + if (!_get_cookie_sem(*cookie, &semid)) + goto_bad; + } else if (!_udev_notify_sem_create(cookie, &semid)) + goto_bad; + + if (!_udev_notify_sem_inc(*cookie, semid)) { + log_error("Could not set notification semaphore " + "identified by cookie value %" PRIu32 " (0x%x)", + *cookie, *cookie); + goto bad; + } + + dmt->event_nr |= ~DM_UDEV_FLAGS_MASK & *cookie; + dmt->cookie_set = 1; + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) assigned to " + "%s task(%d) with flags%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (0x%" PRIx16 ")", + *cookie, semid, _task_type_disp(dmt->type), dmt->type, + (flags & DM_UDEV_DISABLE_DM_RULES_FLAG) ? " DISABLE_DM_RULES" : "", + (flags & DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) ? " DISABLE_SUBSYSTEM_RULES" : "", + (flags & DM_UDEV_DISABLE_DISK_RULES_FLAG) ? " DISABLE_DISK_RULES" : "", + (flags & DM_UDEV_DISABLE_OTHER_RULES_FLAG) ? " DISABLE_OTHER_RULES" : "", + (flags & DM_UDEV_LOW_PRIORITY_FLAG) ? " LOW_PRIORITY" : "", + (flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK) ? " DISABLE_LIBRARY_FALLBACK" : "", + (flags & DM_UDEV_PRIMARY_SOURCE_FLAG) ? " PRIMARY_SOURCE" : "", + (flags & DM_SUBSYSTEM_UDEV_FLAG0) ? " SUBSYSTEM_0" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG1) ? " SUBSYSTEM_1" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG2) ? " SUBSYSTEM_2" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG3) ? " SUBSYSTEM_3" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG4) ? " SUBSYSTEM_4" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG5) ? " SUBSYSTEM_5" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG6) ? " SUBSYSTEM_6" : " ", + (flags & DM_SUBSYSTEM_UDEV_FLAG7) ? " SUBSYSTEM_7" : " ", + flags); + + return 1; + +bad: + dmt->event_nr = 0; + return 0; +} + +int dm_udev_complete(uint32_t cookie) +{ + int semid; + + if (!cookie || !dm_udev_get_sync_support()) + return 1; + + if (!_get_cookie_sem(cookie, &semid)) + return_0; + + if (!_udev_notify_sem_dec(cookie, semid)) { + log_error("Could not signal waiting process using notification " + "semaphore identified by cookie value %" PRIu32 " (0x%x)", + cookie, cookie); + return 0; + } + + return 1; +} + +/* + * If *nowait is set, return immediately leaving it set if the semaphore + * is not ready to be decremented to 0. *nowait is cleared if the wait + * succeeds. + */ +static int _udev_wait(uint32_t cookie, int *nowait) +{ + int semid; + struct sembuf sb = {0, 0, 0}; + int val; + + if (!cookie || !dm_udev_get_sync_support()) + return 1; + + if (!_get_cookie_sem(cookie, &semid)) + return_0; + + /* Return immediately if the semaphore value exceeds 1? */ + if (*nowait) { + if ((val = semctl(semid, 0, GETVAL)) < 0) { + log_error("semid %d: sem_ctl GETVAL failed for " + "cookie 0x%" PRIx32 ": %s", + semid, cookie, strerror(errno)); + return 0; + } + + if (val > 1) + return 1; + + *nowait = 0; + } + + if (!_udev_notify_sem_dec(cookie, semid)) { + log_error("Failed to set a proper state for notification " + "semaphore identified by cookie value %" PRIu32 " (0x%x) " + "to initialize waiting for incoming notifications.", + cookie, cookie); + (void) _udev_notify_sem_destroy(cookie, semid); + return 0; + } + + log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) waiting for zero", + cookie, semid); + +repeat_wait: + if (semop(semid, &sb, 1) < 0) { + if (errno == EINTR) + goto repeat_wait; + else if (errno == EIDRM) + return 1; + + log_error("Could not set wait state for notification semaphore " + "identified by cookie value %" PRIu32 " (0x%x): %s", + cookie, cookie, strerror(errno)); + (void) _udev_notify_sem_destroy(cookie, semid); + return 0; + } + + return _udev_notify_sem_destroy(cookie, semid); +} + +int dm_udev_wait(uint32_t cookie) +{ + int nowait = 0; + int r = _udev_wait(cookie, &nowait); + + update_devs(); + + return r; +} + +int dm_udev_wait_immediate(uint32_t cookie, int *ready) +{ + int nowait = 1; + int r = _udev_wait(cookie, &nowait); + + if (r && nowait) { + *ready = 0; + return 1; + } + + update_devs(); + *ready = 1; + + return r; +} +#endif /* UDEV_SYNC_SUPPORT */ diff --git a/device_mapper/libdm-common.h b/device_mapper/libdm-common.h new file mode 100644 index 000000000..010d87674 --- /dev/null +++ b/device_mapper/libdm-common.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef LIB_DMCOMMON_H +#define LIB_DMCOMMON_H + +#include "libdevmapper.h" + +#define DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME "DM_DEFAULT_NAME_MANGLING_MODE" + +#define DEV_NAME(dmt) (dmt->mangled_dev_name ? : dmt->dev_name) +#define DEV_UUID(DMT) (dmt->mangled_uuid ? : dmt->uuid) + +int mangle_string(const char *str, const char *str_name, size_t len, + char *buf, size_t buf_len, dm_string_mangling_t mode); + +int unmangle_string(const char *str, const char *str_name, size_t len, + char *buf, size_t buf_len, dm_string_mangling_t mode); + +int check_multiple_mangled_string_allowed(const char *str, const char *str_name, + dm_string_mangling_t mode); + +struct target *create_target(uint64_t start, + uint64_t len, + const char *type, const char *params); + +int add_dev_node(const char *dev_name, uint32_t minor, uint32_t major, + uid_t uid, gid_t gid, mode_t mode, int check_udev, unsigned rely_on_udev); +int rm_dev_node(const char *dev_name, int check_udev, unsigned rely_on_udev); +int rename_dev_node(const char *old_name, const char *new_name, + int check_udev, unsigned rely_on_udev); +int get_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor, + uint32_t *read_ahead); +int set_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor, + uint32_t read_ahead, uint32_t read_ahead_flags); +void update_devs(void); +void selinux_release(void); + +void inc_suspended(void); +void dec_suspended(void); + +int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s); + +int get_uname_version(unsigned *major, unsigned *minor, unsigned *release); + +#endif diff --git a/device_mapper/libdm-config.c b/device_mapper/libdm-config.c new file mode 100644 index 000000000..fd4d929ec --- /dev/null +++ b/device_mapper/libdm-config.c @@ -0,0 +1,1486 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> +#include <fcntl.h> +#include <ctype.h> +#include <stdarg.h> + +#define SECTION_B_CHAR '{' +#define SECTION_E_CHAR '}' + +enum { + TOK_INT, + TOK_FLOAT, + TOK_STRING, /* Single quotes */ + TOK_STRING_ESCAPED, /* Double quotes */ + TOK_STRING_BARE, /* No quotes */ + TOK_EQ, + TOK_SECTION_B, + TOK_SECTION_E, + TOK_ARRAY_B, + TOK_ARRAY_E, + TOK_IDENTIFIER, + TOK_COMMA, + TOK_EOF +}; + +struct parser { + const char *fb, *fe; /* file limits */ + + int t; /* token limits and type */ + const char *tb, *te; + + int line; /* line number we are on */ + + struct dm_pool *mem; + int no_dup_node_check; /* whether to disable dup node checking */ +}; + +struct config_output { + struct dm_pool *mem; + dm_putline_fn putline; + const struct dm_config_node_out_spec *spec; + void *baton; +}; + +static void _get_token(struct parser *p, int tok_prev); +static void _eat_space(struct parser *p); +static struct dm_config_node *_file(struct parser *p); +static struct dm_config_node *_section(struct parser *p, struct dm_config_node *parent); +static struct dm_config_value *_value(struct parser *p); +static struct dm_config_value *_type(struct parser *p); +static int _match_aux(struct parser *p, int t); +static struct dm_config_value *_create_value(struct dm_pool *mem); +static struct dm_config_node *_create_node(struct dm_pool *mem); +static char *_dup_tok(struct parser *p); +static char *_dup_token(struct dm_pool *mem, const char *b, const char *e); + +static const int _sep = '/'; + +#define MAX_INDENT 32 + +#define match(t) do {\ + if (!_match_aux(p, (t))) {\ + log_error("Parse error at byte %" PRIptrdiff_t " (line %d): unexpected token", \ + p->tb - p->fb + 1, p->line); \ + return 0;\ + } \ +} while(0) + +static int _tok_match(const char *str, const char *b, const char *e) +{ + while (*str && (b != e)) { + if (*str++ != *b++) + return 0; + } + + return !(*str || (b != e)); +} + +struct dm_config_tree *dm_config_create(void) +{ + struct dm_config_tree *cft; + struct dm_pool *mem = dm_pool_create("config", 10 * 1024); + + if (!mem) { + log_error("Failed to allocate config pool."); + return 0; + } + + if (!(cft = dm_pool_zalloc(mem, sizeof(*cft)))) { + log_error("Failed to allocate config tree."); + dm_pool_destroy(mem); + return 0; + } + cft->mem = mem; + + return cft; +} + +void dm_config_set_custom(struct dm_config_tree *cft, void *custom) +{ + cft->custom = custom; +} + +void *dm_config_get_custom(struct dm_config_tree *cft) +{ + return cft->custom; +} + +void dm_config_destroy(struct dm_config_tree *cft) +{ + dm_pool_destroy(cft->mem); +} + +/* + * If there's a cascaded dm_config_tree, remove and return it, otherwise + * return NULL. + */ +struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft) +{ + struct dm_config_tree *second_cft; + + if (!cft) + return NULL; + + second_cft = cft->cascade; + cft->cascade = NULL; + + return second_cft; +} + +/* + * When searching, first_cft is checked before second_cft. + */ +struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft) +{ + first_cft->cascade = second_cft; + + return first_cft; +} + +static struct dm_config_node *_config_reverse(struct dm_config_node *head) +{ + struct dm_config_node *left = head, *middle = NULL, *right = NULL; + + while (left) { + right = middle; + middle = left; + left = left->sib; + middle->sib = right; + middle->child = _config_reverse(middle->child); + } + + return middle; +} + +static int _do_dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end, int no_dup_node_check) +{ + /* TODO? if (start == end) return 1; */ + + struct parser *p; + if (!(p = dm_pool_alloc(cft->mem, sizeof(*p)))) + return_0; + + p->mem = cft->mem; + p->fb = start; + p->fe = end; + p->tb = p->te = p->fb; + p->line = 1; + p->no_dup_node_check = no_dup_node_check; + + _get_token(p, TOK_SECTION_E); + if (!(cft->root = _file(p))) + return_0; + + cft->root = _config_reverse(cft->root); + + return 1; +} + +int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end) +{ + return _do_dm_config_parse(cft, start, end, 0); +} + +int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end) +{ + return _do_dm_config_parse(cft, start, end, 1); +} + +struct dm_config_tree *dm_config_from_string(const char *config_settings) +{ + struct dm_config_tree *cft; + + if (!(cft = dm_config_create())) + return_NULL; + + if (!dm_config_parse(cft, config_settings, config_settings + strlen(config_settings))) { + dm_config_destroy(cft); + return_NULL; + } + + return cft; +} + +static int _line_start(struct config_output *out) +{ + if (!dm_pool_begin_object(out->mem, 128)) { + log_error("dm_pool_begin_object failed for config line"); + return 0; + } + + return 1; +} + +__attribute__ ((format(printf, 2, 3))) +static int _line_append(struct config_output *out, const char *fmt, ...) +{ + char buf[4096]; + char *dyn_buf = NULL; + va_list ap; + int n; + + /* + * We should be fine with the 4096 char buffer 99% of the time, + * but if we need to go beyond that, allocate the buffer dynamically. + */ + + va_start(ap, fmt); + n = vsnprintf(buf, sizeof(buf), fmt, ap); + va_end(ap); + + if (n < 0) { + log_error("vsnprintf failed for config line"); + return 0; + } + + if (n > (int) sizeof buf - 1) { + /* + * Fixed size buffer with sizeof buf is not enough, + * so try dynamically allocated buffer now... + */ + va_start(ap, fmt); + n = dm_vasprintf(&dyn_buf, fmt, ap); + va_end(ap); + + if (n < 0) { + log_error("dm_vasprintf failed for config line"); + return 0; + } + } + + if (!dm_pool_grow_object(out->mem, dyn_buf ? : buf, 0)) { + log_error("dm_pool_grow_object failed for config line"); + dm_free(dyn_buf); + return 0; + } + + dm_free(dyn_buf); + + return 1; +} + +#define line_append(args...) do {if (!_line_append(out, args)) {return_0;}} while (0) + +static int _line_end(const struct dm_config_node *cn, struct config_output *out) +{ + const char *line; + + if (!dm_pool_grow_object(out->mem, "\0", 1)) { + log_error("dm_pool_grow_object failed for config line"); + return 0; + } + + line = dm_pool_end_object(out->mem); + + if (!out->putline && !out->spec) + return 0; + + if (out->putline) + out->putline(line, out->baton); + + if (out->spec && out->spec->line_fn) + out->spec->line_fn(cn, line, out->baton); + + return 1; +} + +static int _write_value(struct config_output *out, const struct dm_config_value *v) +{ + char *buf; + const char *s; + + switch (v->type) { + case DM_CFG_STRING: + buf = alloca(dm_escaped_len(v->v.str)); + s = (v->format_flags & DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES) ? "" : "\""; + line_append("%s%s%s", s, dm_escape_double_quotes(buf, v->v.str), s); + break; + + case DM_CFG_FLOAT: + line_append("%f", v->v.f); + break; + + case DM_CFG_INT: + if (v->format_flags & DM_CONFIG_VALUE_FMT_INT_OCTAL) + line_append("0%" PRIo64, v->v.i); + else + line_append(FMTd64, v->v.i); + break; + + case DM_CFG_EMPTY_ARRAY: + s = (v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES) ? " " : ""; + line_append("[%s]", s); + break; + + default: + log_error("_write_value: Unknown value type: %d", v->type); + + } + + return 1; +} + +static int _write_config(const struct dm_config_node *n, int only_one, + struct config_output *out, int level) +{ + const char *extra_space; + int format_array; + char space[MAX_INDENT + 1]; + int l = (level < MAX_INDENT) ? level : MAX_INDENT; + int i; + char *escaped_key = NULL; + + if (!n) + return 1; + + for (i = 0; i < l; i++) + space[i] = '\t'; + space[i] = '\0'; + + do { + extra_space = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES)) ? " " : ""; + format_array = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_ARRAY)); + + if (out->spec && out->spec->prefix_fn) + out->spec->prefix_fn(n, space, out->baton); + + if (!_line_start(out)) + return_0; + if (strchr(n->key, '#') || strchr(n->key, '"') || strchr(n->key, '!')) { + escaped_key = alloca(dm_escaped_len(n->key) + 2); + *escaped_key = '"'; + dm_escape_double_quotes(escaped_key + 1, n->key); + strcat(escaped_key, "\""); + } + line_append("%s%s", space, escaped_key ? escaped_key : n->key); + escaped_key = NULL; + if (!n->v) { + /* it's a sub section */ + line_append(" {"); + if (!_line_end(n, out)) + return_0; + if (!_write_config(n->child, 0, out, level + 1)) + return_0; + if (!_line_start(out)) + return_0; + line_append("%s}", space); + } else { + /* it's a value */ + const struct dm_config_value *v = n->v; + line_append("%s=%s", extra_space, extra_space); + if (v->next) { + line_append("[%s", extra_space); + while (v && v->type != DM_CFG_EMPTY_ARRAY) { + if (!_write_value(out, v)) + return_0; + v = v->next; + if (v && v->type != DM_CFG_EMPTY_ARRAY) + line_append(",%s", extra_space); + } + line_append("%s]", extra_space); + } else { + if (format_array && (v->type != DM_CFG_EMPTY_ARRAY)) + line_append("[%s", extra_space); + if (!_write_value(out, v)) + return_0; + if (format_array && (v->type != DM_CFG_EMPTY_ARRAY)) + line_append("%s]", extra_space); + } + } + if (!_line_end(n, out)) + return_0; + + if (out->spec && out->spec->suffix_fn) + out->spec->suffix_fn(n, space, out->baton); + + n = n->sib; + } while (n && !only_one); + /* FIXME: add error checking */ + return 1; +} + +static int _write_node(const struct dm_config_node *cn, int only_one, + dm_putline_fn putline, + const struct dm_config_node_out_spec *out_spec, + void *baton) +{ + struct config_output out = { + .mem = dm_pool_create("config_output", 1024), + .putline = putline, + .spec = out_spec, + .baton = baton + }; + + if (!out.mem) + return_0; + + if (!_write_config(cn, only_one, &out, 0)) { + dm_pool_destroy(out.mem); + return_0; + } + dm_pool_destroy(out.mem); + return 1; +} + +int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton) +{ + return _write_node(cn, 1, putline, NULL, baton); +} + +int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton) +{ + return _write_node(cn, 0, putline, NULL, baton); +} + +int dm_config_write_one_node_out(const struct dm_config_node *cn, + const struct dm_config_node_out_spec *out_spec, + void *baton) +{ + return _write_node(cn, 1, NULL, out_spec, baton); +} + +int dm_config_write_node_out(const struct dm_config_node *cn, + const struct dm_config_node_out_spec *out_spec, + void *baton) +{ + return _write_node(cn, 0, NULL, out_spec, baton); +} + +/* + * parser + */ +static char *_dup_string_tok(struct parser *p) +{ + char *str; + + p->tb++, p->te--; /* strip "'s */ + + if (p->te < p->tb) { + log_error("Parse error at byte %" PRIptrdiff_t " (line %d): " + "expected a string token.", + p->tb - p->fb + 1, p->line); + return NULL; + } + + if (!(str = _dup_tok(p))) + return_NULL; + + p->te++; + + return str; +} + +static struct dm_config_node *_file(struct parser *p) +{ + struct dm_config_node root = { 0 }; + root.key = "<root>"; + + while (p->t != TOK_EOF) + if (!_section(p, &root)) + return_NULL; + return root.child; +} + +static struct dm_config_node *_make_node(struct dm_pool *mem, + const char *key_b, const char *key_e, + struct dm_config_node *parent) +{ + struct dm_config_node *n; + + if (!(n = _create_node(mem))) + return_NULL; + + n->key = _dup_token(mem, key_b, key_e); + if (parent) { + n->parent = parent; + n->sib = parent->child; + parent->child = n; + } + return n; +} + +/* when mem is not NULL, we create the path if it doesn't exist yet */ +static struct dm_config_node *_find_or_make_node(struct dm_pool *mem, + struct dm_config_node *parent, + const char *path, + int no_dup_node_check) +{ + const char *e; + struct dm_config_node *cn = parent ? parent->child : NULL; + struct dm_config_node *cn_found = NULL; + + while (cn || mem) { + /* trim any leading slashes */ + while (*path && (*path == _sep)) + path++; + + /* find the end of this segment */ + for (e = path; *e && (*e != _sep); e++) ; + + /* hunt for the node */ + cn_found = NULL; + + if (!no_dup_node_check) { + while (cn) { + if (_tok_match(cn->key, path, e)) { + /* Inefficient */ + if (!cn_found) + cn_found = cn; + else + log_warn("WARNING: Ignoring duplicate" + " config node: %s (" + "seeking %s)", cn->key, path); + } + + cn = cn->sib; + } + } + + if (!cn_found && mem) { + if (!(cn_found = _make_node(mem, path, e, parent))) + return_NULL; + } + + if (cn_found && *e) { + parent = cn_found; + cn = cn_found->child; + } else + return cn_found; + path = e; + } + + return NULL; +} + +static struct dm_config_node *_section(struct parser *p, struct dm_config_node *parent) +{ + /* IDENTIFIER SECTION_B_CHAR VALUE* SECTION_E_CHAR */ + + struct dm_config_node *root; + struct dm_config_value *value; + char *str; + + if (p->t == TOK_STRING_ESCAPED) { + if (!(str = _dup_string_tok(p))) + return_NULL; + dm_unescape_double_quotes(str); + + match(TOK_STRING_ESCAPED); + } else if (p->t == TOK_STRING) { + if (!(str = _dup_string_tok(p))) + return_NULL; + + match(TOK_STRING); + } else { + if (!(str = _dup_tok(p))) + return_NULL; + + match(TOK_IDENTIFIER); + } + + if (!strlen(str)) { + log_error("Parse error at byte %" PRIptrdiff_t " (line %d): empty section identifier", + p->tb - p->fb + 1, p->line); + return NULL; + } + + if (!(root = _find_or_make_node(p->mem, parent, str, p->no_dup_node_check))) + return_NULL; + + if (p->t == TOK_SECTION_B) { + match(TOK_SECTION_B); + while (p->t != TOK_SECTION_E) { + if (!(_section(p, root))) + return_NULL; + } + match(TOK_SECTION_E); + } else { + match(TOK_EQ); + if (!(value = _value(p))) + return_NULL; + if (root->v) + log_warn("WARNING: Ignoring duplicate" + " config value: %s", str); + root->v = value; + } + + return root; +} + +static struct dm_config_value *_value(struct parser *p) +{ + /* '[' TYPE* ']' | TYPE */ + struct dm_config_value *h = NULL, *l, *ll = NULL; + if (p->t == TOK_ARRAY_B) { + match(TOK_ARRAY_B); + while (p->t != TOK_ARRAY_E) { + if (!(l = _type(p))) + return_NULL; + + if (!h) + h = l; + else + ll->next = l; + ll = l; + + if (p->t == TOK_COMMA) + match(TOK_COMMA); + } + match(TOK_ARRAY_E); + /* + * Special case for an empty array. + */ + if (!h) { + if (!(h = _create_value(p->mem))) { + log_error("Failed to allocate value"); + return NULL; + } + + h->type = DM_CFG_EMPTY_ARRAY; + } + + } else + if (!(h = _type(p))) + return_NULL; + + return h; +} + +static struct dm_config_value *_type(struct parser *p) +{ + /* [+-]{0,1}[0-9]+ | [0-9]*\.[0-9]* | ".*" */ + struct dm_config_value *v = _create_value(p->mem); + char *str; + + if (!v) { + log_error("Failed to allocate type value"); + return NULL; + } + + switch (p->t) { + case TOK_INT: + v->type = DM_CFG_INT; + errno = 0; + v->v.i = strtoll(p->tb, NULL, 0); /* FIXME: check error */ + if (errno) { + log_error("Failed to read int token."); + return NULL; + } + match(TOK_INT); + break; + + case TOK_FLOAT: + v->type = DM_CFG_FLOAT; + errno = 0; + v->v.f = strtod(p->tb, NULL); /* FIXME: check error */ + if (errno) { + log_error("Failed to read float token."); + return NULL; + } + match(TOK_FLOAT); + break; + + case TOK_STRING: + v->type = DM_CFG_STRING; + + if (!(v->v.str = _dup_string_tok(p))) + return_NULL; + + match(TOK_STRING); + break; + + case TOK_STRING_BARE: + v->type = DM_CFG_STRING; + + if (!(v->v.str = _dup_tok(p))) + return_NULL; + + match(TOK_STRING_BARE); + break; + + case TOK_STRING_ESCAPED: + v->type = DM_CFG_STRING; + + if (!(str = _dup_string_tok(p))) + return_NULL; + dm_unescape_double_quotes(str); + v->v.str = str; + match(TOK_STRING_ESCAPED); + break; + + default: + log_error("Parse error at byte %" PRIptrdiff_t " (line %d): expected a value", + p->tb - p->fb + 1, p->line); + return NULL; + } + return v; +} + +static int _match_aux(struct parser *p, int t) +{ + if (p->t != t) + return 0; + + _get_token(p, t); + return 1; +} + +/* + * tokeniser + */ +static void _get_token(struct parser *p, int tok_prev) +{ + int values_allowed = 0; + + const char *te; + + p->tb = p->te; + _eat_space(p); + if (p->tb == p->fe || !*p->tb) { + p->t = TOK_EOF; + return; + } + + /* Should next token be interpreted as value instead of identifier? */ + if (tok_prev == TOK_EQ || tok_prev == TOK_ARRAY_B || + tok_prev == TOK_COMMA) + values_allowed = 1; + + p->t = TOK_INT; /* fudge so the fall through for + floats works */ + + te = p->te; + switch (*te) { + case SECTION_B_CHAR: + p->t = TOK_SECTION_B; + te++; + break; + + case SECTION_E_CHAR: + p->t = TOK_SECTION_E; + te++; + break; + + case '[': + p->t = TOK_ARRAY_B; + te++; + break; + + case ']': + p->t = TOK_ARRAY_E; + te++; + break; + + case ',': + p->t = TOK_COMMA; + te++; + break; + + case '=': + p->t = TOK_EQ; + te++; + break; + + case '"': + p->t = TOK_STRING_ESCAPED; + te++; + while ((te != p->fe) && (*te) && (*te != '"')) { + if ((*te == '\\') && (te + 1 != p->fe) && + *(te + 1)) + te++; + te++; + } + + if ((te != p->fe) && (*te)) + te++; + break; + + case '\'': + p->t = TOK_STRING; + te++; + while ((te != p->fe) && (*te) && (*te != '\'')) + te++; + + if ((te != p->fe) && (*te)) + te++; + break; + + case '.': + p->t = TOK_FLOAT; + /* Fall through */ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '+': + case '-': + if (values_allowed) { + while (++te != p->fe) { + if (!isdigit((int) *te)) { + if (*te == '.') { + if (p->t != TOK_FLOAT) { + p->t = TOK_FLOAT; + continue; + } + } + break; + } + } + break; + } + /* fall through */ + + default: + p->t = TOK_IDENTIFIER; + while ((te != p->fe) && (*te) && !isspace(*te) && + (*te != '#') && (*te != '=') && + (*te != SECTION_B_CHAR) && + (*te != SECTION_E_CHAR)) + te++; + if (values_allowed) + p->t = TOK_STRING_BARE; + break; + } + + p->te = te; +} + +static void _eat_space(struct parser *p) +{ + while (p->tb != p->fe) { + if (*p->te == '#') + while ((p->te != p->fe) && (*p->te != '\n') && (*p->te)) + ++p->te; + + else if (!isspace(*p->te)) + break; + + while ((p->te != p->fe) && isspace(*p->te)) { + if (*p->te == '\n') + ++p->line; + ++p->te; + } + + p->tb = p->te; + } +} + +/* + * memory management + */ +static struct dm_config_value *_create_value(struct dm_pool *mem) +{ + return dm_pool_zalloc(mem, sizeof(struct dm_config_value)); +} + +static struct dm_config_node *_create_node(struct dm_pool *mem) +{ + return dm_pool_zalloc(mem, sizeof(struct dm_config_node)); +} + +static char *_dup_token(struct dm_pool *mem, const char *b, const char *e) +{ + size_t len = e - b; + char *str = dm_pool_alloc(mem, len + 1); + if (!str) { + log_error("Failed to duplicate token."); + return 0; + } + memcpy(str, b, len); + str[len] = '\0'; + return str; +} + +static char *_dup_tok(struct parser *p) +{ + return _dup_token(p->mem, p->tb, p->te); +} + +/* + * Utility functions + */ + +/* + * node_lookup_fn is either: + * _find_config_node to perform a lookup starting from a given config_node + * in a config_tree; + * or + * _find_first_config_node to find the first config_node in a set of + * cascaded trees. + */ +typedef const struct dm_config_node *node_lookup_fn(const void *start, const char *path); + +static const struct dm_config_node *_find_config_node(const void *start, const char *path) { + struct dm_config_node dummy = { .child = (void *) start }; + return _find_or_make_node(NULL, &dummy, path, 0); +} + +static const struct dm_config_node *_find_first_config_node(const void *start, const char *path) +{ + const struct dm_config_tree *cft = start; + const struct dm_config_node *cn = NULL; + + while (cft) { + if ((cn = _find_config_node(cft->root, path))) + return cn; + cft = cft->cascade; + } + + return NULL; +} + +static const char *_find_config_str(const void *start, node_lookup_fn find_fn, + const char *path, const char *fail, int allow_empty) +{ + const struct dm_config_node *n = find_fn(start, path); + + /* Empty strings are ignored if allow_empty is set */ + if (n && n->v) { + if ((n->v->type == DM_CFG_STRING) && + (allow_empty || (*n->v->v.str))) { + /* log_very_verbose("Setting %s to %s", path, n->v->v.str); */ + return n->v->v.str; + } + if ((n->v->type != DM_CFG_STRING) || (!allow_empty && fail)) + log_warn("WARNING: Ignoring unsupported value for %s.", path); + } + + if (fail) + log_very_verbose("%s not found in config: defaulting to %s", + path, fail); + return fail; +} + +const char *dm_config_find_str(const struct dm_config_node *cn, + const char *path, const char *fail) +{ + return _find_config_str(cn, _find_config_node, path, fail, 0); +} + +const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn, + const char *path, const char *fail) +{ + return _find_config_str(cn, _find_config_node, path, fail, 1); +} + +static int64_t _find_config_int64(const void *start, node_lookup_fn find, + const char *path, int64_t fail) +{ + const struct dm_config_node *n = find(start, path); + + if (n && n->v && n->v->type == DM_CFG_INT) { + /* log_very_verbose("Setting %s to %" PRId64, path, n->v->v.i); */ + return n->v->v.i; + } + + log_very_verbose("%s not found in config: defaulting to %" PRId64, + path, fail); + return fail; +} + +static float _find_config_float(const void *start, node_lookup_fn find, + const char *path, float fail) +{ + const struct dm_config_node *n = find(start, path); + + if (n && n->v && n->v->type == DM_CFG_FLOAT) { + /* log_very_verbose("Setting %s to %f", path, n->v->v.f); */ + return n->v->v.f; + } + + log_very_verbose("%s not found in config: defaulting to %f", + path, fail); + + return fail; + +} + +static int _str_in_array(const char *str, const char * const values[]) +{ + int i; + + for (i = 0; values[i]; i++) + if (!strcasecmp(str, values[i])) + return 1; + + return 0; +} + +static int _str_to_bool(const char *str, int fail) +{ + const char * const _true_values[] = { "y", "yes", "on", "true", NULL }; + const char * const _false_values[] = { "n", "no", "off", "false", NULL }; + + if (_str_in_array(str, _true_values)) + return 1; + + if (_str_in_array(str, _false_values)) + return 0; + + return fail; +} + +static int _find_config_bool(const void *start, node_lookup_fn find, + const char *path, int fail) +{ + const struct dm_config_node *n = find(start, path); + const struct dm_config_value *v; + int b; + + if (n) { + v = n->v; + + switch (v->type) { + case DM_CFG_INT: + b = v->v.i ? 1 : 0; + /* log_very_verbose("Setting %s to %d", path, b); */ + return b; + + case DM_CFG_STRING: + b = _str_to_bool(v->v.str, fail); + /* log_very_verbose("Setting %s to %d", path, b); */ + return b; + default: + ; + } + } + + log_very_verbose("%s not found in config: defaulting to %d", + path, fail); + + return fail; +} + +/*********************************** + * node-based lookup + **/ + +struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn, + const char *path) +{ + return (struct dm_config_node *) _find_config_node(cn, path); +} + +int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail) +{ + /* FIXME Add log_error message on overflow */ + return (int) _find_config_int64(cn, _find_config_node, path, (int64_t) fail); +} + +int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail) +{ + return _find_config_int64(cn, _find_config_node, path, fail); +} + +float dm_config_find_float(const struct dm_config_node *cn, const char *path, + float fail) +{ + return _find_config_float(cn, _find_config_node, path, fail); +} + +int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail) +{ + return _find_config_bool(cn, _find_config_node, path, fail); +} + +int dm_config_value_is_bool(const struct dm_config_value *v) { + if (!v) + return 0; + + switch(v->type) { + case DM_CFG_INT: + return 1; + case DM_CFG_STRING: + return _str_to_bool(v->v.str, -1) != -1; + default: + return 0; + } +} + +/*********************************** + * tree-based lookup + **/ + +const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft, + const char *path) +{ + return _find_first_config_node(cft, path); +} + +const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path, + const char *fail) +{ + return _find_config_str(cft, _find_first_config_node, path, fail, 0); +} + +const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path, + const char *fail) +{ + return _find_config_str(cft, _find_first_config_node, path, fail, 1); +} + +int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail) +{ + /* FIXME Add log_error message on overflow */ + return (int) _find_config_int64(cft, _find_first_config_node, path, (int64_t) fail); +} + +int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail) +{ + return _find_config_int64(cft, _find_first_config_node, path, fail); +} + +float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path, + float fail) +{ + return _find_config_float(cft, _find_first_config_node, path, fail); +} + +int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail) +{ + return _find_config_bool(cft, _find_first_config_node, path, fail); +} + +/************************************/ + + +int dm_config_get_uint32(const struct dm_config_node *cn, const char *path, + uint32_t *result) +{ + const struct dm_config_node *n; + + n = _find_config_node(cn, path); + + if (!n || !n->v || n->v->type != DM_CFG_INT) + return 0; + + if (result) + *result = n->v->v.i; + return 1; +} + +int dm_config_get_uint64(const struct dm_config_node *cn, const char *path, + uint64_t *result) +{ + const struct dm_config_node *n; + + n = _find_config_node(cn, path); + + if (!n || !n->v || n->v->type != DM_CFG_INT) + return 0; + + if (result) + *result = (uint64_t) n->v->v.i; + return 1; +} + +int dm_config_get_str(const struct dm_config_node *cn, const char *path, + const char **result) +{ + const struct dm_config_node *n; + + n = _find_config_node(cn, path); + + if (!n || !n->v || n->v->type != DM_CFG_STRING) + return 0; + + if (result) + *result = n->v->v.str; + return 1; +} + +int dm_config_get_list(const struct dm_config_node *cn, const char *path, + const struct dm_config_value **result) +{ + const struct dm_config_node *n; + + n = _find_config_node(cn, path); + /* TODO when we represent single-item lists consistently, add a check + * for n->v->next != NULL */ + if (!n || !n->v) + return 0; + + if (result) + *result = n->v; + return 1; +} + +int dm_config_get_section(const struct dm_config_node *cn, const char *path, + const struct dm_config_node **result) +{ + const struct dm_config_node *n; + + n = _find_config_node(cn, path); + if (!n || n->v) + return 0; + + if (result) + *result = n; + return 1; +} + +int dm_config_has_node(const struct dm_config_node *cn, const char *path) +{ + return _find_config_node(cn, path) ? 1 : 0; +} + +/* + * Convert a token type to the char it represents. + */ +static char _token_type_to_char(int type) +{ + switch (type) { + case TOK_SECTION_B: + return SECTION_B_CHAR; + case TOK_SECTION_E: + return SECTION_E_CHAR; + default: + return 0; + } +} + +/* + * Returns: + * # of 'type' tokens in 'str'. + */ +static unsigned _count_tokens(const char *str, unsigned len, int type) +{ + char c; + + c = _token_type_to_char(type); + + return dm_count_chars(str, len, c); +} + +const char *dm_config_parent_name(const struct dm_config_node *n) +{ + return (n->parent ? n->parent->key : "(root)"); +} +/* + * Heuristic function to make a quick guess as to whether a text + * region probably contains a valid config "section". (Useful for + * scanning areas of the disk for old metadata.) + * Config sections contain various tokens, may contain other sections + * and strings, and are delimited by begin (type 'TOK_SECTION_B') and + * end (type 'TOK_SECTION_E') tokens. As a quick heuristic, we just + * count the number of begin and end tokens, and see if they are + * non-zero and the counts match. + * Full validation of the section should be done with another function + * (for example, read_config_fd). + * + * Returns: + * 0 - probably is not a valid config section + * 1 - probably _is_ a valid config section + */ +unsigned dm_config_maybe_section(const char *str, unsigned len) +{ + int begin_count; + int end_count; + + begin_count = _count_tokens(str, len, TOK_SECTION_B); + end_count = _count_tokens(str, len, TOK_SECTION_E); + + if (begin_count && end_count && (begin_count == end_count)) + return 1; + else + return 0; +} + +__attribute__((nonnull(1, 2))) +static struct dm_config_value *_clone_config_value(struct dm_pool *mem, + const struct dm_config_value *v) +{ + struct dm_config_value *new_cv; + + if (!(new_cv = _create_value(mem))) { + log_error("Failed to clone config value."); + return NULL; + } + + new_cv->type = v->type; + if (v->type == DM_CFG_STRING) { + if (!(new_cv->v.str = dm_pool_strdup(mem, v->v.str))) { + log_error("Failed to clone config string value."); + return NULL; + } + } else + new_cv->v = v->v; + + if (v->next && !(new_cv->next = _clone_config_value(mem, v->next))) + return_NULL; + + return new_cv; +} + +struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *cn, int siblings) +{ + struct dm_config_node *new_cn; + + if (!cn) { + log_error("Cannot clone NULL config node."); + return NULL; + } + + if (!(new_cn = _create_node(mem))) { + log_error("Failed to clone config node."); + return NULL; + } + + if ((cn->key && !(new_cn->key = dm_pool_strdup(mem, cn->key)))) { + log_error("Failed to clone config node key."); + return NULL; + } + + new_cn->id = cn->id; + + if ((cn->v && !(new_cn->v = _clone_config_value(mem, cn->v))) || + (cn->child && !(new_cn->child = dm_config_clone_node_with_mem(mem, cn->child, 1))) || + (siblings && cn->sib && !(new_cn->sib = dm_config_clone_node_with_mem(mem, cn->sib, siblings)))) + return_NULL; /* 'new_cn' released with mem pool */ + + return new_cn; +} + +struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *node, int sib) +{ + return dm_config_clone_node_with_mem(cft->mem, node, sib); +} + +struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key) +{ + struct dm_config_node *cn; + + if (!(cn = _create_node(cft->mem))) { + log_error("Failed to create config node."); + return NULL; + } + if (!(cn->key = dm_pool_strdup(cft->mem, key))) { + log_error("Failed to create config node's key."); + return NULL; + } + cn->parent = NULL; + cn->v = NULL; + + return cn; +} + +struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft) +{ + return _create_value(cft->mem); +} + +void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags) +{ + if (!cv) + return; + + cv->format_flags = format_flags; +} + +uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv) +{ + if (!cv) + return 0; + + return cv->format_flags; +} + +struct dm_pool *dm_config_memory(struct dm_config_tree *cft) +{ + return cft->mem; +} + +static int _override_path(const char *path, struct dm_config_node *node, void *baton) +{ + struct dm_config_tree *cft = baton; + struct dm_config_node dummy, *target; + dummy.child = cft->root; + if (!(target = _find_or_make_node(cft->mem, &dummy, path, 0))) + return_0; + if (!(target->v = _clone_config_value(cft->mem, node->v))) + return_0; + cft->root = dummy.child; + return 1; +} + +static int _enumerate(const char *path, struct dm_config_node *cn, int (*cb)(const char *, struct dm_config_node *, void *), void *baton) +{ + char *sub = NULL; + + while (cn) { + if (dm_asprintf(&sub, "%s/%s", path, cn->key) < 0) + return_0; + if (cn->child) { + if (!_enumerate(sub, cn->child, cb, baton)) + goto_bad; + } else + if (!cb(sub, cn, baton)) + goto_bad; + dm_free(sub); + cn = cn->sib; + } + return 1; +bad: + dm_free(sub); + return 0; +} + +struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft) +{ + struct dm_config_tree *res = dm_config_create(), *done = NULL, *current = NULL; + + if (!res) + return_NULL; + + while (done != cft) { + current = cft; + while (current->cascade != done) + current = current->cascade; + _enumerate("", current->root, _override_path, res); + done = current; + } + + return res; +} + +int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *rem_node) +{ + struct dm_config_node *cn = parent->child, *last = NULL; + while (cn) { + if (cn == rem_node) { + if (last) + last->sib = cn->sib; + else + parent->child = cn->sib; + return 1; + } + last = cn; + cn = cn->sib; + } + return 0; +} diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c new file mode 100644 index 000000000..fab3e0662 --- /dev/null +++ b/device_mapper/libdm-deptree.c @@ -0,0 +1,3853 @@ +/* + * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "ioctl/libdm-targets.h" +#include "libdm-common.h" +#include "misc/kdev_t.h" +#include "misc/dm-ioctl.h" + +#include <stdarg.h> +#include <sys/param.h> +#include <sys/utsname.h> + +#define MAX_TARGET_PARAMSIZE 500000 + +/* Supported segment types */ +enum { + SEG_CACHE, + SEG_CRYPT, + SEG_ERROR, + SEG_LINEAR, + SEG_MIRRORED, + SEG_SNAPSHOT, + SEG_SNAPSHOT_ORIGIN, + SEG_SNAPSHOT_MERGE, + SEG_STRIPED, + SEG_ZERO, + SEG_THIN_POOL, + SEG_THIN, + SEG_RAID0, + SEG_RAID0_META, + SEG_RAID1, + SEG_RAID10, + SEG_RAID4, + SEG_RAID5_N, + SEG_RAID5_LA, + SEG_RAID5_RA, + SEG_RAID5_LS, + SEG_RAID5_RS, + SEG_RAID6_N_6, + SEG_RAID6_ZR, + SEG_RAID6_NR, + SEG_RAID6_NC, + SEG_RAID6_LS_6, + SEG_RAID6_RS_6, + SEG_RAID6_LA_6, + SEG_RAID6_RA_6, +}; + +/* FIXME Add crypt and multipath support */ + +static const struct { + unsigned type; + const char target[16]; +} _dm_segtypes[] = { + { SEG_CACHE, "cache" }, + { SEG_CRYPT, "crypt" }, + { SEG_ERROR, "error" }, + { SEG_LINEAR, "linear" }, + { SEG_MIRRORED, "mirror" }, + { SEG_SNAPSHOT, "snapshot" }, + { SEG_SNAPSHOT_ORIGIN, "snapshot-origin" }, + { SEG_SNAPSHOT_MERGE, "snapshot-merge" }, + { SEG_STRIPED, "striped" }, + { SEG_ZERO, "zero"}, + { SEG_THIN_POOL, "thin-pool"}, + { SEG_THIN, "thin"}, + { SEG_RAID0, "raid0"}, + { SEG_RAID0_META, "raid0_meta"}, + { SEG_RAID1, "raid1"}, + { SEG_RAID10, "raid10"}, + { SEG_RAID4, "raid4"}, + { SEG_RAID5_N, "raid5_n"}, + { SEG_RAID5_LA, "raid5_la"}, + { SEG_RAID5_RA, "raid5_ra"}, + { SEG_RAID5_LS, "raid5_ls"}, + { SEG_RAID5_RS, "raid5_rs"}, + { SEG_RAID6_N_6,"raid6_n_6"}, + { SEG_RAID6_ZR, "raid6_zr"}, + { SEG_RAID6_NR, "raid6_nr"}, + { SEG_RAID6_NC, "raid6_nc"}, + { SEG_RAID6_LS_6, "raid6_ls_6"}, + { SEG_RAID6_RS_6, "raid6_rs_6"}, + { SEG_RAID6_LA_6, "raid6_la_6"}, + { SEG_RAID6_RA_6, "raid6_ra_6"}, + + + /* + * WARNING: Since 'raid' target overloads this 1:1 mapping table + * for search do not add new enum elements past them! + */ + { SEG_RAID5_LS, "raid5"}, /* same as "raid5_ls" (default for MD also) */ + { SEG_RAID6_ZR, "raid6"}, /* same as "raid6_zr" */ + { SEG_RAID10, "raid10_near"}, /* same as "raid10" */ +}; + +/* Some segment types have a list of areas of other devices attached */ +struct seg_area { + struct dm_list list; + + struct dm_tree_node *dev_node; + + uint64_t offset; +}; + +struct dm_thin_message { + dm_thin_message_t type; + union { + struct { + uint32_t device_id; + uint32_t origin_id; + } m_create_snap; + struct { + uint32_t device_id; + } m_create_thin; + struct { + uint32_t device_id; + } m_delete; + struct { + uint64_t current_id; + uint64_t new_id; + } m_set_transaction_id; + } u; +}; + +struct thin_message { + struct dm_list list; + struct dm_thin_message message; + int expected_errno; +}; + +/* Per-segment properties */ +struct load_segment { + struct dm_list list; + + unsigned type; + + uint64_t size; + + unsigned area_count; /* Linear + Striped + Mirrored + Crypt */ + struct dm_list areas; /* Linear + Striped + Mirrored + Crypt */ + + uint32_t stripe_size; /* Striped + raid */ + + int persistent; /* Snapshot */ + uint32_t chunk_size; /* Snapshot */ + struct dm_tree_node *cow; /* Snapshot */ + struct dm_tree_node *origin; /* Snapshot + Snapshot origin + Cache */ + struct dm_tree_node *merge; /* Snapshot */ + + struct dm_tree_node *log; /* Mirror */ + uint32_t region_size; /* Mirror + raid */ + unsigned clustered; /* Mirror */ + unsigned mirror_area_count; /* Mirror */ + uint32_t flags; /* Mirror + raid + Cache */ + char *uuid; /* Clustered mirror log */ + + const char *policy_name; /* Cache */ + unsigned policy_argc; /* Cache */ + struct dm_config_node *policy_settings; /* Cache */ + + const char *cipher; /* Crypt */ + const char *chainmode; /* Crypt */ + const char *iv; /* Crypt */ + uint64_t iv_offset; /* Crypt */ + const char *key; /* Crypt */ + + int delta_disks; /* raid reshape number of disks */ + int data_offset; /* raid reshape data offset on disk to set */ + uint64_t rebuilds[RAID_BITMAP_SIZE]; /* raid */ + uint64_t writemostly[RAID_BITMAP_SIZE]; /* raid */ + uint32_t writebehind; /* raid */ + uint32_t max_recovery_rate; /* raid kB/sec/disk */ + uint32_t min_recovery_rate; /* raid kB/sec/disk */ + uint32_t data_copies; /* raid10 data_copies */ + + struct dm_tree_node *metadata; /* Thin_pool + Cache */ + struct dm_tree_node *pool; /* Thin_pool, Thin */ + struct dm_tree_node *external; /* Thin */ + struct dm_list thin_messages; /* Thin_pool */ + uint64_t transaction_id; /* Thin_pool */ + uint64_t low_water_mark; /* Thin_pool */ + uint32_t data_block_size; /* Thin_pool + cache */ + unsigned skip_block_zeroing; /* Thin_pool */ + unsigned ignore_discard; /* Thin_pool target vsn 1.1 */ + unsigned no_discard_passdown; /* Thin_pool target vsn 1.1 */ + unsigned error_if_no_space; /* Thin pool target vsn 1.10 */ + unsigned read_only; /* Thin pool target vsn 1.3 */ + uint32_t device_id; /* Thin */ + +}; + +/* Per-device properties */ +struct load_properties { + int read_only; + uint32_t major; + uint32_t minor; + + uint32_t read_ahead; + uint32_t read_ahead_flags; + + unsigned segment_count; + int size_changed; + struct dm_list segs; + + const char *new_name; + + /* If immediate_dev_node is set to 1, try to create the dev node + * as soon as possible (e.g. in preload stage even during traversal + * and processing of dm tree). This will also flush all stacked dev + * node operations, synchronizing with udev. + */ + unsigned immediate_dev_node; + + /* + * If the device size changed from zero and this is set, + * don't resume the device immediately, even if the device + * has parents. This works provided the parents do not + * validate the device size and is required by pvmove to + * avoid starting the mirror resync operation too early. + */ + unsigned delay_resume_if_new; + + /* + * Preload tree normally only loads and not resume, but there is + * automatic resume when target is extended, as it's believed + * there can be no i/o flying to this 'new' extedend space + * from any device above. Reason is that preloaded target above + * may actually need to see its bigger subdevice before it + * gets suspended. As long as devices are simple linears + * there is no problem to resume bigger device in preload (before commit). + * However complex targets like thin-pool (raid,cache...) + * they shall not be resumed before their commit. + */ + unsigned delay_resume_if_extended; + + /* + * Call node_send_messages(), set to 2 if there are messages + * When != 0, it validates matching transaction id, thus thin-pools + * where transation_id is passed as 0 are never validated, this + * allows external managment of thin-pool TID. + */ + unsigned send_messages; + /* Skip suspending node's children, used when sending messages to thin-pool */ + int skip_suspend; +}; + +/* Two of these used to join two nodes with uses and used_by. */ +struct dm_tree_link { + struct dm_list list; + struct dm_tree_node *node; +}; + +struct dm_tree_node { + struct dm_tree *dtree; + + const char *name; + const char *uuid; + struct dm_info info; + + struct dm_list uses; /* Nodes this node uses */ + struct dm_list used_by; /* Nodes that use this node */ + + int activation_priority; /* 0 gets activated first */ + int implicit_deps; /* 1 device only implicitly referenced */ + + uint16_t udev_flags; /* Udev control flags */ + + void *context; /* External supplied context */ + + struct load_properties props; /* For creation/table (re)load */ + + /* + * If presuspend of child node is needed + * Note: only direct child is allowed + */ + struct dm_tree_node *presuspend_node; + + /* Callback */ + dm_node_callback_fn callback; + void *callback_data; + + /* + * TODO: + * Add advanced code which tracks of send ioctls and their + * proper revert operation for more advanced recovery + * Current code serves mostly only to recovery when + * thin pool metadata check fails and command would + * have left active thin data and metadata subvolumes. + */ + struct dm_list activated; /* Head of activated nodes for preload revert */ + struct dm_list activated_list; /* List of activated nodes for preload revert */ +}; + +struct dm_tree { + struct dm_pool *mem; + struct dm_hash_table *devs; + struct dm_hash_table *uuids; + struct dm_tree_node root; + int skip_lockfs; /* 1 skips lockfs (for non-snapshots) */ + int no_flush; /* 1 sets noflush (mirrors/multipath) */ + int retry_remove; /* 1 retries remove if not successful */ + uint32_t cookie; + char buf[DM_NAME_LEN + 32]; /* print buffer for device_name (major:minor) */ + const char **optional_uuid_suffixes; /* uuid suffixes ignored when matching */ +}; + +/* + * Tree functions. + */ +struct dm_tree *dm_tree_create(void) +{ + struct dm_pool *dmem; + struct dm_tree *dtree; + + if (!(dmem = dm_pool_create("dtree", 1024)) || + !(dtree = dm_pool_zalloc(dmem, sizeof(*dtree)))) { + log_error("Failed to allocate dtree."); + if (dmem) + dm_pool_destroy(dmem); + return NULL; + } + + dtree->root.dtree = dtree; + dm_list_init(&dtree->root.uses); + dm_list_init(&dtree->root.used_by); + dm_list_init(&dtree->root.activated); + dtree->skip_lockfs = 0; + dtree->no_flush = 0; + dtree->mem = dmem; + dtree->optional_uuid_suffixes = NULL; + + if (!(dtree->devs = dm_hash_create(8))) { + log_error("dtree hash creation failed"); + dm_pool_destroy(dtree->mem); + return NULL; + } + + if (!(dtree->uuids = dm_hash_create(32))) { + log_error("dtree uuid hash creation failed"); + dm_hash_destroy(dtree->devs); + dm_pool_destroy(dtree->mem); + return NULL; + } + + return dtree; +} + +void dm_tree_free(struct dm_tree *dtree) +{ + if (!dtree) + return; + + dm_hash_destroy(dtree->uuids); + dm_hash_destroy(dtree->devs); + dm_pool_destroy(dtree->mem); +} + +void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie) +{ + node->dtree->cookie = cookie; +} + +uint32_t dm_tree_get_cookie(struct dm_tree_node *node) +{ + return node->dtree->cookie; +} + +void dm_tree_skip_lockfs(struct dm_tree_node *dnode) +{ + dnode->dtree->skip_lockfs = 1; +} + +void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode) +{ + dnode->dtree->no_flush = 1; +} + +void dm_tree_retry_remove(struct dm_tree_node *dnode) +{ + dnode->dtree->retry_remove = 1; +} + +/* + * Node functions. + */ +static int _nodes_are_linked(const struct dm_tree_node *parent, + const struct dm_tree_node *child) +{ + struct dm_tree_link *dlink; + + dm_list_iterate_items(dlink, &parent->uses) + if (dlink->node == child) + return 1; + + return 0; +} + +static int _link(struct dm_list *list, struct dm_tree_node *node) +{ + struct dm_tree_link *dlink; + + if (!(dlink = dm_pool_alloc(node->dtree->mem, sizeof(*dlink)))) { + log_error("dtree link allocation failed"); + return 0; + } + + dlink->node = node; + dm_list_add(list, &dlink->list); + + return 1; +} + +static int _link_nodes(struct dm_tree_node *parent, + struct dm_tree_node *child) +{ + if (_nodes_are_linked(parent, child)) + return 1; + + if (!_link(&parent->uses, child)) + return 0; + + if (!_link(&child->used_by, parent)) + return 0; + + return 1; +} + +static void _unlink(struct dm_list *list, struct dm_tree_node *node) +{ + struct dm_tree_link *dlink; + + dm_list_iterate_items(dlink, list) + if (dlink->node == node) { + dm_list_del(&dlink->list); + break; + } +} + +static void _unlink_nodes(struct dm_tree_node *parent, + struct dm_tree_node *child) +{ + if (!_nodes_are_linked(parent, child)) + return; + + _unlink(&parent->uses, child); + _unlink(&child->used_by, parent); +} + +static int _add_to_toplevel(struct dm_tree_node *node) +{ + return _link_nodes(&node->dtree->root, node); +} + +static void _remove_from_toplevel(struct dm_tree_node *node) +{ + _unlink_nodes(&node->dtree->root, node); +} + +static int _add_to_bottomlevel(struct dm_tree_node *node) +{ + return _link_nodes(node, &node->dtree->root); +} + +static void _remove_from_bottomlevel(struct dm_tree_node *node) +{ + _unlink_nodes(node, &node->dtree->root); +} + +static int _link_tree_nodes(struct dm_tree_node *parent, struct dm_tree_node *child) +{ + /* Don't link to root node if child already has a parent */ + if (parent == &parent->dtree->root) { + if (dm_tree_node_num_children(child, 1)) + return 1; + } else + _remove_from_toplevel(child); + + if (child == &child->dtree->root) { + if (dm_tree_node_num_children(parent, 0)) + return 1; + } else + _remove_from_bottomlevel(parent); + + return _link_nodes(parent, child); +} + +static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree, + const char *name, + const char *uuid, + struct dm_info *info, + void *context, + uint16_t udev_flags) +{ + struct dm_tree_node *node; + dev_t dev; + + if (!(node = dm_pool_zalloc(dtree->mem, sizeof(*node))) || + !(node->name = dm_pool_strdup(dtree->mem, name)) || + !(node->uuid = dm_pool_strdup(dtree->mem, uuid))) { + log_error("_create_dm_tree_node alloc failed."); + return NULL; + } + + node->dtree = dtree; + node->info = *info; + node->context = context; + node->udev_flags = udev_flags; + + dm_list_init(&node->uses); + dm_list_init(&node->used_by); + dm_list_init(&node->activated); + dm_list_init(&node->props.segs); + + dev = MKDEV((dev_t)info->major, (dev_t)info->minor); + + if (!dm_hash_insert_binary(dtree->devs, (const char *) &dev, + sizeof(dev), node)) { + log_error("dtree node hash insertion failed"); + dm_pool_free(dtree->mem, node); + return NULL; + } + + if (*uuid && !dm_hash_insert(dtree->uuids, uuid, node)) { + log_error("dtree uuid hash insertion failed"); + dm_hash_remove_binary(dtree->devs, (const char *) &dev, + sizeof(dev)); + dm_pool_free(dtree->mem, node); + return NULL; + } + + return node; +} + +static struct dm_tree_node *_find_dm_tree_node(struct dm_tree *dtree, + uint32_t major, uint32_t minor) +{ + dev_t dev = MKDEV((dev_t)major, (dev_t)minor); + + return dm_hash_lookup_binary(dtree->devs, (const char *) &dev, + sizeof(dev)); +} + +void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes) +{ + dtree->optional_uuid_suffixes = optional_uuid_suffixes; +} + +static struct dm_tree_node *_find_dm_tree_node_by_uuid(struct dm_tree *dtree, + const char *uuid) +{ + struct dm_tree_node *node; + const char *default_uuid_prefix; + size_t default_uuid_prefix_len; + const char *suffix, *suffix_position; + char uuid_without_suffix[DM_UUID_LEN]; + unsigned i = 0; + const char **suffix_list = dtree->optional_uuid_suffixes; + + if ((node = dm_hash_lookup(dtree->uuids, uuid))) { + log_debug("Matched uuid %s in deptree.", uuid); + return node; + } + + default_uuid_prefix = dm_uuid_prefix(); + default_uuid_prefix_len = strlen(default_uuid_prefix); + + if (suffix_list && (suffix_position = rindex(uuid, '-'))) { + while ((suffix = suffix_list[i++])) { + if (strcmp(suffix_position + 1, suffix)) + continue; + + (void) strncpy(uuid_without_suffix, uuid, sizeof(uuid_without_suffix)); + uuid_without_suffix[suffix_position - uuid] = '\0'; + + if ((node = dm_hash_lookup(dtree->uuids, uuid_without_suffix))) { + log_debug("Matched uuid %s (missing suffix -%s) in deptree.", uuid_without_suffix, suffix); + return node; + } + + break; + }; + } + + if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len)) + return NULL; + + if ((node = dm_hash_lookup(dtree->uuids, uuid + default_uuid_prefix_len))) { + log_debug("Matched uuid %s (missing prefix) in deptree.", uuid + default_uuid_prefix_len); + return node; + } + + log_debug("Not matched uuid %s in deptree.", uuid); + return NULL; +} + +/* Return node's device_name (major:minor) for debug messages */ +static const char *_node_name(struct dm_tree_node *dnode) +{ + if (dm_snprintf(dnode->dtree->buf, sizeof(dnode->dtree->buf), + "%s (" FMTu32 ":" FMTu32 ")", + dnode->name ? dnode->name : "", + dnode->info.major, dnode->info.minor) < 0) { + stack; + return dnode->name; + } + + return dnode->dtree->buf; +} + +void dm_tree_node_set_udev_flags(struct dm_tree_node *dnode, uint16_t udev_flags) + +{ + if (udev_flags != dnode->udev_flags) + log_debug_activation("Resetting %s udev_flags from 0x%x to 0x%x.", + _node_name(dnode), + dnode->udev_flags, udev_flags); + dnode->udev_flags = udev_flags; +} + +void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode, + uint32_t read_ahead, + uint32_t read_ahead_flags) +{ + dnode->props.read_ahead = read_ahead; + dnode->props.read_ahead_flags = read_ahead_flags; +} + +void dm_tree_node_set_presuspend_node(struct dm_tree_node *node, + struct dm_tree_node *presuspend_node) +{ + node->presuspend_node = presuspend_node; +} + +const char *dm_tree_node_get_name(const struct dm_tree_node *node) +{ + return node->info.exists ? node->name : ""; +} + +const char *dm_tree_node_get_uuid(const struct dm_tree_node *node) +{ + return node->info.exists ? node->uuid : ""; +} + +const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node) +{ + return &node->info; +} + +void *dm_tree_node_get_context(const struct dm_tree_node *node) +{ + return node->context; +} + +int dm_tree_node_size_changed(const struct dm_tree_node *dnode) +{ + return dnode->props.size_changed; +} + +int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted) +{ + if (inverted) { + if (_nodes_are_linked(&node->dtree->root, node)) + return 0; + return dm_list_size(&node->used_by); + } + + if (_nodes_are_linked(node, &node->dtree->root)) + return 0; + + return dm_list_size(&node->uses); +} + +/* + * Returns 1 if no prefix supplied + */ +static int _uuid_prefix_matches(const char *uuid, const char *uuid_prefix, size_t uuid_prefix_len) +{ + const char *default_uuid_prefix = dm_uuid_prefix(); + size_t default_uuid_prefix_len = strlen(default_uuid_prefix); + + if (!uuid_prefix) + return 1; + + if (!strncmp(uuid, uuid_prefix, uuid_prefix_len)) + return 1; + + /* Handle transition: active device uuids might be missing the prefix */ + if (uuid_prefix_len <= 4) + return 0; + + if (!strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len)) + return 0; + + if (strncmp(uuid_prefix, default_uuid_prefix, default_uuid_prefix_len)) + return 0; + + if (!strncmp(uuid, uuid_prefix + default_uuid_prefix_len, uuid_prefix_len - default_uuid_prefix_len)) + return 1; + + return 0; +} + +/* + * Returns 1 if no children. + */ +static int _children_suspended(struct dm_tree_node *node, + uint32_t inverted, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + struct dm_list *list; + struct dm_tree_link *dlink; + const struct dm_info *dinfo; + const char *uuid; + + if (inverted) { + if (_nodes_are_linked(&node->dtree->root, node)) + return 1; + list = &node->used_by; + } else { + if (_nodes_are_linked(node, &node->dtree->root)) + return 1; + list = &node->uses; + } + + dm_list_iterate_items(dlink, list) { + if (!(uuid = dm_tree_node_get_uuid(dlink->node))) { + stack; + continue; + } + + /* Ignore if it doesn't belong to this VG */ + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + /* Ignore if parent node wants to presuspend this node */ + if (dlink->node->presuspend_node == node) + continue; + + if (!(dinfo = dm_tree_node_get_info(dlink->node))) + return_0; /* FIXME Is this normal? */ + + if (!dinfo->suspended) + return 0; + } + + return 1; +} + +/* + * Set major and minor to zero for root of tree. + */ +struct dm_tree_node *dm_tree_find_node(struct dm_tree *dtree, + uint32_t major, + uint32_t minor) +{ + if (!major && !minor) + return &dtree->root; + + return _find_dm_tree_node(dtree, major, minor); +} + +/* + * Set uuid to NULL for root of tree. + */ +struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *dtree, + const char *uuid) +{ + if (!uuid || !*uuid) + return &dtree->root; + + return _find_dm_tree_node_by_uuid(dtree, uuid); +} + +/* + * First time set *handle to NULL. + * Set inverted to invert the tree. + */ +struct dm_tree_node *dm_tree_next_child(void **handle, + const struct dm_tree_node *parent, + uint32_t inverted) +{ + struct dm_list **dlink = (struct dm_list **) handle; + const struct dm_list *use_list; + + if (inverted) + use_list = &parent->used_by; + else + use_list = &parent->uses; + + if (!*dlink) + *dlink = dm_list_first(use_list); + else + *dlink = dm_list_next(use_list, *dlink); + + return (*dlink) ? dm_list_item(*dlink, struct dm_tree_link)->node : NULL; +} + +static int _deps(struct dm_task **dmt, struct dm_pool *mem, uint32_t major, uint32_t minor, + const char **name, const char **uuid, unsigned inactive_table, + struct dm_info *info, struct dm_deps **deps) +{ + memset(info, 0, sizeof(*info)); + *name = ""; + *uuid = ""; + *deps = NULL; + + if (!dm_is_dm_major(major)) { + info->major = major; + info->minor = minor; + return 1; + } + + if (!(*dmt = dm_task_create(DM_DEVICE_DEPS))) + return_0; + + if (!dm_task_set_major(*dmt, major) || !dm_task_set_minor(*dmt, minor)) { + log_error("_deps: failed to set major:minor for (" FMTu32 ":" FMTu32 ").", + major, minor); + goto failed; + } + + if (inactive_table && !dm_task_query_inactive_table(*dmt)) { + log_error("_deps: failed to set inactive table for (%" PRIu32 ":%" PRIu32 ")", + major, minor); + goto failed; + } + + if (!dm_task_run(*dmt)) { + log_error("_deps: task run failed for (%" PRIu32 ":%" PRIu32 ")", + major, minor); + goto failed; + } + + if (!dm_task_get_info(*dmt, info)) { + log_error("_deps: failed to get info for (%" PRIu32 ":%" PRIu32 ")", + major, minor); + goto failed; + } + + if (info->exists) { + if (info->major != major) { + log_error("Inconsistent dtree major number: %u != %u", + major, info->major); + goto failed; + } + if (info->minor != minor) { + log_error("Inconsistent dtree minor number: %u != %u", + minor, info->minor); + goto failed; + } + *name = dm_task_get_name(*dmt); + *uuid = dm_task_get_uuid(*dmt); + *deps = dm_task_get_deps(*dmt); + } + + return 1; + +failed: + dm_task_destroy(*dmt); + *dmt = NULL; + + return 0; +} + +/* + * Deactivate a device with its dependencies if the uuid prefix matches. + */ +static int _info_by_dev(uint32_t major, uint32_t minor, int with_open_count, + struct dm_info *info, struct dm_pool *mem, + const char **name, const char **uuid) +{ + struct dm_task *dmt; + int r = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return_0; + + if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) { + log_error("_info_by_dev: Failed to set device number."); + goto out; + } + + if (!with_open_count && !dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + if (!dm_task_run(dmt)) + goto_out; + + if (!dm_task_get_info(dmt, info)) + goto_out; + + if (name && !(*name = dm_pool_strdup(mem, dm_task_get_name(dmt)))) { + log_error("name pool_strdup failed"); + goto out; + } + + if (uuid && !(*uuid = dm_pool_strdup(mem, dm_task_get_uuid(dmt)))) { + log_error("uuid pool_strdup failed"); + goto out; + } + + r = 1; +out: + dm_task_destroy(dmt); + + return r; +} + +static int _check_device_not_in_use(const char *name, struct dm_info *info) +{ + const char *reason; + + if (!info->exists) + return 1; + + /* If sysfs is not used, use open_count information only. */ + if (!*dm_sysfs_dir()) { + if (!info->open_count) + return 1; + reason = "in use"; + } else if (dm_device_has_holders(info->major, info->minor)) + reason = "is used by another device"; + else if (dm_device_has_mounted_fs(info->major, info->minor)) + reason = "constains a filesystem in use"; + else + return 1; + + log_error("Device %s (" FMTu32 ":" FMTu32 ") %s.", + name, info->major, info->minor, reason); + return 0; +} + +/* Check if all parent nodes of given node have open_count == 0 */ +static int _node_has_closed_parents(struct dm_tree_node *node, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + struct dm_tree_link *dlink; + const struct dm_info *dinfo; + struct dm_info info; + const char *uuid; + + /* Iterate through parents of this node */ + dm_list_iterate_items(dlink, &node->used_by) { + if (!(uuid = dm_tree_node_get_uuid(dlink->node))) { + stack; + continue; + } + + /* Ignore if it doesn't belong to this VG */ + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + if (!(dinfo = dm_tree_node_get_info(dlink->node))) + return_0; /* FIXME Is this normal? */ + + /* Refresh open_count */ + if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL)) + return_0; + + if (!info.exists) + continue; + + if (info.open_count) { + log_debug_activation("Node %s %d:%d has open_count %d", uuid_prefix, + dinfo->major, dinfo->minor, info.open_count); + return 0; + } + } + + return 1; +} + +static int _deactivate_node(const char *name, uint32_t major, uint32_t minor, + uint32_t *cookie, uint16_t udev_flags, int retry) +{ + struct dm_task *dmt; + int r = 0; + + log_verbose("Removing %s (%" PRIu32 ":%" PRIu32 ")", name, major, minor); + + if (!(dmt = dm_task_create(DM_DEVICE_REMOVE))) { + log_error("Deactivation dm_task creation failed for %s", name); + return 0; + } + + if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) { + log_error("Failed to set device number for %s deactivation", name); + goto out; + } + + if (!dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + if (cookie) + if (!dm_task_set_cookie(dmt, cookie, udev_flags)) + goto out; + + if (retry) + dm_task_retry_remove(dmt); + + r = dm_task_run(dmt); + + /* FIXME Until kernel returns actual name so dm-iface.c can handle it */ + rm_dev_node(name, dmt->cookie_set && !(udev_flags & DM_UDEV_DISABLE_DM_RULES_FLAG), + dmt->cookie_set && (udev_flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK)); + + /* FIXME Remove node from tree or mark invalid? */ + +out: + dm_task_destroy(dmt); + + return r; +} + +static int _node_clear_table(struct dm_tree_node *dnode, uint16_t udev_flags) +{ + struct dm_task *dmt = NULL, *deps_dmt = NULL; + struct dm_info *info = &dnode->info, deps_info; + struct dm_deps *deps = NULL; + const char *name, *uuid, *depname, *depuuid; + const char *default_uuid_prefix; + size_t default_uuid_prefix_len; + uint32_t i; + int r = 0; + + if (!(name = dm_tree_node_get_name(dnode))) { + log_error("_node_clear_table failed: missing name"); + return 0; + } + + /* Is there a table? */ + if (!info->exists || !info->inactive_table) + return 1; + + /* Get devices used by inactive table that's about to be deleted. */ + if (!_deps(&deps_dmt, dnode->dtree->mem, info->major, info->minor, &depname, &depuuid, 1, info, &deps)) { + log_error("Failed to obtain dependencies for %s before clearing table.", name); + return 0; + } + + log_verbose("Clearing inactive table %s (%" PRIu32 ":%" PRIu32 ")", + name, info->major, info->minor); + + if (!(dmt = dm_task_create(DM_DEVICE_CLEAR))) { + log_error("Table clear dm_task creation failed for %s", name); + goto out; + } + + if (!dm_task_set_major(dmt, info->major) || + !dm_task_set_minor(dmt, info->minor)) { + log_error("Failed to set device number for %s table clear", name); + goto out; + } + + r = dm_task_run(dmt); + + if (!dm_task_get_info(dmt, info)) { + log_error("_node_clear_table failed: info missing after running task for %s", name); + r = 0; + } + + if (!r || !deps) + goto_out; + + /* + * Remove (incomplete) devices that the inactive table referred to but + * which are not in the tree, no longer referenced and don't have a live + * table. + */ + default_uuid_prefix = dm_uuid_prefix(); + default_uuid_prefix_len = strlen(default_uuid_prefix); + + for (i = 0; i < deps->count; i++) { + /* If already in tree, assume it's under control */ + if (_find_dm_tree_node(dnode->dtree, MAJOR(deps->device[i]), MINOR(deps->device[i]))) + continue; + + if (!_info_by_dev(MAJOR(deps->device[i]), MINOR(deps->device[i]), 1, + &deps_info, dnode->dtree->mem, &name, &uuid)) + goto_out; + + /* Proceed if device is an 'orphan' - unreferenced and without a live table. */ + if (!deps_info.exists || deps_info.live_table || deps_info.open_count) + continue; + + if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len)) + continue; + + /* Remove device. */ + if (!_deactivate_node(name, deps_info.major, deps_info.minor, &dnode->dtree->cookie, udev_flags, 0)) { + log_error("Failed to deactivate no-longer-used device %s (%" + PRIu32 ":%" PRIu32 ")", name, deps_info.major, deps_info.minor); + } else if (deps_info.suspended) + dec_suspended(); + } + +out: + if (dmt) + dm_task_destroy(dmt); + + if (deps_dmt) + dm_task_destroy(deps_dmt); + + return r; +} + +struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *dtree, + const char *name, + const char *uuid, + uint32_t major, + uint32_t minor, + int read_only, + int clear_inactive, + void *context, + uint16_t udev_flags) +{ + struct dm_tree_node *dnode; + struct dm_info info = { 0 }; + + if (!name || !uuid) { + log_error("Cannot add device without name and uuid."); + return NULL; + } + + /* Do we need to add node to tree? */ + if (!(dnode = dm_tree_find_node_by_uuid(dtree, uuid))) { + if (!(dnode = _create_dm_tree_node(dtree, name, uuid, &info, + context, 0))) + return_NULL; + + /* Attach to root node until a table is supplied */ + if (!_add_to_toplevel(dnode) || !_add_to_bottomlevel(dnode)) + return_NULL; + + dnode->props.major = major; + dnode->props.minor = minor; + } else if (strcmp(name, dnode->name)) { + /* Do we need to rename node? */ + if (!(dnode->props.new_name = dm_pool_strdup(dtree->mem, name))) { + log_error("name pool_strdup failed"); + return NULL; + } + } + + dnode->props.read_only = read_only ? 1 : 0; + dnode->props.read_ahead = DM_READ_AHEAD_AUTO; + dnode->props.read_ahead_flags = 0; + + if (clear_inactive && !_node_clear_table(dnode, udev_flags)) + return_NULL; + + dnode->context = context; + dnode->udev_flags = udev_flags; + + return dnode; +} + +struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *dtree, const char *name, + const char *uuid, uint32_t major, uint32_t minor, + int read_only, int clear_inactive, void *context) +{ + return dm_tree_add_new_dev_with_udev_flags(dtree, name, uuid, major, minor, + read_only, clear_inactive, context, 0); +} + +static struct dm_tree_node *_add_dev(struct dm_tree *dtree, + struct dm_tree_node *parent, + uint32_t major, uint32_t minor, + uint16_t udev_flags, + int implicit_deps) +{ + struct dm_task *dmt = NULL; + struct dm_info info; + struct dm_deps *deps = NULL; + const char *name = NULL; + const char *uuid = NULL; + struct dm_tree_node *node = NULL; + uint32_t i; + int new = 0; + + /* Already in tree? */ + if (!(node = _find_dm_tree_node(dtree, major, minor))) { + if (!_deps(&dmt, dtree->mem, major, minor, &name, &uuid, 0, &info, &deps)) + return_NULL; + + if (!(node = _create_dm_tree_node(dtree, name, uuid, &info, + NULL, udev_flags))) + goto_out; + new = 1; + node->implicit_deps = implicit_deps; + } else if (!implicit_deps && node->implicit_deps) { + node->udev_flags = udev_flags; + node->implicit_deps = 0; + } + + if (!_link_tree_nodes(parent, node)) { + node = NULL; + goto_out; + } + + /* If node was already in tree, no need to recurse. */ + if (!new) + goto out; + + /* Can't recurse if not a mapped device or there are no dependencies */ + if (!node->info.exists || !deps || !deps->count) { + if (!_add_to_bottomlevel(node)) { + stack; + node = NULL; + } + goto out; + } + + /* Add dependencies to tree */ + for (i = 0; i < deps->count; i++) + /* Implicit devices are by default temporary */ + if (!_add_dev(dtree, node, MAJOR(deps->device[i]), + MINOR(deps->device[i]), udev_flags | + DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG | + DM_UDEV_DISABLE_DISK_RULES_FLAG | + DM_UDEV_DISABLE_OTHER_RULES_FLAG, 1)) { + node = NULL; + goto_out; + } + +out: + if (dmt) + dm_task_destroy(dmt); + + return node; +} + +int dm_tree_add_dev(struct dm_tree *dtree, uint32_t major, uint32_t minor) +{ + return _add_dev(dtree, &dtree->root, major, minor, 0, 0) ? 1 : 0; +} + +int dm_tree_add_dev_with_udev_flags(struct dm_tree *dtree, uint32_t major, + uint32_t minor, uint16_t udev_flags) +{ + return _add_dev(dtree, &dtree->root, major, minor, udev_flags, 0) ? 1 : 0; +} + +static int _rename_node(const char *old_name, const char *new_name, uint32_t major, + uint32_t minor, uint32_t *cookie, uint16_t udev_flags) +{ + struct dm_task *dmt; + int r = 0; + + log_verbose("Renaming %s (%" PRIu32 ":%" PRIu32 ") to %s", old_name, major, minor, new_name); + + if (!(dmt = dm_task_create(DM_DEVICE_RENAME))) { + log_error("Rename dm_task creation failed for %s", old_name); + return 0; + } + + if (!dm_task_set_name(dmt, old_name)) { + log_error("Failed to set name for %s rename.", old_name); + goto out; + } + + if (!dm_task_set_newname(dmt, new_name)) + goto_out; + + if (!dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + if (!dm_task_set_cookie(dmt, cookie, udev_flags)) + goto out; + + r = dm_task_run(dmt); + +out: + dm_task_destroy(dmt); + + return r; +} + +/* FIXME Merge with _suspend_node? */ +static int _resume_node(const char *name, uint32_t major, uint32_t minor, + uint32_t read_ahead, uint32_t read_ahead_flags, + struct dm_info *newinfo, uint32_t *cookie, + uint16_t udev_flags, int already_suspended) +{ + struct dm_task *dmt; + int r = 0; + + log_verbose("Resuming %s (" FMTu32 ":" FMTu32 ").", name, major, minor); + + if (!(dmt = dm_task_create(DM_DEVICE_RESUME))) { + log_debug_activation("Suspend dm_task creation failed for %s.", name); + return 0; + } + + /* FIXME Kernel should fill in name on return instead */ + if (!dm_task_set_name(dmt, name)) { + log_debug_activation("Failed to set device name for %s resumption.", name); + goto out; + } + + if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) { + log_error("Failed to set device number for %s resumption.", name); + goto out; + } + + if (!dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + if (!dm_task_set_read_ahead(dmt, read_ahead, read_ahead_flags)) + log_warn("WARNING: Failed to set read ahead."); + + if (!dm_task_set_cookie(dmt, cookie, udev_flags)) + goto_out; + + if (!(r = dm_task_run(dmt))) + goto_out; + + if (already_suspended) + dec_suspended(); + + if (!(r = dm_task_get_info(dmt, newinfo))) + stack; + +out: + dm_task_destroy(dmt); + + return r; +} + +static int _suspend_node(const char *name, uint32_t major, uint32_t minor, + int skip_lockfs, int no_flush, struct dm_info *newinfo) +{ + struct dm_task *dmt; + int r = 0; + + log_verbose("Suspending %s (%" PRIu32 ":%" PRIu32 ")%s%s", + name, major, minor, + skip_lockfs ? "" : " with filesystem sync", + no_flush ? "" : " with device flush"); + + if (!(dmt = dm_task_create(DM_DEVICE_SUSPEND))) { + log_error("Suspend dm_task creation failed for %s", name); + return 0; + } + + if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) { + log_error("Failed to set device number for %s suspension.", name); + goto out; + } + + if (!dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + if (skip_lockfs && !dm_task_skip_lockfs(dmt)) + log_warn("WARNING: Failed to set skip_lockfs flag."); + + if (no_flush && !dm_task_no_flush(dmt)) + log_warn("WARNING: Failed to set no_flush flag."); + + if ((r = dm_task_run(dmt))) { + inc_suspended(); + r = dm_task_get_info(dmt, newinfo); + } +out: + dm_task_destroy(dmt); + + return r; +} + +static int _thin_pool_get_status(struct dm_tree_node *dnode, + struct dm_status_thin_pool *s) +{ + struct dm_task *dmt; + int r = 0; + uint64_t start, length; + char *type = NULL; + char *params = NULL; + + if (!(dmt = dm_task_create(DM_DEVICE_STATUS))) + return_0; + + if (!dm_task_set_major(dmt, dnode->info.major) || + !dm_task_set_minor(dmt, dnode->info.minor)) { + log_error("Failed to set major minor."); + goto out; + } + + if (!dm_task_no_flush(dmt)) + log_warn("WARNING: Can't set no_flush flag."); /* Non fatal */ + + if (!dm_task_run(dmt)) + goto_out; + + dm_get_next_target(dmt, NULL, &start, &length, &type, ¶ms); + + if (!type || (strcmp(type, "thin-pool") != 0)) { + log_error("Expected thin-pool target for %s and got %s.", + _node_name(dnode), type ? : "no target"); + goto out; + } + + if (!parse_thin_pool_status(params, s)) + goto_out; + + log_debug_activation("Found transaction id %" PRIu64 " for thin pool %s " + "with status line: %s.", + s->transaction_id, _node_name(dnode), params); + + r = 1; +out: + dm_task_destroy(dmt); + + return r; +} + +static int _thin_pool_node_message(struct dm_tree_node *dnode, struct thin_message *tm) +{ + struct dm_task *dmt; + struct dm_thin_message *m = &tm->message; + char buf[64]; + int r; + + switch (m->type) { + case DM_THIN_MESSAGE_CREATE_SNAP: + r = dm_snprintf(buf, sizeof(buf), "create_snap %u %u", + m->u.m_create_snap.device_id, + m->u.m_create_snap.origin_id); + break; + case DM_THIN_MESSAGE_CREATE_THIN: + r = dm_snprintf(buf, sizeof(buf), "create_thin %u", + m->u.m_create_thin.device_id); + break; + case DM_THIN_MESSAGE_DELETE: + r = dm_snprintf(buf, sizeof(buf), "delete %u", + m->u.m_delete.device_id); + break; + case DM_THIN_MESSAGE_SET_TRANSACTION_ID: + r = dm_snprintf(buf, sizeof(buf), + "set_transaction_id %" PRIu64 " %" PRIu64, + m->u.m_set_transaction_id.current_id, + m->u.m_set_transaction_id.new_id); + break; + case DM_THIN_MESSAGE_RESERVE_METADATA_SNAP: /* target vsn 1.1 */ + r = dm_snprintf(buf, sizeof(buf), "reserve_metadata_snap"); + break; + case DM_THIN_MESSAGE_RELEASE_METADATA_SNAP: /* target vsn 1.1 */ + r = dm_snprintf(buf, sizeof(buf), "release_metadata_snap"); + break; + default: + r = -1; + } + + if (r < 0) { + log_error("Failed to prepare message."); + return 0; + } + + r = 0; + + if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG))) + return_0; + + if (!dm_task_set_major(dmt, dnode->info.major) || + !dm_task_set_minor(dmt, dnode->info.minor)) { + log_error("Failed to set message major minor."); + goto out; + } + + if (!dm_task_set_message(dmt, buf)) + goto_out; + + /* Internal functionality of dm_task */ + dmt->expected_errno = tm->expected_errno; + + if (!dm_task_run(dmt)) { + log_error("Failed to process thin pool message \"%s\".", buf); + goto out; + } + + r = 1; +out: + dm_task_destroy(dmt); + + return r; +} + +static struct load_segment *_get_last_load_segment(struct dm_tree_node *node) +{ + if (dm_list_empty(&node->props.segs)) { + log_error("Node %s is missing a segment.", _node_name(node)); + return NULL; + } + + return dm_list_item(dm_list_last(&node->props.segs), struct load_segment); +} + +/* For preload pass only validate pool's transaction_id */ +static int _node_send_messages(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len, + int send) +{ + struct load_segment *seg; + struct thin_message *tmsg; + struct dm_status_thin_pool stp; + const char *uuid; + int have_messages; + + if (!dnode->info.exists) + return 1; + + if (!(seg = _get_last_load_segment(dnode))) + return_0; + + if (seg->type != SEG_THIN_POOL) + return 1; + + if (!(uuid = dm_tree_node_get_uuid(dnode))) + return_0; + + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) { + log_debug_activation("UUID \"%s\" does not match.", uuid); + return 1; + } + + if (!_thin_pool_get_status(dnode, &stp)) + return_0; + + have_messages = !dm_list_empty(&seg->thin_messages) ? 1 : 0; + if (stp.transaction_id == seg->transaction_id) { + dnode->props.send_messages = 0; /* messages already committed */ + if (have_messages) + log_debug_activation("Thin pool %s transaction_id matches %" + PRIu64 ", skipping messages.", + _node_name(dnode), stp.transaction_id); + return 1; + } + + /* Error if there are no stacked messages or id mismatches */ + if ((stp.transaction_id + 1) != seg->transaction_id) { + log_error("Thin pool %s transaction_id is %" PRIu64 ", while expected %" PRIu64 ".", + _node_name(dnode), stp.transaction_id, seg->transaction_id - have_messages); + return 0; + } + + if (!have_messages || !send) + return 1; /* transaction_id is matching */ + + dm_list_iterate_items(tmsg, &seg->thin_messages) { + if (!(_thin_pool_node_message(dnode, tmsg))) + return_0; + if (tmsg->message.type == DM_THIN_MESSAGE_SET_TRANSACTION_ID) { + if (!_thin_pool_get_status(dnode, &stp)) + return_0; + if (stp.transaction_id != tmsg->message.u.m_set_transaction_id.new_id) { + log_error("Thin pool %s transaction_id is %" PRIu64 + " and does not match expected %" PRIu64 ".", + _node_name(dnode), stp.transaction_id, + tmsg->message.u.m_set_transaction_id.new_id); + return 0; + } + } + } + + dnode->props.send_messages = 0; /* messages posted */ + + return 1; +} + +/* + * FIXME Don't attempt to deactivate known internal dependencies. + */ +static int _dm_tree_deactivate_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len, + unsigned level) +{ + int r = 1; + void *handle = NULL; + struct dm_tree_node *child = dnode; + struct dm_info info; + const struct dm_info *dinfo; + const char *name; + const char *uuid; + + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + if (!(dinfo = dm_tree_node_get_info(child))) { + stack; + continue; + } + + if (!(name = dm_tree_node_get_name(child))) { + stack; + continue; + } + + if (!(uuid = dm_tree_node_get_uuid(child))) { + stack; + continue; + } + + /* Ignore if it doesn't belong to this VG */ + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + /* Refresh open_count */ + if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL)) + return_0; + + if (!info.exists) + continue; + + if (info.open_count) { + /* Skip internal non-toplevel opened nodes */ + if (level) + continue; + + /* When retry is not allowed, error */ + if (!child->dtree->retry_remove) { + log_error("Unable to deactivate open %s (" FMTu32 ":" + FMTu32 ").", name, info.major, info.minor); + r = 0; + continue; + } + + /* Check toplevel node for holders/mounted fs */ + if (!_check_device_not_in_use(name, &info)) { + stack; + r = 0; + continue; + } + /* Go on with retry */ + } + + /* Also checking open_count in parent nodes of presuspend_node */ + if ((child->presuspend_node && + !_node_has_closed_parents(child->presuspend_node, + uuid_prefix, uuid_prefix_len))) { + /* Only report error from (likely non-internal) dependency at top level */ + if (!level) { + log_error("Unable to deactivate open %s (" FMTu32 ":" + FMTu32 ").", name, info.major, info.minor); + r = 0; + } + continue; + } + + /* Suspend child node first if requested */ + if (child->presuspend_node && + !dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len)) + continue; + + if (!_deactivate_node(name, info.major, info.minor, + &child->dtree->cookie, child->udev_flags, + (level == 0) ? child->dtree->retry_remove : 0)) { + log_error("Unable to deactivate %s (" FMTu32 ":" + FMTu32 ").", name, info.major, info.minor); + r = 0; + continue; + } + + if (info.suspended && info.live_table) + dec_suspended(); + + if (child->callback && + !child->callback(child, DM_NODE_CALLBACK_DEACTIVATED, + child->callback_data)) + stack; + /* FIXME Deactivation must currently ignore failure + * here so that lvremove can continue: we need an + * alternative way to handle this state without + * setting r=0. Or better, skip calling thin_check + * entirely if the device is about to be removed. */ + + if (dm_tree_node_num_children(child, 0) && + !_dm_tree_deactivate_children(child, uuid_prefix, uuid_prefix_len, level + 1)) + return_0; + } + + return r; +} + +int dm_tree_deactivate_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + return _dm_tree_deactivate_children(dnode, uuid_prefix, uuid_prefix_len, 0); +} + +int dm_tree_suspend_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + int r = 1; + void *handle = NULL; + struct dm_tree_node *child = dnode; + struct dm_info info, newinfo; + const struct dm_info *dinfo; + const char *name; + const char *uuid; + + /* Suspend nodes at this level of the tree */ + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + if (!(dinfo = dm_tree_node_get_info(child))) { + stack; + continue; + } + + if (!(name = dm_tree_node_get_name(child))) { + stack; + continue; + } + + if (!(uuid = dm_tree_node_get_uuid(child))) { + stack; + continue; + } + + /* Ignore if it doesn't belong to this VG */ + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + /* Ensure immediate parents are already suspended */ + if (!_children_suspended(child, 1, uuid_prefix, uuid_prefix_len)) + continue; + + if (!_info_by_dev(dinfo->major, dinfo->minor, 0, &info, NULL, NULL, NULL)) + return_0; + + if (!info.exists || info.suspended) + continue; + + /* If child has some real messages send them */ + if ((child->props.send_messages > 1) && r) { + if (!(r = _node_send_messages(child, uuid_prefix, uuid_prefix_len, 1))) + stack; + else { + log_debug_activation("Sent messages to thin-pool %s and " + "skipping suspend of its children.", + _node_name(child)); + child->props.skip_suspend++; + } + continue; + } + + if (!_suspend_node(name, info.major, info.minor, + child->dtree->skip_lockfs, + child->dtree->no_flush, &newinfo)) { + log_error("Unable to suspend %s (" FMTu32 ":" + FMTu32 ")", name, info.major, info.minor); + r = 0; + continue; + } + + /* Update cached info */ + child->info = newinfo; + } + + /* Then suspend any child nodes */ + handle = NULL; + + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + if (child->props.skip_suspend) + continue; + + if (!(uuid = dm_tree_node_get_uuid(child))) { + stack; + continue; + } + + /* Ignore if it doesn't belong to this VG */ + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + if (dm_tree_node_num_children(child, 0)) + if (!dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len)) + return_0; + } + + return r; +} + +/* + * _rename_conflict_exists + * @dnode + * @node + * @resolvable + * + * Check if there is a rename conflict with existing peers in + * this tree. 'resolvable' is set if the conflicting node will + * also be undergoing a rename. (Allowing that node to rename + * first would clear the conflict.) + * + * Returns: 1 if conflict, 0 otherwise + */ +static int _rename_conflict_exists(struct dm_tree_node *parent, + struct dm_tree_node *node, + int *resolvable) +{ + void *handle = NULL; + const char *name = dm_tree_node_get_name(node); + const char *sibling_name; + struct dm_tree_node *sibling; + + *resolvable = 0; + + if (!name) + return_0; + + while ((sibling = dm_tree_next_child(&handle, parent, 0))) { + if (sibling == node) + continue; + + if (!(sibling_name = dm_tree_node_get_name(sibling))) { + stack; + continue; + } + + if (!strcmp(node->props.new_name, sibling_name)) { + if (sibling->props.new_name) + *resolvable = 1; + return 1; + } + } + + return 0; +} + +int dm_tree_activate_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + int r = 1; + int resolvable_name_conflict, awaiting_peer_rename = 0; + void *handle = NULL; + struct dm_tree_node *child = dnode; + const char *name; + const char *uuid; + int priority; + + /* Activate children first */ + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + if (!(uuid = dm_tree_node_get_uuid(child))) { + stack; + continue; + } + + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + if (dm_tree_node_num_children(child, 0)) + if (!dm_tree_activate_children(child, uuid_prefix, uuid_prefix_len)) + return_0; + } + + handle = NULL; + + for (priority = 0; priority < 3; priority++) { + awaiting_peer_rename = 0; + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + if (priority != child->activation_priority) + continue; + + if (!(uuid = dm_tree_node_get_uuid(child))) { + stack; + continue; + } + + if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + continue; + + if (!(name = dm_tree_node_get_name(child))) { + stack; + continue; + } + + /* Rename? */ + if (child->props.new_name) { + if (_rename_conflict_exists(dnode, child, &resolvable_name_conflict) && + resolvable_name_conflict) { + awaiting_peer_rename++; + continue; + } + if (!_rename_node(name, child->props.new_name, child->info.major, + child->info.minor, &child->dtree->cookie, + child->udev_flags)) { + log_error("Failed to rename %s (%" PRIu32 + ":%" PRIu32 ") to %s", name, child->info.major, + child->info.minor, child->props.new_name); + return 0; + } + child->name = child->props.new_name; + child->props.new_name = NULL; + } + + if (!child->info.inactive_table && !child->info.suspended) + continue; + + if (!_resume_node(child->name, child->info.major, child->info.minor, + child->props.read_ahead, child->props.read_ahead_flags, + &child->info, &child->dtree->cookie, child->udev_flags, child->info.suspended)) { + log_error("Unable to resume %s.", _node_name(child)); + r = 0; + continue; + } + } + if (awaiting_peer_rename) + priority--; /* redo priority level */ + } + + /* + * FIXME: Implement delayed error reporting + * activation should be stopped only in the case, + * the submission of transation_id message fails, + * resume should continue further, just whole command + * has to report failure. + */ + if (r && (dnode->props.send_messages > 1) && + !(r = _node_send_messages(dnode, uuid_prefix, uuid_prefix_len, 1))) + stack; + + return r; +} + +static int _create_node(struct dm_tree_node *dnode) +{ + int r = 0; + struct dm_task *dmt; + + log_verbose("Creating %s", dnode->name); + + if (!(dmt = dm_task_create(DM_DEVICE_CREATE))) { + log_error("Create dm_task creation failed for %s", dnode->name); + return 0; + } + + if (!dm_task_set_name(dmt, dnode->name)) { + log_error("Failed to set device name for %s", dnode->name); + goto out; + } + + if (!dm_task_set_uuid(dmt, dnode->uuid)) { + log_error("Failed to set uuid for %s", dnode->name); + goto out; + } + + if (dnode->props.major && + (!dm_task_set_major(dmt, dnode->props.major) || + !dm_task_set_minor(dmt, dnode->props.minor))) { + log_error("Failed to set device number for %s creation.", dnode->name); + goto out; + } + + if (dnode->props.read_only && !dm_task_set_ro(dmt)) { + log_error("Failed to set read only flag for %s", dnode->name); + goto out; + } + + if (!dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + if ((r = dm_task_run(dmt))) { + if (!(r = dm_task_get_info(dmt, &dnode->info))) + /* + * This should not be possible to occur. However, + * we print an error message anyway for the more + * absurd cases (e.g. memory corruption) so there + * is never any question as to which one failed. + */ + log_error(INTERNAL_ERROR + "Unable to get DM task info for %s.", + dnode->name); + } +out: + dm_task_destroy(dmt); + + return r; +} + +/* + * _remove_node + * + * This function is only used to remove a DM device that has failed + * to load any table. + */ +static int _remove_node(struct dm_tree_node *dnode) +{ + if (!dnode->info.exists) + return 1; + + if (dnode->info.live_table || dnode->info.inactive_table) { + log_error(INTERNAL_ERROR + "_remove_node called on device with loaded table(s)."); + return 0; + } + + if (!_deactivate_node(dnode->name, dnode->info.major, dnode->info.minor, + &dnode->dtree->cookie, dnode->udev_flags, 0)) { + log_error("Failed to clean-up device with no table: %s.", + _node_name(dnode)); + return 0; + } + return 1; +} + +static int _build_dev_string(char *devbuf, size_t bufsize, struct dm_tree_node *node) +{ + if (!dm_format_dev(devbuf, bufsize, node->info.major, node->info.minor)) { + log_error("Failed to format %s device number for %s as dm " + "target (%u,%u)", + node->name, node->uuid, node->info.major, node->info.minor); + return 0; + } + + return 1; +} + +/* simplify string emiting code */ +#define EMIT_PARAMS(p, str...)\ +do {\ + int w;\ + if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\ + stack; /* Out of space */\ + return -1;\ + }\ + p += w;\ +} while (0) + +/* + * _emit_areas_line + * + * Returns: 1 on success, 0 on failure + */ +static int _emit_areas_line(struct dm_task *dmt __attribute__((unused)), + struct load_segment *seg, char *params, + size_t paramsize, int *pos) +{ + struct seg_area *area; + char devbuf[DM_FORMAT_DEV_BUFSIZE]; + unsigned first_time = 1; + + dm_list_iterate_items(area, &seg->areas) { + switch (seg->type) { + case SEG_RAID0: + case SEG_RAID0_META: + case SEG_RAID1: + case SEG_RAID10: + case SEG_RAID4: + case SEG_RAID5_N: + case SEG_RAID5_LA: + case SEG_RAID5_RA: + case SEG_RAID5_LS: + case SEG_RAID5_RS: + case SEG_RAID6_N_6: + case SEG_RAID6_ZR: + case SEG_RAID6_NR: + case SEG_RAID6_NC: + case SEG_RAID6_LS_6: + case SEG_RAID6_RS_6: + case SEG_RAID6_LA_6: + case SEG_RAID6_RA_6: + if (!area->dev_node) { + EMIT_PARAMS(*pos, " -"); + break; + } + if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node)) + return_0; + + EMIT_PARAMS(*pos, " %s", devbuf); + break; + default: + if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node)) + return_0; + + EMIT_PARAMS(*pos, "%s%s %" PRIu64, first_time ? "" : " ", + devbuf, area->offset); + } + + first_time = 0; + } + + return 1; +} + +/* + * Returns: 1 on success, 0 on failure + */ +static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *seg, + char *params, size_t paramsize) +{ + int block_on_error = 0; + int handle_errors = 0; + int dm_log_userspace = 0; + unsigned log_parm_count; + int pos = 0; + char logbuf[DM_FORMAT_DEV_BUFSIZE]; + const char *logtype; + unsigned kmaj = 0, kmin = 0, krel = 0; + + if (!get_uname_version(&kmaj, &kmin, &krel)) + return_0; + + if ((seg->flags & DM_BLOCK_ON_ERROR)) { + /* + * Originally, block_on_error was an argument to the log + * portion of the mirror CTR table. It was renamed to + * "handle_errors" and now resides in the 'features' + * section of the mirror CTR table (i.e. at the end). + * + * We can identify whether to use "block_on_error" or + * "handle_errors" by the dm-mirror module's version + * number (>= 1.12) or by the kernel version (>= 2.6.22). + */ + if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 22)) + handle_errors = 1; + else + block_on_error = 1; + } + + if (seg->clustered) { + /* Cluster mirrors require a UUID */ + if (!seg->uuid) + return_0; + + /* + * Cluster mirrors used to have their own log + * types. Now they are accessed through the + * userspace log type. + * + * The dm-log-userspace module was added to the + * 2.6.31 kernel. + */ + if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 31)) + dm_log_userspace = 1; + } + + /* Region size */ + log_parm_count = 1; + + /* [no]sync, block_on_error etc. */ + log_parm_count += hweight32(seg->flags); + + /* "handle_errors" is a feature arg now */ + if (handle_errors) + log_parm_count--; + + /* DM_CORELOG does not count in the param list */ + if (seg->flags & DM_CORELOG) + log_parm_count--; + + if (seg->clustered) { + log_parm_count++; /* For UUID */ + + if (!dm_log_userspace) + EMIT_PARAMS(pos, "clustered-"); + else + /* For clustered-* type field inserted later */ + log_parm_count++; + } + + if (!seg->log) + logtype = "core"; + else { + logtype = "disk"; + log_parm_count++; + if (!_build_dev_string(logbuf, sizeof(logbuf), seg->log)) + return_0; + } + + if (dm_log_userspace) + EMIT_PARAMS(pos, "userspace %u %s clustered-%s", + log_parm_count, seg->uuid, logtype); + else + EMIT_PARAMS(pos, "%s %u", logtype, log_parm_count); + + if (seg->log) + EMIT_PARAMS(pos, " %s", logbuf); + + EMIT_PARAMS(pos, " %u", seg->region_size); + + if (seg->clustered && !dm_log_userspace) + EMIT_PARAMS(pos, " %s", seg->uuid); + + if ((seg->flags & DM_NOSYNC)) + EMIT_PARAMS(pos, " nosync"); + else if ((seg->flags & DM_FORCESYNC)) + EMIT_PARAMS(pos, " sync"); + + if (block_on_error) + EMIT_PARAMS(pos, " block_on_error"); + + EMIT_PARAMS(pos, " %u ", seg->mirror_area_count); + + if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0) + return_0; + + if (handle_errors) + EMIT_PARAMS(pos, " 1 handle_errors"); + + return 1; +} + +static int _2_if_value(unsigned p) +{ + return p ? 2 : 0; +} + +/* Return number of bits passed in @bits assuming 2 * 64 bit size */ +static int _get_params_count(const uint64_t *bits) +{ + int r = 0; + int i = RAID_BITMAP_SIZE; + + while (i--) { + r += 2 * hweight32(bits[i] & 0xFFFFFFFF); + r += 2 * hweight32(bits[i] >> 32); + } + + return r; +} + +/* + * Get target version (major, minor and patchlevel) for @target_name + * + * FIXME: this function is derived from liblvm. + * Integrate with move of liblvm functions + * to libdm in future library layer purge + * (e.g. expose as API dm_target_version()?) + */ +static int _target_version(const char *target_name, uint32_t *maj, + uint32_t *min, uint32_t *patchlevel) +{ + int r = 0; + struct dm_task *dmt; + struct dm_versions *target, *last_target = NULL; + + log_very_verbose("Getting target version for %s", target_name); + if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS))) + return_0; + + if (!dm_task_run(dmt)) { + log_debug_activation("Failed to get %s target versions", target_name); + /* Assume this was because LIST_VERSIONS isn't supported */ + *maj = *min = *patchlevel = 0; + r = 1; + } else + for (target = dm_task_get_versions(dmt); + target != last_target; + last_target = target, target = (struct dm_versions *)((char *) target + target->next)) + if (!strcmp(target_name, target->name)) { + *maj = target->version[0]; + *min = target->version[1]; + *patchlevel = target->version[2]; + log_very_verbose("Found %s target " + "v%" PRIu32 ".%" PRIu32 ".%" PRIu32 ".", + target_name, *maj, *min, *patchlevel); + r = 1; + break; + } + + dm_task_destroy(dmt); + + return r; +} + +static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major, + uint32_t minor, struct load_segment *seg, + uint64_t *seg_start, char *params, + size_t paramsize) +{ + uint32_t i; + uint32_t area_count = seg->area_count / 2; + uint32_t maj, min, patchlevel; + int param_count = 1; /* mandatory 'chunk size'/'stripe size' arg */ + int pos = 0; + unsigned type; + + if (seg->area_count % 2) + return 0; + + if ((seg->flags & DM_NOSYNC) || (seg->flags & DM_FORCESYNC)) + param_count++; + + param_count += _2_if_value(seg->data_offset) + + _2_if_value(seg->delta_disks) + + _2_if_value(seg->region_size) + + _2_if_value(seg->writebehind) + + _2_if_value(seg->min_recovery_rate) + + _2_if_value(seg->max_recovery_rate) + + _2_if_value(seg->data_copies > 1); + + /* rebuilds and writemostly are BITMAP_SIZE * 64 bits */ + param_count += _get_params_count(seg->rebuilds); + param_count += _get_params_count(seg->writemostly); + + if ((seg->type == SEG_RAID1) && seg->stripe_size) + log_info("WARNING: Ignoring RAID1 stripe size"); + + /* Kernel only expects "raid0", not "raid0_meta" */ + type = seg->type; + if (type == SEG_RAID0_META) + type = SEG_RAID0; + + EMIT_PARAMS(pos, "%s %d %u", + type == SEG_RAID10 ? "raid10" : _dm_segtypes[type].target, + param_count, seg->stripe_size); + + if (!_target_version("raid", &maj, &min, &patchlevel)) + return_0; + + /* + * Target version prior to 1.9.0 and >= 1.11.0 emit + * order of parameters as of kernel target documentation + */ + if (maj > 1 || (maj == 1 && (min < 9 || min >= 11))) { + if (seg->flags & DM_NOSYNC) + EMIT_PARAMS(pos, " nosync"); + else if (seg->flags & DM_FORCESYNC) + EMIT_PARAMS(pos, " sync"); + + for (i = 0; i < area_count; i++) + if (seg->rebuilds[i/64] & (1ULL << (i%64))) + EMIT_PARAMS(pos, " rebuild %u", i); + + if (seg->min_recovery_rate) + EMIT_PARAMS(pos, " min_recovery_rate %u", + seg->min_recovery_rate); + + if (seg->max_recovery_rate) + EMIT_PARAMS(pos, " max_recovery_rate %u", + seg->max_recovery_rate); + + for (i = 0; i < area_count; i++) + if (seg->writemostly[i/64] & (1ULL << (i%64))) + EMIT_PARAMS(pos, " write_mostly %u", i); + + if (seg->writebehind) + EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind); + + if (seg->region_size) + EMIT_PARAMS(pos, " region_size %u", seg->region_size); + + if (seg->data_copies > 1 && type == SEG_RAID10) + EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies); + + if (seg->delta_disks) + EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks); + + /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */ + if (seg->data_offset) + EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset); + + /* Target version >= 1.9.0 && < 1.11.0 had a table line parameter ordering flaw */ + } else { + if (seg->data_copies > 1 && type == SEG_RAID10) + EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies); + + if (seg->flags & DM_NOSYNC) + EMIT_PARAMS(pos, " nosync"); + else if (seg->flags & DM_FORCESYNC) + EMIT_PARAMS(pos, " sync"); + + if (seg->region_size) + EMIT_PARAMS(pos, " region_size %u", seg->region_size); + + /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */ + if (seg->data_offset) + EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset); + + if (seg->delta_disks) + EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks); + + for (i = 0; i < area_count; i++) + if (seg->rebuilds[i/64] & (1ULL << (i%64))) + EMIT_PARAMS(pos, " rebuild %u", i); + + for (i = 0; i < area_count; i++) + if (seg->writemostly[i/64] & (1ULL << (i%64))) + EMIT_PARAMS(pos, " write_mostly %u", i); + + if (seg->writebehind) + EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind); + + if (seg->max_recovery_rate) + EMIT_PARAMS(pos, " max_recovery_rate %u", + seg->max_recovery_rate); + + if (seg->min_recovery_rate) + EMIT_PARAMS(pos, " min_recovery_rate %u", + seg->min_recovery_rate); + } + + /* Print number of metadata/data device pairs */ + EMIT_PARAMS(pos, " %u", area_count); + + if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0) + return_0; + + return 1; +} + +static int _cache_emit_segment_line(struct dm_task *dmt, + struct load_segment *seg, + char *params, size_t paramsize) +{ + int pos = 0; + /* unsigned feature_count; */ + char data[DM_FORMAT_DEV_BUFSIZE]; + char metadata[DM_FORMAT_DEV_BUFSIZE]; + char origin[DM_FORMAT_DEV_BUFSIZE]; + const char *name; + struct dm_config_node *cn; + + /* Cache Dev */ + if (!_build_dev_string(data, sizeof(data), seg->pool)) + return_0; + + /* Metadata Dev */ + if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata)) + return_0; + + /* Origin Dev */ + if (!_build_dev_string(origin, sizeof(origin), seg->origin)) + return_0; + + EMIT_PARAMS(pos, "%s %s %s", metadata, data, origin); + + /* Data block size */ + EMIT_PARAMS(pos, " %u", seg->data_block_size); + + /* Features */ + /* feature_count = hweight32(seg->flags); */ + /* EMIT_PARAMS(pos, " %u", feature_count); */ + if (seg->flags & DM_CACHE_FEATURE_METADATA2) + EMIT_PARAMS(pos, " 2 metadata2 "); + else + EMIT_PARAMS(pos, " 1 "); + + if (seg->flags & DM_CACHE_FEATURE_PASSTHROUGH) + EMIT_PARAMS(pos, "passthrough"); + else if (seg->flags & DM_CACHE_FEATURE_WRITEBACK) + EMIT_PARAMS(pos, "writeback"); + else + EMIT_PARAMS(pos, "writethrough"); + + /* Cache Policy */ + name = seg->policy_name ? : "default"; + + EMIT_PARAMS(pos, " %s", name); + + EMIT_PARAMS(pos, " %u", seg->policy_argc * 2); + if (seg->policy_settings) + for (cn = seg->policy_settings->child; cn; cn = cn->sib) + EMIT_PARAMS(pos, " %s %" PRIu64, cn->key, cn->v->v.i); + + return 1; +} + +static int _thin_pool_emit_segment_line(struct dm_task *dmt, + struct load_segment *seg, + char *params, size_t paramsize) +{ + int pos = 0; + char pool[DM_FORMAT_DEV_BUFSIZE], metadata[DM_FORMAT_DEV_BUFSIZE]; + int features = (seg->error_if_no_space ? 1 : 0) + + (seg->read_only ? 1 : 0) + + (seg->ignore_discard ? 1 : 0) + + (seg->no_discard_passdown ? 1 : 0) + + (seg->skip_block_zeroing ? 1 : 0); + + if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata)) + return_0; + + if (!_build_dev_string(pool, sizeof(pool), seg->pool)) + return_0; + + EMIT_PARAMS(pos, "%s %s %d %" PRIu64 " %d%s%s%s%s%s", metadata, pool, + seg->data_block_size, seg->low_water_mark, features, + seg->skip_block_zeroing ? " skip_block_zeroing" : "", + seg->ignore_discard ? " ignore_discard" : "", + seg->no_discard_passdown ? " no_discard_passdown" : "", + seg->error_if_no_space ? " error_if_no_space" : "", + seg->read_only ? " read_only" : "" + ); + + return 1; +} + +static int _thin_emit_segment_line(struct dm_task *dmt, + struct load_segment *seg, + char *params, size_t paramsize) +{ + int pos = 0; + char pool[DM_FORMAT_DEV_BUFSIZE]; + char external[DM_FORMAT_DEV_BUFSIZE + 1]; + + if (!_build_dev_string(pool, sizeof(pool), seg->pool)) + return_0; + + if (!seg->external) + *external = 0; + else { + *external = ' '; + if (!_build_dev_string(external + 1, sizeof(external) - 1, + seg->external)) + return_0; + } + + EMIT_PARAMS(pos, "%s %d%s", pool, seg->device_id, external); + + return 1; +} + +static int _emit_segment_line(struct dm_task *dmt, uint32_t major, + uint32_t minor, struct load_segment *seg, + uint64_t *seg_start, char *params, + size_t paramsize) +{ + int pos = 0; + int r; + int target_type_is_raid = 0; + char originbuf[DM_FORMAT_DEV_BUFSIZE], cowbuf[DM_FORMAT_DEV_BUFSIZE]; + + switch(seg->type) { + case SEG_ERROR: + case SEG_ZERO: + case SEG_LINEAR: + break; + case SEG_MIRRORED: + /* Mirrors are pretty complicated - now in separate function */ + r = _mirror_emit_segment_line(dmt, seg, params, paramsize); + if (!r) + return_0; + break; + case SEG_SNAPSHOT: + case SEG_SNAPSHOT_MERGE: + if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin)) + return_0; + if (!_build_dev_string(cowbuf, sizeof(cowbuf), seg->cow)) + return_0; + EMIT_PARAMS(pos, "%s %s %c %d", originbuf, cowbuf, + seg->persistent ? 'P' : 'N', seg->chunk_size); + break; + case SEG_SNAPSHOT_ORIGIN: + if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin)) + return_0; + EMIT_PARAMS(pos, "%s", originbuf); + break; + case SEG_STRIPED: + EMIT_PARAMS(pos, "%u %u ", seg->area_count, seg->stripe_size); + break; + case SEG_CRYPT: + EMIT_PARAMS(pos, "%s%s%s%s%s %s %" PRIu64 " ", seg->cipher, + seg->chainmode ? "-" : "", seg->chainmode ?: "", + seg->iv ? "-" : "", seg->iv ?: "", seg->key, + seg->iv_offset != DM_CRYPT_IV_DEFAULT ? + seg->iv_offset : *seg_start); + break; + case SEG_RAID0: + case SEG_RAID0_META: + case SEG_RAID1: + case SEG_RAID10: + case SEG_RAID4: + case SEG_RAID5_N: + case SEG_RAID5_LA: + case SEG_RAID5_RA: + case SEG_RAID5_LS: + case SEG_RAID5_RS: + case SEG_RAID6_N_6: + case SEG_RAID6_ZR: + case SEG_RAID6_NR: + case SEG_RAID6_NC: + case SEG_RAID6_LS_6: + case SEG_RAID6_RS_6: + case SEG_RAID6_LA_6: + case SEG_RAID6_RA_6: + target_type_is_raid = 1; + r = _raid_emit_segment_line(dmt, major, minor, seg, seg_start, + params, paramsize); + if (!r) + return_0; + + break; + case SEG_THIN_POOL: + if (!_thin_pool_emit_segment_line(dmt, seg, params, paramsize)) + return_0; + break; + case SEG_THIN: + if (!_thin_emit_segment_line(dmt, seg, params, paramsize)) + return_0; + break; + case SEG_CACHE: + if (!_cache_emit_segment_line(dmt, seg, params, paramsize)) + return_0; + break; + } + + switch(seg->type) { + case SEG_ERROR: + case SEG_SNAPSHOT: + case SEG_SNAPSHOT_ORIGIN: + case SEG_SNAPSHOT_MERGE: + case SEG_ZERO: + case SEG_THIN_POOL: + case SEG_THIN: + case SEG_CACHE: + break; + case SEG_CRYPT: + case SEG_LINEAR: + case SEG_STRIPED: + if ((r = _emit_areas_line(dmt, seg, params, paramsize, &pos)) <= 0) { + stack; + return r; + } + if (!params[0]) { + log_error("No parameters supplied for %s target " + "%u:%u.", _dm_segtypes[seg->type].target, + major, minor); + return 0; + } + break; + } + + log_debug_activation("Adding target to (%" PRIu32 ":%" PRIu32 "): %" PRIu64 + " %" PRIu64 " %s %s", major, minor, + *seg_start, seg->size, target_type_is_raid ? "raid" : + _dm_segtypes[seg->type].target, params); + + if (!dm_task_add_target(dmt, *seg_start, seg->size, + target_type_is_raid ? "raid" : + _dm_segtypes[seg->type].target, params)) + return_0; + + *seg_start += seg->size; + + return 1; +} + +#undef EMIT_PARAMS + +static int _emit_segment(struct dm_task *dmt, uint32_t major, uint32_t minor, + struct load_segment *seg, uint64_t *seg_start) +{ + char *params; + size_t paramsize = 4096; /* FIXME: too small for long RAID lines when > 64 devices supported */ + int ret; + + do { + if (!(params = dm_malloc(paramsize))) { + log_error("Insufficient space for target parameters."); + return 0; + } + + params[0] = '\0'; + ret = _emit_segment_line(dmt, major, minor, seg, seg_start, + params, paramsize); + dm_free(params); + + if (!ret) + stack; + + if (ret >= 0) + return ret; + + log_debug_activation("Insufficient space in params[%" PRIsize_t + "] for target parameters.", paramsize); + + paramsize *= 2; + } while (paramsize < MAX_TARGET_PARAMSIZE); + + log_error("Target parameter size too big. Aborting."); + return 0; +} + +static int _load_node(struct dm_tree_node *dnode) +{ + int r = 0; + struct dm_task *dmt; + struct load_segment *seg; + uint64_t seg_start = 0, existing_table_size; + + log_verbose("Loading table for %s.", _node_name(dnode)); + + if (!(dmt = dm_task_create(DM_DEVICE_RELOAD))) { + log_error("Reload dm_task creation failed for %s.", _node_name(dnode)); + return 0; + } + + if (!dm_task_set_major(dmt, dnode->info.major) || + !dm_task_set_minor(dmt, dnode->info.minor)) { + log_error("Failed to set device number for %s reload.", _node_name(dnode)); + goto out; + } + + if (dnode->props.read_only && !dm_task_set_ro(dmt)) { + log_error("Failed to set read only flag for %s.", _node_name(dnode)); + goto out; + } + + if (!dm_task_no_open_count(dmt)) + log_warn("WARNING: Failed to disable open_count."); + + dm_list_iterate_items(seg, &dnode->props.segs) + if (!_emit_segment(dmt, dnode->info.major, dnode->info.minor, + seg, &seg_start)) + goto_out; + + if (!dm_task_suppress_identical_reload(dmt)) + log_warn("WARNING: Failed to suppress reload of identical tables."); + + if ((r = dm_task_run(dmt))) { + r = dm_task_get_info(dmt, &dnode->info); + if (r && !dnode->info.inactive_table) + log_verbose("Suppressed %s identical table reload.", + _node_name(dnode)); + + existing_table_size = dm_task_get_existing_table_size(dmt); + if ((dnode->props.size_changed = + (existing_table_size == seg_start) ? 0 : + (existing_table_size > seg_start) ? -1 : 1)) { + /* + * Kernel usually skips size validation on zero-length devices + * now so no need to preload them. + */ + /* FIXME In which kernel version did this begin? */ + if (!existing_table_size && dnode->props.delay_resume_if_new) + dnode->props.size_changed = 0; + + log_debug_activation("Table size changed from %" PRIu64 " to %" + PRIu64 " for %s.%s", existing_table_size, + seg_start, _node_name(dnode), + dnode->props.size_changed ? "" : " (Ignoring.)"); + + /* + * FIXME: code here has known design problem. + * LVM2 does NOT resize thin-pool on top of other LV in 2 steps - + * where raid would be resized with 1st. transaction + * followed by 2nd. thin-pool resize - RHBZ #1285063 + */ + if (existing_table_size && dnode->props.delay_resume_if_extended) { + log_debug_activation("Resume of table of extended device %s delayed.", + _node_name(dnode)); + dnode->props.size_changed = 0; + } + } + } + + dnode->props.segment_count = 0; + +out: + dm_task_destroy(dmt); + + return r; +} + +/* + * Currently try to deactivate only nodes created during preload. + * New node is always attached to the front of activated_list + */ +static int _dm_tree_revert_activated(struct dm_tree_node *parent) +{ + struct dm_tree_node *child; + + dm_list_iterate_items_gen(child, &parent->activated, activated_list) { + log_debug_activation("Reverting %s.", _node_name(child)); + if (child->callback) { + log_debug_activation("Dropping callback for %s.", _node_name(child)); + child->callback = NULL; + } + if (!_deactivate_node(child->name, child->info.major, child->info.minor, + &child->dtree->cookie, child->udev_flags, 0)) { + log_error("Unable to deactivate %s.", _node_name(child)); + return 0; + } + if (!_dm_tree_revert_activated(child)) + return_0; + } + + return 1; +} + +int dm_tree_preload_children(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + int r = 1, node_created = 0; + void *handle = NULL; + struct dm_tree_node *child; + int update_devs_flag = 0; + + /* Preload children first */ + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + /* Propagate delay of resume from parent node */ + if (dnode->props.delay_resume_if_new > 1) + child->props.delay_resume_if_new = dnode->props.delay_resume_if_new; + + /* Skip existing non-device-mapper devices */ + if (!child->info.exists && child->info.major) + continue; + + /* Ignore if it doesn't belong to this VG */ + if (child->info.exists && + !_uuid_prefix_matches(child->uuid, uuid_prefix, uuid_prefix_len)) + continue; + + if (dm_tree_node_num_children(child, 0)) + if (!dm_tree_preload_children(child, uuid_prefix, uuid_prefix_len)) + return_0; + + /* FIXME Cope if name exists with no uuid? */ + if (!child->info.exists && !(node_created = _create_node(child))) + return_0; + + /* Propagate delayed resume from exteded child node */ + if (child->props.delay_resume_if_extended) + dnode->props.delay_resume_if_extended = 1; + + if (!child->info.inactive_table && + child->props.segment_count && + !_load_node(child)) { + /* + * If the table load does not succeed, we remove the + * device in the kernel that would otherwise have an + * empty table. This makes the create + load of the + * device atomic. However, if other dependencies have + * already been created and loaded; this code is + * insufficient to remove those - only the node + * encountering the table load failure is removed. + */ + if (node_created && !_remove_node(child)) + return_0; + return_0; + } + + /* No resume for a device without parents or with unchanged or smaller size */ + if (!dm_tree_node_num_children(child, 1) || (child->props.size_changed <= 0)) + continue; + + if (!child->info.inactive_table && !child->info.suspended) + continue; + + if (!_resume_node(child->name, child->info.major, child->info.minor, + child->props.read_ahead, child->props.read_ahead_flags, + &child->info, &child->dtree->cookie, child->udev_flags, + child->info.suspended)) { + log_error("Unable to resume %s.", _node_name(child)); + /* If the device was not previously active, we might as well remove this node. */ + if (!child->info.live_table && + !_deactivate_node(child->name, child->info.major, child->info.minor, + &child->dtree->cookie, child->udev_flags, 0)) + log_error("Unable to deactivate %s.", _node_name(child)); + r = 0; + /* Each child is handled independently */ + continue; + } + + if (node_created) { + /* Collect newly introduced devices for revert */ + dm_list_add_h(&dnode->activated, &child->activated_list); + + /* When creating new node also check transaction_id. */ + if (child->props.send_messages && + !_node_send_messages(child, uuid_prefix, uuid_prefix_len, 0)) { + stack; + if (!dm_udev_wait(dm_tree_get_cookie(dnode))) + stack; + dm_tree_set_cookie(dnode, 0); + (void) _dm_tree_revert_activated(dnode); + r = 0; + continue; + } + } + + /* + * Prepare for immediate synchronization with udev and flush all stacked + * dev node operations if requested by immediate_dev_node property. But + * finish processing current level in the tree first. + */ + if (child->props.immediate_dev_node) + update_devs_flag = 1; + } + + if (update_devs_flag || + (r && !dnode->info.exists && dnode->callback)) { + if (!dm_udev_wait(dm_tree_get_cookie(dnode))) + stack; + dm_tree_set_cookie(dnode, 0); + + if (r && !dnode->info.exists && dnode->callback && + !dnode->callback(dnode, DM_NODE_CALLBACK_PRELOADED, + dnode->callback_data)) + { + /* Try to deactivate what has been activated in preload phase */ + (void) _dm_tree_revert_activated(dnode); + return_0; + } + } + + return r; +} + +/* + * Returns 1 if unsure. + */ +int dm_tree_children_use_uuid(struct dm_tree_node *dnode, + const char *uuid_prefix, + size_t uuid_prefix_len) +{ + void *handle = NULL; + struct dm_tree_node *child = dnode; + const char *uuid; + + while ((child = dm_tree_next_child(&handle, dnode, 0))) { + if (!(uuid = dm_tree_node_get_uuid(child))) { + log_warn("WARNING: Failed to get uuid for dtree node %s.", + _node_name(child)); + return 1; + } + + if (_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) + return 1; + + if (dm_tree_node_num_children(child, 0)) + dm_tree_children_use_uuid(child, uuid_prefix, uuid_prefix_len); + } + + return 0; +} + +/* + * Target functions + */ +static struct load_segment *_add_segment(struct dm_tree_node *dnode, unsigned type, uint64_t size) +{ + struct load_segment *seg; + + if (!(seg = dm_pool_zalloc(dnode->dtree->mem, sizeof(*seg)))) { + log_error("dtree node segment allocation failed"); + return NULL; + } + + seg->type = type; + seg->size = size; + dm_list_init(&seg->areas); + dm_list_add(&dnode->props.segs, &seg->list); + dnode->props.segment_count++; + + return seg; +} + +int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode, + uint64_t size, + const char *origin_uuid) +{ + struct load_segment *seg; + struct dm_tree_node *origin_node; + + if (!(seg = _add_segment(dnode, SEG_SNAPSHOT_ORIGIN, size))) + return_0; + + if (!(origin_node = dm_tree_find_node_by_uuid(dnode->dtree, origin_uuid))) { + log_error("Couldn't find snapshot origin uuid %s.", origin_uuid); + return 0; + } + + seg->origin = origin_node; + if (!_link_tree_nodes(dnode, origin_node)) + return_0; + + /* Resume snapshot origins after new snapshots */ + dnode->activation_priority = 1; + + /* + * Don't resume the origin immediately in case it is a non-trivial + * target that must not be active more than once concurrently! + */ + origin_node->props.delay_resume_if_new = 1; + + return 1; +} + +static int _add_snapshot_target(struct dm_tree_node *node, + uint64_t size, + const char *origin_uuid, + const char *cow_uuid, + const char *merge_uuid, + int persistent, + uint32_t chunk_size) +{ + struct load_segment *seg; + struct dm_tree_node *origin_node, *cow_node, *merge_node; + unsigned seg_type; + + seg_type = !merge_uuid ? SEG_SNAPSHOT : SEG_SNAPSHOT_MERGE; + + if (!(seg = _add_segment(node, seg_type, size))) + return_0; + + if (!(origin_node = dm_tree_find_node_by_uuid(node->dtree, origin_uuid))) { + log_error("Couldn't find snapshot origin uuid %s.", origin_uuid); + return 0; + } + + seg->origin = origin_node; + if (!_link_tree_nodes(node, origin_node)) + return_0; + + if (!(cow_node = dm_tree_find_node_by_uuid(node->dtree, cow_uuid))) { + log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid); + return 0; + } + + seg->cow = cow_node; + if (!_link_tree_nodes(node, cow_node)) + return_0; + + seg->persistent = persistent ? 1 : 0; + seg->chunk_size = chunk_size; + + if (merge_uuid) { + if (!(merge_node = dm_tree_find_node_by_uuid(node->dtree, merge_uuid))) { + /* not a pure error, merging snapshot may have been deactivated */ + log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid); + } else { + seg->merge = merge_node; + /* must not link merging snapshot, would undermine activation_priority below */ + } + + /* Resume snapshot-merge (acting origin) after other snapshots */ + node->activation_priority = 1; + if (seg->merge) { + /* Resume merging snapshot after snapshot-merge */ + seg->merge->activation_priority = 2; + } + } + + return 1; +} + + +int dm_tree_node_add_snapshot_target(struct dm_tree_node *node, + uint64_t size, + const char *origin_uuid, + const char *cow_uuid, + int persistent, + uint32_t chunk_size) +{ + return _add_snapshot_target(node, size, origin_uuid, cow_uuid, + NULL, persistent, chunk_size); +} + +int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node, + uint64_t size, + const char *origin_uuid, + const char *cow_uuid, + const char *merge_uuid, + uint32_t chunk_size) +{ + return _add_snapshot_target(node, size, origin_uuid, cow_uuid, + merge_uuid, 1, chunk_size); +} + +int dm_tree_node_add_error_target(struct dm_tree_node *node, + uint64_t size) +{ + if (!_add_segment(node, SEG_ERROR, size)) + return_0; + + return 1; +} + +int dm_tree_node_add_zero_target(struct dm_tree_node *node, + uint64_t size) +{ + if (!_add_segment(node, SEG_ZERO, size)) + return_0; + + return 1; +} + +int dm_tree_node_add_linear_target(struct dm_tree_node *node, + uint64_t size) +{ + if (!_add_segment(node, SEG_LINEAR, size)) + return_0; + + return 1; +} + +int dm_tree_node_add_striped_target(struct dm_tree_node *node, + uint64_t size, + uint32_t stripe_size) +{ + struct load_segment *seg; + + if (!(seg = _add_segment(node, SEG_STRIPED, size))) + return_0; + + seg->stripe_size = stripe_size; + + return 1; +} + +int dm_tree_node_add_crypt_target(struct dm_tree_node *node, + uint64_t size, + const char *cipher, + const char *chainmode, + const char *iv, + uint64_t iv_offset, + const char *key) +{ + struct load_segment *seg; + + if (!(seg = _add_segment(node, SEG_CRYPT, size))) + return_0; + + seg->cipher = cipher; + seg->chainmode = chainmode; + seg->iv = iv; + seg->iv_offset = iv_offset; + seg->key = key; + + return 1; +} + +int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node, + uint32_t region_size, + unsigned clustered, + const char *log_uuid, + unsigned area_count, + uint32_t flags) +{ + struct dm_tree_node *log_node = NULL; + struct load_segment *seg; + + if (!(seg = _get_last_load_segment(node))) + return_0; + + if (log_uuid) { + if (!(seg->uuid = dm_pool_strdup(node->dtree->mem, log_uuid))) { + log_error("log uuid pool_strdup failed"); + return 0; + } + if ((flags & DM_CORELOG)) + /* For pvmove: immediate resume (for size validation) isn't needed. */ + /* pvmove flag passed via unused UUID and its suffix */ + node->props.delay_resume_if_new = strstr(log_uuid, "pvmove") ? 2 : 1; + else { + if (!(log_node = dm_tree_find_node_by_uuid(node->dtree, log_uuid))) { + log_error("Couldn't find mirror log uuid %s.", log_uuid); + return 0; + } + + if (clustered) + log_node->props.immediate_dev_node = 1; + + /* The kernel validates the size of disk logs. */ + /* FIXME Propagate to any devices below */ + log_node->props.delay_resume_if_new = 0; + + if (!_link_tree_nodes(node, log_node)) + return_0; + } + } + + seg->log = log_node; + seg->region_size = region_size; + seg->clustered = clustered; + seg->mirror_area_count = area_count; + seg->flags = flags; + + return 1; +} + +int dm_tree_node_add_mirror_target(struct dm_tree_node *node, + uint64_t size) +{ + if (!_add_segment(node, SEG_MIRRORED, size)) + return_0; + + return 1; +} + +int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node, + uint64_t size, + const struct dm_tree_node_raid_params *p) +{ + unsigned i; + struct load_segment *seg = NULL; + + for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i) + if (!strcmp(p->raid_type, _dm_segtypes[i].target)) + if (!(seg = _add_segment(node, + _dm_segtypes[i].type, size))) + return_0; + if (!seg) { + log_error("Unsupported raid type %s.", p->raid_type); + return 0; + } + + seg->region_size = p->region_size; + seg->stripe_size = p->stripe_size; + seg->area_count = 0; + memset(seg->rebuilds, 0, sizeof(seg->rebuilds)); + seg->rebuilds[0] = p->rebuilds; + memset(seg->writemostly, 0, sizeof(seg->writemostly)); + seg->writemostly[0] = p->writemostly; + seg->writebehind = p->writebehind; + seg->min_recovery_rate = p->min_recovery_rate; + seg->max_recovery_rate = p->max_recovery_rate; + seg->flags = p->flags; + + return 1; +} + +int dm_tree_node_add_raid_target(struct dm_tree_node *node, + uint64_t size, + const char *raid_type, + uint32_t region_size, + uint32_t stripe_size, + uint64_t rebuilds, + uint64_t flags) +{ + struct dm_tree_node_raid_params params = { + .raid_type = raid_type, + .region_size = region_size, + .stripe_size = stripe_size, + .rebuilds = rebuilds, + .flags = flags + }; + + return dm_tree_node_add_raid_target_with_params(node, size, ¶ms); +} + +/* + * Version 2 of dm_tree_node_add_raid_target() allowing for: + * + * - maximum 253 legs in a raid set (MD kernel limitation) + * - delta_disks for disk add/remove reshaping + * - data_offset for out-of-place reshaping + * - data_copies to cope witth odd numbers of raid10 disks + */ +int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node, + uint64_t size, + const struct dm_tree_node_raid_params_v2 *p) +{ + unsigned i; + struct load_segment *seg = NULL; + + for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i) + if (!strcmp(p->raid_type, _dm_segtypes[i].target)) + if (!(seg = _add_segment(node, + _dm_segtypes[i].type, size))) + return_0; + if (!seg) { + log_error("Unsupported raid type %s.", p->raid_type); + return 0; + } + + seg->region_size = p->region_size; + seg->stripe_size = p->stripe_size; + seg->area_count = 0; + seg->delta_disks = p->delta_disks; + seg->data_offset = p->data_offset; + memcpy(seg->rebuilds, p->rebuilds, sizeof(seg->rebuilds)); + memcpy(seg->writemostly, p->writemostly, sizeof(seg->writemostly)); + seg->writebehind = p->writebehind; + seg->data_copies = p->data_copies; + seg->min_recovery_rate = p->min_recovery_rate; + seg->max_recovery_rate = p->max_recovery_rate; + seg->flags = p->flags; + + return 1; +} + +int dm_tree_node_add_cache_target(struct dm_tree_node *node, + uint64_t size, + uint64_t feature_flags, /* DM_CACHE_FEATURE_* */ + const char *metadata_uuid, + const char *data_uuid, + const char *origin_uuid, + const char *policy_name, + const struct dm_config_node *policy_settings, + uint32_t data_block_size) +{ + struct dm_config_node *cn; + struct load_segment *seg; + static const uint64_t _modemask = + DM_CACHE_FEATURE_PASSTHROUGH | + DM_CACHE_FEATURE_WRITETHROUGH | + DM_CACHE_FEATURE_WRITEBACK; + + /* Detect unknown (bigger) feature bit */ + if (feature_flags >= (DM_CACHE_FEATURE_METADATA2 * 2)) { + log_error("Unsupported cache's feature flags set " FMTu64 ".", + feature_flags); + return 0; + } + + switch (feature_flags & _modemask) { + case DM_CACHE_FEATURE_PASSTHROUGH: + case DM_CACHE_FEATURE_WRITEBACK: + if (strcmp(policy_name, "cleaner") == 0) { + /* Enforce writethrough mode for cleaner policy */ + feature_flags = ~_modemask; + feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH; + } + /* Fall through */ + case DM_CACHE_FEATURE_WRITETHROUGH: + break; + default: + log_error("Invalid cache's feature flag " FMTu64 ".", + feature_flags); + return 0; + } + + if (data_block_size < DM_CACHE_MIN_DATA_BLOCK_SIZE) { + log_error("Data block size %u is lower then %u sectors.", + data_block_size, DM_CACHE_MIN_DATA_BLOCK_SIZE); + return 0; + } + + if (data_block_size > DM_CACHE_MAX_DATA_BLOCK_SIZE) { + log_error("Data block size %u is higher then %u sectors.", + data_block_size, DM_CACHE_MAX_DATA_BLOCK_SIZE); + return 0; + } + + if (!(seg = _add_segment(node, SEG_CACHE, size))) + return_0; + + if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree, + data_uuid))) { + log_error("Missing cache's data uuid %s.", + data_uuid); + return 0; + } + if (!_link_tree_nodes(node, seg->pool)) + return_0; + + if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree, + metadata_uuid))) { + log_error("Missing cache's metadata uuid %s.", + metadata_uuid); + return 0; + } + if (!_link_tree_nodes(node, seg->metadata)) + return_0; + + if (!(seg->origin = dm_tree_find_node_by_uuid(node->dtree, + origin_uuid))) { + log_error("Missing cache's origin uuid %s.", + metadata_uuid); + return 0; + } + if (!_link_tree_nodes(node, seg->origin)) + return_0; + + seg->data_block_size = data_block_size; + seg->flags = feature_flags; + seg->policy_name = policy_name; + + /* FIXME: better validation missing */ + if (policy_settings) { + if (!(seg->policy_settings = dm_config_clone_node_with_mem(node->dtree->mem, policy_settings, 0))) + return_0; + + for (cn = seg->policy_settings->child; cn; cn = cn->sib) { + if (!cn->v || (cn->v->type != DM_CFG_INT)) { + /* For now only <key> = <int> pairs are supported */ + log_error("Cache policy parameter %s is without integer value.", cn->key); + return 0; + } + seg->policy_argc++; + } + } + + return 1; +} + +int dm_tree_node_add_replicator_target(struct dm_tree_node *node, + uint64_t size, + const char *rlog_uuid, + const char *rlog_type, + unsigned rsite_index, + dm_replicator_mode_t mode, + uint32_t async_timeout, + uint64_t fall_behind_data, + uint32_t fall_behind_ios) +{ + log_error("Replicator segment is unsupported."); + return 0; +} + +/* Appends device node to Replicator */ +int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node, + uint64_t size, + const char *replicator_uuid, + uint64_t rdevice_index, + const char *rdev_uuid, + unsigned rsite_index, + const char *slog_uuid, + uint32_t slog_flags, + uint32_t slog_region_size) +{ + log_error("Replicator targer is unsupported."); + return 0; +} + +static struct load_segment *_get_single_load_segment(struct dm_tree_node *node, + unsigned type) +{ + struct load_segment *seg; + + if (!(seg = _get_last_load_segment(node))) + return_NULL; + + /* Never used past _load_node(), so can test segment_count */ + if (node->props.segment_count != 1) { + log_error("Node %s must have only one segment.", + _dm_segtypes[type].target); + return NULL; + } + + if (seg->type != type) { + log_error("Node %s has segment type %s.", + _dm_segtypes[type].target, + _dm_segtypes[seg->type].target); + return NULL; + } + + return seg; +} + +static int _thin_validate_device_id(uint32_t device_id) +{ + if (device_id > DM_THIN_MAX_DEVICE_ID) { + log_error("Device id %u is higher then %u.", + device_id, DM_THIN_MAX_DEVICE_ID); + return 0; + } + + return 1; +} + +int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node, + uint64_t size, + uint64_t transaction_id, + const char *metadata_uuid, + const char *pool_uuid, + uint32_t data_block_size, + uint64_t low_water_mark, + unsigned skip_block_zeroing) +{ + struct load_segment *seg, *mseg; + uint64_t devsize = 0; + + if (data_block_size < DM_THIN_MIN_DATA_BLOCK_SIZE) { + log_error("Data block size %u is lower then %u sectors.", + data_block_size, DM_THIN_MIN_DATA_BLOCK_SIZE); + return 0; + } + + if (data_block_size > DM_THIN_MAX_DATA_BLOCK_SIZE) { + log_error("Data block size %u is higher then %u sectors.", + data_block_size, DM_THIN_MAX_DATA_BLOCK_SIZE); + return 0; + } + + if (!(seg = _add_segment(node, SEG_THIN_POOL, size))) + return_0; + + if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree, metadata_uuid))) { + log_error("Missing metadata uuid %s.", metadata_uuid); + return 0; + } + + if (!_link_tree_nodes(node, seg->metadata)) + return_0; + + /* FIXME: more complex target may need more tweaks */ + dm_list_iterate_items(mseg, &seg->metadata->props.segs) { + devsize += mseg->size; + if (devsize > DM_THIN_MAX_METADATA_SIZE) { + log_debug_activation("Ignoring %" PRIu64 " of device.", + devsize - DM_THIN_MAX_METADATA_SIZE); + mseg->size -= (devsize - DM_THIN_MAX_METADATA_SIZE); + devsize = DM_THIN_MAX_METADATA_SIZE; + /* FIXME: drop remaining segs */ + } + } + + if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) { + log_error("Missing pool uuid %s.", pool_uuid); + return 0; + } + + if (!_link_tree_nodes(node, seg->pool)) + return_0; + + /* Clean flag delay_resume_if_new - so corelog gets resumed */ + seg->metadata->props.delay_resume_if_new = 0; + seg->pool->props.delay_resume_if_new = 0; + + /* Preload must not resume extended running thin-pool before it's committed */ + node->props.delay_resume_if_extended = 1; + + /* Validate only transaction_id > 0 when activating thin-pool */ + node->props.send_messages = transaction_id ? 1 : 0; + seg->transaction_id = transaction_id; + seg->low_water_mark = low_water_mark; + seg->data_block_size = data_block_size; + seg->skip_block_zeroing = skip_block_zeroing; + dm_list_init(&seg->thin_messages); + + return 1; +} + +int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node, + dm_thin_message_t type, + uint64_t id1, uint64_t id2) +{ + struct thin_message *tm; + struct load_segment *seg; + + if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL))) + return_0; + + if (!(tm = dm_pool_zalloc(node->dtree->mem, sizeof (*tm)))) { + log_error("Failed to allocate thin message."); + return 0; + } + + switch (type) { + case DM_THIN_MESSAGE_CREATE_SNAP: + /* If the thin origin is active, it must be suspend first! */ + if (id1 == id2) { + log_error("Cannot use same device id for origin and its snapshot."); + return 0; + } + if (!_thin_validate_device_id(id1) || + !_thin_validate_device_id(id2)) + return_0; + tm->message.u.m_create_snap.device_id = id1; + tm->message.u.m_create_snap.origin_id = id2; + break; + case DM_THIN_MESSAGE_CREATE_THIN: + if (!_thin_validate_device_id(id1)) + return_0; + tm->message.u.m_create_thin.device_id = id1; + tm->expected_errno = EEXIST; + break; + case DM_THIN_MESSAGE_DELETE: + if (!_thin_validate_device_id(id1)) + return_0; + tm->message.u.m_delete.device_id = id1; + tm->expected_errno = ENODATA; + break; + case DM_THIN_MESSAGE_SET_TRANSACTION_ID: + if ((id1 + 1) != id2) { + log_error("New transaction id must be sequential."); + return 0; /* FIXME: Maybe too strict here? */ + } + if (id2 != seg->transaction_id) { + log_error("Current transaction id is different from thin pool."); + return 0; /* FIXME: Maybe too strict here? */ + } + tm->message.u.m_set_transaction_id.current_id = id1; + tm->message.u.m_set_transaction_id.new_id = id2; + break; + default: + log_error("Unsupported message type %d.", (int) type); + return 0; + } + + tm->message.type = type; + dm_list_add(&seg->thin_messages, &tm->list); + /* Higher value >1 identifies there are really some messages */ + node->props.send_messages = 2; + + return 1; +} + +int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node, + unsigned ignore, + unsigned no_passdown) +{ + struct load_segment *seg; + + if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL))) + return_0; + + seg->ignore_discard = ignore; + seg->no_discard_passdown = no_passdown; + + return 1; +} + +int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node, + unsigned error_if_no_space) +{ + struct load_segment *seg; + + if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL))) + return_0; + + seg->error_if_no_space = error_if_no_space; + + return 1; +} + +int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node, + unsigned read_only) +{ + struct load_segment *seg; + + if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL))) + return_0; + + seg->read_only = read_only; + + return 1; +} + +int dm_tree_node_add_thin_target(struct dm_tree_node *node, + uint64_t size, + const char *pool_uuid, + uint32_t device_id) +{ + struct dm_tree_node *pool; + struct load_segment *seg; + + if (!(pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) { + log_error("Missing thin pool uuid %s.", pool_uuid); + return 0; + } + + if (!_link_tree_nodes(node, pool)) + return_0; + + if (!_thin_validate_device_id(device_id)) + return_0; + + if (!(seg = _add_segment(node, SEG_THIN, size))) + return_0; + + seg->pool = pool; + seg->device_id = device_id; + + return 1; +} + +int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node, + const char *external_uuid) +{ + struct dm_tree_node *external; + struct load_segment *seg; + + if (!(seg = _get_single_load_segment(node, SEG_THIN))) + return_0; + + if (!(external = dm_tree_find_node_by_uuid(node->dtree, + external_uuid))) { + log_error("Missing thin external origin uuid %s.", + external_uuid); + return 0; + } + + if (!_link_tree_nodes(node, external)) + return_0; + + seg->external = external; + + return 1; +} + +static int _add_area(struct dm_tree_node *node, struct load_segment *seg, struct dm_tree_node *dev_node, uint64_t offset) +{ + struct seg_area *area; + + if (!(area = dm_pool_zalloc(node->dtree->mem, sizeof (*area)))) { + log_error("Failed to allocate target segment area."); + return 0; + } + + area->dev_node = dev_node; + area->offset = offset; + + dm_list_add(&seg->areas, &area->list); + seg->area_count++; + + return 1; +} + +int dm_tree_node_add_target_area(struct dm_tree_node *node, + const char *dev_name, + const char *uuid, + uint64_t offset) +{ + struct load_segment *seg; + struct stat info; + struct dm_tree_node *dev_node; + + if ((!dev_name || !*dev_name) && (!uuid || !*uuid)) { + log_error("dm_tree_node_add_target_area called without device"); + return 0; + } + + if (uuid) { + if (!(dev_node = dm_tree_find_node_by_uuid(node->dtree, uuid))) { + log_error("Couldn't find area uuid %s.", uuid); + return 0; + } + if (!_link_tree_nodes(node, dev_node)) + return_0; + } else { + if (stat(dev_name, &info) < 0) { + log_error("Device %s not found.", dev_name); + return 0; + } + + if (!S_ISBLK(info.st_mode)) { + log_error("Device %s is not a block device.", dev_name); + return 0; + } + + /* FIXME Check correct macro use */ + if (!(dev_node = _add_dev(node->dtree, node, MAJOR(info.st_rdev), + MINOR(info.st_rdev), 0, 0))) + return_0; + } + + if (!(seg = _get_last_load_segment(node))) + return_0; + + if (!_add_area(node, seg, dev_node, offset)) + return_0; + + return 1; +} + +int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset) +{ + struct load_segment *seg; + + if (!(seg = _get_last_load_segment(node))) + return_0; + + switch (seg->type) { + case SEG_RAID0: + case SEG_RAID0_META: + case SEG_RAID1: + case SEG_RAID4: + case SEG_RAID5_N: + case SEG_RAID5_LA: + case SEG_RAID5_RA: + case SEG_RAID5_LS: + case SEG_RAID5_RS: + case SEG_RAID6_N_6: + case SEG_RAID6_ZR: + case SEG_RAID6_NR: + case SEG_RAID6_NC: + case SEG_RAID6_LS_6: + case SEG_RAID6_RS_6: + case SEG_RAID6_LA_6: + case SEG_RAID6_RA_6: + break; + default: + log_error("dm_tree_node_add_null_area() called on an unsupported segment type"); + return 0; + } + + if (!_add_area(node, seg, NULL, offset)) + return_0; + + return 1; +} + +void dm_tree_node_set_callback(struct dm_tree_node *dnode, + dm_node_callback_fn cb, void *data) +{ + dnode->callback = cb; + dnode->callback_data = data; +} + +#if defined(__GNUC__) +/* + * Backward compatible implementations. + * + * Keep these at the end of the file to make sure that + * no code in this file accidentally calls it. + */ + +/* Backward compatible dm_tree_node_size_changed() implementations. */ +int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode); +int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode) +{ + /* Base does not make difference between smaller and bigger */ + return dm_tree_node_size_changed(dnode) ? 1 : 0; +} + +/* + * Retain ABI compatibility after adding the DM_CACHE_FEATURE_METADATA2 + * in version 1.02.138. + * + * Binaries compiled against version 1.02.138 onwards will use + * the new function dm_tree_node_add_cache_target which detects unknown + * feature flags and returns error for them. + */ +int dm_tree_node_add_cache_target_base(struct dm_tree_node *node, + uint64_t size, + uint64_t feature_flags, /* DM_CACHE_FEATURE_* */ + const char *metadata_uuid, + const char *data_uuid, + const char *origin_uuid, + const char *policy_name, + const struct dm_config_node *policy_settings, + uint32_t data_block_size); +int dm_tree_node_add_cache_target_base(struct dm_tree_node *node, + uint64_t size, + uint64_t feature_flags, + const char *metadata_uuid, + const char *data_uuid, + const char *origin_uuid, + const char *policy_name, + const struct dm_config_node *policy_settings, + uint32_t data_block_size) +{ + /* Old version supported only these FEATURE bits, others were ignored so masked them */ + static const uint64_t _mask = + DM_CACHE_FEATURE_WRITEBACK | + DM_CACHE_FEATURE_WRITETHROUGH | + DM_CACHE_FEATURE_PASSTHROUGH; + + return dm_tree_node_add_cache_target(node, size, feature_flags & _mask, + metadata_uuid, data_uuid, origin_uuid, + policy_name, policy_settings, data_block_size); +} +#endif diff --git a/device_mapper/libdm-file.c b/device_mapper/libdm-file.c new file mode 100644 index 000000000..5c6d2232d --- /dev/null +++ b/device_mapper/libdm-file.c @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +#include <sys/file.h> +#include <fcntl.h> +#include <dirent.h> +#include <unistd.h> + +static int _is_dir(const char *path) +{ + struct stat st; + + if (stat(path, &st) < 0) { + log_sys_error("stat", path); + return 0; + } + + if (!S_ISDIR(st.st_mode)) { + log_error("Existing path %s is not " + "a directory.", path); + return 0; + } + + return 1; +} + +static int _create_dir_recursive(const char *dir) +{ + char *orig, *s; + int rc, r = 0; + + log_verbose("Creating directory \"%s\"", dir); + /* Create parent directories */ + orig = s = dm_strdup(dir); + if (!s) { + log_error("Failed to duplicate directory name."); + return 0; + } + + while ((s = strchr(s, '/')) != NULL) { + *s = '\0'; + if (*orig) { + rc = mkdir(orig, 0777); + if (rc < 0) { + if (errno == EEXIST) { + if (!_is_dir(orig)) + goto_out; + } else { + if (errno != EROFS) + log_sys_error("mkdir", orig); + goto out; + } + } + } + *s++ = '/'; + } + + /* Create final directory */ + rc = mkdir(dir, 0777); + if (rc < 0) { + if (errno == EEXIST) { + if (!_is_dir(dir)) + goto_out; + } else { + if (errno != EROFS) + log_sys_error("mkdir", orig); + goto out; + } + } + + r = 1; +out: + dm_free(orig); + return r; +} + +int dm_create_dir(const char *dir) +{ + struct stat info; + + if (!*dir) + return 1; + + if (stat(dir, &info) == 0 && S_ISDIR(info.st_mode)) + return 1; + + if (!_create_dir_recursive(dir)) + return_0; + + return 1; +} + +int dm_is_empty_dir(const char *dir) +{ + struct dirent *dirent; + DIR *d; + + if (!(d = opendir(dir))) { + log_sys_error("opendir", dir); + return 0; + } + + while ((dirent = readdir(d))) + if (strcmp(dirent->d_name, ".") && strcmp(dirent->d_name, "..")) + break; + + if (closedir(d)) + log_sys_error("closedir", dir); + + return dirent ? 0 : 1; +} + +int dm_fclose(FILE *stream) +{ + int prev_fail = ferror(stream); + int fclose_fail = fclose(stream); + + /* If there was a previous failure, but fclose succeeded, + clear errno, since ferror does not set it, and its value + may be unrelated to the ferror-reported failure. */ + if (prev_fail && !fclose_fail) + errno = 0; + + return prev_fail || fclose_fail ? EOF : 0; +} + +int dm_create_lockfile(const char *lockfile) +{ + int fd, value; + size_t bufferlen; + ssize_t write_out; + struct flock lock; + char buffer[50]; + int retries = 0; + + if ((fd = open(lockfile, O_CREAT | O_WRONLY, + (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) < 0) { + log_error("Cannot open lockfile [%s], error was [%s]", + lockfile, strerror(errno)); + return 0; + } + + lock.l_type = F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; +retry_fcntl: + if (fcntl(fd, F_SETLK, &lock) < 0) { + switch (errno) { + case EINTR: + goto retry_fcntl; + case EACCES: + case EAGAIN: + if (retries == 20) { + log_error("Cannot lock lockfile [%s], error was [%s]", + lockfile, strerror(errno)); + break; + } else { + ++ retries; + usleep(1000); + goto retry_fcntl; + } + default: + log_error("process is already running"); + } + + goto fail_close; + } + + if (ftruncate(fd, 0) < 0) { + log_error("Cannot truncate pidfile [%s], error was [%s]", + lockfile, strerror(errno)); + + goto fail_close_unlink; + } + + snprintf(buffer, sizeof(buffer), "%u\n", getpid()); + + bufferlen = strlen(buffer); + write_out = write(fd, buffer, bufferlen); + + if ((write_out < 0) || (write_out == 0 && errno)) { + log_error("Cannot write pid to pidfile [%s], error was [%s]", + lockfile, strerror(errno)); + + goto fail_close_unlink; + } + + if ((write_out == 0) || ((size_t)write_out < bufferlen)) { + log_error("Cannot write pid to pidfile [%s], shortwrite of" + "[%" PRIsize_t "] bytes, expected [%" PRIsize_t "]\n", + lockfile, write_out, bufferlen); + + goto fail_close_unlink; + } + + if ((value = fcntl(fd, F_GETFD, 0)) < 0) { + log_error("Cannot get close-on-exec flag from pidfile [%s], " + "error was [%s]", lockfile, strerror(errno)); + + goto fail_close_unlink; + } + value |= FD_CLOEXEC; + if (fcntl(fd, F_SETFD, value) < 0) { + log_error("Cannot set close-on-exec flag from pidfile [%s], " + "error was [%s]", lockfile, strerror(errno)); + + goto fail_close_unlink; + } + + return 1; + +fail_close_unlink: + if (unlink(lockfile)) + log_sys_debug("unlink", lockfile); +fail_close: + if (close(fd)) + log_sys_debug("close", lockfile); + + return 0; +} + +int dm_daemon_is_running(const char* lockfile) +{ + int fd; + struct flock lock; + + if((fd = open(lockfile, O_RDONLY)) < 0) + return 0; + + lock.l_type = F_WRLCK; + lock.l_start = 0; + lock.l_whence = SEEK_SET; + lock.l_len = 0; + if (fcntl(fd, F_GETLK, &lock) < 0) { + log_error("Cannot check lock status of lockfile [%s], error was [%s]", + lockfile, strerror(errno)); + if (close(fd)) + stack; + return 0; + } + + if (close(fd)) + stack; + + return (lock.l_type == F_UNLCK) ? 0 : 1; +} diff --git a/device_mapper/libdm-report.c b/device_mapper/libdm-report.c new file mode 100644 index 000000000..3a48c3f46 --- /dev/null +++ b/device_mapper/libdm-report.c @@ -0,0 +1,5104 @@ +/* + * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +#include <ctype.h> +#include <math.h> /* fabs() */ +#include <float.h> /* DBL_EPSILON */ +#include <time.h> + +/* + * Internal flags + */ +#define RH_SORT_REQUIRED 0x00000100 +#define RH_HEADINGS_PRINTED 0x00000200 +#define RH_FIELD_CALC_NEEDED 0x00000400 +#define RH_ALREADY_REPORTED 0x00000800 + +struct selection { + struct dm_pool *mem; + struct selection_node *selection_root; + int add_new_fields; +}; + +struct report_group_item; + +struct dm_report { + struct dm_pool *mem; + + /** + * Cache the first row allocated so that all rows and fields + * can be disposed of in a single dm_pool_free() call. + */ + struct row *first_row; + + /* To report all available types */ +#define REPORT_TYPES_ALL UINT32_MAX + uint32_t report_types; + const char *output_field_name_prefix; + const char *field_prefix; + uint32_t flags; + const char *separator; + + uint32_t keys_count; + + /* Ordered list of fields needed for this report */ + struct dm_list field_props; + + /* Rows of report data */ + struct dm_list rows; + + /* Array of field definitions */ + const struct dm_report_field_type *fields; + const char **canonical_field_ids; + const struct dm_report_object_type *types; + + /* To store caller private data */ + void *private; + + /* Selection handle */ + struct selection *selection; + + /* Null-terminated array of reserved values */ + const struct dm_report_reserved_value *reserved_values; + struct dm_hash_table *value_cache; + + struct report_group_item *group_item; +}; + +struct dm_report_group { + dm_report_group_type_t type; + struct dm_pool *mem; + struct dm_list items; + int indent; +}; + +struct report_group_item { + struct dm_list list; + struct dm_report_group *group; + struct dm_report *report; + union { + uint32_t orig_report_flags; + uint32_t finished_count; + } store; + struct report_group_item *parent; + unsigned output_done:1; + unsigned needs_closing:1; + void *data; +}; + +/* + * Internal per-field flags + */ +#define FLD_HIDDEN 0x00001000 +#define FLD_SORT_KEY 0x00002000 +#define FLD_ASCENDING 0x00004000 +#define FLD_DESCENDING 0x00008000 +#define FLD_COMPACTED 0x00010000 +#define FLD_COMPACT_ONE 0x00020000 + +struct field_properties { + struct dm_list list; + uint32_t field_num; + uint32_t sort_posn; + int32_t initial_width; + int32_t width; /* current width: adjusted by dm_report_object() */ + const struct dm_report_object_type *type; + uint32_t flags; + int implicit; +}; + +/* + * Report selection + */ +struct op_def { + const char *string; + uint32_t flags; + const char *desc; +}; + +#define FLD_CMP_MASK 0x0FF00000 +#define FLD_CMP_UNCOMPARABLE 0x00100000 +#define FLD_CMP_EQUAL 0x00200000 +#define FLD_CMP_NOT 0x00400000 +#define FLD_CMP_GT 0x00800000 +#define FLD_CMP_LT 0x01000000 +#define FLD_CMP_REGEX 0x02000000 +#define FLD_CMP_NUMBER 0x04000000 +#define FLD_CMP_TIME 0x08000000 +/* + * #define FLD_CMP_STRING 0x10000000 + * We could define FLD_CMP_STRING here for completeness here, + * but it's not needed - we can check operator compatibility with + * field type by using FLD_CMP_REGEX, FLD_CMP_NUMBER and + * FLD_CMP_TIME flags only. + */ + +/* + * When defining operators, always define longer one before + * shorter one if one is a prefix of another! + * (e.g. =~ comes before =) +*/ +static struct op_def _op_cmp[] = { + { "=~", FLD_CMP_REGEX, "Matching regular expression. [regex]" }, + { "!~", FLD_CMP_REGEX|FLD_CMP_NOT, "Not matching regular expression. [regex]" }, + { "=", FLD_CMP_EQUAL, "Equal to. [number, size, percent, string, string list, time]" }, + { "!=", FLD_CMP_NOT|FLD_CMP_EQUAL, "Not equal to. [number, size, percent, string, string_list, time]" }, + { ">=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Greater than or equal to. [number, size, percent, time]" }, + { ">", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT, "Greater than. [number, size, percent, time]" }, + { "<=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Less than or equal to. [number, size, percent, time]" }, + { "<", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT, "Less than. [number, size, percent, time]" }, + { "since", FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Since specified time (same as '>='). [time]" }, + { "after", FLD_CMP_TIME|FLD_CMP_GT, "After specified time (same as '>'). [time]"}, + { "until", FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Until specified time (same as '<='). [time]"}, + { "before", FLD_CMP_TIME|FLD_CMP_LT, "Before specified time (same as '<'). [time]"}, + { NULL, 0, NULL } +}; + +#define SEL_MASK 0x000000FF +#define SEL_ITEM 0x00000001 +#define SEL_AND 0x00000002 +#define SEL_OR 0x00000004 + +#define SEL_MODIFIER_MASK 0x00000F00 +#define SEL_MODIFIER_NOT 0x00000100 + +#define SEL_PRECEDENCE_MASK 0x0000F000 +#define SEL_PRECEDENCE_PS 0x00001000 +#define SEL_PRECEDENCE_PE 0x00002000 + +#define SEL_LIST_MASK 0x000F0000 +#define SEL_LIST_LS 0x00010000 +#define SEL_LIST_LE 0x00020000 +#define SEL_LIST_SUBSET_LS 0x00040000 +#define SEL_LIST_SUBSET_LE 0x00080000 + +static struct op_def _op_log[] = { + { "&&", SEL_AND, "All fields must match" }, + { ",", SEL_AND, "All fields must match" }, + { "||", SEL_OR, "At least one field must match" }, + { "#", SEL_OR, "At least one field must match" }, + { "!", SEL_MODIFIER_NOT, "Logical negation" }, + { "(", SEL_PRECEDENCE_PS, "Left parenthesis" }, + { ")", SEL_PRECEDENCE_PE, "Right parenthesis" }, + { "[", SEL_LIST_LS, "List start" }, + { "]", SEL_LIST_LE, "List end"}, + { "{", SEL_LIST_SUBSET_LS, "List subset start"}, + { "}", SEL_LIST_SUBSET_LE, "List subset end"}, + { NULL, 0, NULL}, +}; + +struct selection_str_list { + struct dm_str_list str_list; + unsigned type; /* either SEL_AND or SEL_OR */ +}; + +struct field_selection_value { + union { + const char *s; + uint64_t i; + time_t t; + double d; + struct dm_regex *r; + struct selection_str_list *l; + } v; + struct field_selection_value *next; +}; + +struct field_selection { + struct field_properties *fp; + uint32_t flags; + struct field_selection_value *value; +}; + +struct selection_node { + struct dm_list list; + uint32_t type; + union { + struct field_selection *item; + struct dm_list set; + } selection; +}; + +struct reserved_value_wrapper { + const char *matched_name; + const struct dm_report_reserved_value *reserved; + const void *value; +}; + +/* + * Report data field + */ +struct dm_report_field { + struct dm_list list; + struct field_properties *props; + + const char *report_string; /* Formatted ready for display */ + const void *sort_value; /* Raw value for sorting */ +}; + +struct row { + struct dm_list list; + struct dm_report *rh; + struct dm_list fields; /* Fields in display order */ + struct dm_report_field *(*sort_fields)[]; /* Fields in sort order */ + int selected; + struct dm_report_field *field_sel_status; +}; + +/* + * Implicit report types and fields. + */ +#define SPECIAL_REPORT_TYPE 0x80000000 +#define SPECIAL_FIELD_SELECTED_ID "selected" +#define SPECIAL_FIELD_HELP_ID "help" +#define SPECIAL_FIELD_HELP_ALT_ID "?" + +static void *_null_returning_fn(void *obj __attribute__((unused))) +{ + return NULL; +} + +static int _no_report_fn(struct dm_report *rh __attribute__((unused)), + struct dm_pool *mem __attribute__((unused)), + struct dm_report_field *field __attribute__((unused)), + const void *data __attribute__((unused)), + void *private __attribute__((unused))) +{ + return 1; +} + +static int _selected_disp(struct dm_report *rh, + struct dm_pool *mem __attribute__((unused)), + struct dm_report_field *field, + const void *data, + void *private __attribute__((unused))) +{ + const struct row *row = (const struct row *)data; + return dm_report_field_int(rh, field, &row->selected); +} + +static const struct dm_report_object_type _implicit_special_report_types[] = { + { SPECIAL_REPORT_TYPE, "Special", "special_", _null_returning_fn }, + { 0, "", "", NULL } +}; + +static const struct dm_report_field_type _implicit_special_report_fields[] = { + { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ID, "Help", _no_report_fn, "Show help." }, + { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ALT_ID, "Help", _no_report_fn, "Show help." }, + { 0, 0, 0, 0, "", "", 0, 0} +}; + +static const struct dm_report_field_type _implicit_special_report_fields_with_selection[] = { + { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER, 0, 8, SPECIAL_FIELD_SELECTED_ID, "Selected", _selected_disp, "Set if item passes selection criteria." }, + { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ID, "Help", _no_report_fn, "Show help." }, + { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ALT_ID, "Help", _no_report_fn, "Show help." }, + { 0, 0, 0, 0, "", "", 0, 0} +}; + +static const struct dm_report_object_type *_implicit_report_types = _implicit_special_report_types; +static const struct dm_report_field_type *_implicit_report_fields = _implicit_special_report_fields; + +static const struct dm_report_object_type *_find_type(struct dm_report *rh, + uint32_t report_type) +{ + const struct dm_report_object_type *t; + + for (t = _implicit_report_types; t->data_fn; t++) + if (t->id == report_type) + return t; + + for (t = rh->types; t->data_fn; t++) + if (t->id == report_type) + return t; + + return NULL; +} + +/* + * Data-munging functions to prepare each data type for display and sorting + */ + +int dm_report_field_string(struct dm_report *rh, + struct dm_report_field *field, const char *const *data) +{ + char *repstr; + + if (!(repstr = dm_pool_strdup(rh->mem, *data))) { + log_error("dm_report_field_string: dm_pool_strdup failed"); + return 0; + } + + field->report_string = repstr; + field->sort_value = (const void *) field->report_string; + + return 1; +} + +int dm_report_field_percent(struct dm_report *rh, + struct dm_report_field *field, + const dm_percent_t *data) +{ + char *repstr; + uint64_t *sortval; + + if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) { + log_error("dm_report_field_percent: dm_pool_alloc failed for sort_value."); + return 0; + } + + *sortval = (uint64_t)(*data); + + if (*data == DM_PERCENT_INVALID) { + dm_report_field_set_value(field, "", sortval); + return 1; + } + + if (!(repstr = dm_pool_alloc(rh->mem, 8))) { + dm_pool_free(rh->mem, sortval); + log_error("dm_report_field_percent: dm_pool_alloc failed for percent report string."); + return 0; + } + + if (dm_snprintf(repstr, 7, "%.2f", dm_percent_to_round_float(*data, 2)) < 0) { + dm_pool_free(rh->mem, sortval); + log_error("dm_report_field_percent: percentage too large."); + return 0; + } + + dm_report_field_set_value(field, repstr, sortval); + return 1; +} + +struct str_list_sort_value_item { + unsigned pos; + size_t len; +}; + +struct str_list_sort_value { + const char *value; + struct str_list_sort_value_item *items; +}; + +struct str_list_sort_item { + const char *str; + struct str_list_sort_value_item item; +}; + +static int _str_list_sort_item_cmp(const void *a, const void *b) +{ + const struct str_list_sort_item *slsi_a = (const struct str_list_sort_item *) a; + const struct str_list_sort_item *slsi_b = (const struct str_list_sort_item *) b; + + return strcmp(slsi_a->str, slsi_b->str); +} + +static int _report_field_string_list(struct dm_report *rh, + struct dm_report_field *field, + const struct dm_list *data, + const char *delimiter, + int sort) +{ + static const char _string_list_grow_object_failed_msg[] = "dm_report_field_string_list: dm_pool_grow_object_failed"; + struct str_list_sort_value *sort_value = NULL; + unsigned int list_size, pos, i; + struct str_list_sort_item *arr = NULL; + struct dm_str_list *sl; + size_t delimiter_len, len; + void *object; + int r = 0; + + if (!(sort_value = dm_pool_zalloc(rh->mem, sizeof(struct str_list_sort_value)))) { + log_error("dm_report_field_string_list: dm_pool_zalloc failed for sort_value"); + return 0; + } + + list_size = dm_list_size(data); + + /* + * Sort value stores the pointer to the report_string and then + * position and length for each list element withing the report_string. + * The first element stores number of elements in 'len' (therefore + * list_size + 1 is used below for the extra element). + * For example, with this input: + * sort = 0; (we don't want to report sorted) + * report_string = "abc,xy,defgh"; (this is reported) + * + * ...we end up with: + * sort_value->value = report_string; (we'll use the original report_string for indices) + * sort_value->items[0] = {0,3}; (we have 3 items) + * sort_value->items[1] = {0,3}; ("abc") + * sort_value->items[2] = {7,5}; ("defgh") + * sort_value->items[3] = {4,2}; ("xy") + * + * The items alone are always sorted while in report_string they can be + * sorted or not (based on "sort" arg) - it depends on how we prefer to + * display the list. Having items sorted internally helps with searching + * through them. + */ + if (!(sort_value->items = dm_pool_zalloc(rh->mem, (list_size + 1) * sizeof(struct str_list_sort_value_item)))) { + log_error("dm_report_fiel_string_list: dm_pool_zalloc failed for sort value items"); + goto out; + } + sort_value->items[0].len = list_size; + + /* zero items */ + if (!list_size) { + sort_value->value = field->report_string = ""; + field->sort_value = sort_value; + return 1; + } + + /* one item */ + if (list_size == 1) { + sl = (struct dm_str_list *) dm_list_first(data); + if (!sl || + !(sort_value->value = field->report_string = dm_pool_strdup(rh->mem, sl->str))) { + log_error("dm_report_field_string_list: dm_pool_strdup failed"); + goto out; + } + sort_value->items[1].pos = 0; + sort_value->items[1].len = strlen(sl->str); + field->sort_value = sort_value; + return 1; + } + + /* more than one item - sort the list */ + if (!(arr = dm_malloc(sizeof(struct str_list_sort_item) * list_size))) { + log_error("dm_report_field_string_list: dm_malloc failed"); + goto out; + } + + if (!(dm_pool_begin_object(rh->mem, 256))) { + log_error(_string_list_grow_object_failed_msg); + goto out; + } + + if (!delimiter) + delimiter = ","; + delimiter_len = strlen(delimiter); + + i = pos = len = 0; + dm_list_iterate_items(sl, data) { + arr[i].str = sl->str; + if (!sort) { + /* sorted outpud not required - report the list as it is */ + len = strlen(sl->str); + if (!dm_pool_grow_object(rh->mem, arr[i].str, len) || + (i+1 != list_size && !dm_pool_grow_object(rh->mem, delimiter, delimiter_len))) { + log_error(_string_list_grow_object_failed_msg); + goto out; + } + arr[i].item.pos = pos; + arr[i].item.len = len; + pos = i+1 == list_size ? pos+len : pos+len+delimiter_len; + } + i++; + } + + qsort(arr, i, sizeof(struct str_list_sort_item), _str_list_sort_item_cmp); + + for (i = 0, pos = 0; i < list_size; i++) { + if (sort) { + /* sorted output required - report the list as sorted */ + len = strlen(arr[i].str); + if (!dm_pool_grow_object(rh->mem, arr[i].str, len) || + (i+1 != list_size && !dm_pool_grow_object(rh->mem, delimiter, delimiter_len))) { + log_error(_string_list_grow_object_failed_msg); + goto out; + } + /* + * Save position and length of the string + * element in report_string for sort_value. + * Use i+1 here since items[0] stores list size!!! + */ + sort_value->items[i+1].pos = pos; + sort_value->items[i+1].len = len; + pos = i+1 == list_size ? pos+len : pos+len+delimiter_len; + } else { + sort_value->items[i+1].pos = arr[i].item.pos; + sort_value->items[i+1].len = arr[i].item.len; + } + } + + if (!dm_pool_grow_object(rh->mem, "\0", 1)) { + log_error(_string_list_grow_object_failed_msg); + goto out; + } + + object = dm_pool_end_object(rh->mem); + sort_value->value = object; + field->sort_value = sort_value; + field->report_string = object; + r = 1; +out: + if (!r && sort_value) + dm_pool_free(rh->mem, sort_value); + dm_free(arr); + + return r; +} + +int dm_report_field_string_list(struct dm_report *rh, + struct dm_report_field *field, + const struct dm_list *data, + const char *delimiter) +{ + return _report_field_string_list(rh, field, data, delimiter, 1); +} + +int dm_report_field_string_list_unsorted(struct dm_report *rh, + struct dm_report_field *field, + const struct dm_list *data, + const char *delimiter) +{ + /* + * The raw value is always sorted, just the string reported is unsorted. + * Having the raw value always sorted helps when matching selection list + * with selection criteria. + */ + return _report_field_string_list(rh, field, data, delimiter, 0); +} + +int dm_report_field_int(struct dm_report *rh, + struct dm_report_field *field, const int *data) +{ + const int value = *data; + uint64_t *sortval; + char *repstr; + + if (!(repstr = dm_pool_zalloc(rh->mem, 13))) { + log_error("dm_report_field_int: dm_pool_alloc failed"); + return 0; + } + + if (!(sortval = dm_pool_alloc(rh->mem, sizeof(int64_t)))) { + log_error("dm_report_field_int: dm_pool_alloc failed"); + return 0; + } + + if (dm_snprintf(repstr, 12, "%d", value) < 0) { + log_error("dm_report_field_int: int too big: %d", value); + return 0; + } + + *sortval = (uint64_t) value; + field->sort_value = sortval; + field->report_string = repstr; + + return 1; +} + +int dm_report_field_uint32(struct dm_report *rh, + struct dm_report_field *field, const uint32_t *data) +{ + const uint32_t value = *data; + uint64_t *sortval; + char *repstr; + + if (!(repstr = dm_pool_zalloc(rh->mem, 12))) { + log_error("dm_report_field_uint32: dm_pool_alloc failed"); + return 0; + } + + if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) { + log_error("dm_report_field_uint32: dm_pool_alloc failed"); + return 0; + } + + if (dm_snprintf(repstr, 11, "%u", value) < 0) { + log_error("dm_report_field_uint32: uint32 too big: %u", value); + return 0; + } + + *sortval = (uint64_t) value; + field->sort_value = sortval; + field->report_string = repstr; + + return 1; +} + +int dm_report_field_int32(struct dm_report *rh, + struct dm_report_field *field, const int32_t *data) +{ + const int32_t value = *data; + uint64_t *sortval; + char *repstr; + + if (!(repstr = dm_pool_zalloc(rh->mem, 13))) { + log_error("dm_report_field_int32: dm_pool_alloc failed"); + return 0; + } + + if (!(sortval = dm_pool_alloc(rh->mem, sizeof(int64_t)))) { + log_error("dm_report_field_int32: dm_pool_alloc failed"); + return 0; + } + + if (dm_snprintf(repstr, 12, "%d", value) < 0) { + log_error("dm_report_field_int32: int32 too big: %d", value); + return 0; + } + + *sortval = (uint64_t) value; + field->sort_value = sortval; + field->report_string = repstr; + + return 1; +} + +int dm_report_field_uint64(struct dm_report *rh, + struct dm_report_field *field, const uint64_t *data) +{ + const uint64_t value = *data; + uint64_t *sortval; + char *repstr; + + if (!(repstr = dm_pool_zalloc(rh->mem, 22))) { + log_error("dm_report_field_uint64: dm_pool_alloc failed"); + return 0; + } + + if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) { + log_error("dm_report_field_uint64: dm_pool_alloc failed"); + return 0; + } + + if (dm_snprintf(repstr, 21, FMTu64 , value) < 0) { + log_error("dm_report_field_uint64: uint64 too big: %" PRIu64, value); + return 0; + } + + *sortval = value; + field->sort_value = sortval; + field->report_string = repstr; + + return 1; +} + +/* + * Helper functions for custom report functions + */ +void dm_report_field_set_value(struct dm_report_field *field, const void *value, const void *sortvalue) +{ + field->report_string = (const char *) value; + field->sort_value = sortvalue ? : value; + + if ((field->sort_value == value) && + (field->props->flags & DM_REPORT_FIELD_TYPE_NUMBER)) + log_warn(INTERNAL_ERROR "Using string as sort value for numerical field."); +} + +static const char *_get_field_type_name(unsigned field_type) +{ + switch (field_type) { + case DM_REPORT_FIELD_TYPE_STRING: return "string"; + case DM_REPORT_FIELD_TYPE_NUMBER: return "number"; + case DM_REPORT_FIELD_TYPE_SIZE: return "size"; + case DM_REPORT_FIELD_TYPE_PERCENT: return "percent"; + case DM_REPORT_FIELD_TYPE_TIME: return "time"; + case DM_REPORT_FIELD_TYPE_STRING_LIST: return "string list"; + default: return "unknown"; + } +} + +/* + * show help message + */ +static size_t _get_longest_field_id_len(const struct dm_report_field_type *fields) +{ + uint32_t f; + size_t id_len = 0; + + for (f = 0; fields[f].report_fn; f++) + if (strlen(fields[f].id) > id_len) + id_len = strlen(fields[f].id); + + return id_len; +} + +static void _display_fields_more(struct dm_report *rh, + const struct dm_report_field_type *fields, + size_t id_len, int display_all_fields_item, + int display_field_types) +{ + uint32_t f; + const struct dm_report_object_type *type; + const char *desc, *last_desc = ""; + + for (f = 0; fields[f].report_fn; f++) + if (strlen(fields[f].id) > id_len) + id_len = strlen(fields[f].id); + + for (type = rh->types; type->data_fn; type++) + if (strlen(type->prefix) + 3 > id_len) + id_len = strlen(type->prefix) + 3; + + for (f = 0; fields[f].report_fn; f++) { + if ((type = _find_type(rh, fields[f].type)) && type->desc) + desc = type->desc; + else + desc = " "; + if (desc != last_desc) { + if (*last_desc) + log_warn(" "); + log_warn("%s Fields", desc); + log_warn("%*.*s", (int) strlen(desc) + 7, + (int) strlen(desc) + 7, + "-------------------------------------------------------------------------------"); + if (display_all_fields_item && type->id != SPECIAL_REPORT_TYPE) + log_warn(" %sall%-*s - %s", type->prefix, + (int) (id_len - 3 - strlen(type->prefix)), "", + "All fields in this section."); + } + /* FIXME Add line-wrapping at terminal width (or 80 cols) */ + log_warn(" %-*s - %s%s%s%s%s", (int) id_len, fields[f].id, fields[f].desc, + display_field_types ? " [" : "", + display_field_types ? fields[f].flags & FLD_CMP_UNCOMPARABLE ? "unselectable " : "" : "", + display_field_types ? _get_field_type_name(fields[f].flags & DM_REPORT_FIELD_TYPE_MASK) : "", + display_field_types ? "]" : ""); + last_desc = desc; + } +} + +/* + * show help message + */ +static void _display_fields(struct dm_report *rh, int display_all_fields_item, + int display_field_types) +{ + size_t tmp, id_len = 0; + + if ((tmp = _get_longest_field_id_len(_implicit_report_fields)) > id_len) + id_len = tmp; + if ((tmp = _get_longest_field_id_len(rh->fields)) > id_len) + id_len = tmp; + + _display_fields_more(rh, rh->fields, id_len, display_all_fields_item, + display_field_types); + log_warn(" "); + _display_fields_more(rh, _implicit_report_fields, id_len, + display_all_fields_item, display_field_types); + +} + +/* + * Initialise report handle + */ +static int _copy_field(struct dm_report *rh, struct field_properties *dest, + uint32_t field_num, int implicit) +{ + const struct dm_report_field_type *fields = implicit ? _implicit_report_fields + : rh->fields; + + dest->field_num = field_num; + dest->initial_width = fields[field_num].width; + dest->width = fields[field_num].width; /* adjusted in _do_report_object() */ + dest->flags = fields[field_num].flags & DM_REPORT_FIELD_MASK; + dest->implicit = implicit; + + /* set object type method */ + dest->type = _find_type(rh, fields[field_num].type); + if (!dest->type) { + log_error("dm_report: field not match: %s", + fields[field_num].id); + return 0; + } + + return 1; +} + +static struct field_properties * _add_field(struct dm_report *rh, + uint32_t field_num, int implicit, + uint32_t flags) +{ + struct field_properties *fp; + + if (!(fp = dm_pool_zalloc(rh->mem, sizeof(*fp)))) { + log_error("dm_report: struct field_properties allocation " + "failed"); + return NULL; + } + + if (!_copy_field(rh, fp, field_num, implicit)) { + stack; + dm_pool_free(rh->mem, fp); + return NULL; + } + + fp->flags |= flags; + + /* + * Place hidden fields at the front so dm_list_end() will + * tell us when we've reached the last visible field. + */ + if (fp->flags & FLD_HIDDEN) + dm_list_add_h(&rh->field_props, &fp->list); + else + dm_list_add(&rh->field_props, &fp->list); + + return fp; +} + +static int _get_canonical_field_name(const char *field, + size_t flen, + char *canonical_field, + size_t fcanonical_len, + int *differs) +{ + size_t i; + int diff = 0; + + for (i = 0; *field && flen; field++, flen--) { + if (*field == '_') { + diff = 1; + continue; + } + if ((i + 1) >= fcanonical_len) { + canonical_field[0] = '\0'; + log_error("%s: field name too long.", field); + return 0; + } + canonical_field[i++] = *field; + } + + canonical_field[i] = '\0'; + if (differs) + *differs = diff; + return 1; +} + +/* + * Compare canonical_name1 against canonical_name2 or prefix + * plus canonical_name2. Canonical name is a name where all + * superfluous characters are removed (underscores for now). + * Both names are always null-terminated. + */ +static int _is_same_field(const char *canonical_name1, const char *canonical_name2, + const char *prefix) +{ + size_t prefix_len; + + /* Exact match? */ + if (!strcasecmp(canonical_name1, canonical_name2)) + return 1; + + /* Match including prefix? */ + prefix_len = strlen(prefix) - 1; + if (!strncasecmp(prefix, canonical_name1, prefix_len) && + !strcasecmp(canonical_name1 + prefix_len, canonical_name2)) + return 1; + + return 0; +} + +/* + * Check for a report type prefix + "all" match. + */ +static void _all_match_combine(const struct dm_report_object_type *types, + unsigned unprefixed_all_matched, + const char *field, size_t flen, + uint32_t *report_types) +{ + char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN]; + const struct dm_report_object_type *t; + size_t prefix_len; + + if (!_get_canonical_field_name(field, flen, field_canon, sizeof(field_canon), NULL)) + return; + flen = strlen(field_canon); + + for (t = types; t->data_fn; t++) { + prefix_len = strlen(t->prefix) - 1; + + if (!strncasecmp(t->prefix, field_canon, prefix_len) && + ((unprefixed_all_matched && (flen == prefix_len)) || + (!strncasecmp(field_canon + prefix_len, "all", 3) && + (flen == prefix_len + 3)))) + *report_types |= t->id; + } +} + +static uint32_t _all_match(struct dm_report *rh, const char *field, size_t flen) +{ + uint32_t report_types = 0; + unsigned unprefixed_all_matched = 0; + + if (!strncasecmp(field, "all", 3) && flen == 3) { + /* If there's no report prefix, match all report types */ + if (!(flen = strlen(rh->field_prefix))) + return rh->report_types ? : REPORT_TYPES_ALL; + + /* otherwise include all fields beginning with the report prefix. */ + unprefixed_all_matched = 1; + field = rh->field_prefix; + report_types = rh->report_types; + } + + /* Combine all report types that have a matching prefix. */ + _all_match_combine(rh->types, unprefixed_all_matched, field, flen, &report_types); + + return report_types; +} + +/* + * Add all fields with a matching type. + */ +static int _add_all_fields(struct dm_report *rh, uint32_t type) +{ + uint32_t f; + + for (f = 0; rh->fields[f].report_fn; f++) + if ((rh->fields[f].type & type) && !_add_field(rh, f, 0, 0)) + return 0; + + return 1; +} + +static int _get_field(struct dm_report *rh, const char *field, size_t flen, + uint32_t *f_ret, int *implicit) +{ + char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN]; + uint32_t f; + + if (!flen) + return 0; + + if (!_get_canonical_field_name(field, flen, field_canon, sizeof(field_canon), NULL)) + return_0; + + for (f = 0; _implicit_report_fields[f].report_fn; f++) { + if (_is_same_field(_implicit_report_fields[f].id, field_canon, rh->field_prefix)) { + *f_ret = f; + *implicit = 1; + return 1; + } + } + + for (f = 0; rh->fields[f].report_fn; f++) { + if (_is_same_field(rh->canonical_field_ids[f], field_canon, rh->field_prefix)) { + *f_ret = f; + *implicit = 0; + return 1; + } + } + + return 0; +} + +static int _field_match(struct dm_report *rh, const char *field, size_t flen, + unsigned report_type_only) +{ + uint32_t f, type; + int implicit; + + if (!flen) + return 0; + + if ((_get_field(rh, field, flen, &f, &implicit))) { + if (report_type_only) { + rh->report_types |= implicit ? _implicit_report_fields[f].type + : rh->fields[f].type; + return 1; + } + + return _add_field(rh, f, implicit, 0) ? 1 : 0; + } + + if ((type = _all_match(rh, field, flen))) { + if (report_type_only) { + rh->report_types |= type; + return 1; + } + + return _add_all_fields(rh, type); + } + + return 0; +} + +static int _add_sort_key(struct dm_report *rh, uint32_t field_num, int implicit, + uint32_t flags, unsigned report_type_only) +{ + struct field_properties *fp, *found = NULL; + const struct dm_report_field_type *fields = implicit ? _implicit_report_fields + : rh->fields; + + dm_list_iterate_items(fp, &rh->field_props) { + if ((fp->implicit == implicit) && (fp->field_num == field_num)) { + found = fp; + break; + } + } + + if (!found) { + if (report_type_only) + rh->report_types |= fields[field_num].type; + else if (!(found = _add_field(rh, field_num, implicit, FLD_HIDDEN))) + return_0; + } + + if (report_type_only) + return 1; + + if (found->flags & FLD_SORT_KEY) { + log_warn("dm_report: Ignoring duplicate sort field: %s.", + fields[field_num].id); + return 1; + } + + found->flags |= FLD_SORT_KEY; + found->sort_posn = rh->keys_count++; + found->flags |= flags; + + return 1; +} + +static int _key_match(struct dm_report *rh, const char *key, size_t len, + unsigned report_type_only) +{ + char key_canon[DM_REPORT_FIELD_TYPE_ID_LEN]; + uint32_t f; + uint32_t flags; + + if (!len) + return 0; + + if (*key == '+') { + key++; + len--; + flags = FLD_ASCENDING; + } else if (*key == '-') { + key++; + len--; + flags = FLD_DESCENDING; + } else + flags = FLD_ASCENDING; + + if (!len) { + log_error("dm_report: Missing sort field name"); + return 0; + } + + if (!_get_canonical_field_name(key, len, key_canon, sizeof(key_canon), NULL)) + return_0; + + for (f = 0; _implicit_report_fields[f].report_fn; f++) + if (_is_same_field(_implicit_report_fields[f].id, key_canon, rh->field_prefix)) + return _add_sort_key(rh, f, 1, flags, report_type_only); + + for (f = 0; rh->fields[f].report_fn; f++) + if (_is_same_field(rh->canonical_field_ids[f], key_canon, rh->field_prefix)) + return _add_sort_key(rh, f, 0, flags, report_type_only); + + return 0; +} + +static int _parse_fields(struct dm_report *rh, const char *format, + unsigned report_type_only) +{ + const char *ws; /* Word start */ + const char *we = format; /* Word end */ + + while (*we) { + /* Allow consecutive commas */ + while (*we && *we == ',') + we++; + + /* start of the field name */ + ws = we; + while (*we && *we != ',') + we++; + + if (!_field_match(rh, ws, (size_t) (we - ws), report_type_only)) { + _display_fields(rh, 1, 0); + log_warn(" "); + log_error("Unrecognised field: %.*s", (int) (we - ws), ws); + return 0; + } + } + + return 1; +} + +static int _parse_keys(struct dm_report *rh, const char *keys, + unsigned report_type_only) +{ + const char *ws; /* Word start */ + const char *we = keys; /* Word end */ + + if (!keys) + return 1; + + while (*we) { + /* Allow consecutive commas */ + while (*we && *we == ',') + we++; + ws = we; + while (*we && *we != ',') + we++; + if (!_key_match(rh, ws, (size_t) (we - ws), report_type_only)) { + _display_fields(rh, 1, 0); + log_warn(" "); + log_error("dm_report: Unrecognised field: %.*s", (int) (we - ws), ws); + return 0; + } + } + + return 1; +} + +static int _contains_reserved_report_type(const struct dm_report_object_type *types) +{ + const struct dm_report_object_type *type, *implicit_type; + + for (implicit_type = _implicit_report_types; implicit_type->data_fn; implicit_type++) { + for (type = types; type->data_fn; type++) { + if (implicit_type->id & type->id) { + log_error(INTERNAL_ERROR "dm_report_init: definition of report " + "types given contains reserved identifier"); + return 1; + } + } + } + + return 0; +} + +static void _dm_report_init_update_types(struct dm_report *rh, uint32_t *report_types) +{ + const struct dm_report_object_type *type; + + if (!report_types) + return; + + *report_types = rh->report_types; + /* + * Do not include implicit types as these are not understood by + * dm_report_init caller - the caller doesn't know how to check + * these types anyway. + */ + for (type = _implicit_report_types; type->data_fn; type++) + *report_types &= ~type->id; +} + +static int _help_requested(struct dm_report *rh) +{ + struct field_properties *fp; + + dm_list_iterate_items(fp, &rh->field_props) { + if (fp->implicit && + (!strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_HELP_ID) || + !strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_HELP_ALT_ID))) + return 1; + } + + return 0; +} + +static int _canonicalize_field_ids(struct dm_report *rh) +{ + size_t registered_field_count = 0, i; + char canonical_field[DM_REPORT_FIELD_TYPE_ID_LEN]; + char *canonical_field_dup; + int differs; + + while (*rh->fields[registered_field_count].id) + registered_field_count++; + + if (!(rh->canonical_field_ids = dm_pool_alloc(rh->mem, registered_field_count * sizeof(const char *)))) { + log_error("_canonicalize_field_ids: dm_pool_alloc failed"); + return 0; + } + + for (i = 0; i < registered_field_count; i++) { + if (!_get_canonical_field_name(rh->fields[i].id, strlen(rh->fields[i].id), + canonical_field, sizeof(canonical_field), &differs)) + return_0; + + if (differs) { + if (!(canonical_field_dup = dm_pool_strdup(rh->mem, canonical_field))) { + log_error("_canonicalize_field_dup: dm_pool_alloc failed."); + return 0; + } + rh->canonical_field_ids[i] = canonical_field_dup; + } else + rh->canonical_field_ids[i] = rh->fields[i].id; + } + + return 1; +} + +struct dm_report *dm_report_init(uint32_t *report_types, + const struct dm_report_object_type *types, + const struct dm_report_field_type *fields, + const char *output_fields, + const char *output_separator, + uint32_t output_flags, + const char *sort_keys, + void *private_data) +{ + struct dm_report *rh; + const struct dm_report_object_type *type; + + if (_contains_reserved_report_type(types)) + return_NULL; + + if (!(rh = dm_zalloc(sizeof(*rh)))) { + log_error("dm_report_init: dm_malloc failed"); + return NULL; + } + + /* + * rh->report_types is updated in _parse_fields() and _parse_keys() + * to contain all types corresponding to the fields specified by + * fields or keys. + */ + if (report_types) + rh->report_types = *report_types; + + rh->separator = output_separator; + rh->fields = fields; + rh->types = types; + rh->private = private_data; + + rh->flags |= output_flags & DM_REPORT_OUTPUT_MASK; + + /* With columns_as_rows we must buffer and not align. */ + if (output_flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS) { + if (!(output_flags & DM_REPORT_OUTPUT_BUFFERED)) + rh->flags |= DM_REPORT_OUTPUT_BUFFERED; + if (output_flags & DM_REPORT_OUTPUT_ALIGNED) + rh->flags &= ~DM_REPORT_OUTPUT_ALIGNED; + } + + if (output_flags & DM_REPORT_OUTPUT_BUFFERED) + rh->flags |= RH_SORT_REQUIRED; + + rh->flags |= RH_FIELD_CALC_NEEDED; + + dm_list_init(&rh->field_props); + dm_list_init(&rh->rows); + + if ((type = _find_type(rh, rh->report_types)) && type->prefix) + rh->field_prefix = type->prefix; + else + rh->field_prefix = ""; + + if (!(rh->mem = dm_pool_create("report", 10 * 1024))) { + log_error("dm_report_init: allocation of memory pool failed"); + dm_free(rh); + return NULL; + } + + if (!_canonicalize_field_ids(rh)) { + dm_report_free(rh); + return NULL; + } + + /* + * To keep the code needed to add the "all" field to a minimum, we parse + * the field lists twice. The first time we only update the report type. + * FIXME Use one pass instead and expand the "all" field afterwards. + */ + if (!_parse_fields(rh, output_fields, 1) || + !_parse_keys(rh, sort_keys, 1)) { + dm_report_free(rh); + return NULL; + } + + /* Generate list of fields for output based on format string & flags */ + if (!_parse_fields(rh, output_fields, 0) || + !_parse_keys(rh, sort_keys, 0)) { + dm_report_free(rh); + return NULL; + } + + /* + * Return updated types value for further compatility check by caller. + */ + _dm_report_init_update_types(rh, report_types); + + if (_help_requested(rh)) { + _display_fields(rh, 1, 0); + log_warn(" "); + rh->flags |= RH_ALREADY_REPORTED; + } + + return rh; +} + +void dm_report_free(struct dm_report *rh) +{ + if (rh->selection) + dm_pool_destroy(rh->selection->mem); + if (rh->value_cache) + dm_hash_destroy(rh->value_cache); + dm_pool_destroy(rh->mem); + dm_free(rh); +} + +static char *_toupperstr(char *str) +{ + char *u = str; + + do + *u = toupper(*u); + while (*u++); + + return str; +} + +int dm_report_set_output_field_name_prefix(struct dm_report *rh, const char *output_field_name_prefix) +{ + char *prefix; + + if (!(prefix = dm_pool_strdup(rh->mem, output_field_name_prefix))) { + log_error("dm_report_set_output_field_name_prefix: dm_pool_strdup failed"); + return 0; + } + + rh->output_field_name_prefix = _toupperstr(prefix); + + return 1; +} + +/* + * Create a row of data for an object + */ +static void *_report_get_field_data(struct dm_report *rh, + struct field_properties *fp, void *object) +{ + const struct dm_report_field_type *fields = fp->implicit ? _implicit_report_fields + : rh->fields; + + char *ret = fp->type->data_fn(object); + + if (!ret) + return NULL; + + return (void *)(ret + fields[fp->field_num].offset); +} + +static void *_report_get_implicit_field_data(struct dm_report *rh __attribute__((unused)), + struct field_properties *fp, struct row *row) +{ + if (!strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_SELECTED_ID)) + return row; + + return NULL; +} + +static int _dbl_equal(double d1, double d2) +{ + return fabs(d1 - d2) < DBL_EPSILON; +} + +static int _dbl_greater(double d1, double d2) +{ + return (d1 > d2) && !_dbl_equal(d1, d2); +} + +static int _dbl_less(double d1, double d2) +{ + return (d1 < d2) && !_dbl_equal(d1, d2); +} + +static int _dbl_greater_or_equal(double d1, double d2) +{ + return _dbl_greater(d1, d2) || _dbl_equal(d1, d2); +} + +static int _dbl_less_or_equal(double d1, double d2) +{ + return _dbl_less(d1, d2) || _dbl_equal(d1, d2); +} + +#define _uint64 *(const uint64_t *) +#define _uint64arr(var,index) ((const uint64_t *)(var))[(index)] +#define _str (const char *) +#define _dbl *(const double *) +#define _dblarr(var,index) ((const double *)(var))[(index)] + +static int _do_check_value_is_strictly_reserved(unsigned type, const void *res_val, int res_range, + const void *val, struct field_selection *fs) +{ + int sel_range = fs ? fs->value->next != NULL : 0; + + switch (type & DM_REPORT_FIELD_TYPE_MASK) { + case DM_REPORT_FIELD_TYPE_NUMBER: + if (res_range && sel_range) { + /* both reserved value and selection value are ranges */ + if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) || + (fs && ((fs->value->v.i == _uint64arr(res_val,0)) && (fs->value->next->v.i == _uint64arr(res_val,1))))) + return 1; + } else if (res_range) { + /* only reserved value is a range */ + if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) || + (fs && ((fs->value->v.i >= _uint64arr(res_val,0)) && (fs->value->v.i <= _uint64arr(res_val,1))))) + return 1; + } else if (sel_range) { + /* only selection value is a range */ + if (((_uint64 val >= _uint64 res_val) && (_uint64 val <= _uint64 res_val)) || + (fs && ((fs->value->v.i >= _uint64 res_val) && (fs->value->next->v.i <= _uint64 res_val)))) + return 1; + } else { + /* neither selection value nor reserved value is a range */ + if ((_uint64 val == _uint64 res_val) || + (fs && (fs->value->v.i == _uint64 res_val))) + return 1; + } + break; + + case DM_REPORT_FIELD_TYPE_STRING: + /* there are no ranges for string type yet */ + if ((!strcmp(_str val, _str res_val)) || + (fs && (!strcmp(fs->value->v.s, _str res_val)))) + return 1; + break; + + case DM_REPORT_FIELD_TYPE_SIZE: + if (res_range && sel_range) { + /* both reserved value and selection value are ranges */ + if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) || + (fs && (_dbl_equal(fs->value->v.d, _dblarr(res_val,0)) && (_dbl_equal(fs->value->next->v.d, _dblarr(res_val,1)))))) + return 1; + } else if (res_range) { + /* only reserved value is a range */ + if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) || + (fs && (_dbl_greater_or_equal(fs->value->v.d, _dblarr(res_val,0)) && _dbl_less_or_equal(fs->value->v.d, _dblarr(res_val,1))))) + return 1; + } else if (sel_range) { + /* only selection value is a range */ + if ((_dbl_greater_or_equal(_dbl val, _dbl res_val) && (_dbl_less_or_equal(_dbl val, _dbl res_val))) || + (fs && (_dbl_greater_or_equal(fs->value->v.d, _dbl res_val) && _dbl_less_or_equal(fs->value->next->v.d, _dbl res_val)))) + return 1; + } else { + /* neither selection value nor reserved value is a range */ + if ((_dbl_equal(_dbl val, _dbl res_val)) || + (fs && (_dbl_equal(fs->value->v.d, _dbl res_val)))) + return 1; + } + break; + + case DM_REPORT_FIELD_TYPE_STRING_LIST: + /* FIXME Add comparison for string list */ + break; + case DM_REPORT_FIELD_TYPE_TIME: + /* FIXME Add comparison for time */ + break; + } + + return 0; +} + +/* + * Used to check whether a value of certain type used in selection is reserved. + */ +static int _check_value_is_strictly_reserved(struct dm_report *rh, uint32_t field_num, unsigned type, + const void *val, struct field_selection *fs) +{ + const struct dm_report_reserved_value *iter = rh->reserved_values; + const struct dm_report_field_reserved_value *frv; + int res_range; + + if (!iter) + return 0; + + while (iter->value) { + /* Only check strict reserved values, not the weaker form ("named" reserved value). */ + if (!(iter->type & DM_REPORT_FIELD_RESERVED_VALUE_NAMED)) { + res_range = iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE; + if ((iter->type & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_NONE) { + frv = (const struct dm_report_field_reserved_value *) iter->value; + if (frv->field_num == field_num && _do_check_value_is_strictly_reserved(type, frv->value, res_range, val, fs)) + return 1; + } else if (iter->type & type && _do_check_value_is_strictly_reserved(type, iter->value, res_range, val, fs)) + return 1; + } + iter++; + } + + return 0; +} + +static int _cmp_field_int(struct dm_report *rh, uint32_t field_num, const char *field_id, + uint64_t val, struct field_selection *fs) +{ + int range = fs->value->next != NULL; + const uint64_t sel1 = fs->value->v.i; + const uint64_t sel2 = range ? fs->value->next->v.i : 0; + + switch(fs->flags & FLD_CMP_MASK) { + case FLD_CMP_EQUAL: + return range ? ((val >= sel1) && (val <= sel2)) : val == sel1; + + case FLD_CMP_NOT|FLD_CMP_EQUAL: + return range ? !((val >= sel1) && (val <= sel2)) : val != sel1; + + case FLD_CMP_NUMBER|FLD_CMP_GT: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs)) + return 0; + return range ? val > sel2 : val > sel1; + + case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs)) + return 0; + return val >= sel1; + + case FLD_CMP_NUMBER|FLD_CMP_LT: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs)) + return 0; + return val < sel1; + + case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs)) + return 0; + return range ? val <= sel2 : val <= sel1; + + default: + log_error(INTERNAL_ERROR "_cmp_field_int: unsupported number " + "comparison type for field %s", field_id); + } + + return 0; +} + +static int _cmp_field_double(struct dm_report *rh, uint32_t field_num, const char *field_id, + double val, struct field_selection *fs) +{ + int range = fs->value->next != NULL; + double sel1 = fs->value->v.d; + double sel2 = range ? fs->value->next->v.d : 0; + + switch(fs->flags & FLD_CMP_MASK) { + case FLD_CMP_EQUAL: + return range ? (_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2)) + : _dbl_equal(val, sel1); + + case FLD_CMP_NOT|FLD_CMP_EQUAL: + return range ? !(_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2)) + : !_dbl_equal(val, sel1); + + case FLD_CMP_NUMBER|FLD_CMP_GT: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs)) + return 0; + return range ? _dbl_greater(val, sel2) + : _dbl_greater(val, sel1); + + case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs)) + return 0; + return _dbl_greater_or_equal(val, sel1); + + case FLD_CMP_NUMBER|FLD_CMP_LT: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs)) + return 0; + return _dbl_less(val, sel1); + + case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs)) + return 0; + return range ? _dbl_less_or_equal(val, sel2) : _dbl_less_or_equal(val, sel1); + + default: + log_error(INTERNAL_ERROR "_cmp_field_double: unsupported number " + "comparison type for selection field %s", field_id); + } + + return 0; +} + +static int _cmp_field_string(struct dm_report *rh __attribute__((unused)), + uint32_t field_num, const char *field_id, + const char *val, struct field_selection *fs) +{ + const char *sel = fs->value->v.s; + + switch (fs->flags & FLD_CMP_MASK) { + case FLD_CMP_EQUAL: + return !strcmp(val, sel); + case FLD_CMP_NOT|FLD_CMP_EQUAL: + return strcmp(val, sel); + default: + log_error(INTERNAL_ERROR "_cmp_field_string: unsupported string " + "comparison type for selection field %s", field_id); + } + + return 0; +} + +static int _cmp_field_time(struct dm_report *rh, + uint32_t field_num, const char *field_id, + time_t val, struct field_selection *fs) +{ + int range = fs->value->next != NULL; + time_t sel1 = fs->value->v.t; + time_t sel2 = range ? fs->value->next->v.t : 0; + + switch(fs->flags & FLD_CMP_MASK) { + case FLD_CMP_EQUAL: + return range ? ((val >= sel1) && (val <= sel2)) : val == sel1; + case FLD_CMP_NOT|FLD_CMP_EQUAL: + return range ? ((val >= sel1) && (val <= sel2)) : val != sel1; + case FLD_CMP_TIME|FLD_CMP_GT: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs)) + return 0; + return range ? val > sel2 : val > sel1; + case FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs)) + return 0; + return val >= sel1; + case FLD_CMP_TIME|FLD_CMP_LT: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs)) + return 0; + return val < sel1; + case FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL: + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs)) + return 0; + return range ? val <= sel2 : val <= sel1; + default: + log_error(INTERNAL_ERROR "_cmp_field_time: unsupported time " + "comparison type for field %s", field_id); + } + + return 0; +} + +/* Matches if all items from selection string list match list value strictly 1:1. */ +static int _cmp_field_string_list_strict_all(const struct str_list_sort_value *val, + const struct selection_str_list *sel) +{ + unsigned int sel_list_size = dm_list_size(&sel->str_list.list); + struct dm_str_list *sel_item; + unsigned int i = 1; + + if (!val->items[0].len) { + if (sel_list_size == 1) { + /* match blank string list with selection defined as blank string only */ + sel_item = dm_list_item(dm_list_first(&sel->str_list.list), struct dm_str_list); + return !strcmp(sel_item->str, ""); + } + return 0; + } + + /* if item count differs, it's clear the lists do not match */ + if (val->items[0].len != sel_list_size) + return 0; + + /* both lists are sorted so they either match 1:1 or not */ + dm_list_iterate_items(sel_item, &sel->str_list.list) { + if ((strlen(sel_item->str) != val->items[i].len) || + strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len)) + return 0; + i++; + } + + return 1; +} + +/* Matches if all items from selection string list match a subset of list value. */ +static int _cmp_field_string_list_subset_all(const struct str_list_sort_value *val, + const struct selection_str_list *sel) +{ + unsigned int sel_list_size = dm_list_size(&sel->str_list.list); + struct dm_str_list *sel_item; + unsigned int i, last_found = 1; + int r = 0; + + if (!val->items[0].len) { + if (sel_list_size == 1) { + /* match blank string list with selection defined as blank string only */ + sel_item = dm_list_item(dm_list_first(&sel->str_list.list), struct dm_str_list); + return !strcmp(sel_item->str, ""); + } + return 0; + } + + /* check selection is a subset of the value */ + dm_list_iterate_items(sel_item, &sel->str_list.list) { + r = 0; + for (i = last_found; i <= val->items[0].len; i++) { + if ((strlen(sel_item->str) == val->items[i].len) && + !strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len)) { + last_found = i; + r = 1; + } + } + if (!r) + break; + } + + return r; +} + +/* Matches if any item from selection string list matches list value. */ +static int _cmp_field_string_list_any(const struct str_list_sort_value *val, + const struct selection_str_list *sel) +{ + struct dm_str_list *sel_item; + unsigned int i; + + /* match blank string list with selection that contains blank string */ + if (!val->items[0].len) { + dm_list_iterate_items(sel_item, &sel->str_list.list) { + if (!strcmp(sel_item->str, "")) + return 1; + } + return 0; + } + + dm_list_iterate_items(sel_item, &sel->str_list.list) { + /* + * TODO: Optimize this so we don't need to compare the whole lists' content. + * Make use of the fact that the lists are sorted! + */ + for (i = 1; i <= val->items[0].len; i++) { + if ((strlen(sel_item->str) == val->items[i].len) && + !strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len)) + return 1; + } + } + + return 0; +} + +static int _cmp_field_string_list(struct dm_report *rh __attribute__((unused)), + uint32_t field_num, const char *field_id, + const struct str_list_sort_value *val, + struct field_selection *fs) +{ + const struct selection_str_list *sel = fs->value->v.l; + int subset, r; + + switch (sel->type & SEL_LIST_MASK) { + case SEL_LIST_LS: + subset = 0; + break; + case SEL_LIST_SUBSET_LS: + subset = 1; + break; + default: + log_error(INTERNAL_ERROR "_cmp_field_string_list: unknown list type"); + return 0; + } + + switch (sel->type & SEL_MASK) { + case SEL_AND: + r = subset ? _cmp_field_string_list_subset_all(val, sel) + : _cmp_field_string_list_strict_all(val, sel); + break; + case SEL_OR: + r = _cmp_field_string_list_any(val, sel); + break; + default: + log_error(INTERNAL_ERROR "_cmp_field_string_list: unsupported string " + "list type found, expecting either AND or OR list for " + "selection field %s", field_id); + return 0; + } + + return fs->flags & FLD_CMP_NOT ? !r : r; +} + +static int _cmp_field_regex(const char *s, struct field_selection *fs) +{ + int match = dm_regex_match(fs->value->v.r, s) >= 0; + return fs->flags & FLD_CMP_NOT ? !match : match; +} + +static int _compare_selection_field(struct dm_report *rh, + struct dm_report_field *f, + struct field_selection *fs) +{ + const struct dm_report_field_type *fields = f->props->implicit ? _implicit_report_fields + : rh->fields; + const char *field_id = fields[f->props->field_num].id; + int r = 0; + + if (!f->sort_value) { + log_error("_compare_selection_field: field without value :%d", + f->props->field_num); + return 0; + } + + if (fs->flags & FLD_CMP_REGEX) + r = _cmp_field_regex((const char *) f->sort_value, fs); + else { + switch(f->props->flags & DM_REPORT_FIELD_TYPE_MASK) { + case DM_REPORT_FIELD_TYPE_PERCENT: + /* + * Check against real percent values only. + * That means DM_PERCENT_0 <= percent <= DM_PERCENT_100. + */ + if (*(const uint64_t *) f->sort_value > DM_PERCENT_100) + return 0; + /* fall through */ + case DM_REPORT_FIELD_TYPE_NUMBER: + r = _cmp_field_int(rh, f->props->field_num, field_id, *(const uint64_t *) f->sort_value, fs); + break; + case DM_REPORT_FIELD_TYPE_SIZE: + r = _cmp_field_double(rh, f->props->field_num, field_id, *(const double *) f->sort_value, fs); + break; + case DM_REPORT_FIELD_TYPE_STRING: + r = _cmp_field_string(rh, f->props->field_num, field_id, (const char *) f->sort_value, fs); + break; + case DM_REPORT_FIELD_TYPE_STRING_LIST: + r = _cmp_field_string_list(rh, f->props->field_num, field_id, (const struct str_list_sort_value *) f->sort_value, fs); + break; + case DM_REPORT_FIELD_TYPE_TIME: + r = _cmp_field_time(rh, f->props->field_num, field_id, *(const time_t *) f->sort_value, fs); + break; + default: + log_error(INTERNAL_ERROR "_compare_selection_field: unknown field type for field %s", field_id); + } + } + + return r; +} + +static int _check_selection(struct dm_report *rh, struct selection_node *sn, + struct dm_list *fields) +{ + int r; + struct selection_node *iter_n; + struct dm_report_field *f; + + switch (sn->type & SEL_MASK) { + case SEL_ITEM: + r = 1; + dm_list_iterate_items(f, fields) { + if (sn->selection.item->fp != f->props) + continue; + if (!_compare_selection_field(rh, f, sn->selection.item)) + r = 0; + } + break; + case SEL_OR: + r = 0; + dm_list_iterate_items(iter_n, &sn->selection.set) + if ((r |= _check_selection(rh, iter_n, fields))) + break; + break; + case SEL_AND: + r = 1; + dm_list_iterate_items(iter_n, &sn->selection.set) + if (!(r &= _check_selection(rh, iter_n, fields))) + break; + break; + default: + log_error("Unsupported selection type"); + return 0; + } + + return (sn->type & SEL_MODIFIER_NOT) ? !r : r; +} + +static int _check_report_selection(struct dm_report *rh, struct dm_list *fields) +{ + if (!rh->selection || !rh->selection->selection_root) + return 1; + + return _check_selection(rh, rh->selection->selection_root, fields); +} + +static int _do_report_object(struct dm_report *rh, void *object, int do_output, int *selected) +{ + const struct dm_report_field_type *fields; + struct field_properties *fp; + struct row *row = NULL; + struct dm_report_field *field; + void *data = NULL; + int r = 0; + + if (!rh) { + log_error(INTERNAL_ERROR "_do_report_object: dm_report handler is NULL."); + return 0; + } + + if (!do_output && !selected) { + log_error(INTERNAL_ERROR "_do_report_object: output not requested and " + "selected output variable is NULL too."); + return 0; + } + + if (rh->flags & RH_ALREADY_REPORTED) + return 1; + + if (!(row = dm_pool_zalloc(rh->mem, sizeof(*row)))) { + log_error("_do_report_object: struct row allocation failed"); + return 0; + } + + if (!rh->first_row) + rh->first_row = row; + + row->rh = rh; + + if ((rh->flags & RH_SORT_REQUIRED) && + !(row->sort_fields = + dm_pool_zalloc(rh->mem, sizeof(struct dm_report_field *) * + rh->keys_count))) { + log_error("_do_report_object: " + "row sort value structure allocation failed"); + goto out; + } + + dm_list_init(&row->fields); + row->selected = 1; + + /* For each field to be displayed, call its report_fn */ + dm_list_iterate_items(fp, &rh->field_props) { + if (!(field = dm_pool_zalloc(rh->mem, sizeof(*field)))) { + log_error("_do_report_object: " + "struct dm_report_field allocation failed"); + goto out; + } + + if (fp->implicit) { + fields = _implicit_report_fields; + if (!strcmp(fields[fp->field_num].id, SPECIAL_FIELD_SELECTED_ID)) + row->field_sel_status = field; + } else + fields = rh->fields; + + field->props = fp; + + data = fp->implicit ? _report_get_implicit_field_data(rh, fp, row) + : _report_get_field_data(rh, fp, object); + if (!data) { + log_error("_do_report_object: " + "no data assigned to field %s", + fields[fp->field_num].id); + goto out; + } + + if (!fields[fp->field_num].report_fn(rh, rh->mem, + field, data, + rh->private)) { + log_error("_do_report_object: " + "report function failed for field %s", + fields[fp->field_num].id); + goto out; + } + + dm_list_add(&row->fields, &field->list); + } + + r = 1; + + if (!_check_report_selection(rh, &row->fields)) { + row->selected = 0; + + /* + * If the row is not selected, we still keep it for output if either: + * - we're displaying special "selected" field in the row, + * - or the report is supposed to be on output multiple times + * where each output can have a new selection defined. + */ + if (!row->field_sel_status && !(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES)) + goto out; + + if (row->field_sel_status) { + /* + * If field with id "selected" is reported, + * report the row although it does not pass + * the selection criteria. + * The "selected" field reports the result + * of the selection. + */ + _implicit_report_fields[row->field_sel_status->props->field_num].report_fn(rh, + rh->mem, row->field_sel_status, row, rh->private); + /* + * If the "selected" field is not displayed, e.g. + * because it is part of the sort field list, + * skip the display of the row as usual unless + * we plan to do the output multiple times. + */ + if ((row->field_sel_status->props->flags & FLD_HIDDEN) && + !(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES)) + goto out; + } + } + + if (!do_output) + goto out; + + dm_list_add(&rh->rows, &row->list); + + if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED)) + return dm_report_output(rh); +out: + if (selected) + *selected = row->selected; + if (!do_output || !r) + dm_pool_free(rh->mem, row); + return r; +} + +static int _do_report_compact_fields(struct dm_report *rh, int global) +{ + struct dm_report_field *field; + struct field_properties *fp; + struct row *row; + + if (!rh) { + log_error("dm_report_enable_compact_output: dm report handler is NULL."); + return 0; + } + + if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED) || + dm_list_empty(&rh->rows)) + return 1; + + /* + * At first, mark all fields with FLD_HIDDEN flag. + * Also, mark field with FLD_COMPACTED flag, but only + * the ones that didn't have FLD_HIDDEN set before. + * This prevents losing the original FLD_HIDDEN flag + * in next step... + */ + dm_list_iterate_items(fp, &rh->field_props) { + if (fp->flags & FLD_HIDDEN) + continue; + if (global || (fp->flags & FLD_COMPACT_ONE)) + fp->flags |= (FLD_COMPACTED | FLD_HIDDEN); + } + + /* + * ...check each field in a row and if its report value + * is not empty, drop the FLD_COMPACTED and FLD_HIDDEN + * flag if FLD_COMPACTED flag is set. It's important + * to keep FLD_HIDDEN flag for the fields that were + * already marked with FLD_HIDDEN before - these don't + * have FLD_COMPACTED set - check this condition! + */ + dm_list_iterate_items(row, &rh->rows) { + dm_list_iterate_items(field, &row->fields) { + if ((field->report_string && *field->report_string) && + field->props->flags & FLD_COMPACTED) + field->props->flags &= ~(FLD_COMPACTED | FLD_HIDDEN); + } + } + + /* + * The fields left with FLD_COMPACTED and FLD_HIDDEN flag are + * the ones which have blank value in all rows. The FLD_HIDDEN + * will cause such field to not be reported on output at all. + */ + + return 1; +} + +int dm_report_compact_fields(struct dm_report *rh) +{ + return _do_report_compact_fields(rh, 1); +} + +static int _field_to_compact_match(struct dm_report *rh, const char *field, size_t flen) +{ + struct field_properties *fp; + uint32_t f; + int implicit; + + if ((_get_field(rh, field, flen, &f, &implicit))) { + dm_list_iterate_items(fp, &rh->field_props) { + if ((fp->implicit == implicit) && (fp->field_num == f)) { + fp->flags |= FLD_COMPACT_ONE; + break; + } + } + return 1; + } + + return 0; +} + +static int _parse_fields_to_compact(struct dm_report *rh, const char *fields) +{ + const char *ws; /* Word start */ + const char *we = fields; /* Word end */ + + if (!fields) + return 1; + + while (*we) { + while (*we && *we == ',') + we++; + ws = we; + while (*we && *we != ',') + we++; + if (!_field_to_compact_match(rh, ws, (size_t) (we - ws))) { + log_error("dm_report: Unrecognized field: %.*s", (int) (we - ws), ws); + return 0; + } + } + + return 1; +} + +int dm_report_compact_given_fields(struct dm_report *rh, const char *fields) +{ + if (!_parse_fields_to_compact(rh, fields)) + return_0; + + return _do_report_compact_fields(rh, 0); +} + +int dm_report_object(struct dm_report *rh, void *object) +{ + return _do_report_object(rh, object, 1, NULL); +} + +int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected) +{ + return _do_report_object(rh, object, do_output, selected); +} + +/* + * Selection parsing + */ + +/* + * Other tokens (FIELD, VALUE, STRING, NUMBER, REGEX) + * FIELD := <strings of alphabet, number and '_'> + * VALUE := NUMBER | STRING + * REGEX := <strings quoted by '"', '\'', '(', '{', '[' or unquoted> + * NUMBER := <strings of [0-9]> (because sort_value is unsigned) + * STRING := <strings quoted by '"', '\'' or unquoted> + */ + +static const char * _skip_space(const char *s) +{ + while (*s && isspace(*s)) + s++; + return s; +} + +static int _tok_op(struct op_def *t, const char *s, const char **end, + uint32_t expect) +{ + size_t len; + + s = _skip_space(s); + + for (; t->string; t++) { + if (expect && !(t->flags & expect)) + continue; + + len = strlen(t->string); + if (!strncmp(s, t->string, len)) { + if (end) + *end = s + len; + return t->flags; + } + } + + if (end) + *end = s; + return 0; +} + +static int _tok_op_log(const char *s, const char **end, uint32_t expect) +{ + return _tok_op(_op_log, s, end, expect); +} + +static int _tok_op_cmp(const char *s, const char **end) +{ + return _tok_op(_op_cmp, s, end, 0); +} + +static char _get_and_skip_quote_char(char const **s) +{ + char c = 0; + + if (**s == '"' || **s == '\'') { + c = **s; + (*s)++; + } + + return c; +} + + /* + * + * Input: + * s - a pointer to the parsed string + * Output: + * begin - a pointer to the beginning of the token + * end - a pointer to the end of the token + 1 + * or undefined if return value is NULL + * return value - a starting point of the next parsing or + * NULL if 's' doesn't match with token type + * (the parsing should be terminated) + */ +static const char *_tok_value_number(const char *s, + const char **begin, const char **end) + +{ + int is_float = 0; + + *begin = s; + while ((!is_float && (*s == '.') && ++is_float) || isdigit(*s)) + s++; + *end = s; + + if (*begin == *end) + return NULL; + + return s; +} + +/* + * Input: + * s - a pointer to the parsed string + * endchar - terminating character + * end_op_flags - terminating operator flags (see _op_log) + * (if endchar is non-zero then endflags is ignored) + * Output: + * begin - a pointer to the beginning of the token + * end - a pointer to the end of the token + 1 + * end_op_flag_hit - the flag from endflags hit during parsing + * return value - a starting point of the next parsing + */ +static const char *_tok_value_string(const char *s, + const char **begin, const char **end, + const char endchar, uint32_t end_op_flags, + uint32_t *end_op_flag_hit) +{ + uint32_t flag_hit = 0; + + *begin = s; + + /* + * If endchar is defined, scan the string till + * the endchar or the end of string is hit. + * This is in case the string is quoted and we + * know exact character that is the stopper. + */ + if (endchar) { + while (*s && *s != endchar) + s++; + if (*s != endchar) { + log_error("Missing end quote."); + return NULL; + } + *end = s; + s++; + } else { + /* + * If endchar is not defined then endchar is/are the + * operator/s as defined by 'endflags' arg or space char. + * This is in case the string is not quoted and + * we don't know which character is the exact stopper. + */ + while (*s) { + if ((flag_hit = _tok_op(_op_log, s, NULL, end_op_flags)) || *s == ' ') + break; + s++; + } + *end = s; + /* + * If we hit one of the strings as defined by 'endflags' + * and if 'endflag_hit' arg is provided, save the exact + * string flag that was hit. + */ + if (end_op_flag_hit) + *end_op_flag_hit = flag_hit; + } + + return s; +} + +static const char *_reserved_name(struct dm_report *rh, + const struct dm_report_reserved_value *reserved, + const struct dm_report_field_reserved_value *frv, + uint32_t field_num, const char *s, size_t len) +{ + dm_report_reserved_handler handler; + const char *canonical_name; + const char **name; + char *tmp_s; + char c; + int r; + + name = reserved->names; + while (*name) { + if ((strlen(*name) == len) && !strncmp(*name, s, len)) + return *name; + name++; + } + + if (reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES) { + handler = (dm_report_reserved_handler) (frv ? frv->value : reserved->value); + c = s[len]; + tmp_s = (char *) s; + tmp_s[len] = '\0'; + if ((r = handler(rh, rh->selection->mem, field_num, + DM_REPORT_RESERVED_PARSE_FUZZY_NAME, + tmp_s, (const void **) &canonical_name)) <= 0) { + if (r == -1) + log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing " + "implementation of DM_REPORT_RESERVED_PARSE_FUZZY_NAME action", + (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific", + rh->fields[field_num].id); + else + log_error("Error occured while processing %s reserved value handler for field %s", + (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific", + rh->fields[field_num].id); + } + tmp_s[len] = c; + if (r && canonical_name) + return canonical_name; + } + + return NULL; +} + +/* + * Used to replace a string representation of the reserved value + * found in selection with the exact reserved value of certain type. + */ +static const char *_get_reserved(struct dm_report *rh, unsigned type, + uint32_t field_num, int implicit, + const char *s, const char **begin, const char **end, + struct reserved_value_wrapper *rvw) +{ + const struct dm_report_reserved_value *iter = implicit ? NULL : rh->reserved_values; + const struct dm_report_field_reserved_value *frv; + const char *tmp_begin, *tmp_end, *tmp_s = s; + const char *name = NULL; + char c; + + rvw->reserved = NULL; + + if (!iter) + return s; + + c = _get_and_skip_quote_char(&tmp_s); + if (!(tmp_s = _tok_value_string(tmp_s, &tmp_begin, &tmp_end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) + return s; + + while (iter->value) { + if (!(iter->type & DM_REPORT_FIELD_TYPE_MASK)) { + /* DM_REPORT_FIELD_TYPE_NONE - per-field reserved value */ + frv = (const struct dm_report_field_reserved_value *) iter->value; + if ((frv->field_num == field_num) && (name = _reserved_name(rh, iter, frv, field_num, + tmp_begin, tmp_end - tmp_begin))) + break; + } else if (iter->type & type) { + /* DM_REPORT_FIELD_TYPE_* - per-type reserved value */ + if ((name = _reserved_name(rh, iter, NULL, field_num, + tmp_begin, tmp_end - tmp_begin))) + break; + } + iter++; + } + + if (name) { + /* found! */ + *begin = tmp_begin; + *end = tmp_end; + s = tmp_s; + rvw->reserved = iter; + rvw->matched_name = name; + } + + return s; +} + +float dm_percent_to_float(dm_percent_t percent) +{ + /* Add 0.f to prevent returning -0.00 */ + return (float) percent / DM_PERCENT_1 + 0.f; +} + +float dm_percent_to_round_float(dm_percent_t percent, unsigned digits) +{ + static const float power10[] = { + 1.f, .1f, .01f, .001f, .0001f, .00001f, .000001f, + .0000001f, .00000001f, .000000001f, + .0000000001f + }; + float r; + float f = dm_percent_to_float(percent); + + if (digits >= DM_ARRAY_SIZE(power10)) + digits = DM_ARRAY_SIZE(power10) - 1; /* no better precision */ + + r = DM_PERCENT_1 * power10[digits]; + + if ((percent < r) && (percent > DM_PERCENT_0)) + f = power10[digits]; + else if ((percent > (DM_PERCENT_100 - r)) && (percent < DM_PERCENT_100)) + f = (float) (DM_PERCENT_100 - r) / DM_PERCENT_1; + + return f; +} + +dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator) +{ + dm_percent_t percent; + + if (!denominator) + return DM_PERCENT_100; /* FIXME? */ + if (!numerator) + return DM_PERCENT_0; + if (numerator == denominator) + return DM_PERCENT_100; + switch (percent = DM_PERCENT_100 * ((double) numerator / (double) denominator)) { + case DM_PERCENT_100: + return DM_PERCENT_100 - 1; + case DM_PERCENT_0: + return DM_PERCENT_0 + 1; + default: + return percent; + } +} + +int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data) +{ + if (!rh->value_cache && (!(rh->value_cache = dm_hash_create(64)))) { + log_error("Failed to create cache for values used during reporting."); + return 0; + } + + return dm_hash_insert(rh->value_cache, name, (void *) data); +} + +const void *dm_report_value_cache_get(struct dm_report *rh, const char *name) +{ + return (rh->value_cache) ? dm_hash_lookup(rh->value_cache, name) : NULL; +} + +/* + * Used to check whether the reserved_values definition passed to + * dm_report_init_with_selection contains only supported reserved value types. + */ +static int _check_reserved_values_supported(const struct dm_report_field_type fields[], + const struct dm_report_reserved_value reserved_values[]) +{ + const struct dm_report_reserved_value *iter; + const struct dm_report_field_reserved_value *field_res; + const struct dm_report_field_type *field; + static uint32_t supported_reserved_types = DM_REPORT_FIELD_TYPE_NUMBER | + DM_REPORT_FIELD_TYPE_SIZE | + DM_REPORT_FIELD_TYPE_PERCENT | + DM_REPORT_FIELD_TYPE_STRING | + DM_REPORT_FIELD_TYPE_TIME; + static uint32_t supported_reserved_types_with_range = DM_REPORT_FIELD_RESERVED_VALUE_RANGE | + DM_REPORT_FIELD_TYPE_NUMBER | + DM_REPORT_FIELD_TYPE_SIZE | + DM_REPORT_FIELD_TYPE_PERCENT | + DM_REPORT_FIELD_TYPE_TIME; + + + if (!reserved_values) + return 1; + + iter = reserved_values; + + while (iter->value) { + if (iter->type & DM_REPORT_FIELD_TYPE_MASK) { + if (!(iter->type & supported_reserved_types) || + ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) && + !(iter->type & supported_reserved_types_with_range))) { + log_error(INTERNAL_ERROR "_check_reserved_values_supported: " + "global reserved value for type 0x%x not supported", + iter->type); + return 0; + } + } else { + field_res = (const struct dm_report_field_reserved_value *) iter->value; + field = &fields[field_res->field_num]; + if (!(field->flags & supported_reserved_types) || + ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) && + !(iter->type & supported_reserved_types_with_range))) { + log_error(INTERNAL_ERROR "_check_reserved_values_supported: " + "field-specific reserved value of type 0x%x for " + "field %s not supported", + field->flags & DM_REPORT_FIELD_TYPE_MASK, field->id); + return 0; + } + } + iter++; + } + + return 1; +} + +/* + * Input: + * ft - field type for which the value is parsed + * s - a pointer to the parsed string + * Output: + * begin - a pointer to the beginning of the token + * end - a pointer to the end of the token + 1 + * flags - parsing flags + */ +static const char *_tok_value_regex(struct dm_report *rh, + const struct dm_report_field_type *ft, + const char *s, const char **begin, + const char **end, uint32_t *flags, + struct reserved_value_wrapper *rvw) +{ + char c; + rvw->reserved = NULL; + + s = _skip_space(s); + + if (!*s) { + log_error("Regular expression expected for selection field %s", ft->id); + return NULL; + } + + switch (*s) { + case '(': c = ')'; break; + case '{': c = '}'; break; + case '[': c = ']'; break; + case '"': /* fall through */ + case '\'': c = *s; break; + default: c = 0; + } + + if (!(s = _tok_value_string(c ? s + 1 : s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) { + log_error("Failed to parse regex value for selection field %s.", ft->id); + return NULL; + } + + *flags |= DM_REPORT_FIELD_TYPE_STRING; + return s; +} + +static int _str_list_item_cmp(const void *a, const void *b) +{ + const struct dm_str_list * const *item_a = (const struct dm_str_list * const *) a; + const struct dm_str_list * const *item_b = (const struct dm_str_list * const *) b; + + return strcmp((*item_a)->str, (*item_b)->str); +} + +static int _add_item_to_string_list(struct dm_pool *mem, const char *begin, + const char *end, struct dm_list *list) +{ + struct dm_str_list *item; + + if (!(item = dm_pool_zalloc(mem, sizeof(*item))) || + !(item->str = begin == end ? "" : dm_pool_strndup(mem, begin, end - begin))) { + log_error("_add_item_to_string_list: memory allocation failed for string list item"); + return 0; + } + dm_list_add(list, &item->list); + + return 1; +} + +/* + * Input: + * ft - field type for which the value is parsed + * mem - memory pool to allocate from + * s - a pointer to the parsed string + * Output: + * begin - a pointer to the beginning of the token (whole list) + * end - a pointer to the end of the token + 1 (whole list) + * sel_str_list - the list of strings parsed + */ +static const char *_tok_value_string_list(const struct dm_report_field_type *ft, + struct dm_pool *mem, const char *s, + const char **begin, const char **end, + struct selection_str_list **sel_str_list) +{ + static const char _str_list_item_parsing_failed[] = "Failed to parse string list value " + "for selection field %s."; + struct selection_str_list *ssl = NULL; + struct dm_str_list *item; + const char *begin_item = NULL, *end_item = NULL, *tmp; + uint32_t op_flags, end_op_flag_expected, end_op_flag_hit = 0; + struct dm_str_list **arr; + size_t list_size; + unsigned int i; + int list_end = 0; + char c; + + if (!(ssl = dm_pool_alloc(mem, sizeof(*ssl)))) { + log_error("_tok_value_string_list: memory allocation failed for selection list"); + goto bad; + } + dm_list_init(&ssl->str_list.list); + ssl->type = 0; + *begin = s; + + if (!(op_flags = _tok_op_log(s, &tmp, SEL_LIST_LS | SEL_LIST_SUBSET_LS))) { + /* Only one item - SEL_LIST_{SUBSET_}LS and SEL_LIST_{SUBSET_}LE not used */ + c = _get_and_skip_quote_char(&s); + if (!(s = _tok_value_string(s, &begin_item, &end_item, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) { + log_error(_str_list_item_parsing_failed, ft->id); + goto bad; + } + if (!_add_item_to_string_list(mem, begin_item, end_item, &ssl->str_list.list)) + goto_bad; + ssl->type = SEL_OR | SEL_LIST_LS; + goto out; + } + + /* More than one item - items enclosed in SEL_LIST_LS and SEL_LIST_LE + * or SEL_LIST_SUBSET_LS and SEL_LIST_SUBSET_LE. + * Each element is terminated by AND or OR operator or 'list end'. + * The first operator hit is then the one allowed for the whole list, + * no mixing allowed! + */ + + /* Are we using [] or {} for the list? */ + end_op_flag_expected = (op_flags == SEL_LIST_LS) ? SEL_LIST_LE : SEL_LIST_SUBSET_LE; + + op_flags = SEL_LIST_LE | SEL_LIST_SUBSET_LE | SEL_AND | SEL_OR; + s++; + while (*s) { + s = _skip_space(s); + c = _get_and_skip_quote_char(&s); + if (!(s = _tok_value_string(s, &begin_item, &end_item, c, op_flags, NULL))) { + log_error(_str_list_item_parsing_failed, ft->id); + goto bad; + } + s = _skip_space(s); + + if (!(end_op_flag_hit = _tok_op_log(s, &tmp, op_flags))) { + log_error("Invalid operator in selection list."); + goto bad; + } + + if (end_op_flag_hit & (SEL_LIST_LE | SEL_LIST_SUBSET_LE)) { + list_end = 1; + if (end_op_flag_hit != end_op_flag_expected) { + for (i = 0; _op_log[i].string; i++) + if (_op_log[i].flags == end_op_flag_expected) + break; + log_error("List ended with incorrect character, " + "expecting \'%s\'.", _op_log[i].string); + goto bad; + } + } + + if (ssl->type) { + if (!list_end && !(ssl->type & end_op_flag_hit)) { + log_error("Only one type of logical operator allowed " + "in selection list at a time."); + goto bad; + } + } else { + if (list_end) + ssl->type = end_op_flag_expected == SEL_LIST_LE ? SEL_AND : SEL_OR; + else + ssl->type = end_op_flag_hit; + } + + if (!_add_item_to_string_list(mem, begin_item, end_item, &ssl->str_list.list)) + goto_bad; + + s = tmp; + + if (list_end) + break; + } + + if (!(end_op_flag_hit & (SEL_LIST_LE | SEL_LIST_SUBSET_LE))) { + log_error("Missing list end for selection field %s", ft->id); + goto bad; + } + + /* Store information whether [] or {} was used. */ + if (end_op_flag_expected == SEL_LIST_LE) + ssl->type |= SEL_LIST_LS; + else + ssl->type |= SEL_LIST_SUBSET_LS; + + /* Sort the list. */ + if (!(list_size = dm_list_size(&ssl->str_list.list))) { + log_error(INTERNAL_ERROR "_tok_value_string_list: list has no items"); + goto bad; + } else if (list_size == 1) + goto out; + if (!(arr = dm_malloc(sizeof(item) * list_size))) { + log_error("_tok_value_string_list: memory allocation failed for sort array"); + goto bad; + } + + i = 0; + dm_list_iterate_items(item, &ssl->str_list.list) + arr[i++] = item; + qsort(arr, list_size, sizeof(item), _str_list_item_cmp); + dm_list_init(&ssl->str_list.list); + for (i = 0; i < list_size; i++) + dm_list_add(&ssl->str_list.list, &arr[i]->list); + + dm_free(arr); +out: + *end = s; + if (sel_str_list) + *sel_str_list = ssl; + + return s; +bad: + *end = s; + if (ssl) + dm_pool_free(mem, ssl); + if (sel_str_list) + *sel_str_list = NULL; + return s; +} + +struct time_value { + int range; + time_t t1; + time_t t2; +}; + +static const char *_out_of_range_msg = "Field selection value %s out of supported range for field %s."; + +/* + * Standard formatted date and time - ISO8601. + * + * date time timezone + * + * date: + * YYYY-MM-DD (or shortly YYYYMMDD) + * YYYY-MM (shortly YYYYMM), auto DD=1 + * YYYY, auto MM=01 and DD=01 + * + * time: + * hh:mm:ss (or shortly hhmmss) + * hh:mm (or shortly hhmm), auto ss=0 + * hh (or shortly hh), auto mm=0, auto ss=0 + * + * timezone: + * +hh:mm or -hh:mm (or shortly +hhmm or -hhmm) + * +hh or -hh +*/ + +#define DELIM_DATE '-' +#define DELIM_TIME ':' + +static int _days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +static int _is_leap_year(long year) +{ + return (((year % 4==0) && (year % 100 != 0)) || (year % 400 == 0)); +} + +static int _get_days_in_month(long month, long year) +{ + return (month == 2 && _is_leap_year(year)) ? _days_in_month[month-1] + 1 + : _days_in_month[month-1]; +} + +typedef enum { + RANGE_NONE, + RANGE_SECOND, + RANGE_MINUTE, + RANGE_HOUR, + RANGE_DAY, + RANGE_MONTH, + RANGE_YEAR +} time_range_t; + +static char *_get_date(char *str, struct tm *tm, time_range_t *range) +{ + static const char incorrect_date_format_msg[] = "Incorrect date format."; + time_range_t tmp_range = RANGE_NONE; + long n1, n2 = -1, n3 = -1; + char *s = str, *end; + size_t len = 0; + + if (!isdigit(*s)) + /* we need a year at least */ + return NULL; + + n1 = strtol(s, &end, 10); + if (*end == DELIM_DATE) { + len += (4 - (end - s)); /* diff in length from standard YYYY */ + s = end + 1; + if (isdigit(*s)) { + n2 = strtol(s, &end, 10); + len += (2 - (end - s)); /* diff in length from standard MM */ + if (*end == DELIM_DATE) { + s = end + 1; + n3 = strtol(s, &end, 10); + len += (2 - (end - s)); /* diff in length from standard DD */ + } + } + } + + len = len + end - str; + + /* variations from standard YYYY-MM-DD */ + if (n3 == -1) { + if (n2 == -1) { + if (len == 4) { + /* YYYY */ + tmp_range = RANGE_YEAR; + n3 = n2 = 1; + } else if (len == 6) { + /* YYYYMM */ + tmp_range = RANGE_MONTH; + n3 = 1; + n2 = n1 % 100; + n1 = n1 / 100; + } else if (len == 8) { + tmp_range = RANGE_DAY; + /* YYYYMMDD */ + n3 = n1 % 100; + n2 = (n1 / 100) % 100; + n1 = n1 / 10000; + } else { + log_error(incorrect_date_format_msg); + return NULL; + } + } else { + if (len == 7) { + tmp_range = RANGE_MONTH; + /* YYYY-MM */ + n3 = 1; + } else { + log_error(incorrect_date_format_msg); + return NULL; + } + } + } + + if (n2 < 1 || n2 > 12) { + log_error("Specified month out of range."); + return NULL; + } + + if (n3 < 1 || n3 > _get_days_in_month(n2, n1)) { + log_error("Specified day out of range."); + return NULL; + } + + if (tmp_range == RANGE_NONE) + tmp_range = RANGE_DAY; + + tm->tm_year = n1 - 1900; + tm->tm_mon = n2 - 1; + tm->tm_mday = n3; + *range = tmp_range; + + return (char *) _skip_space(end); +} + +static char *_get_time(char *str, struct tm *tm, time_range_t *range) +{ + static const char incorrect_time_format_msg[] = "Incorrect time format."; + time_range_t tmp_range = RANGE_NONE; + long n1, n2 = -1, n3 = -1; + char *s = str, *end; + size_t len = 0; + + if (!isdigit(*s)) { + /* time is not compulsory */ + tm->tm_hour = tm->tm_min = tm->tm_sec = 0; + return (char *) _skip_space(s); + } + + n1 = strtol(s, &end, 10); + if (*end == DELIM_TIME) { + len += (2 - (end - s)); /* diff in length from standard HH */ + s = end + 1; + if (isdigit(*s)) { + n2 = strtol(s, &end, 10); + len += (2 - (end - s)); /* diff in length from standard MM */ + if (*end == DELIM_TIME) { + s = end + 1; + n3 = strtol(s, &end, 10); + len += (2 - (end - s)); /* diff in length from standard SS */ + } + } + } + + len = len + end - str; + + /* variations from standard HH:MM:SS */ + if (n3 == -1) { + if (n2 == -1) { + if (len == 2) { + /* HH */ + tmp_range = RANGE_HOUR; + n3 = n2 = 0; + } else if (len == 4) { + /* HHMM */ + tmp_range = RANGE_MINUTE; + n3 = 0; + n2 = n1 % 100; + n1 = n1 / 100; + } else if (len == 6) { + /* HHMMSS */ + tmp_range = RANGE_SECOND; + n3 = n1 % 100; + n2 = (n1 / 100) % 100; + n1 = n1 / 10000; + } else { + log_error(incorrect_time_format_msg); + return NULL; + } + } else { + if (len == 5) { + /* HH:MM */ + tmp_range = RANGE_MINUTE; + n3 = 0; + } else { + log_error(incorrect_time_format_msg); + return NULL; + } + } + } + + if (n1 < 0 || n1 > 23) { + log_error("Specified hours out of range."); + return NULL; + } + + if (n2 < 0 || n2 > 60) { + log_error("Specified minutes out of range."); + return NULL; + } + + if (n3 < 0 || n3 > 60) { + log_error("Specified seconds out of range."); + return NULL; + } + + /* Just time without exact date is incomplete! */ + if (*range != RANGE_DAY) { + log_error("Full date specification needed."); + return NULL; + } + + tm->tm_hour = n1; + tm->tm_min = n2; + tm->tm_sec = n3; + *range = tmp_range; + + return (char *) _skip_space(end); +} + +/* The offset is always an absolute offset against GMT! */ +static char *_get_tz(char *str, int *tz_supplied, int *offset) +{ + long n1, n2 = -1; + char *s = str, *end; + int sign = 1; /* +HH:MM by default */ + size_t len = 0; + + *tz_supplied = 0; + *offset = 0; + + if (!isdigit(*s)) { + if (*s == '+') { + sign = 1; + s = s + 1; + } else if (*s == '-') { + sign = -1; + s = s + 1; + } else + return (char *) _skip_space(s); + } + + n1 = strtol(s, &end, 10); + if (*end == DELIM_TIME) { + len = (2 - (end - s)); /* diff in length from standard HH */ + s = end + 1; + if (isdigit(*s)) { + n2 = strtol(s, &end, 10); + len = (2 - (end - s)); /* diff in length from standard MM */ + } + } + + len = len + end - s; + + /* variations from standard HH:MM */ + if (n2 == -1) { + if (len == 2) { + /* HH */ + n2 = 0; + } else if (len == 4) { + /* HHMM */ + n2 = n1 % 100; + n1 = n1 / 100; + } else + return NULL; + } + + if (n2 < 0 || n2 > 60) + return NULL; + + if (n1 < 0 || n1 > 14) + return NULL; + + /* timezone offset in seconds */ + *offset = sign * ((n1 * 3600) + (n2 * 60)); + *tz_supplied = 1; + return (char *) _skip_space(end); +} + +static int _local_tz_offset(time_t t_local) +{ + struct tm tm_gmt; + time_t t_gmt; + + gmtime_r(&t_local, &tm_gmt); + t_gmt = mktime(&tm_gmt); + + /* + * gmtime returns time that is adjusted + * for DST.Subtract this adjustment back + * to give us proper *absolute* offset + * for our local timezone. + */ + if (tm_gmt.tm_isdst) + t_gmt -= 3600; + + return t_local - t_gmt; +} + +static void _get_final_time(time_range_t range, struct tm *tm, + int tz_supplied, int offset, + struct time_value *tval) +{ + + struct tm tm_up = *tm; + + switch (range) { + case RANGE_SECOND: + if (tm_up.tm_sec < 59) { + tm_up.tm_sec += 1; + break; + } + /* fall through */ + case RANGE_MINUTE: + if (tm_up.tm_min < 59) { + tm_up.tm_min += 1; + break; + } + /* fall through */ + case RANGE_HOUR: + if (tm_up.tm_hour < 23) { + tm_up.tm_hour += 1; + break; + } + /* fall through */ + case RANGE_DAY: + if (tm_up.tm_mday < _get_days_in_month(tm_up.tm_mon, tm_up.tm_year)) { + tm_up.tm_mday += 1; + break; + } + /* fall through */ + case RANGE_MONTH: + if (tm_up.tm_mon < 11) { + tm_up.tm_mon += 1; + break; + } + /* fall through */ + case RANGE_YEAR: + tm_up.tm_year += 1; + break; + case RANGE_NONE: + /* nothing to do here */ + break; + } + + tval->range = (range != RANGE_NONE); + tval->t1 = mktime(tm); + tval->t2 = mktime(&tm_up) - 1; + + if (tz_supplied) { + /* + * The 'offset' is with respect to the GMT. + * Calculate what the offset is with respect + * to our local timezone and adjust times + * so they represent time in our local timezone. + */ + offset -= _local_tz_offset(tval->t1); + tval->t1 -= offset; + tval->t2 -= offset; + } +} + +static int _parse_formatted_date_time(char *str, struct time_value *tval) +{ + time_range_t range = RANGE_NONE; + struct tm tm = {0}; + int gmt_offset; + int tz_supplied; + + tm.tm_year = tm.tm_mday = tm.tm_mon = -1; + tm.tm_hour = tm.tm_min = tm.tm_sec = -1; + tm.tm_isdst = tm.tm_wday = tm.tm_yday = -1; + + if (!(str = _get_date(str, &tm, &range))) + return 0; + + if (!(str = _get_time(str, &tm, &range))) + return 0; + + if (!(str = _get_tz(str, &tz_supplied, &gmt_offset))) + return 0; + + if (*str) + return 0; + + _get_final_time(range, &tm, tz_supplied, gmt_offset, tval); + + return 1; +} + +static const char *_tok_value_time(const struct dm_report_field_type *ft, + struct dm_pool *mem, const char *s, + const char **begin, const char **end, + struct time_value *tval) +{ + char *time_str = NULL; + const char *r = NULL; + uint64_t t; + char c; + + s = _skip_space(s); + + if (*s == '@') { + /* Absolute time value in number of seconds since epoch. */ + if (!(s = _tok_value_number(s+1, begin, end))) + goto_out; + + if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) { + log_error("_tok_value_time: dm_pool_strndup failed"); + goto out; + } + + errno = 0; + if (((t = strtoull(time_str, NULL, 10)) == ULLONG_MAX) && errno == ERANGE) { + log_error(_out_of_range_msg, time_str, ft->id); + goto out; + } + + tval->range = 0; + tval->t1 = (time_t) t; + tval->t2 = 0; + r = s; + } else { + c = _get_and_skip_quote_char(&s); + if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) + goto_out; + + if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) { + log_error("tok_value_time: dm_pool_strndup failed"); + goto out; + } + + if (!_parse_formatted_date_time(time_str, tval)) + goto_out; + r = s; + } +out: + if (time_str) + dm_pool_free(mem, time_str); + return r; +} + +/* + * Input: + * ft - field type for which the value is parsed + * s - a pointer to the parsed string + * mem - memory pool to allocate from + * Output: + * begin - a pointer to the beginning of the token + * end - a pointer to the end of the token + 1 + * flags - parsing flags + * custom - custom data specific to token type + * (e.g. size unit factor) + */ +static const char *_tok_value(struct dm_report *rh, + const struct dm_report_field_type *ft, + uint32_t field_num, int implicit, + const char *s, + const char **begin, const char **end, + uint32_t *flags, + struct reserved_value_wrapper *rvw, + struct dm_pool *mem, void *custom) +{ + int expected_type = ft->flags & DM_REPORT_FIELD_TYPE_MASK; + struct selection_str_list **str_list; + struct time_value *tval; + uint64_t *factor; + const char *tmp; + char c; + + s = _skip_space(s); + + s = _get_reserved(rh, expected_type, field_num, implicit, s, begin, end, rvw); + if (rvw->reserved) { + /* + * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME, + * so adjust flags here based on expected type. + */ + if (expected_type == DM_REPORT_FIELD_TYPE_TIME) + *flags &= ~FLD_CMP_NUMBER; + else if (expected_type == DM_REPORT_FIELD_TYPE_NUMBER) + *flags &= ~FLD_CMP_TIME; + *flags |= expected_type; + return s; + } + + switch (expected_type) { + + case DM_REPORT_FIELD_TYPE_STRING: + c = _get_and_skip_quote_char(&s); + if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) { + log_error("Failed to parse string value " + "for selection field %s.", ft->id); + return NULL; + } + *flags |= DM_REPORT_FIELD_TYPE_STRING; + break; + + case DM_REPORT_FIELD_TYPE_STRING_LIST: + if (!(str_list = (struct selection_str_list **) custom)) + goto_bad; + + s = _tok_value_string_list(ft, mem, s, begin, end, str_list); + if (!(*str_list)) { + log_error("Failed to parse string list value " + "for selection field %s.", ft->id); + return NULL; + } + *flags |= DM_REPORT_FIELD_TYPE_STRING_LIST; + break; + + case DM_REPORT_FIELD_TYPE_NUMBER: + /* fall through */ + case DM_REPORT_FIELD_TYPE_SIZE: + /* fall through */ + case DM_REPORT_FIELD_TYPE_PERCENT: + if (!(s = _tok_value_number(s, begin, end))) { + log_error("Failed to parse numeric value " + "for selection field %s.", ft->id); + return NULL; + } + + if (*s == DM_PERCENT_CHAR) { + s++; + c = DM_PERCENT_CHAR; + if (expected_type != DM_REPORT_FIELD_TYPE_PERCENT) { + log_error("Found percent value but %s value " + "expected for selection field %s.", + expected_type == DM_REPORT_FIELD_TYPE_NUMBER ? + "numeric" : "size", ft->id); + return NULL; + } + } else { + if (!(factor = (uint64_t *) custom)) + goto_bad; + + if ((*factor = dm_units_to_factor(s, &c, 0, &tmp))) { + s = tmp; + if (expected_type != DM_REPORT_FIELD_TYPE_SIZE) { + log_error("Found size unit specifier " + "but %s value expected for " + "selection field %s.", + expected_type == DM_REPORT_FIELD_TYPE_NUMBER ? + "numeric" : "percent", ft->id); + return NULL; + } + } else if (expected_type == DM_REPORT_FIELD_TYPE_SIZE) { + /* + * If size unit is not defined in the selection + * and the type expected is size, use use 'm' + * (1 MiB) for the unit by default. This is the + * same behaviour as seen in lvcreate -L <size>. + */ + *factor = 1024*1024; + } + } + + *flags |= expected_type; + /* + * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME, + * but we have NUMBER here, so remove FLD_CMP_TIME. + */ + *flags &= ~FLD_CMP_TIME; + break; + + case DM_REPORT_FIELD_TYPE_TIME: + if (!(tval = (struct time_value *) custom)) + goto_bad; + + if (!(s = _tok_value_time(ft, mem, s, begin, end, tval))) { + log_error("Failed to parse time value " + "for selection field %s.", ft->id); + return NULL; + } + + *flags |= DM_REPORT_FIELD_TYPE_TIME; + /* + * FLD_CMP_TIME shares operators with FLD_CMP_NUMBER, + * but we have TIME here, so remove FLD_CMP_NUMBER. + */ + *flags &= ~FLD_CMP_NUMBER; + break; + } + + return s; +bad: + log_error(INTERNAL_ERROR "Forbidden NULL custom detected."); + + return NULL; +} + +/* + * Input: + * s - a pointer to the parsed string + * Output: + * begin - a pointer to the beginning of the token + * end - a pointer to the end of the token + 1 + */ +static const char *_tok_field_name(const char *s, + const char **begin, const char **end) +{ + char c; + s = _skip_space(s); + + *begin = s; + while ((c = *s) && + (isalnum(c) || c == '_' || c == '-')) + s++; + *end = s; + + if (*begin == *end) + return NULL; + + return s; +} + +static int _get_reserved_value(struct dm_report *rh, uint32_t field_num, + struct reserved_value_wrapper *rvw) +{ + const void *tmp_value; + dm_report_reserved_handler handler; + int r; + + if (!rvw->reserved) { + rvw->value = NULL; + return 1; + } + + if (rvw->reserved->type & DM_REPORT_FIELD_TYPE_MASK) + /* type reserved value */ + tmp_value = rvw->reserved->value; + else + /* per-field reserved value */ + tmp_value = ((const struct dm_report_field_reserved_value *) rvw->reserved->value)->value; + + if (rvw->reserved->type & (DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE | DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES)) { + handler = (dm_report_reserved_handler) tmp_value; + if ((r = handler(rh, rh->selection->mem, field_num, + DM_REPORT_RESERVED_GET_DYNAMIC_VALUE, + rvw->matched_name, &tmp_value)) <= 0) { + if (r == -1) + log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing" + "implementation of DM_REPORT_RESERVED_GET_DYNAMIC_VALUE action", + (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific", + rh->fields[field_num].id); + else + log_error("Error occured while processing %s reserved value handler for field %s", + (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific", + rh->fields[field_num].id); + return 0; + } + } + + rvw->value = tmp_value; + return 1; +} + +static struct field_selection *_create_field_selection(struct dm_report *rh, + uint32_t field_num, + int implicit, + const char *v, + size_t len, + uint32_t flags, + struct reserved_value_wrapper *rvw, + void *custom) +{ + static const char *_field_selection_value_alloc_failed_msg = "dm_report: struct field_selection_value allocation failed for selection field %s"; + const struct dm_report_field_type *fields = implicit ? _implicit_report_fields + : rh->fields; + struct field_properties *fp, *found = NULL; + struct field_selection *fs; + const char *field_id; + struct time_value *tval; + uint64_t factor; + char *s; + + dm_list_iterate_items(fp, &rh->field_props) { + if ((fp->implicit == implicit) && (fp->field_num == field_num)) { + found = fp; + break; + } + } + + /* The field is neither used in display options nor sort keys. */ + if (!found) { + if (rh->selection->add_new_fields) { + if (!(found = _add_field(rh, field_num, implicit, FLD_HIDDEN))) + return NULL; + rh->report_types |= fields[field_num].type; + } else { + log_error("Unable to create selection with field \'%s\' " + "which is not included in current report.", + implicit ? _implicit_report_fields[field_num].id + : rh->fields[field_num].id); + return NULL; + } + } + + field_id = fields[found->field_num].id; + + if (!(found->flags & flags & DM_REPORT_FIELD_TYPE_MASK)) { + log_error("dm_report: incompatible comparison " + "type for selection field %s", field_id); + return NULL; + } + + /* set up selection */ + if (!(fs = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection)))) { + log_error("dm_report: struct field_selection " + "allocation failed for selection field %s", field_id); + return NULL; + } + + if (!(fs->value = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) { + log_error(_field_selection_value_alloc_failed_msg, field_id); + goto error; + } + + if (((rvw->reserved && (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)) || + (((flags & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_TIME) && + custom && ((struct time_value *) custom)->range)) + && + !(fs->value->next = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) { + log_error(_field_selection_value_alloc_failed_msg, field_id); + goto error; + } + + fs->fp = found; + fs->flags = flags; + + if (!_get_reserved_value(rh, field_num, rvw)) { + log_error("dm_report: could not get reserved value " + "while processing selection field %s", field_id); + goto error; + } + + /* store comparison operand */ + if (flags & FLD_CMP_REGEX) { + /* REGEX */ + if (!(s = dm_malloc(len + 1))) { + log_error("dm_report: dm_malloc failed to store " + "regex value for selection field %s", field_id); + goto error; + } + memcpy(s, v, len); + s[len] = '\0'; + + fs->value->v.r = dm_regex_create(rh->selection->mem, (const char * const *) &s, 1); + dm_free(s); + if (!fs->value->v.r) { + log_error("dm_report: failed to create regex " + "matcher for selection field %s", field_id); + goto error; + } + } else { + /* STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME */ + if (!(s = dm_pool_strndup(rh->selection->mem, v, len))) { + log_error("dm_report: dm_pool_strndup for value " + "of selection field %s", field_id); + goto error; + } + + switch (flags & DM_REPORT_FIELD_TYPE_MASK) { + case DM_REPORT_FIELD_TYPE_STRING: + if (rvw->value) { + fs->value->v.s = (const char *) rvw->value; + if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) + fs->value->next->v.s = (((const char * const *) rvw->value)[1]); + dm_pool_free(rh->selection->mem, s); + } else { + fs->value->v.s = s; + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING, fs->value->v.s, NULL)) { + log_error("String value %s found in selection is reserved.", fs->value->v.s); + goto error; + } + } + break; + case DM_REPORT_FIELD_TYPE_NUMBER: + if (rvw->value) { + fs->value->v.i = *(const uint64_t *) rvw->value; + if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) + fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]); + } else { + errno = 0; + if (((fs->value->v.i = strtoull(s, NULL, 10)) == ULLONG_MAX) && + (errno == ERANGE)) { + log_error(_out_of_range_msg, s, field_id); + goto error; + } + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &fs->value->v.i, NULL)) { + log_error("Numeric value %" PRIu64 " found in selection is reserved.", fs->value->v.i); + goto error; + } + } + dm_pool_free(rh->selection->mem, s); + break; + case DM_REPORT_FIELD_TYPE_SIZE: + if (rvw->value) { + fs->value->v.d = *(const double *) rvw->value; + if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) + fs->value->next->v.d = (((const double *) rvw->value)[1]); + } else { + errno = 0; + fs->value->v.d = strtod(s, NULL); + if (errno == ERANGE) { + log_error(_out_of_range_msg, s, field_id); + goto error; + } + if (custom && (factor = *((const uint64_t *)custom))) + fs->value->v.d *= factor; + fs->value->v.d /= 512; /* store size in sectors! */ + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &fs->value->v.d, NULL)) { + log_error("Size value %f found in selection is reserved.", fs->value->v.d); + goto error; + } + } + dm_pool_free(rh->selection->mem, s); + break; + case DM_REPORT_FIELD_TYPE_PERCENT: + if (rvw->value) { + fs->value->v.i = *(const uint64_t *) rvw->value; + if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) + fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]); + } else { + errno = 0; + fs->value->v.d = strtod(s, NULL); + if ((errno == ERANGE) || (fs->value->v.d < 0) || (fs->value->v.d > 100)) { + log_error(_out_of_range_msg, s, field_id); + goto error; + } + + fs->value->v.i = (dm_percent_t) (DM_PERCENT_1 * fs->value->v.d); + + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_PERCENT, &fs->value->v.i, NULL)) { + log_error("Percent value %s found in selection is reserved.", s); + goto error; + } + } + break; + case DM_REPORT_FIELD_TYPE_STRING_LIST: + if (!custom) + goto_bad; + fs->value->v.l = *(struct selection_str_list **)custom; + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING_LIST, fs->value->v.l, NULL)) { + log_error("String list value found in selection is reserved."); + goto error; + } + break; + case DM_REPORT_FIELD_TYPE_TIME: + if (rvw->value) { + fs->value->v.t = *(const time_t *) rvw->value; + if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) + fs->value->next->v.t = (((const time_t *) rvw->value)[1]); + } else { + if (!(tval = (struct time_value *) custom)) + goto_bad; + fs->value->v.t = tval->t1; + if (tval->range) + fs->value->next->v.t = tval->t2; + if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &fs->value->v.t, NULL)) { + log_error("Time value found in selection is reserved."); + goto error; + } + } + break; + default: + log_error(INTERNAL_ERROR "_create_field_selection: " + "unknown type of selection field %s", field_id); + goto error; + } + } + + return fs; +bad: + log_error(INTERNAL_ERROR "Forbiden NULL custom detected."); +error: + dm_pool_free(rh->selection->mem, fs); + + return NULL; +} + +static struct selection_node *_alloc_selection_node(struct dm_pool *mem, uint32_t type) +{ + struct selection_node *sn; + + if (!(sn = dm_pool_zalloc(mem, sizeof(struct selection_node)))) { + log_error("dm_report: struct selection_node allocation failed"); + return NULL; + } + + dm_list_init(&sn->list); + sn->type = type; + if (!(type & SEL_ITEM)) + dm_list_init(&sn->selection.set); + + return sn; +} + +static void _display_selection_help(struct dm_report *rh) +{ + static const char _grow_object_failed_msg[] = "_display_selection_help: dm_pool_grow_object failed"; + struct op_def *t; + const struct dm_report_reserved_value *rv; + size_t len_all, len_final = 0; + const char **rvs; + char *rvs_all; + + log_warn("Selection operands"); + log_warn("------------------"); + log_warn(" field - Reporting field."); + log_warn(" number - Non-negative integer value."); + log_warn(" size - Floating point value with units, 'm' unit used by default if not specified."); + log_warn(" percent - Non-negative integer with or without %% suffix."); + log_warn(" string - Characters quoted by \' or \" or unquoted."); + log_warn(" string list - Strings enclosed by [ ] or { } and elements delimited by either"); + log_warn(" \"all items must match\" or \"at least one item must match\" operator."); + log_warn(" regular expression - Characters quoted by \' or \" or unquoted."); + log_warn(" "); + if (rh->reserved_values) { + log_warn("Reserved values"); + log_warn("---------------"); + + for (rv = rh->reserved_values; rv->type; rv++) { + for (len_all = 0, rvs = rv->names; *rvs; rvs++) + len_all += strlen(*rvs) + 2; + if (len_all > len_final) + len_final = len_all; + } + + for (rv = rh->reserved_values; rv->type; rv++) { + if (!dm_pool_begin_object(rh->mem, 256)) { + log_error("_display_selection_help: dm_pool_begin_object failed"); + break; + } + for (rvs = rv->names; *rvs; rvs++) { + if (((rvs != rv->names) && !dm_pool_grow_object(rh->mem, ", ", 2)) || + !dm_pool_grow_object(rh->mem, *rvs, strlen(*rvs))) { + log_error(_grow_object_failed_msg); + goto out_reserved_values; + } + } + if (!dm_pool_grow_object(rh->mem, "\0", 1)) { + log_error(_grow_object_failed_msg); + goto out_reserved_values; + } + rvs_all = dm_pool_end_object(rh->mem); + + log_warn(" %-*s - %s [%s]", (int) len_final, rvs_all, rv->description, + _get_field_type_name(rv->type)); + dm_pool_free(rh->mem, rvs_all); + } + log_warn(" "); + } +out_reserved_values: + log_warn("Selection operators"); + log_warn("-------------------"); + log_warn(" Comparison operators:"); + t = _op_cmp; + for (; t->string; t++) + log_warn(" %6s - %s", t->string, t->desc); + log_warn(" "); + log_warn(" Logical and grouping operators:"); + t = _op_log; + for (; t->string; t++) + log_warn(" %4s - %s", t->string, t->desc); + log_warn(" "); +} + +static const char _sel_syntax_error_at_msg[] = "Selection syntax error at '%s'."; +static const char _sel_help_ref_msg[] = "Use \'help\' for selection to get more help."; + +/* + * Selection parser + * + * _parse_* functions + * + * Input: + * s - a pointer to the parsed string + * Output: + * next - a pointer used for next _parse_*'s input, + * next == s if return value is NULL + * return value - a filter node pointer, + * NULL if s doesn't match + */ + +/* + * SELECTION := FIELD_NAME OP_CMP STRING | + * FIELD_NAME OP_CMP NUMBER | + * FIELD_NAME OP_REGEX REGEX + */ +static struct selection_node *_parse_selection(struct dm_report *rh, + const char *s, + const char **next) +{ + struct field_selection *fs; + struct selection_node *sn; + const char *ws, *we; /* field name */ + const char *vs, *ve; /* value */ + const char *last; + uint32_t flags, field_num; + int implicit; + const struct dm_report_field_type *ft; + struct selection_str_list *str_list; + struct reserved_value_wrapper rvw = {0}; + struct time_value tval; + uint64_t factor; + void *custom = NULL; + char *tmp; + char c; + + /* field name */ + if (!(last = _tok_field_name(s, &ws, &we))) { + log_error("Expecting field name"); + goto bad; + } + + /* check if the field with given name exists */ + if (!_get_field(rh, ws, (size_t) (we - ws), &field_num, &implicit)) { + c = we[0]; + tmp = (char *) we; + tmp[0] = '\0'; + _display_fields(rh, 0, 1); + log_warn(" "); + log_error("Unrecognised selection field: %s", ws); + tmp[0] = c; + goto bad; + } + + if (implicit) { + ft = &_implicit_report_fields[field_num]; + if (ft->flags & FLD_CMP_UNCOMPARABLE) { + c = we[0]; + tmp = (char *) we; + tmp[0] = '\0'; + _display_fields(rh, 0, 1); + log_warn(" "); + log_error("Selection field is uncomparable: %s.", ws); + tmp[0] = c; + goto bad; + } + } else + ft = &rh->fields[field_num]; + + /* comparison operator */ + if (!(flags = _tok_op_cmp(we, &last))) { + _display_selection_help(rh); + log_error("Unrecognised comparison operator: %s", we); + goto bad; + } + if (!last) { + _display_selection_help(rh); + log_error("Missing value after operator"); + goto bad; + } + + /* comparison value */ + if (flags & FLD_CMP_REGEX) { + /* + * REGEX value + */ + if (!(last = _tok_value_regex(rh, ft, last, &vs, &ve, &flags, &rvw))) + goto_bad; + } else { + /* + * STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME value + */ + if (flags & FLD_CMP_NUMBER) { + if (!(ft->flags & (DM_REPORT_FIELD_TYPE_NUMBER | + DM_REPORT_FIELD_TYPE_SIZE | + DM_REPORT_FIELD_TYPE_PERCENT | + DM_REPORT_FIELD_TYPE_TIME))) { + _display_selection_help(rh); + log_error("Operator can be used only with number, size, time or percent fields: %s", ws); + goto bad; + } + } else if (flags & FLD_CMP_TIME) { + if (!(ft->flags & DM_REPORT_FIELD_TYPE_TIME)) { + _display_selection_help(rh); + log_error("Operator can be used only with time fields: %s", ws); + goto bad; + } + } + + if (ft->flags == DM_REPORT_FIELD_TYPE_SIZE || + ft->flags == DM_REPORT_FIELD_TYPE_NUMBER || + ft->flags == DM_REPORT_FIELD_TYPE_PERCENT) + custom = &factor; + else if (ft->flags & DM_REPORT_FIELD_TYPE_TIME) + custom = &tval; + else if (ft->flags == DM_REPORT_FIELD_TYPE_STRING_LIST) + custom = &str_list; + else + custom = NULL; + if (!(last = _tok_value(rh, ft, field_num, implicit, + last, &vs, &ve, &flags, + &rvw, rh->selection->mem, custom))) + goto_bad; + } + + *next = _skip_space(last); + + /* create selection */ + if (!(fs = _create_field_selection(rh, field_num, implicit, vs, (size_t) (ve - vs), flags, &rvw, custom))) + return_NULL; + + /* create selection node */ + if (!(sn = _alloc_selection_node(rh->selection->mem, SEL_ITEM))) + return_NULL; + + /* add selection to selection node */ + sn->selection.item = fs; + + return sn; +bad: + log_error(_sel_syntax_error_at_msg, s); + log_error(_sel_help_ref_msg); + *next = s; + return NULL; +} + +static struct selection_node *_parse_or_ex(struct dm_report *rh, + const char *s, + const char **next, + struct selection_node *or_sn); + +static struct selection_node *_parse_ex(struct dm_report *rh, + const char *s, + const char **next) +{ + static const char _ps_expected_msg[] = "Syntax error: left parenthesis expected at \'%s\'"; + static const char _pe_expected_msg[] = "Syntax error: right parenthesis expected at \'%s\'"; + struct selection_node *sn = NULL; + uint32_t t; + const char *tmp; + + t = _tok_op_log(s, next, SEL_MODIFIER_NOT | SEL_PRECEDENCE_PS); + if (t == SEL_MODIFIER_NOT) { + /* '!' '(' EXPRESSION ')' */ + if (!_tok_op_log(*next, &tmp, SEL_PRECEDENCE_PS)) { + log_error(_ps_expected_msg, *next); + goto error; + } + if (!(sn = _parse_or_ex(rh, tmp, next, NULL))) + goto error; + sn->type |= SEL_MODIFIER_NOT; + if (!_tok_op_log(*next, &tmp, SEL_PRECEDENCE_PE)) { + log_error(_pe_expected_msg, *next); + goto error; + } + *next = tmp; + } else if (t == SEL_PRECEDENCE_PS) { + /* '(' EXPRESSION ')' */ + if (!(sn = _parse_or_ex(rh, *next, &tmp, NULL))) + goto error; + if (!_tok_op_log(tmp, next, SEL_PRECEDENCE_PE)) { + log_error(_pe_expected_msg, *next); + goto error; + } + } else if ((s = _skip_space(s))) { + /* SELECTION */ + sn = _parse_selection(rh, s, next); + } else { + sn = NULL; + *next = s; + } + + return sn; +error: + *next = s; + return NULL; +} + +/* AND_EXPRESSION := EX (AND_OP AND_EXPRSSION) */ +static struct selection_node *_parse_and_ex(struct dm_report *rh, + const char *s, + const char **next, + struct selection_node *and_sn) +{ + struct selection_node *n; + const char *tmp; + + n = _parse_ex(rh, s, next); + if (!n) + goto error; + + if (!_tok_op_log(*next, &tmp, SEL_AND)) { + if (!and_sn) + return n; + dm_list_add(&and_sn->selection.set, &n->list); + return and_sn; + } + + if (!and_sn) { + if (!(and_sn = _alloc_selection_node(rh->selection->mem, SEL_AND))) + goto error; + } + dm_list_add(&and_sn->selection.set, &n->list); + + return _parse_and_ex(rh, tmp, next, and_sn); +error: + *next = s; + return NULL; +} + +/* OR_EXPRESSION := AND_EXPRESSION (OR_OP OR_EXPRESSION) */ +static struct selection_node *_parse_or_ex(struct dm_report *rh, + const char *s, + const char **next, + struct selection_node *or_sn) +{ + struct selection_node *n; + const char *tmp; + + n = _parse_and_ex(rh, s, next, NULL); + if (!n) + goto error; + + if (!_tok_op_log(*next, &tmp, SEL_OR)) { + if (!or_sn) + return n; + dm_list_add(&or_sn->selection.set, &n->list); + return or_sn; + } + + if (!or_sn) { + if (!(or_sn = _alloc_selection_node(rh->selection->mem, SEL_OR))) + goto error; + } + dm_list_add(&or_sn->selection.set, &n->list); + + return _parse_or_ex(rh, tmp, next, or_sn); +error: + *next = s; + return NULL; +} + +static int _alloc_rh_selection(struct dm_report *rh) +{ + if (!(rh->selection = dm_pool_zalloc(rh->mem, sizeof(struct selection))) || + !(rh->selection->mem = dm_pool_create("report selection", 10 * 1024))) { + log_error("Failed to allocate report selection structure."); + if (rh->selection) + dm_pool_free(rh->mem, rh->selection); + return 0; + } + + return 1; +} + +#define SPECIAL_SELECTION_ALL "all" + +static int _report_set_selection(struct dm_report *rh, const char *selection, int add_new_fields) +{ + struct selection_node *root = NULL; + const char *fin, *next; + + if (rh->selection) { + if (rh->selection->selection_root) + /* Trash any previous selection. */ + dm_pool_free(rh->selection->mem, rh->selection->selection_root); + rh->selection->selection_root = NULL; + } else { + if (!_alloc_rh_selection(rh)) + goto_bad; + } + + if (!selection || !selection[0] || !strcasecmp(selection, SPECIAL_SELECTION_ALL)) + return 1; + + rh->selection->add_new_fields = add_new_fields; + + if (!(root = _alloc_selection_node(rh->selection->mem, SEL_OR))) + return 0; + + if (!_parse_or_ex(rh, selection, &fin, root)) + goto_bad; + + next = _skip_space(fin); + if (*next) { + log_error("Expecting logical operator"); + log_error(_sel_syntax_error_at_msg, next); + log_error(_sel_help_ref_msg); + goto bad; + } + + rh->selection->selection_root = root; + return 1; +bad: + dm_pool_free(rh->selection->mem, root); + return 0; +} + +static void _reset_field_props(struct dm_report *rh) +{ + struct field_properties *fp; + dm_list_iterate_items(fp, &rh->field_props) + fp->width = fp->initial_width; + rh->flags |= RH_FIELD_CALC_NEEDED; +} + +int dm_report_set_selection(struct dm_report *rh, const char *selection) +{ + struct row *row; + + if (!_report_set_selection(rh, selection, 0)) + return_0; + + _reset_field_props(rh); + + dm_list_iterate_items(row, &rh->rows) { + row->selected = _check_report_selection(rh, &row->fields); + if (row->field_sel_status) + _implicit_report_fields[row->field_sel_status->props->field_num].report_fn(rh, + rh->mem, row->field_sel_status, row, rh->private); + } + + return 1; +} + +struct dm_report *dm_report_init_with_selection(uint32_t *report_types, + const struct dm_report_object_type *types, + const struct dm_report_field_type *fields, + const char *output_fields, + const char *output_separator, + uint32_t output_flags, + const char *sort_keys, + const char *selection, + const struct dm_report_reserved_value reserved_values[], + void *private_data) +{ + struct dm_report *rh; + + _implicit_report_fields = _implicit_special_report_fields_with_selection; + + if (!(rh = dm_report_init(report_types, types, fields, output_fields, + output_separator, output_flags, sort_keys, private_data))) + return NULL; + + if (!selection || !selection[0]) { + rh->selection = NULL; + return rh; + } + + if (!_check_reserved_values_supported(fields, reserved_values)) { + log_error(INTERNAL_ERROR "dm_report_init_with_selection: " + "trying to register unsupported reserved value type, " + "skipping report selection"); + return rh; + } + rh->reserved_values = reserved_values; + + if (!strcasecmp(selection, SPECIAL_FIELD_HELP_ID) || + !strcmp(selection, SPECIAL_FIELD_HELP_ALT_ID)) { + _display_fields(rh, 0, 1); + log_warn(" "); + _display_selection_help(rh); + rh->flags |= RH_ALREADY_REPORTED; + return rh; + } + + if (!_report_set_selection(rh, selection, 1)) + goto_bad; + + _dm_report_init_update_types(rh, report_types); + + return rh; +bad: + dm_report_free(rh); + return NULL; +} + +/* + * Print row of headings + */ +static int _report_headings(struct dm_report *rh) +{ + const struct dm_report_field_type *fields; + struct field_properties *fp; + const char *heading; + char *buf = NULL; + size_t buf_size = 0; + + rh->flags |= RH_HEADINGS_PRINTED; + + if (!(rh->flags & DM_REPORT_OUTPUT_HEADINGS)) + return 1; + + if (!dm_pool_begin_object(rh->mem, 128)) { + log_error("dm_report: " + "dm_pool_begin_object failed for headings"); + return 0; + } + + dm_list_iterate_items(fp, &rh->field_props) { + if ((int) buf_size < fp->width) + buf_size = (size_t) fp->width; + } + /* Including trailing '\0'! */ + buf_size++; + + if (!(buf = dm_malloc(buf_size))) { + log_error("dm_report: Could not allocate memory for heading buffer."); + goto bad; + } + + /* First heading line */ + dm_list_iterate_items(fp, &rh->field_props) { + if (fp->flags & FLD_HIDDEN) + continue; + + fields = fp->implicit ? _implicit_report_fields : rh->fields; + + heading = fields[fp->field_num].heading; + if (rh->flags & DM_REPORT_OUTPUT_ALIGNED) { + if (dm_snprintf(buf, buf_size, "%-*.*s", + fp->width, fp->width, heading) < 0) { + log_error("dm_report: snprintf heading failed"); + goto bad; + } + if (!dm_pool_grow_object(rh->mem, buf, fp->width)) { + log_error("dm_report: Failed to generate report headings for printing"); + goto bad; + } + } else if (!dm_pool_grow_object(rh->mem, heading, 0)) { + log_error("dm_report: Failed to generate report headings for printing"); + goto bad; + } + + if (!dm_list_end(&rh->field_props, &fp->list)) + if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) { + log_error("dm_report: Failed to generate report headings for printing"); + goto bad; + } + } + if (!dm_pool_grow_object(rh->mem, "\0", 1)) { + log_error("dm_report: Failed to generate report headings for printing"); + goto bad; + } + + /* print all headings */ + heading = (char *) dm_pool_end_object(rh->mem); + log_print("%s", heading); + + dm_pool_free(rh->mem, (void *)heading); + dm_free(buf); + + return 1; + + bad: + dm_free(buf); + dm_pool_abandon_object(rh->mem); + return 0; +} + +static int _should_display_row(struct row *row) +{ + return row->field_sel_status || row->selected; +} + +static void _recalculate_fields(struct dm_report *rh) +{ + struct row *row; + struct dm_report_field *field; + int len; + + dm_list_iterate_items(row, &rh->rows) { + dm_list_iterate_items(field, &row->fields) { + if ((rh->flags & RH_SORT_REQUIRED) && + (field->props->flags & FLD_SORT_KEY)) { + (*row->sort_fields)[field->props->sort_posn] = field; + } + + if (_should_display_row(row)) { + len = (int) strlen(field->report_string); + if ((len > field->props->width)) + field->props->width = len; + + } + } + } + + rh->flags &= ~RH_FIELD_CALC_NEEDED; +} + +int dm_report_column_headings(struct dm_report *rh) +{ + /* Columns-as-rows does not use _report_headings. */ + if (rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS) + return 1; + + if (rh->flags & RH_FIELD_CALC_NEEDED) + _recalculate_fields(rh); + + return _report_headings(rh); +} + +/* + * Sort rows of data + */ +static int _row_compare(const void *a, const void *b) +{ + const struct row *rowa = *(const struct row * const *) a; + const struct row *rowb = *(const struct row * const *) b; + const struct dm_report_field *sfa, *sfb; + uint32_t cnt; + + for (cnt = 0; cnt < rowa->rh->keys_count; cnt++) { + sfa = (*rowa->sort_fields)[cnt]; + sfb = (*rowb->sort_fields)[cnt]; + if ((sfa->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) || + (sfa->props->flags & DM_REPORT_FIELD_TYPE_SIZE) || + (sfa->props->flags & DM_REPORT_FIELD_TYPE_TIME)) { + const uint64_t numa = + *(const uint64_t *) sfa->sort_value; + const uint64_t numb = + *(const uint64_t *) sfb->sort_value; + + if (numa == numb) + continue; + + if (sfa->props->flags & FLD_ASCENDING) { + return (numa > numb) ? 1 : -1; + } else { /* FLD_DESCENDING */ + return (numa < numb) ? 1 : -1; + } + } else { + /* DM_REPORT_FIELD_TYPE_STRING + * DM_REPORT_FIELD_TYPE_STRING_LIST */ + const char *stra = (const char *) sfa->sort_value; + const char *strb = (const char *) sfb->sort_value; + int cmp = strcmp(stra, strb); + + if (!cmp) + continue; + + if (sfa->props->flags & FLD_ASCENDING) { + return (cmp > 0) ? 1 : -1; + } else { /* FLD_DESCENDING */ + return (cmp < 0) ? 1 : -1; + } + } + } + + return 0; /* Identical */ +} + +static int _sort_rows(struct dm_report *rh) +{ + struct row *(*rows)[]; + uint32_t count = 0; + struct row *row; + + if (!(rows = dm_pool_alloc(rh->mem, sizeof(**rows) * + dm_list_size(&rh->rows)))) { + log_error("dm_report: sort array allocation failed"); + return 0; + } + + dm_list_iterate_items(row, &rh->rows) + (*rows)[count++] = row; + + qsort(rows, count, sizeof(**rows), _row_compare); + + dm_list_init(&rh->rows); + while (count--) + dm_list_add_h(&rh->rows, &(*rows)[count]->list); + + return 1; +} + +#define STANDARD_QUOTE "\'" +#define STANDARD_PAIR "=" + +#define JSON_INDENT_UNIT 4 +#define JSON_SPACE " " +#define JSON_QUOTE "\"" +#define JSON_PAIR ":" +#define JSON_SEPARATOR "," +#define JSON_OBJECT_START "{" +#define JSON_OBJECT_END "}" +#define JSON_ARRAY_START "[" +#define JSON_ARRAY_END "]" +#define JSON_ESCAPE_CHAR "\\" + +#define UNABLE_TO_EXTEND_OUTPUT_LINE_MSG "dm_report: Unable to extend output line" + +static int _is_basic_report(struct dm_report *rh) +{ + return rh->group_item && + (rh->group_item->group->type == DM_REPORT_GROUP_BASIC); +} + +static int _is_json_report(struct dm_report *rh) +{ + return rh->group_item && + (rh->group_item->group->type == DM_REPORT_GROUP_JSON); +} + +/* + * Produce report output + */ +static int _output_field(struct dm_report *rh, struct dm_report_field *field) +{ + const struct dm_report_field_type *fields = field->props->implicit ? _implicit_report_fields + : rh->fields; + char *field_id; + int32_t width; + uint32_t align; + const char *repstr; + const char *p1_repstr, *p2_repstr; + char *buf = NULL; + size_t buf_size = 0; + + if (_is_json_report(rh)) { + if (!dm_pool_grow_object(rh->mem, JSON_QUOTE, 1) || + !dm_pool_grow_object(rh->mem, fields[field->props->field_num].id, 0) || + !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1) || + !dm_pool_grow_object(rh->mem, JSON_PAIR, 1) || + !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) { + log_error("dm_report: Unable to extend output line"); + return 0; + } + } else if (rh->flags & DM_REPORT_OUTPUT_FIELD_NAME_PREFIX) { + if (!(field_id = dm_strdup(fields[field->props->field_num].id))) { + log_error("dm_report: Failed to copy field name"); + return 0; + } + + if (!dm_pool_grow_object(rh->mem, rh->output_field_name_prefix, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + dm_free(field_id); + return 0; + } + + if (!dm_pool_grow_object(rh->mem, _toupperstr(field_id), 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + dm_free(field_id); + return 0; + } + + dm_free(field_id); + + if (!dm_pool_grow_object(rh->mem, STANDARD_PAIR, 1)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + + if (!(rh->flags & DM_REPORT_OUTPUT_FIELD_UNQUOTED) && + !dm_pool_grow_object(rh->mem, STANDARD_QUOTE, 1)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + } + + repstr = field->report_string; + width = field->props->width; + if (!(rh->flags & DM_REPORT_OUTPUT_ALIGNED)) { + if (_is_json_report(rh)) { + /* Escape any JSON_QUOTE that may appear in reported string. */ + p1_repstr = repstr; + while ((p2_repstr = strstr(p1_repstr, JSON_QUOTE))) { + if (p2_repstr > p1_repstr) { + if (!dm_pool_grow_object(rh->mem, p1_repstr, p2_repstr - p1_repstr)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + } + if (!dm_pool_grow_object(rh->mem, JSON_ESCAPE_CHAR, 1) || + !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + p1_repstr = p2_repstr + 1; + } + + if (!dm_pool_grow_object(rh->mem, p1_repstr, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + } else { + if (!dm_pool_grow_object(rh->mem, repstr, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + } + } else { + if (!(align = field->props->flags & DM_REPORT_FIELD_ALIGN_MASK)) + align = ((field->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) || + (field->props->flags & DM_REPORT_FIELD_TYPE_SIZE)) ? + DM_REPORT_FIELD_ALIGN_RIGHT : DM_REPORT_FIELD_ALIGN_LEFT; + + /* Including trailing '\0'! */ + buf_size = width + 1; + if (!(buf = dm_malloc(buf_size))) { + log_error("dm_report: Could not allocate memory for output line buffer."); + return 0; + } + + if (align & DM_REPORT_FIELD_ALIGN_LEFT) { + if (dm_snprintf(buf, buf_size, "%-*.*s", + width, width, repstr) < 0) { + log_error("dm_report: left-aligned snprintf() failed"); + goto bad; + } + if (!dm_pool_grow_object(rh->mem, buf, width)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } else if (align & DM_REPORT_FIELD_ALIGN_RIGHT) { + if (dm_snprintf(buf, buf_size, "%*.*s", + width, width, repstr) < 0) { + log_error("dm_report: right-aligned snprintf() failed"); + goto bad; + } + if (!dm_pool_grow_object(rh->mem, buf, width)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + } + + if (rh->flags & DM_REPORT_OUTPUT_FIELD_NAME_PREFIX) { + if (!(rh->flags & DM_REPORT_OUTPUT_FIELD_UNQUOTED)) { + if (!dm_pool_grow_object(rh->mem, STANDARD_QUOTE, 1)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + } else if (_is_json_report(rh)) { + if (!dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + + dm_free(buf); + return 1; + +bad: + dm_free(buf); + return 0; +} + +static void _destroy_rows(struct dm_report *rh) +{ + /* + * free the first row allocated to this report: since this is a + * pool allocation this will also free all subsequently allocated + * rows from the report and any associated string data. + */ + if (rh->first_row) + dm_pool_free(rh->mem, rh->first_row); + rh->first_row = NULL; + dm_list_init(&rh->rows); + + /* Reset field widths to original values. */ + _reset_field_props(rh); +} + +static int _output_as_rows(struct dm_report *rh) +{ + const struct dm_report_field_type *fields; + struct field_properties *fp; + struct dm_report_field *field; + struct row *row; + + dm_list_iterate_items(fp, &rh->field_props) { + if (fp->flags & FLD_HIDDEN) { + dm_list_iterate_items(row, &rh->rows) { + field = dm_list_item(dm_list_first(&row->fields), struct dm_report_field); + dm_list_del(&field->list); + } + continue; + } + + fields = fp->implicit ? _implicit_report_fields : rh->fields; + + if (!dm_pool_begin_object(rh->mem, 512)) { + log_error("dm_report: Unable to allocate output line"); + return 0; + } + + if ((rh->flags & DM_REPORT_OUTPUT_HEADINGS)) { + if (!dm_pool_grow_object(rh->mem, fields[fp->field_num].heading, 0)) { + log_error("dm_report: Failed to extend row for field name"); + goto bad; + } + if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) { + log_error("dm_report: Failed to extend row with separator"); + goto bad; + } + } + + dm_list_iterate_items(row, &rh->rows) { + if ((field = dm_list_item(dm_list_first(&row->fields), struct dm_report_field))) { + if (!_output_field(rh, field)) + goto bad; + dm_list_del(&field->list); + } + + if (!dm_list_end(&rh->rows, &row->list)) + if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + + if (!dm_pool_grow_object(rh->mem, "\0", 1)) { + log_error("dm_report: Failed to terminate row"); + goto bad; + } + log_print("%s", (char *) dm_pool_end_object(rh->mem)); + } + + _destroy_rows(rh); + + return 1; + + bad: + dm_pool_abandon_object(rh->mem); + return 0; +} + +static int _output_as_columns(struct dm_report *rh) +{ + struct dm_list *fh, *rowh, *ftmp, *rtmp; + struct row *row = NULL; + struct dm_report_field *field; + struct dm_list *last_row; + int do_field_delim; + char *line; + + /* If headings not printed yet, calculate field widths and print them */ + if (!(rh->flags & RH_HEADINGS_PRINTED)) + _report_headings(rh); + + /* Print and clear buffer */ + last_row = dm_list_last(&rh->rows); + dm_list_iterate_safe(rowh, rtmp, &rh->rows) { + row = dm_list_item(rowh, struct row); + + if (!_should_display_row(row)) + continue; + + if (!dm_pool_begin_object(rh->mem, 512)) { + log_error("dm_report: Unable to allocate output line"); + return 0; + } + + if (_is_json_report(rh)) { + if (!dm_pool_grow_object(rh->mem, JSON_OBJECT_START, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + + do_field_delim = 0; + + dm_list_iterate_safe(fh, ftmp, &row->fields) { + field = dm_list_item(fh, struct dm_report_field); + if (field->props->flags & FLD_HIDDEN) + continue; + + if (do_field_delim) { + if (_is_json_report(rh)) { + if (!dm_pool_grow_object(rh->mem, JSON_SEPARATOR, 0) || + !dm_pool_grow_object(rh->mem, JSON_SPACE, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } else { + if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + } else + do_field_delim = 1; + + if (!_output_field(rh, field)) + goto bad; + + if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES)) + dm_list_del(&field->list); + } + + if (_is_json_report(rh)) { + if (!dm_pool_grow_object(rh->mem, JSON_OBJECT_END, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + if (rowh != last_row && + !dm_pool_grow_object(rh->mem, JSON_SEPARATOR, 0)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + } + + if (!dm_pool_grow_object(rh->mem, "\0", 1)) { + log_error("dm_report: Unable to terminate output line"); + goto bad; + } + + line = (char *) dm_pool_end_object(rh->mem); + log_print("%*s", rh->group_item ? rh->group_item->group->indent + (int) strlen(line) : 0, line); + if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES)) + dm_list_del(&row->list); + } + + if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES)) + _destroy_rows(rh); + + return 1; + + bad: + dm_pool_abandon_object(rh->mem); + return 0; +} + +int dm_report_is_empty(struct dm_report *rh) +{ + return dm_list_empty(&rh->rows) ? 1 : 0; +} + +static struct report_group_item *_get_topmost_report_group_item(struct dm_report_group *group) +{ + struct report_group_item *item; + + if (group && !dm_list_empty(&group->items)) + item = dm_list_item(dm_list_first(&group->items), struct report_group_item); + else + item = NULL; + + return item; +} + +static void _json_output_start(struct dm_report_group *group) +{ + if (!group->indent) { + log_print(JSON_OBJECT_START); + group->indent += JSON_INDENT_UNIT; + } +} + +static int _json_output_array_start(struct dm_pool *mem, struct report_group_item *item) +{ + const char *name = (const char *) item->data; + char *output; + + if (!dm_pool_begin_object(mem, 32)) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + return 0; + } + + if (!dm_pool_grow_object(mem, JSON_QUOTE, 1) || + !dm_pool_grow_object(mem, name, 0) || + !dm_pool_grow_object(mem, JSON_QUOTE JSON_PAIR JSON_SPACE JSON_ARRAY_START, 0) || + !dm_pool_grow_object(mem, "\0", 1) || + !(output = dm_pool_end_object(mem))) { + log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG); + goto bad; + } + + if (item->parent->store.finished_count > 0) + log_print("%*s", item->group->indent + (int) sizeof(JSON_SEPARATOR) - 1, JSON_SEPARATOR); + + if (item->parent->parent && item->parent->data) { + log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_START) - 1, JSON_OBJECT_START); + item->group->indent += JSON_INDENT_UNIT; + } + + log_print("%*s", item->group->indent + (int) strlen(output), output); + item->group->indent += JSON_INDENT_UNIT; + + dm_pool_free(mem, output); + return 1; +bad: + dm_pool_abandon_object(mem); + return 0; +} + +static int _prepare_json_report_output(struct dm_report *rh) +{ + _json_output_start(rh->group_item->group); + + if (rh->group_item->output_done && dm_list_empty(&rh->rows)) + return 1; + + /* + * If this report is in JSON group, it must be at the + * top of the stack of reports so the output from + * different reports do not interleave with each other. + */ + if (_get_topmost_report_group_item(rh->group_item->group) != rh->group_item) { + log_error("dm_report: dm_report_output: interleaved reports detected for JSON output"); + return 0; + } + + if (rh->group_item->needs_closing) { + log_error("dm_report: dm_report_output: unfinished JSON output detected"); + return 0; + } + + if (!_json_output_array_start(rh->mem, rh->group_item)) + return_0; + + rh->group_item->needs_closing = 1; + return 1; +} + +static int _print_basic_report_header(struct dm_report *rh) +{ + const char *report_name = (const char *) rh->group_item->data; + size_t len = strlen(report_name); + char *underline; + + if (!(underline = dm_pool_zalloc(rh->mem, len + 1))) + return_0; + + memset(underline, '=', len); + + if (rh->group_item->parent->store.finished_count > 0) + log_print("%s", ""); + log_print("%s", report_name); + log_print("%s", underline); + + dm_pool_free(rh->mem, underline); + return 1; +} + +int dm_report_output(struct dm_report *rh) +{ + int r = 0; + + if (_is_json_report(rh) && + !_prepare_json_report_output(rh)) + return_0; + + if (dm_list_empty(&rh->rows)) { + r = 1; + goto out; + } + + if (rh->flags & RH_FIELD_CALC_NEEDED) + _recalculate_fields(rh); + + if ((rh->flags & RH_SORT_REQUIRED)) + _sort_rows(rh); + + if (_is_basic_report(rh) && !_print_basic_report_header(rh)) + goto_out; + + if ((rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS)) + r = _output_as_rows(rh); + else + r = _output_as_columns(rh); +out: + if (r && rh->group_item) + rh->group_item->output_done = 1; + return r; +} + +void dm_report_destroy_rows(struct dm_report *rh) +{ + _destroy_rows(rh); +} + +struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data) +{ + struct dm_report_group *group; + struct dm_pool *mem; + struct report_group_item *item; + + if (!(mem = dm_pool_create("report_group", 1024))) { + log_error("dm_report: dm_report_init_group: failed to allocate mem pool"); + return NULL; + } + + if (!(group = dm_pool_zalloc(mem, sizeof(*group)))) { + log_error("dm_report: failed to allocate report group structure"); + goto bad; + } + + group->mem = mem; + group->type = type; + dm_list_init(&group->items); + + if (!(item = dm_pool_zalloc(mem, sizeof(*item)))) { + log_error("dm_report: faile to allocate root report group item"); + goto bad; + } + + dm_list_add_h(&group->items, &item->list); + + return group; +bad: + dm_pool_destroy(mem); + return NULL; +} + +static int _report_group_push_single(struct report_group_item *item, void *data) +{ + struct report_group_item *item_iter; + unsigned count = 0; + + dm_list_iterate_items(item_iter, &item->group->items) { + if (item_iter->report) + count++; + } + + if (count > 1) { + log_error("dm_report: unable to add more than one report " + "to current report group"); + return 0; + } + + return 1; +} + +static int _report_group_push_basic(struct report_group_item *item, const char *name) +{ + if (item->report) { + if (!(item->report->flags & DM_REPORT_OUTPUT_BUFFERED)) + item->report->flags &= ~(DM_REPORT_OUTPUT_MULTIPLE_TIMES); + } else { + if (!name && item->parent->store.finished_count > 0) + log_print("%s", ""); + } + + return 1; +} + +static int _report_group_push_json(struct report_group_item *item, const char *name) +{ + if (name && !(item->data = dm_pool_strdup(item->group->mem, name))) { + log_error("dm_report: failed to duplicate json item name"); + return 0; + } + + if (item->report) { + item->report->flags &= ~(DM_REPORT_OUTPUT_ALIGNED | + DM_REPORT_OUTPUT_HEADINGS | + DM_REPORT_OUTPUT_COLUMNS_AS_ROWS); + item->report->flags |= DM_REPORT_OUTPUT_BUFFERED; + } else { + _json_output_start(item->group); + if (name) { + if (!_json_output_array_start(item->group->mem, item)) + return_0; + } else { + if (!item->parent->parent) { + log_error("dm_report: can't use unnamed object at top level of JSON output"); + return 0; + } + if (item->parent->store.finished_count > 0) + log_print("%*s", item->group->indent + (int) sizeof(JSON_SEPARATOR) - 1, JSON_SEPARATOR); + log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_START) - 1, JSON_OBJECT_START); + item->group->indent += JSON_INDENT_UNIT; + } + + item->output_done = 1; + item->needs_closing = 1; + } + + return 1; +} + +int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data) +{ + struct report_group_item *item, *tmp_item; + + if (!group) + return 1; + + if (!(item = dm_pool_zalloc(group->mem, sizeof(*item)))) { + log_error("dm_report: dm_report_group_push: group item allocation failed"); + return 0; + } + + if ((item->report = report)) { + item->store.orig_report_flags = report->flags; + report->group_item = item; + } + + item->group = group; + item->data = data; + + dm_list_iterate_items(tmp_item, &group->items) { + if (!tmp_item->report) { + item->parent = tmp_item; + break; + } + } + + dm_list_add_h(&group->items, &item->list); + + switch (group->type) { + case DM_REPORT_GROUP_SINGLE: + if (!_report_group_push_single(item, data)) + goto_bad; + break; + case DM_REPORT_GROUP_BASIC: + if (!_report_group_push_basic(item, data)) + goto_bad; + break; + case DM_REPORT_GROUP_JSON: + if (!_report_group_push_json(item, data)) + goto_bad; + break; + default: + goto_bad; + } + + return 1; +bad: + dm_list_del(&item->list); + dm_pool_free(group->mem, item); + return 0; +} + +static int _report_group_pop_single(struct report_group_item *item) +{ + return 1; +} + +static int _report_group_pop_basic(struct report_group_item *item) +{ + return 1; +} + +static int _report_group_pop_json(struct report_group_item *item) +{ + if (item->output_done && item->needs_closing) { + if (item->data) { + item->group->indent -= JSON_INDENT_UNIT; + log_print("%*s", item->group->indent + (int) sizeof(JSON_ARRAY_END) - 1, JSON_ARRAY_END); + } + if (item->parent->data && item->parent->parent) { + item->group->indent -= JSON_INDENT_UNIT; + log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_END) - 1, JSON_OBJECT_END); + } + item->needs_closing = 0; + } + + return 1; +} + +int dm_report_group_pop(struct dm_report_group *group) +{ + struct report_group_item *item; + + if (!group) + return 1; + + if (!(item = _get_topmost_report_group_item(group))) { + log_error("dm_report: dm_report_group_pop: group has no items"); + return 0; + } + + switch (group->type) { + case DM_REPORT_GROUP_SINGLE: + if (!_report_group_pop_single(item)) + return_0; + break; + case DM_REPORT_GROUP_BASIC: + if (!_report_group_pop_basic(item)) + return_0; + break; + case DM_REPORT_GROUP_JSON: + if (!_report_group_pop_json(item)) + return_0; + break; + default: + return 0; + } + + dm_list_del(&item->list); + + if (item->report) { + item->report->flags = item->store.orig_report_flags; + item->report->group_item = NULL; + } + + if (item->parent) + item->parent->store.finished_count++; + + dm_pool_free(group->mem, item); + return 1; +} + +int dm_report_group_output_and_pop_all(struct dm_report_group *group) +{ + struct report_group_item *item, *tmp_item; + + dm_list_iterate_items_safe(item, tmp_item, &group->items) { + if (!item->parent) { + item->store.finished_count = 0; + continue; + } + if (item->report && !dm_report_output(item->report)) + return_0; + if (!dm_report_group_pop(group)) + return_0; + } + + if (group->type == DM_REPORT_GROUP_JSON) { + _json_output_start(group); + log_print(JSON_OBJECT_END); + group->indent -= JSON_INDENT_UNIT; + } + + return 1; +} + +int dm_report_group_destroy(struct dm_report_group *group) +{ + int r = 1; + + if (!group) + return 1; + + if (!dm_report_group_output_and_pop_all(group)) + r = 0; + + dm_pool_destroy(group->mem); + return r; +} diff --git a/device_mapper/libdm-stats.c b/device_mapper/libdm-stats.c new file mode 100644 index 000000000..6cd08a773 --- /dev/null +++ b/device_mapper/libdm-stats.c @@ -0,0 +1,5095 @@ +/* + * Copyright (C) 2016 Red Hat, Inc. All rights reserved. + * + * _stats_get_extents_for_file() based in part on filefrag_fiemap() from + * e2fsprogs/misc/filefrag.c. Copyright 2003 by Theodore Ts'o. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "misc/kdev_t.h" + +#include "math.h" /* log10() */ + +#include <sys/sysmacros.h> +#include <sys/ioctl.h> +#include <sys/vfs.h> /* fstatfs */ +#include <unistd.h> + +#ifdef __linux__ + #include <linux/fs.h> /* FS_IOC_FIEMAP */ +#endif + +#ifdef HAVE_LINUX_FIEMAP_H + #include <linux/fiemap.h> /* fiemap */ +#endif + +#ifdef HAVE_LINUX_MAGIC_H + #include <linux/magic.h> /* BTRFS_SUPER_MAGIC */ +#endif + +#define DM_STATS_REGION_NOT_PRESENT UINT64_MAX +#define DM_STATS_GROUP_NOT_PRESENT DM_STATS_GROUP_NONE + +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define NSEC_PER_SEC 1000000000L + +#define PRECISE_ARG "precise_timestamps" +#define HISTOGRAM_ARG "histogram:" + +#define STATS_ROW_BUF_LEN 4096 +#define STATS_MSG_BUF_LEN 1024 +#define STATS_FIE_BUF_LEN 2048 + +#define SECTOR_SHIFT 9L + +/* Histogram bin */ +struct dm_histogram_bin { + uint64_t upper; /* Upper bound on this bin. */ + uint64_t count; /* Count value for this bin. */ +}; + +struct dm_histogram { + /* The stats handle this histogram belongs to. */ + const struct dm_stats *dms; + /* The region this histogram belongs to. */ + const struct dm_stats_region *region; + uint64_t sum; /* Sum of histogram bin counts. */ + int nr_bins; /* Number of histogram bins assigned. */ + struct dm_histogram_bin bins[0]; +}; + +/* + * See Documentation/device-mapper/statistics.txt for full descriptions + * of the device-mapper statistics counter fields. + */ +struct dm_stats_counters { + uint64_t reads; /* Num reads completed */ + uint64_t reads_merged; /* Num reads merged */ + uint64_t read_sectors; /* Num sectors read */ + uint64_t read_nsecs; /* Num milliseconds spent reading */ + uint64_t writes; /* Num writes completed */ + uint64_t writes_merged; /* Num writes merged */ + uint64_t write_sectors; /* Num sectors written */ + uint64_t write_nsecs; /* Num milliseconds spent writing */ + uint64_t io_in_progress; /* Num I/Os currently in progress */ + uint64_t io_nsecs; /* Num milliseconds spent doing I/Os */ + uint64_t weighted_io_nsecs; /* Weighted num milliseconds doing I/Os */ + uint64_t total_read_nsecs; /* Total time spent reading in milliseconds */ + uint64_t total_write_nsecs; /* Total time spent writing in milliseconds */ + struct dm_histogram *histogram; /* Histogram. */ +}; + +struct dm_stats_region { + uint64_t region_id; /* as returned by @stats_list */ + uint64_t group_id; + uint64_t start; + uint64_t len; + uint64_t step; + char *program_id; + char *aux_data; + uint64_t timescale; /* precise_timestamps is per-region */ + struct dm_histogram *bounds; /* histogram configuration */ + struct dm_histogram *histogram; /* aggregate cache */ + struct dm_stats_counters *counters; +}; + +struct dm_stats_group { + uint64_t group_id; + const char *alias; + dm_bitset_t regions; + struct dm_histogram *histogram; +}; + +struct dm_stats { + /* device binding */ + int bind_major; /* device major that this dm_stats object is bound to */ + int bind_minor; /* device minor that this dm_stats object is bound to */ + char *bind_name; /* device-mapper device name */ + char *bind_uuid; /* device-mapper UUID */ + char *program_id; /* default program_id for this handle */ + const char *name; /* cached device_name used for reporting */ + struct dm_pool *mem; /* memory pool for region and counter tables */ + struct dm_pool *hist_mem; /* separate pool for histogram tables */ + struct dm_pool *group_mem; /* separate pool for group tables */ + uint64_t nr_regions; /* total number of present regions */ + uint64_t max_region; /* size of the regions table */ + uint64_t interval_ns; /* sampling interval in nanoseconds */ + uint64_t timescale; /* default sample value multiplier */ + int precise; /* use precise_timestamps when creating regions */ + struct dm_stats_region *regions; + struct dm_stats_group *groups; + /* statistics cursor */ + uint64_t walk_flags; /* walk control flags */ + uint64_t cur_flags; + uint64_t cur_group; + uint64_t cur_region; + uint64_t cur_area; +}; + +#define PROC_SELF_COMM "/proc/self/comm" +static char *_program_id_from_proc(void) +{ + FILE *comm = NULL; + char buf[STATS_ROW_BUF_LEN]; + + if (!(comm = fopen(PROC_SELF_COMM, "r"))) + return_NULL; + + if (!fgets(buf, sizeof(buf), comm)) { + log_error("Could not read from %s", PROC_SELF_COMM); + if (fclose(comm)) + stack; + return NULL; + } + + if (fclose(comm)) + stack; + + return dm_strdup(buf); +} + +static uint64_t _nr_areas(uint64_t len, uint64_t step) +{ + /* Default is one area. */ + if (!len || !step) + return 1; + /* + * drivers/md/dm-stats.c::message_stats_create() + * A region may be sub-divided into areas with their own counters. + * Any partial area at the end of the region is treated as an + * additional complete area. + */ + return (len + step - 1) / step; +} + +static uint64_t _nr_areas_region(struct dm_stats_region *region) +{ + return _nr_areas(region->len, region->step); +} + +struct dm_stats *dm_stats_create(const char *program_id) +{ + size_t hist_hint = sizeof(struct dm_histogram_bin); + size_t group_hint = sizeof(struct dm_stats_group); + struct dm_stats *dms = NULL; + + if (!(dms = dm_zalloc(sizeof(*dms)))) + return_NULL; + + /* FIXME: better hint. */ + if (!(dms->mem = dm_pool_create("stats_pool", 4096))) { + dm_free(dms); + return_NULL; + } + + if (!(dms->hist_mem = dm_pool_create("histogram_pool", hist_hint))) + goto_bad; + + if (!(dms->group_mem = dm_pool_create("group_pool", group_hint))) + goto_bad; + + if (!program_id || !strlen(program_id)) + dms->program_id = _program_id_from_proc(); + else + dms->program_id = dm_strdup(program_id); + + if (!dms->program_id) { + log_error("Could not allocate memory for program_id"); + goto bad; + } + + dms->bind_major = -1; + dms->bind_minor = -1; + dms->bind_name = NULL; + dms->bind_uuid = NULL; + + dms->name = NULL; + + /* by default all regions use msec precision */ + dms->timescale = NSEC_PER_MSEC; + dms->precise = 0; + + dms->nr_regions = DM_STATS_REGION_NOT_PRESENT; + dms->max_region = DM_STATS_REGION_NOT_PRESENT; + dms->regions = NULL; + + /* maintain compatibility with earlier walk version */ + dms->walk_flags = dms->cur_flags = DM_STATS_WALK_DEFAULT; + + return dms; + +bad: + dm_pool_destroy(dms->mem); + if (dms->hist_mem) + dm_pool_destroy(dms->hist_mem); + if (dms->group_mem) + dm_pool_destroy(dms->group_mem); + dm_free(dms); + return NULL; +} + +/* + * Test whether the stats region pointed to by region is present. + */ +static int _stats_region_present(const struct dm_stats_region *region) +{ + return !(region->region_id == DM_STATS_REGION_NOT_PRESENT); +} + +/* + * Test whether the stats group pointed to by group is present. + */ +static int _stats_group_present(const struct dm_stats_group *group) +{ + return !(group->group_id == DM_STATS_GROUP_NOT_PRESENT); +} + +/* + * Test whether a stats group id is present. + */ +static int _stats_group_id_present(const struct dm_stats *dms, uint64_t id) +{ + struct dm_stats_group *group = NULL; + + if (id == DM_STATS_GROUP_NOT_PRESENT) + return 0; + + if (!dms) + return_0; + + if (!dms->regions) + return 0; + + if (id > dms->max_region) + return 0; + + group = &dms->groups[id]; + + return _stats_group_present(group); +} + +/* + * Test whether the given region_id is a member of any group. + */ +static uint64_t _stats_region_is_grouped(const struct dm_stats* dms, + uint64_t region_id) +{ + uint64_t group_id; + + if (region_id == DM_STATS_GROUP_NOT_PRESENT) + return 0; + + if (!_stats_region_present(&dms->regions[region_id])) + return 0; + + group_id = dms->regions[region_id].group_id; + + return group_id != DM_STATS_GROUP_NOT_PRESENT; +} + +static void _stats_histograms_destroy(struct dm_pool *mem, + struct dm_stats_region *region) +{ + /* Unpopulated handle. */ + if (!region->counters) + return; + + /* + * Free everything in the pool back to the first histogram. + */ + if (region->counters[0].histogram) + dm_pool_free(mem, region->counters[0].histogram); +} + +static void _stats_region_destroy(struct dm_stats_region *region) +{ + if (!_stats_region_present(region)) + return; + + region->start = region->len = region->step = 0; + region->timescale = 0; + + /* + * Don't free counters and histogram bounds here: they are + * dropped from the pool along with the corresponding + * regions table. + * + * The following objects are all allocated with dm_malloc. + */ + + region->counters = NULL; + region->bounds = NULL; + + dm_free(region->program_id); + region->program_id = NULL; + dm_free(region->aux_data); + region->aux_data = NULL; + region->region_id = DM_STATS_REGION_NOT_PRESENT; +} + +static void _stats_regions_destroy(struct dm_stats *dms) +{ + struct dm_pool *mem = dms->mem; + uint64_t i; + + if (!dms->regions) + return; + + /* walk backwards to obey pool order */ + for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--) { + _stats_histograms_destroy(dms->hist_mem, &dms->regions[i]); + _stats_region_destroy(&dms->regions[i]); + } + + dm_pool_free(mem, dms->regions); + dms->regions = NULL; +} + +static void _stats_group_destroy(struct dm_stats_group *group) +{ + if (!_stats_group_present(group)) + return; + + group->histogram = NULL; + + if (group->alias) { + dm_free((char *) group->alias); + group->alias = NULL; + } + if (group->regions) { + dm_bitset_destroy(group->regions); + group->regions = NULL; + } + group->group_id = DM_STATS_GROUP_NOT_PRESENT; +} + +static void _stats_groups_destroy(struct dm_stats *dms) +{ + uint64_t i; + + if (!dms->groups) + return; + + for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--) + _stats_group_destroy(&dms->groups[i]); + dm_pool_free(dms->group_mem, dms->groups); + dms->groups = NULL; +} + +static int _set_stats_device(struct dm_stats *dms, struct dm_task *dmt) +{ + if (dms->bind_name) + return dm_task_set_name(dmt, dms->bind_name); + if (dms->bind_uuid) + return dm_task_set_uuid(dmt, dms->bind_uuid); + if (dms->bind_major > 0) + return dm_task_set_major(dmt, dms->bind_major) + && dm_task_set_minor(dmt, dms->bind_minor); + return_0; +} + +static int _stats_bound(const struct dm_stats *dms) +{ + if (dms->bind_major > 0 || dms->bind_name || dms->bind_uuid) + return 1; + /* %p format specifier expects a void pointer. */ + log_error("Stats handle at %p is not bound.", dms); + return 0; +} + +static void _stats_clear_binding(struct dm_stats *dms) +{ + if (dms->bind_name) + dm_pool_free(dms->mem, dms->bind_name); + if (dms->bind_uuid) + dm_pool_free(dms->mem, dms->bind_uuid); + dm_free((char *) dms->name); + + dms->bind_name = dms->bind_uuid = NULL; + dms->bind_major = dms->bind_minor = -1; + dms->name = NULL; +} + +int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor) +{ + _stats_clear_binding(dms); + _stats_regions_destroy(dms); + _stats_groups_destroy(dms); + + dms->bind_major = major; + dms->bind_minor = minor; + + return 1; +} + +int dm_stats_bind_name(struct dm_stats *dms, const char *name) +{ + _stats_clear_binding(dms); + _stats_regions_destroy(dms); + _stats_groups_destroy(dms); + + if (!(dms->bind_name = dm_pool_strdup(dms->mem, name))) + return_0; + + return 1; +} + +int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid) +{ + _stats_clear_binding(dms); + _stats_regions_destroy(dms); + _stats_groups_destroy(dms); + + if (!(dms->bind_uuid = dm_pool_strdup(dms->mem, uuid))) + return_0; + + return 1; +} + +int dm_stats_bind_from_fd(struct dm_stats *dms, int fd) +{ + int major, minor; + struct stat buf; + + if (fstat(fd, &buf)) { + log_error("fstat failed for fd %d.", fd); + return 0; + } + + major = (int) MAJOR(buf.st_dev); + minor = (int) MINOR(buf.st_dev); + + if (!dm_stats_bind_devno(dms, major, minor)) + return_0; + return 1; +} + +static int _stats_check_precise_timestamps(const struct dm_stats *dms) +{ + /* Already checked? */ + if (dms && dms->precise) + return 1; + + return dm_message_supports_precise_timestamps(); +} + +int dm_stats_driver_supports_precise(void) +{ + return _stats_check_precise_timestamps(NULL); +} + +int dm_stats_driver_supports_histogram(void) +{ + return _stats_check_precise_timestamps(NULL); +} + +static int _fill_hist_arg(char *hist_arg, size_t hist_len, uint64_t scale, + struct dm_histogram *bounds) +{ + int i, l, len = 0, nr_bins; + char *arg = hist_arg; + uint64_t value; + + nr_bins = bounds->nr_bins; + + for (i = 0; i < nr_bins; i++) { + value = bounds->bins[i].upper / scale; + if ((l = dm_snprintf(arg, hist_len - len, FMTu64"%s", value, + (i == (nr_bins - 1)) ? "" : ",")) < 0) + return_0; + len += l; + arg += l; + } + return 1; +} + +static void *_get_hist_arg(struct dm_histogram *bounds, uint64_t scale, + size_t *len) +{ + struct dm_histogram_bin *entry, *bins; + size_t hist_len = 1; /* terminating '\0' */ + double value; + + entry = bins = bounds->bins; + + entry += bounds->nr_bins - 1; + while(entry >= bins) { + value = (double) (entry--)->upper; + /* Use lround to avoid size_t -> double cast warning. */ + hist_len += 1 + (size_t) lround(log10(value / scale)); + if (entry != bins) + hist_len++; /* ',' */ + } + + *len = hist_len; + + return dm_zalloc(hist_len); +} + +static char *_build_histogram_arg(struct dm_histogram *bounds, int *precise) +{ + struct dm_histogram_bin *entry, *bins; + size_t hist_len; + char *hist_arg; + uint64_t scale; + + entry = bins = bounds->bins; + + /* Empty histogram is invalid. */ + if (!bounds->nr_bins) { + log_error("Cannot format empty histogram description."); + return NULL; + } + + /* Validate entries and set *precise if precision < 1ms. */ + entry += bounds->nr_bins - 1; + while (entry >= bins) { + if (entry != bins) { + if (entry->upper < (entry - 1)->upper) { + log_error("Histogram boundaries must be in " + "order of increasing magnitude."); + return 0; + } + } + + /* + * Only enable precise_timestamps automatically if any + * value in the histogram bounds uses precision < 1ms. + */ + if (((entry--)->upper % NSEC_PER_MSEC) && !*precise) + *precise = 1; + } + + scale = (*precise) ? 1 : NSEC_PER_MSEC; + + /* Calculate hist_len and allocate a character buffer. */ + if (!(hist_arg = _get_hist_arg(bounds, scale, &hist_len))) { + log_error("Could not allocate memory for histogram argument."); + return 0; + } + + /* Fill hist_arg with boundary strings. */ + if (!_fill_hist_arg(hist_arg, hist_len, scale, bounds)) + goto_bad; + + return hist_arg; + +bad: + log_error("Could not build histogram arguments."); + dm_free(hist_arg); + + return NULL; +} + +static struct dm_task *_stats_send_message(struct dm_stats *dms, char *msg) +{ + struct dm_task *dmt; + + if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG))) + return_0; + + if (!_set_stats_device(dms, dmt)) + goto_bad; + + if (!dm_task_set_message(dmt, msg)) + goto_bad; + + if (!dm_task_run(dmt)) + goto_bad; + + return dmt; + +bad: + dm_task_destroy(dmt); + return NULL; +} + +/* + * Cache the dm device_name for the device bound to dms. + */ +static int _stats_set_name_cache(struct dm_stats *dms) +{ + struct dm_task *dmt; + + if (dms->name) + return 1; + + if (!(dmt = dm_task_create(DM_DEVICE_INFO))) + return_0; + + if (!_set_stats_device(dms, dmt)) + goto_bad; + + if (!dm_task_run(dmt)) + goto_bad; + + if (!(dms->name = dm_strdup(dm_task_get_name(dmt)))) + goto_bad; + + dm_task_destroy(dmt); + + return 1; + +bad: + log_error("Could not retrieve device-mapper name for device."); + dm_task_destroy(dmt); + return 0; +} + +/* + * update region group_id values + */ +static void _stats_update_groups(struct dm_stats *dms) +{ + struct dm_stats_group *group; + uint64_t group_id, i; + + for (group_id = 0; group_id < dms->max_region + 1; group_id++) { + if (!_stats_group_id_present(dms, group_id)) + continue; + + group = &dms->groups[group_id]; + + for (i = dm_bit_get_first(group->regions); + i != DM_STATS_GROUP_NOT_PRESENT; + i = dm_bit_get_next(group->regions, i)) + dms->regions[i].group_id = group_id; + } +} + +static void _check_group_regions_present(struct dm_stats *dms, + struct dm_stats_group *group) +{ + dm_bitset_t regions = group->regions; + int64_t i, group_id; + + group_id = i = dm_bit_get_first(regions); + + for (; i > 0; i = dm_bit_get_next(regions, i)) + if (!_stats_region_present(&dms->regions[i])) { + log_warn("Group descriptor " FMTd64 " contains " + "non-existent region_id " FMTd64 ".", + group_id, i); + dm_bit_clear(regions, i); + } +} + +/* + * Parse a DMS_GROUP group descriptor embedded in a region's aux_data. + * + * DMS_GROUP="ALIAS:MEMBERS" + * + * ALIAS: group alias + * MEMBERS: list of group member region ids. + * + */ +#define DMS_GROUP_TAG "DMS_GROUP=" +#define DMS_GROUP_TAG_LEN (sizeof(DMS_GROUP_TAG) - 1) +#define DMS_GROUP_SEP ':' +#define DMS_AUX_SEP "#" + +static int _parse_aux_data_group(struct dm_stats *dms, + struct dm_stats_region *region, + struct dm_stats_group *group) +{ + char *alias, *c, *end; + dm_bitset_t regions; + + memset(group, 0, sizeof(*group)); + group->group_id = DM_STATS_GROUP_NOT_PRESENT; + + /* find start of group tag */ + c = strstr(region->aux_data, DMS_GROUP_TAG); + if (!c) + return 1; /* no group is not an error */ + + alias = c + strlen(DMS_GROUP_TAG); + + c = strchr(c, DMS_GROUP_SEP); + + if (!c) { + log_error("Found malformed group tag while reading aux_data"); + return 0; + } + + /* terminate alias and advance to members */ + *(c++) = '\0'; + + log_debug("Read alias '%s' from aux_data", alias); + + if (!c) { + log_error("Found malformed group descriptor while " + "reading aux_data, expected '%c'", DMS_GROUP_SEP); + return 0; + } + + /* if user aux_data follows make sure we have a terminated + * string to pass to dm_bitset_parse_list(). + */ + end = strstr(c, DMS_AUX_SEP); + if (!end) + end = c + strlen(c); + *(end++) = '\0'; + + if (!(regions = dm_bitset_parse_list(c, NULL, 0))) { + log_error("Could not parse member list while " + "reading group aux_data"); + return 0; + } + + group->group_id = dm_bit_get_first(regions); + if (group->group_id != region->region_id) { + log_error("Found invalid group descriptor in region " FMTu64 + " aux_data.", region->region_id); + group->group_id = DM_STATS_GROUP_NOT_PRESENT; + goto bad; + } + + group->regions = regions; + group->alias = NULL; + if (strlen(alias)) { + group->alias = dm_strdup(alias); + if (!group->alias) { + log_error("Could not allocate memory for group alias"); + goto bad; + } + } + + /* separate group tag from user aux_data */ + if ((strlen(end) > 1) || strncmp(end, "-", 1)) + c = dm_strdup(end); + else + c = dm_strdup(""); + + if (!c) { + log_error("Could not allocate memory for user aux_data"); + goto bad_alias; + } + + dm_free(region->aux_data); + region->aux_data = c; + + log_debug("Found group_id " FMTu64 ": alias=\"%s\"", group->group_id, + (group->alias) ? group->alias : ""); + + return 1; + +bad_alias: + dm_free((char *) group->alias); +bad: + dm_bitset_destroy(regions); + return 0; +} + +/* + * Parse a histogram specification returned by the kernel in a + * @stats_list response. + */ +static int _stats_parse_histogram_spec(struct dm_stats *dms, + struct dm_stats_region *region, + const char *histogram) +{ + static const char _valid_chars[] = "0123456789,"; + uint64_t scale = region->timescale, this_val = 0; + struct dm_pool *mem = dms->hist_mem; + struct dm_histogram_bin cur; + struct dm_histogram hist; + int nr_bins = 1; + const char *c, *v, *val_start; + char *p, *endptr = NULL; + + /* Advance past "histogram:". */ + histogram = strchr(histogram, ':'); + if (!histogram) { + log_error("Could not parse histogram description."); + return 0; + } + histogram++; + + /* @stats_list rows are newline terminated. */ + if ((p = strchr(histogram, '\n'))) + *p = '\0'; + + if (!dm_pool_begin_object(mem, sizeof(cur))) + return_0; + + memset(&hist, 0, sizeof(hist)); + + hist.nr_bins = 0; /* fix later */ + hist.region = region; + hist.dms = dms; + + if (!dm_pool_grow_object(mem, &hist, sizeof(hist))) + goto_bad; + + c = histogram; + do { + for (v = _valid_chars; *v; v++) + if (*c == *v) + break; + if (!*v) { + stack; + goto badchar; + } + + if (*c == ',') { + log_error("Invalid histogram description: %s", + histogram); + goto bad; + } else { + val_start = c; + endptr = NULL; + + errno = 0; + this_val = strtoull(val_start, &endptr, 10); + if (errno || !endptr) { + log_error("Could not parse histogram boundary."); + goto bad; + } + + c = endptr; /* Advance to units, comma, or end. */ + + if (*c == ',') + c++; + else if (*c || (*c == ' ')) { /* Expected ',' or NULL. */ + stack; + goto badchar; + } + + if (*c == ',') + c++; + + cur.upper = scale * this_val; + cur.count = 0; + + if (!dm_pool_grow_object(mem, &cur, sizeof(cur))) + goto_bad; + + nr_bins++; + } + } while (*c && (*c != ' ')); + + /* final upper bound. */ + cur.upper = UINT64_MAX; + if (!dm_pool_grow_object(mem, &cur, sizeof(cur))) + goto_bad; + + region->bounds = dm_pool_end_object(mem); + + if (!region->bounds) + return_0; + + region->bounds->nr_bins = nr_bins; + + log_debug("Added region histogram spec with %d entries.", nr_bins); + return 1; + +badchar: + log_error("Invalid character in histogram: '%c' (0x%x)", *c, *c); +bad: + dm_pool_abandon_object(mem); + return 0; +} + +static int _stats_parse_list_region(struct dm_stats *dms, + struct dm_stats_region *region, char *line) +{ + char *p = NULL, string_data[STATS_ROW_BUF_LEN]; + char *program_id, *aux_data, *stats_args; + char *empty_string = (char *) ""; + int r; + + memset(string_data, 0, sizeof(string_data)); + + /* + * Parse fixed fields, line format: + * + * <region_id>: <start_sector>+<length> <step> <string data> + * + * Maximum string data size is 4096 - 1 bytes. + */ + r = sscanf(line, FMTu64 ": " FMTu64 "+" FMTu64 " " FMTu64 " %4095c", + ®ion->region_id, ®ion->start, ®ion->len, + ®ion->step, string_data); + + if (r != 5) + return_0; + + /* program_id is guaranteed to be first. */ + program_id = string_data; + + /* + * FIXME: support embedded '\ ' in string data: + * s/strchr/_find_unescaped_space()/ + */ + if ((p = strchr(string_data, ' '))) { + /* terminate program_id string. */ + *p = '\0'; + if (!strncmp(program_id, "-", 1)) + program_id = empty_string; + aux_data = p + 1; + if ((p = strchr(aux_data, ' '))) { + /* terminate aux_data string. */ + *p = '\0'; + stats_args = p + 1; + } else + stats_args = empty_string; + + /* no aux_data? */ + if (!strncmp(aux_data, "-", 1)) + aux_data = empty_string; + else + /* remove trailing newline */ + aux_data[strlen(aux_data) - 1] = '\0'; + } else + aux_data = stats_args = empty_string; + + if (strstr(stats_args, PRECISE_ARG)) + region->timescale = 1; + else + region->timescale = NSEC_PER_MSEC; + + if ((p = strstr(stats_args, HISTOGRAM_ARG))) { + if (!_stats_parse_histogram_spec(dms, region, p)) + return_0; + } else + region->bounds = NULL; + + /* clear aggregate cache */ + region->histogram = NULL; + + region->group_id = DM_STATS_GROUP_NOT_PRESENT; + + if (!(region->program_id = dm_strdup(program_id))) + return_0; + if (!(region->aux_data = dm_strdup(aux_data))) { + dm_free(region->program_id); + return_0; + } + + region->counters = NULL; + return 1; +} + +static int _stats_parse_list(struct dm_stats *dms, const char *resp) +{ + uint64_t max_region = 0, nr_regions = 0; + struct dm_stats_region cur, fill; + struct dm_stats_group cur_group; + struct dm_pool *mem = dms->mem, *group_mem = dms->group_mem; + char line[STATS_ROW_BUF_LEN]; + FILE *list_rows; + + if (!resp) { + log_error("Could not parse NULL @stats_list response."); + return 0; + } + + _stats_regions_destroy(dms); + _stats_groups_destroy(dms); + + /* no regions */ + if (!strlen(resp)) { + dms->nr_regions = dms->max_region = 0; + dms->regions = NULL; + return 1; + } + + /* + * dm_task_get_message_response() returns a 'const char *' but + * since fmemopen also permits "w" it expects a 'char *'. + */ + if (!(list_rows = fmemopen((char *)resp, strlen(resp), "r"))) + return_0; + + /* begin region table */ + if (!dm_pool_begin_object(mem, 1024)) + goto_bad; + + /* begin group table */ + if (!dm_pool_begin_object(group_mem, 32)) + goto_bad; + + while(fgets(line, sizeof(line), list_rows)) { + + cur_group.group_id = DM_STATS_GROUP_NOT_PRESENT; + cur_group.regions = NULL; + cur_group.alias = NULL; + + if (!_stats_parse_list_region(dms, &cur, line)) + goto_bad; + + /* handle holes in the list of region_ids */ + if (cur.region_id > max_region) { + memset(&fill, 0, sizeof(fill)); + memset(&cur_group, 0, sizeof(cur_group)); + fill.region_id = DM_STATS_REGION_NOT_PRESENT; + cur_group.group_id = DM_STATS_GROUP_NOT_PRESENT; + do { + if (!dm_pool_grow_object(mem, &fill, sizeof(fill))) + goto_bad; + if (!dm_pool_grow_object(group_mem, &cur_group, + sizeof(cur_group))) + goto_bad; + } while (max_region++ < (cur.region_id - 1)); + } + + if (cur.aux_data) + if (!_parse_aux_data_group(dms, &cur, &cur_group)) + log_error("Failed to parse group descriptor " + "from region_id " FMTu64 " aux_data:" + "'%s'", cur.region_id, cur.aux_data); + /* continue */ + + if (!dm_pool_grow_object(mem, &cur, sizeof(cur))) + goto_bad; + + if (!dm_pool_grow_object(group_mem, &cur_group, + sizeof(cur_group))) + goto_bad; + + max_region++; + nr_regions++; + } + + if (!nr_regions) + /* no region data read from @stats_list */ + goto bad; + + dms->nr_regions = nr_regions; + dms->max_region = max_region - 1; + dms->regions = dm_pool_end_object(mem); + dms->groups = dm_pool_end_object(group_mem); + + dm_stats_foreach_group(dms) + _check_group_regions_present(dms, &dms->groups[dms->cur_group]); + + _stats_update_groups(dms); + + if (fclose(list_rows)) + stack; + + return 1; + +bad: + if (fclose(list_rows)) + stack; + dm_pool_abandon_object(mem); + dm_pool_abandon_object(group_mem); + + return 0; +} + +int dm_stats_list(struct dm_stats *dms, const char *program_id) +{ + char msg[STATS_MSG_BUF_LEN]; + struct dm_task *dmt; + int r; + + if (!_stats_bound(dms)) + return_0; + + /* allow zero-length program_id for list */ + if (!program_id) + program_id = dms->program_id; + + if (!_stats_set_name_cache(dms)) + return_0; + + if (dms->regions) + _stats_regions_destroy(dms); + + r = dm_snprintf(msg, sizeof(msg), "@stats_list %s", program_id); + + if (r < 0) { + log_error("Failed to prepare stats message."); + return 0; + } + + if (!(dmt = _stats_send_message(dms, msg))) + return_0; + + if (!_stats_parse_list(dms, dm_task_get_message_response(dmt))) { + log_error("Could not parse @stats_list response."); + goto bad; + } + + dm_task_destroy(dmt); + return 1; + +bad: + dm_task_destroy(dmt); + return 0; +} + +/* + * Parse histogram data returned from a @stats_print operation. + */ +static int _stats_parse_histogram(struct dm_pool *mem, char *hist_str, + struct dm_histogram **histogram, + struct dm_stats_region *region) +{ + static const char _valid_chars[] = "0123456789:"; + struct dm_histogram *bounds = region->bounds; + struct dm_histogram hist = { + .nr_bins = region->bounds->nr_bins + }; + const char *c, *v, *val_start; + struct dm_histogram_bin cur; + uint64_t sum = 0, this_val; + char *endptr = NULL; + int bin = 0; + + c = hist_str; + + if (!dm_pool_begin_object(mem, sizeof(cur))) + return_0; + + if (!dm_pool_grow_object(mem, &hist, sizeof(hist))) + goto_bad; + + do { + memset(&cur, 0, sizeof(cur)); + for (v = _valid_chars; *v; v++) + if (*c == *v) + break; + if (!*v) + goto badchar; + + if (*c == ',') + goto badchar; + else { + val_start = c; + endptr = NULL; + + errno = 0; + this_val = strtoull(val_start, &endptr, 10); + if (errno || !endptr) { + log_error("Could not parse histogram value."); + goto bad; + } + c = endptr; /* Advance to colon, or end. */ + + if (*c == ':') + c++; + else if (*c & (*c != '\n')) + /* Expected ':', '\n', or NULL. */ + goto badchar; + + if (*c == ':') + c++; + + cur.upper = bounds->bins[bin].upper; + cur.count = this_val; + sum += this_val; + + if (!dm_pool_grow_object(mem, &cur, sizeof(cur))) + goto_bad; + + bin++; + } + } while (*c && (*c != '\n')); + + log_debug("Added region histogram data with %d entries.", hist.nr_bins); + + *histogram = dm_pool_end_object(mem); + (*histogram)->sum = sum; + + return 1; + +badchar: + log_error("Invalid character in histogram data: '%c' (0x%x)", *c, *c); +bad: + dm_pool_abandon_object(mem); + return 0; +} + +static int _stats_parse_region(struct dm_stats *dms, const char *resp, + struct dm_stats_region *region, + uint64_t timescale) +{ + struct dm_histogram *hist = NULL; + struct dm_pool *mem = dms->mem; + struct dm_stats_counters cur; + FILE *stats_rows = NULL; + uint64_t start = 0, len = 0; + char row[STATS_ROW_BUF_LEN]; + int r; + + if (!resp) { + log_error("Could not parse empty @stats_print response."); + return 0; + } + + region->start = UINT64_MAX; + + if (!dm_pool_begin_object(mem, 512)) + goto_bad; + + /* + * dm_task_get_message_response() returns a 'const char *' but + * since fmemopen also permits "w" it expects a 'char *'. + */ + stats_rows = fmemopen((char *)resp, strlen(resp), "r"); + if (!stats_rows) + goto_bad; + + /* + * Output format for each step-sized area of a region: + * + * <start_sector>+<length> counters + * + * The first 11 counters have the same meaning as + * /sys/block/ * /stat or /proc/diskstats. + * + * Please refer to Documentation/iostats.txt for details. + * + * 1. the number of reads completed + * 2. the number of reads merged + * 3. the number of sectors read + * 4. the number of milliseconds spent reading + * 5. the number of writes completed + * 6. the number of writes merged + * 7. the number of sectors written + * 8. the number of milliseconds spent writing + * 9. the number of I/Os currently in progress + * 10. the number of milliseconds spent doing I/Os + * 11. the weighted number of milliseconds spent doing I/Os + * + * Additional counters: + * 12. the total time spent reading in milliseconds + * 13. the total time spent writing in milliseconds + * + */ + while (fgets(row, sizeof(row), stats_rows)) { + r = sscanf(row, FMTu64 "+" FMTu64 /* start+len */ + /* reads */ + FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " " + /* writes */ + FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " " + /* in flight & io nsecs */ + FMTu64 " " FMTu64 " " FMTu64 " " + /* tot read/write nsecs */ + FMTu64 " " FMTu64, &start, &len, + &cur.reads, &cur.reads_merged, &cur.read_sectors, + &cur.read_nsecs, + &cur.writes, &cur.writes_merged, &cur.write_sectors, + &cur.write_nsecs, + &cur.io_in_progress, + &cur.io_nsecs, &cur.weighted_io_nsecs, + &cur.total_read_nsecs, &cur.total_write_nsecs); + if (r != 15) { + log_error("Could not parse @stats_print row."); + goto bad; + } + + /* scale time values up if needed */ + if (timescale != 1) { + cur.read_nsecs *= timescale; + cur.write_nsecs *= timescale; + cur.io_nsecs *= timescale; + cur.weighted_io_nsecs *= timescale; + cur.total_read_nsecs *= timescale; + cur.total_write_nsecs *= timescale; + } + + if (region->bounds) { + /* Find first histogram separator. */ + char *hist_str = strchr(row, ':'); + if (!hist_str) { + log_error("Could not parse histogram value."); + goto bad; + } + /* Find space preceding histogram. */ + while (hist_str && *(hist_str - 1) != ' ') + hist_str--; + + /* Use a separate pool for histogram objects since we + * are growing the area table and each area's histogram + * table simultaneously. + */ + if (!_stats_parse_histogram(dms->hist_mem, hist_str, + &hist, region)) + goto_bad; + hist->dms = dms; + hist->region = region; + } + + cur.histogram = hist; + + if (!dm_pool_grow_object(mem, &cur, sizeof(cur))) + goto_bad; + + if (region->start == UINT64_MAX) { + region->start = start; + region->step = len; /* area size is always uniform. */ + } + } + + if (region->start == UINT64_MAX) + /* no area data read from @stats_print */ + goto bad; + + region->len = (start + len) - region->start; + region->timescale = timescale; + region->counters = dm_pool_end_object(mem); + + if (fclose(stats_rows)) + stack; + + return 1; + +bad: + if (stats_rows) + if (fclose(stats_rows)) + stack; + dm_pool_abandon_object(mem); + + return 0; +} + +static void _stats_walk_next_present(const struct dm_stats *dms, + uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a, + uint64_t *cur_g) +{ + struct dm_stats_region *cur = NULL; + + /* start of walk: region loop advances *cur_r to 0. */ + if (*cur_r != DM_STATS_REGION_NOT_PRESENT) + cur = &dms->regions[*cur_r]; + + /* within current region? */ + if (cur && (*flags & DM_STATS_WALK_AREA)) { + if (++(*cur_a) < _nr_areas_region(cur)) + return; + else + *cur_a = 0; + } + + /* advance to next present, non-skipped region or end */ + while (++(*cur_r) <= dms->max_region) { + cur = &dms->regions[*cur_r]; + if (!_stats_region_present(cur)) + continue; + if ((*flags & DM_STATS_WALK_SKIP_SINGLE_AREA)) + if (!(*flags & DM_STATS_WALK_AREA)) + if (_nr_areas_region(cur) < 2) + continue; + /* matching region found */ + break; + } + return; +} + +static void _stats_walk_next(const struct dm_stats *dms, uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g) +{ + if (!dms || !dms->regions) + return; + + if (*flags & DM_STATS_WALK_AREA) { + /* advance to next area, region, or end */ + _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g); + return; + } + + if (*flags & DM_STATS_WALK_REGION) { + /* enable region aggregation */ + *cur_a = DM_STATS_WALK_REGION; + _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g); + return; + } + + if (*flags & DM_STATS_WALK_GROUP) { + /* enable group aggregation */ + *cur_r = *cur_a = DM_STATS_WALK_GROUP; + while (!_stats_group_id_present(dms, ++(*cur_g)) + && (*cur_g) < dms->max_region + 1) + ; /* advance to next present group or end */ + return; + } + + log_error("stats_walk_next called with empty walk flags"); +} + +static void _group_walk_start(const struct dm_stats *dms, uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g) +{ + if (!(*flags & DM_STATS_WALK_GROUP)) + return; + + *cur_a = *cur_r = DM_STATS_WALK_GROUP; + *cur_g = 0; + + /* advance to next present group or end */ + while ((*cur_g) <= dms->max_region) { + if (_stats_region_is_grouped(dms, *cur_g)) + break; + (*cur_g)++; + } + + if (*cur_g > dms->max_region) + /* no groups to walk */ + *flags &= ~DM_STATS_WALK_GROUP; +} + +static void _stats_walk_start(const struct dm_stats *dms, uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a, + uint64_t *cur_g) +{ + log_debug("starting stats walk with %s %s %s %s", + (*flags & DM_STATS_WALK_AREA) ? "AREA" : "", + (*flags & DM_STATS_WALK_REGION) ? "REGION" : "", + (*flags & DM_STATS_WALK_GROUP) ? "GROUP" : "", + (*flags & DM_STATS_WALK_SKIP_SINGLE_AREA) ? "SKIP" : ""); + + if (!dms->regions) + return; + + if (!(*flags & (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION))) + return _group_walk_start(dms, flags, cur_r, cur_a, cur_g); + + /* initialise cursor state */ + *cur_a = 0; + *cur_r = DM_STATS_REGION_NOT_PRESENT; + *cur_g = DM_STATS_GROUP_NOT_PRESENT; + + if (!(*flags & DM_STATS_WALK_AREA)) + *cur_a = DM_STATS_WALK_REGION; + + /* advance to first present, non-skipped region */ + _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g); +} + +#define DM_STATS_WALK_MASK (DM_STATS_WALK_AREA \ + | DM_STATS_WALK_REGION \ + | DM_STATS_WALK_GROUP \ + | DM_STATS_WALK_SKIP_SINGLE_AREA) + +int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags) +{ + if (!dms) + return_0; + + if (flags & ~DM_STATS_WALK_MASK) { + log_error("Unknown value in walk flags: 0x" FMTx64, + (uint64_t) (flags & ~DM_STATS_WALK_MASK)); + return 0; + } + dms->walk_flags = flags; + log_debug("dm_stats_walk_init: initialised flags to " FMTx64, flags); + return 1; +} + +void dm_stats_walk_start(struct dm_stats *dms) +{ + if (!dms || !dms->regions) + return; + + dms->cur_flags = dms->walk_flags; + + _stats_walk_start(dms, &dms->cur_flags, + &dms->cur_region, &dms->cur_area, + &dms->cur_group); +} + +void dm_stats_walk_next(struct dm_stats *dms) +{ + _stats_walk_next(dms, &dms->cur_flags, + &dms->cur_region, &dms->cur_area, + &dms->cur_group); +} + +void dm_stats_walk_next_region(struct dm_stats *dms) +{ + dms->cur_flags &= ~DM_STATS_WALK_AREA; + _stats_walk_next(dms, &dms->cur_flags, + &dms->cur_region, &dms->cur_area, + &dms->cur_group); +} + +/* + * Return 1 if any regions remain that are present and not skipped + * by the current walk flags or 0 otherwise. + */ +static uint64_t _stats_walk_any_unskipped(const struct dm_stats *dms, + uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a) +{ + struct dm_stats_region *region; + uint64_t i; + + if (*cur_r > dms->max_region) + return 0; + + for (i = *cur_r; i <= dms->max_region; i++) { + region = &dms->regions[i]; + if (!_stats_region_present(region)) + continue; + if ((*flags & DM_STATS_WALK_SKIP_SINGLE_AREA) + && !(*flags & DM_STATS_WALK_AREA)) + if (_nr_areas_region(region) < 2) + continue; + return 1; + } + return 0; +} + +static void _stats_walk_end_areas(const struct dm_stats *dms, uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a, + uint64_t *cur_g) +{ + int end = !_stats_walk_any_unskipped(dms, flags, cur_r, cur_a); + + if (!(*flags & DM_STATS_WALK_AREA)) + return; + + if (!end) + return; + + *flags &= ~DM_STATS_WALK_AREA; + if (*flags & DM_STATS_WALK_REGION) { + /* start region walk */ + *cur_a = DM_STATS_WALK_REGION; + *cur_r = DM_STATS_REGION_NOT_PRESENT; + _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g); + if (!_stats_walk_any_unskipped(dms, flags, cur_r, cur_a)) { + /* no more regions */ + *flags &= ~DM_STATS_WALK_REGION; + if (!(*flags & DM_STATS_WALK_GROUP)) + *cur_r = dms->max_region; + } + } + + if (*flags & DM_STATS_WALK_REGION) + return; + + if (*flags & DM_STATS_WALK_GROUP) + _group_walk_start(dms, flags, cur_r, cur_a, cur_g); +} + +static int _stats_walk_end(const struct dm_stats *dms, uint64_t *flags, + uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g) +{ + if (*flags & DM_STATS_WALK_AREA) { + _stats_walk_end_areas(dms, flags, cur_r, cur_a, cur_g); + goto out; + } + + if (*flags & DM_STATS_WALK_REGION) { + if (!_stats_walk_any_unskipped(dms, flags, cur_r, cur_a)) { + *flags &= ~DM_STATS_WALK_REGION; + _group_walk_start(dms, flags, cur_r, cur_a, cur_g); + } + goto out; + } + + if (*flags & DM_STATS_WALK_GROUP) { + if (*cur_g <= dms->max_region) + goto out; + *flags &= ~DM_STATS_WALK_GROUP; + } +out: + return !(*flags & ~DM_STATS_WALK_SKIP_SINGLE_AREA); +} + +int dm_stats_walk_end(struct dm_stats *dms) +{ + if (!dms) + return 1; + + if (_stats_walk_end(dms, &dms->cur_flags, + &dms->cur_region, &dms->cur_area, + &dms->cur_group)) { + dms->cur_flags = dms->walk_flags; + return 1; + } + return 0; +} + +dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms, + uint64_t region_id, + uint64_t area_id) +{ + uint64_t group_id; + + region_id = (region_id == DM_STATS_REGION_CURRENT) + ? dms->cur_region : region_id ; + area_id = (area_id == DM_STATS_AREA_CURRENT) + ? dms->cur_area : area_id ; + + if (region_id == DM_STATS_REGION_NOT_PRESENT) + /* no region */ + return DM_STATS_OBJECT_TYPE_NONE; + + if (region_id & DM_STATS_WALK_GROUP) { + if (region_id == DM_STATS_WALK_GROUP) + /* indirect group_id from cursor */ + group_id = dms->cur_group; + else + /* immediate group_id encoded in region_id */ + group_id = region_id & ~DM_STATS_WALK_GROUP; + if (!_stats_group_id_present(dms, group_id)) + return DM_STATS_OBJECT_TYPE_NONE; + return DM_STATS_OBJECT_TYPE_GROUP; + } + + if (region_id > dms->max_region) + /* end of table */ + return DM_STATS_OBJECT_TYPE_NONE; + + if (area_id & DM_STATS_WALK_REGION) + /* aggregate region */ + return DM_STATS_OBJECT_TYPE_REGION; + + /* plain region_id and area_id */ + return DM_STATS_OBJECT_TYPE_AREA; +} + +dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms) +{ + /* dm_stats_object_type will decode region/area */ + return dm_stats_object_type(dms, + DM_STATS_REGION_CURRENT, + DM_STATS_AREA_CURRENT); +} + +uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms, + uint64_t region_id) +{ + struct dm_stats_region *region = NULL; + + /* groups or aggregate regions cannot be subdivided */ + if (region_id & DM_STATS_WALK_GROUP) + return 1; + + region = &dms->regions[region_id]; + return _nr_areas_region(region); +} + +uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms) +{ + /* groups or aggregate regions cannot be subdivided */ + if (dms->cur_region & DM_STATS_WALK_GROUP) + return 1; + + return dm_stats_get_region_nr_areas(dms, dms->cur_region); +} + +uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms) +{ + uint64_t nr_areas = 0, flags = DM_STATS_WALK_AREA; + /* use a separate cursor */ + uint64_t cur_region = 0, cur_area = 0, cur_group = 0; + + /* no regions to visit? */ + if (!dms->regions) + return 0; + + flags = DM_STATS_WALK_AREA; + _stats_walk_start(dms, &flags, &cur_region, &cur_area, &cur_group); + do { + nr_areas += dm_stats_get_current_nr_areas(dms); + _stats_walk_next(dms, &flags, + &cur_region, &cur_area, + &cur_group); + } while (!_stats_walk_end(dms, &flags, + &cur_region, &cur_area, + &cur_group)); + return nr_areas; +} + +int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id) +{ + return _stats_group_id_present(dms, group_id); +} + +int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms, + uint64_t region_id) +{ + region_id = (region_id == DM_STATS_REGION_CURRENT) + ? dms->cur_region : region_id ; + + /* FIXME: support group histograms if all region bounds match */ + if (region_id & DM_STATS_WALK_GROUP) + return 0; + + if (!dms->regions[region_id].bounds) + return 0; + + return dms->regions[region_id].bounds->nr_bins; +} + +/* + * Fill buf with a list of set regions in the regions bitmap. Consecutive + * ranges of set region IDs are output using "M-N" range notation. + * + * The number of bytes consumed is returned or zero on error. + */ +static size_t _stats_group_tag_fill(const struct dm_stats *dms, + dm_bitset_t regions, + char *buf, size_t buflen) +{ + int i, j, r, next, last = 0; + size_t used = 0; + + last = dm_bit_get_last(regions); + + i = dm_bit_get_first(regions); + for(; i >= 0; i = dm_bit_get_next(regions, i)) { + /* find range end */ + j = i; + do + next = j + 1; + while ((j = dm_bit_get_next(regions, j)) == next); + + /* set to last set bit */ + j = next - 1; + + /* handle range vs. single region */ + if (i != j) + r = dm_snprintf(buf, buflen, FMTu64 "-" FMTu64 "%s", + (uint64_t) i, (uint64_t) j, + (j == last) ? "" : ","); + else + r = dm_snprintf(buf, buflen, FMTu64 "%s", (uint64_t) i, + (i == last) ? "" : ","); + if (r < 0) + goto_bad; + + i = next; /* skip handled bits if in range */ + + buf += r; + used += r; + } + + return used; +bad: + log_error("Could not format group list."); + return 0; +} + +/* + * Calculate the space required to hold a string description of the group + * described by the regions bitset using comma separated list in range + * notation ("A,B,C,M-N"). + */ +static size_t _stats_group_tag_len(const struct dm_stats *dms, + dm_bitset_t regions) +{ + int64_t i, j, next, nr_regions = 0; + size_t buflen = 0, id_len = 0; + + /* check region ids and find last set bit */ + i = dm_bit_get_first(regions); + for (; i >= 0; i = dm_bit_get_next(regions, i)) { + /* length of region_id or range start in characters */ + id_len = (i) ? 1 + (size_t) log10(i) : 1; + buflen += id_len; + j = i; + do + next = j + 1; + while ((j = dm_bit_get_next(regions, j)) == next); + + /* set to last set bit */ + j = next - 1; + + nr_regions += j - i + 1; + + /* handle range */ + if (i != j) { + /* j is always > i, which is always >= 0 */ + id_len = 1 + (size_t) log10(j); + buflen += id_len + 1; /* range end plus "-" */ + } + buflen++; + i = next; /* skip bits if handling range */ + } + return buflen; +} + +/* + * Build a DMS_GROUP="..." tag for the group specified by group_id, + * to be stored in the corresponding region's aux_data field. + */ +static char *_build_group_tag(struct dm_stats *dms, uint64_t group_id) +{ + char *aux_string, *buf; + dm_bitset_t regions; + const char *alias; + size_t buflen = 0; + int r; + + regions = dms->groups[group_id].regions; + alias = dms->groups[group_id].alias; + + buflen = _stats_group_tag_len(dms, regions); + + if (!buflen) + return_0; + + buflen += DMS_GROUP_TAG_LEN; + buflen += 1 + (alias ? strlen(alias) : 0); /* 'alias:' */ + + buf = aux_string = dm_malloc(buflen); + if (!buf) { + log_error("Could not allocate memory for aux_data string."); + return NULL; + } + + if (!dm_strncpy(buf, DMS_GROUP_TAG, DMS_GROUP_TAG_LEN + 1)) + goto_bad; + + buf += DMS_GROUP_TAG_LEN; + buflen -= DMS_GROUP_TAG_LEN; + + r = dm_snprintf(buf, buflen, "%s%c", alias ? alias : "", DMS_GROUP_SEP); + if (r < 0) + goto_bad; + + buf += r; + buflen -= r; + + r = _stats_group_tag_fill(dms, regions, buf, buflen); + if (!r) + goto_bad; + + return aux_string; +bad: + log_error("Could not format group aux_data."); + dm_free(aux_string); + return NULL; +} + +/* + * Store updated aux_data for a region. The aux_data is passed to the + * kernel using the @stats_set_aux message. Any required group tag is + * generated from the current group table and included in the message. + */ +static int _stats_set_aux(struct dm_stats *dms, + uint64_t region_id, const char *aux_data) +{ + const char *group_tag = NULL; + struct dm_task *dmt = NULL; + char msg[STATS_MSG_BUF_LEN]; + + /* group data required? */ + if (_stats_group_id_present(dms, region_id)) { + group_tag = _build_group_tag(dms, region_id); + if (!group_tag) { + log_error("Could not build group descriptor for " + "region ID " FMTu64, region_id); + goto bad; + } + } + + if (dm_snprintf(msg, sizeof(msg), "@stats_set_aux " FMTu64 " %s%s%s ", + region_id, (group_tag) ? group_tag : "", + (group_tag) ? DMS_AUX_SEP : "", + (strlen(aux_data)) ? aux_data : "-") < 0) { + log_error("Could not prepare @stats_set_aux message"); + goto bad; + } + + if (!(dmt = _stats_send_message(dms, msg))) + goto_bad; + + dm_free((char *) group_tag); + + /* no response to a @stats_set_aux message */ + dm_task_destroy(dmt); + + return 1; +bad: + dm_free((char *) group_tag); + return 0; +} + +/* + * Maximum length of a "start+end" range string: + * Two 20 digit uint64_t, '+', and NULL. + */ +#define RANGE_LEN 42 +static int _stats_create_region(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + int precise, const char *hist_arg, + const char *program_id, const char *aux_data) +{ + char msg[STATS_MSG_BUF_LEN], range[RANGE_LEN], *endptr = NULL; + const char *err_fmt = "Could not prepare @stats_create %s."; + const char *precise_str = PRECISE_ARG; + const char *resp, *opt_args = NULL; + struct dm_task *dmt = NULL; + int r = 0, nr_opt = 0; + + if (!_stats_bound(dms)) + return_0; + + if (!program_id || !strlen(program_id)) + program_id = dms->program_id; + + if (start || len) { + if (dm_snprintf(range, sizeof(range), FMTu64 "+" FMTu64, + start, len) < 0) { + log_error(err_fmt, "range"); + return 0; + } + } + + if (precise < 0) + precise = dms->precise; + + if (precise) + nr_opt++; + else + precise_str = ""; + + if (hist_arg) + nr_opt++; + else + hist_arg = ""; + + if (nr_opt) { + if ((dm_asprintf((char **)&opt_args, "%d %s %s%s", nr_opt, + precise_str, + (strlen(hist_arg)) ? HISTOGRAM_ARG : "", + hist_arg)) < 0) { + log_error(err_fmt, PRECISE_ARG " option."); + return 0; + } + } else + opt_args = dm_strdup(""); + + if (dm_snprintf(msg, sizeof(msg), "@stats_create %s %s" FMTu64 + " %s %s %s", (start || len) ? range : "-", + (step < 0) ? "/" : "", + (uint64_t)llabs(step), + opt_args, program_id, aux_data) < 0) { + log_error(err_fmt, "message"); + dm_free((void *) opt_args); + return 0; + } + + if (!(dmt = _stats_send_message(dms, msg))) + goto_out; + + resp = dm_task_get_message_response(dmt); + if (!resp) { + log_error("Could not parse empty @stats_create response."); + goto out; + } + + if (region_id) { + errno = 0; + *region_id = strtoull(resp, &endptr, 10); + if (errno || resp == endptr) + goto_out; + } + + r = 1; + +out: + if (dmt) + dm_task_destroy(dmt); + dm_free((void *) opt_args); + + return r; +} + +int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + int precise, struct dm_histogram *bounds, + const char *program_id, const char *user_data) +{ + char *hist_arg = NULL; + int r = 0; + + /* Nanosecond counters and histograms both need precise_timestamps. */ + if ((precise || bounds) && !_stats_check_precise_timestamps(dms)) + return_0; + + if (bounds) { + /* _build_histogram_arg enables precise if vals < 1ms. */ + if (!(hist_arg = _build_histogram_arg(bounds, &precise))) + goto_out; + } + + r = _stats_create_region(dms, region_id, start, len, step, + precise, hist_arg, program_id, user_data); + dm_free(hist_arg); + +out: + return r; +} + +static void _stats_clear_group_regions(struct dm_stats *dms, uint64_t group_id) +{ + struct dm_stats_group *group; + uint64_t i; + + group = &dms->groups[group_id]; + for (i = dm_bit_get_first(group->regions); + i != DM_STATS_GROUP_NOT_PRESENT; + i = dm_bit_get_next(group->regions, i)) + dms->regions[i].group_id = DM_STATS_GROUP_NOT_PRESENT; +} + +static int _stats_remove_region_id_from_group(struct dm_stats *dms, + uint64_t region_id) +{ + struct dm_stats_region *region = &dms->regions[region_id]; + uint64_t group_id = region->group_id; + dm_bitset_t regions = dms->groups[group_id].regions; + + if (!_stats_region_is_grouped(dms, region_id)) + return_0; + + dm_bit_clear(regions, region_id); + + /* removing group leader? */ + if (region_id == group_id) { + _stats_clear_group_regions(dms, group_id); + _stats_group_destroy(&dms->groups[group_id]); + } + + return _stats_set_aux(dms, group_id, dms->regions[group_id].aux_data); +} + +static int _stats_delete_region(struct dm_stats *dms, uint64_t region_id) +{ + char msg[STATS_MSG_BUF_LEN]; + struct dm_task *dmt; + + if (_stats_region_is_grouped(dms, region_id)) + if (!_stats_remove_region_id_from_group(dms, region_id)) { + log_error("Could not remove region ID " FMTu64 " from " + "group ID " FMTu64, + region_id, dms->regions[region_id].group_id); + return 0; + } + + if (dm_snprintf(msg, sizeof(msg), "@stats_delete " FMTu64, region_id) < 0) { + log_error("Could not prepare @stats_delete message."); + return 0; + } + + dmt = _stats_send_message(dms, msg); + if (!dmt) + return_0; + dm_task_destroy(dmt); + + return 1; +} + +int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id) +{ + int listed = 0; + + if (!_stats_bound(dms)) + return_0; + + /* + * To correctly delete a region, that may be part of a group, a + * listed handle is required, since the region may need to be + * removed from another region's group descriptor; earlier + * versions of the region deletion interface do not have this + * requirement since there are no dependencies between regions. + * + * Listing a previously unlisted handle has numerous + * side-effects on other calls and operations (e.g. stats + * walks), especially when returning to a function that depends + * on the state of the region table, or statistics cursor. + * + * To avoid changing the semantics of the API, and the need for + * a versioned symbol, maintain a flag indicating when a listing + * has been carried out, and drop the region table before + * returning. + * + * This ensures compatibility with programs compiled against + * earlier versions of libdm. + */ + if (!dms->regions && !(listed = dm_stats_list(dms, dms->program_id))) { + log_error("Could not obtain region list while deleting " + "region ID " FMTu64, region_id); + goto bad; + } + + if (!dm_stats_get_nr_regions(dms)) { + log_error("Could not delete region ID " FMTu64 ": " + "no regions found", region_id); + goto bad; + } + + /* includes invalid and special region_id values */ + if (!dm_stats_region_present(dms, region_id)) { + log_error("Region ID " FMTu64 " does not exist", region_id); + goto bad; + } + + if (!_stats_delete_region(dms, region_id)) + goto bad; + + if (!listed) + /* wipe region and mark as not present */ + _stats_region_destroy(&dms->regions[region_id]); + else + /* return handle to prior state */ + _stats_regions_destroy(dms); + + return 1; +bad: + if (listed) + _stats_regions_destroy(dms); + + return 0; +} + +int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id) +{ + char msg[STATS_MSG_BUF_LEN]; + struct dm_task *dmt; + + if (!_stats_bound(dms)) + return_0; + + if (dm_snprintf(msg, sizeof(msg), "@stats_clear " FMTu64, region_id) < 0) { + log_error("Could not prepare @stats_clear message."); + return 0; + } + + dmt = _stats_send_message(dms, msg); + + if (!dmt) + return_0; + + dm_task_destroy(dmt); + + return 1; +} + +static struct dm_task *_stats_print_region(struct dm_stats *dms, + uint64_t region_id, unsigned start_line, + unsigned num_lines, unsigned clear) +{ + /* @stats_print[_clear] <region_id> [<start_line> <num_lines>] */ + const char *err_fmt = "Could not prepare @stats_print %s."; + char msg[STATS_MSG_BUF_LEN], lines[RANGE_LEN]; + struct dm_task *dmt = NULL; + + if (start_line || num_lines) + if (dm_snprintf(lines, sizeof(lines), + "%u %u", start_line, num_lines) < 0) { + log_error(err_fmt, "row specification"); + return NULL; + } + + if (dm_snprintf(msg, sizeof(msg), "@stats_print%s " FMTu64 " %s", + (clear) ? "_clear" : "", + region_id, (start_line || num_lines) ? lines : "") < 0) { + log_error(err_fmt, "message"); + return NULL; + } + + if (!(dmt = _stats_send_message(dms, msg))) + return_NULL; + + return dmt; +} + +char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id, + unsigned start_line, unsigned num_lines, + unsigned clear) +{ + char *resp = NULL; + struct dm_task *dmt = NULL; + const char *response; + + if (!_stats_bound(dms)) + return_0; + + /* + * FIXME: 'print' can be emulated for groups or aggregate regions + * by populating the handle and emitting aggregate counter data + * in the kernel print format. + */ + if (region_id == DM_STATS_WALK_GROUP) + return_0; + + dmt = _stats_print_region(dms, region_id, + start_line, num_lines, clear); + + if (!dmt) + return_0; + + if (!(response = dm_task_get_message_response(dmt))) + goto_out; + + if (!(resp = dm_pool_strdup(dms->mem, response))) + log_error("Could not allocate memory for response buffer."); +out: + dm_task_destroy(dmt); + + return resp; +} + +void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer) +{ + dm_pool_free(dms->mem, buffer); +} + +uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms) +{ + if (!dms) + return_0; + + if (!dms->regions) + return 0; + + return dms->nr_regions; +} + +uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms) +{ + uint64_t group_id, nr_groups = 0; + + if (!dms) + return_0; + + /* no regions or groups? */ + if (!dms->regions || !dms->groups) + return 0; + + for (group_id = 0; group_id <= dms->max_region; group_id++) + if (dms->groups[group_id].group_id + != DM_STATS_GROUP_NOT_PRESENT) + nr_groups++; + + return nr_groups; +} + +/** + * Test whether region_id is present in this set of stats data. + */ +int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id) +{ + if (!dms->regions) + return_0; + + if (region_id > dms->max_region) + return 0; + + return _stats_region_present(&dms->regions[region_id]); +} + +static int _dm_stats_populate_region(struct dm_stats *dms, uint64_t region_id, + const char *resp) +{ + struct dm_stats_region *region = &dms->regions[region_id]; + + if (!_stats_bound(dms)) + return_0; + + if (!region) { + log_error("Cannot populate empty handle before dm_stats_list()."); + return 0; + } + if (!_stats_parse_region(dms, resp, region, region->timescale)) { + log_error("Could not parse @stats_print message response."); + return 0; + } + region->region_id = region_id; + return 1; +} + +int dm_stats_populate(struct dm_stats *dms, const char *program_id, + uint64_t region_id) +{ + int all_regions = (region_id == DM_STATS_REGIONS_ALL); + struct dm_task *dmt = NULL; /* @stats_print task */ + uint64_t saved_flags; /* saved walk flags */ + const char *resp; + + /* + * We are about do destroy and re-create the region table, so it + * is safe to use the cursor embedded in the stats handle: just + * save a copy of the current walk_flags to restore later. + */ + saved_flags = dms->walk_flags; + + if (!_stats_bound(dms)) + return_0; + + if ((!all_regions) && (region_id & DM_STATS_WALK_GROUP)) { + log_error("Invalid region_id for dm_stats_populate: " + "DM_STATS_WALK_GROUP"); + return 0; + } + + if (!dms->nr_regions) { + log_error("No regions registered."); + return 0; + } + + /* allow zero-length program_id for populate */ + if (!program_id) + program_id = dms->program_id; + + if (all_regions && !dm_stats_list(dms, program_id)) { + log_error("Could not parse @stats_list response."); + goto bad; + } else if (!_stats_set_name_cache(dms)) { + goto_bad; + } + + dms->walk_flags = DM_STATS_WALK_REGION; + dm_stats_walk_start(dms); + do { + region_id = (all_regions) + ? dm_stats_get_current_region(dms) : region_id; + + /* obtain all lines and clear counter values */ + if (!(dmt = _stats_print_region(dms, region_id, 0, 0, 1))) + goto_bad; + + resp = dm_task_get_message_response(dmt); + if (!_dm_stats_populate_region(dms, region_id, resp)) { + dm_task_destroy(dmt); + goto_bad; + } + + dm_task_destroy(dmt); + dm_stats_walk_next(dms); + + } while (all_regions && !dm_stats_walk_end(dms)); + + dms->walk_flags = saved_flags; + return 1; + +bad: + dms->walk_flags = saved_flags; + _stats_regions_destroy(dms); + dms->regions = NULL; + return 0; +} + +/** + * destroy a dm_stats object and all associated regions and counter sets. + */ +void dm_stats_destroy(struct dm_stats *dms) +{ + if (!dms) + return; + + _stats_regions_destroy(dms); + _stats_groups_destroy(dms); + _stats_clear_binding(dms); + dm_pool_destroy(dms->mem); + dm_pool_destroy(dms->hist_mem); + dm_pool_destroy(dms->group_mem); + dm_free(dms->program_id); + dm_free((char *) dms->name); + dm_free(dms); +} + +/* + * Walk each area that is a member of region_id rid. + * i is a variable of type int that holds the current area_id. + */ +#define _foreach_region_area(dms, rid, i) \ +for ((i) = 0; (i) < _nr_areas_region(&dms->regions[(rid)]); (i)++) \ + +/* + * Walk each region that is a member of group_id gid. + * i is a variable of type int that holds the current region_id. + */ +#define _foreach_group_region(dms, gid, i) \ +for ((i) = dm_bit_get_first((dms)->groups[(gid)].regions); \ + (i) != DM_STATS_GROUP_NOT_PRESENT; \ + (i) = dm_bit_get_next((dms)->groups[(gid)].regions, (i))) \ + +/* + * Walk each region that is a member of group_id gid visiting each + * area within the region. + * i is a variable of type int that holds the current region_id. + * j is a variable of type int variable that holds the current area_id. + */ +#define _foreach_group_area(dms, gid, i, j) \ +_foreach_group_region(dms, gid, i) \ + _foreach_region_area(dms, i, j) + +static uint64_t _stats_get_counter(const struct dm_stats *dms, + const struct dm_stats_counters *area, + dm_stats_counter_t counter) +{ + switch(counter) { + case DM_STATS_READS_COUNT: + return area->reads; + case DM_STATS_READS_MERGED_COUNT: + return area->reads_merged; + case DM_STATS_READ_SECTORS_COUNT: + return area->read_sectors; + case DM_STATS_READ_NSECS: + return area->read_nsecs; + case DM_STATS_WRITES_COUNT: + return area->writes; + case DM_STATS_WRITES_MERGED_COUNT: + return area->writes_merged; + case DM_STATS_WRITE_SECTORS_COUNT: + return area->write_sectors; + case DM_STATS_WRITE_NSECS: + return area->write_nsecs; + case DM_STATS_IO_IN_PROGRESS_COUNT: + return area->io_in_progress; + case DM_STATS_IO_NSECS: + return area->io_nsecs; + case DM_STATS_WEIGHTED_IO_NSECS: + return area->weighted_io_nsecs; + case DM_STATS_TOTAL_READ_NSECS: + return area->total_read_nsecs; + case DM_STATS_TOTAL_WRITE_NSECS: + return area->total_write_nsecs; + case DM_STATS_NR_COUNTERS: + default: + log_error("Attempt to read invalid counter: %d", counter); + } + return 0; +} + +uint64_t dm_stats_get_counter(const struct dm_stats *dms, + dm_stats_counter_t counter, + uint64_t region_id, uint64_t area_id) +{ + uint64_t i, j, sum = 0; /* aggregation */ + int sum_regions = 0; + struct dm_stats_region *region; + struct dm_stats_counters *area; + + region_id = (region_id == DM_STATS_REGION_CURRENT) + ? dms->cur_region : region_id ; + area_id = (area_id == DM_STATS_REGION_CURRENT) + ? dms->cur_area : area_id ; + + sum_regions = !!(region_id & DM_STATS_WALK_GROUP); + + if (region_id == DM_STATS_WALK_GROUP) + /* group walk using the cursor */ + region_id = dms->cur_group; + else if (region_id & DM_STATS_WALK_GROUP) + /* group walk using immediate group_id */ + region_id &= ~DM_STATS_WALK_GROUP; + region = &dms->regions[region_id]; + + /* + * All statistics aggregation takes place here: aggregate metrics + * are calculated as normal using the aggregated counter values + * returned for the region or group specified. + */ + + if (_stats_region_is_grouped(dms, region_id) && (sum_regions)) { + /* group */ + if (area_id & DM_STATS_WALK_GROUP) + _foreach_group_area(dms, region->group_id, i, j) { + area = &dms->regions[i].counters[j]; + sum += _stats_get_counter(dms, area, counter); + } + else + _foreach_group_region(dms, region->group_id, i) { + area = &dms->regions[i].counters[area_id]; + sum += _stats_get_counter(dms, area, counter); + } + } else if (area_id == DM_STATS_WALK_REGION) { + /* aggregate region */ + _foreach_region_area(dms, region_id, j) { + area = &dms->regions[region_id].counters[j]; + sum += _stats_get_counter(dms, area, counter); + } + } else { + /* plain region / area */ + area = ®ion->counters[area_id]; + sum = _stats_get_counter(dms, area, counter); + } + + return sum; +} + +/* + * Methods for accessing named counter fields. All methods share the + * following naming scheme and prototype: + * + * uint64_t dm_stats_get_COUNTER(const struct dm_stats *, uint64_t, uint64_t) + * + * Where the two integer arguments are the region_id and area_id + * respectively. + * + * name is the name of the counter (lower case) + * counter is the part of the enum name following DM_STATS_ (upper case) + */ +#define MK_STATS_GET_COUNTER_FN(name, counter) \ +uint64_t dm_stats_get_ ## name(const struct dm_stats *dms, \ + uint64_t region_id, uint64_t area_id) \ +{ \ + return dm_stats_get_counter(dms, DM_STATS_ ## counter, \ + region_id, area_id); \ +} + +MK_STATS_GET_COUNTER_FN(reads, READS_COUNT) +MK_STATS_GET_COUNTER_FN(reads_merged, READS_MERGED_COUNT) +MK_STATS_GET_COUNTER_FN(read_sectors, READ_SECTORS_COUNT) +MK_STATS_GET_COUNTER_FN(read_nsecs, READ_NSECS) +MK_STATS_GET_COUNTER_FN(writes, WRITES_COUNT) +MK_STATS_GET_COUNTER_FN(writes_merged, WRITES_MERGED_COUNT) +MK_STATS_GET_COUNTER_FN(write_sectors, WRITE_SECTORS_COUNT) +MK_STATS_GET_COUNTER_FN(write_nsecs, WRITE_NSECS) +MK_STATS_GET_COUNTER_FN(io_in_progress, IO_IN_PROGRESS_COUNT) +MK_STATS_GET_COUNTER_FN(io_nsecs, IO_NSECS) +MK_STATS_GET_COUNTER_FN(weighted_io_nsecs, WEIGHTED_IO_NSECS) +MK_STATS_GET_COUNTER_FN(total_read_nsecs, TOTAL_READ_NSECS) +MK_STATS_GET_COUNTER_FN(total_write_nsecs, TOTAL_WRITE_NSECS) +#undef MK_STATS_GET_COUNTER_FN + +/* + * Floating point stats metric functions + * + * Called from dm_stats_get_metric() to calculate the value of + * the requested metric. + * + * int _metric_name(const struct dm_stats *dms, + * struct dm_stats_counters *c, + * double *value); + * + * Calculate a metric value from the counter data for the given + * identifiers and store it in the memory pointed to by value, + * applying group or region aggregation if enabled. + * + * Return one on success or zero on failure. + * + * To add a new metric: + * + * o Add a new name to the dm_stats_metric_t enum. + * o Create a _metric_fn() to calculate the new metric. + * o Add _metric_fn to the _metrics function table + * (entries in enum order). + * o Do not add a new named public function for the metric - + * users of new metrics are encouraged to convert to the enum + * based metric interface. + * + */ + +static int _rd_merges_per_sec(const struct dm_stats *dms, double *rrqm, + uint64_t region_id, uint64_t area_id) +{ + double mrgs; + mrgs = (double) dm_stats_get_counter(dms, DM_STATS_READS_MERGED_COUNT, + region_id, area_id); + + *rrqm = mrgs / (double) dms->interval_ns; + + return 1; +} + +static int _wr_merges_per_sec(const struct dm_stats *dms, double *wrqm, + uint64_t region_id, uint64_t area_id) +{ + double mrgs; + mrgs = (double) dm_stats_get_counter(dms, DM_STATS_WRITES_MERGED_COUNT, + region_id, area_id); + + *wrqm = mrgs / (double) dms->interval_ns; + + return 1; +} + +static int _reads_per_sec(const struct dm_stats *dms, double *rd_s, + uint64_t region_id, uint64_t area_id) +{ + double reads; + reads = (double) dm_stats_get_counter(dms, DM_STATS_READS_COUNT, + region_id, area_id); + + *rd_s = (reads * NSEC_PER_SEC) / (double) dms->interval_ns; + + return 1; +} + +static int _writes_per_sec(const struct dm_stats *dms, double *wr_s, + uint64_t region_id, uint64_t area_id) +{ + double writes; + writes = (double) dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT, + region_id, area_id); + + *wr_s = (writes * NSEC_PER_SEC) / (double) dms->interval_ns; + + return 1; +} + +static int _read_sectors_per_sec(const struct dm_stats *dms, double *rsec_s, + uint64_t region_id, uint64_t area_id) +{ + double sect; + sect = (double) dm_stats_get_counter(dms, DM_STATS_READ_SECTORS_COUNT, + region_id, area_id); + + *rsec_s = (sect * (double) NSEC_PER_SEC) / (double) dms->interval_ns; + + return 1; +} + +static int _write_sectors_per_sec(const struct dm_stats *dms, double *wsec_s, + uint64_t region_id, uint64_t area_id) +{ + double sect; + sect = (double) dm_stats_get_counter(dms, DM_STATS_WRITE_SECTORS_COUNT, + region_id, area_id); + + *wsec_s = (sect * (double) NSEC_PER_SEC) / (double) dms->interval_ns; + + return 1; +} + +static int _average_request_size(const struct dm_stats *dms, double *arqsz, + uint64_t region_id, uint64_t area_id) +{ + double ios, sectors; + + ios = (double) (dm_stats_get_counter(dms, DM_STATS_READS_COUNT, + region_id, area_id) + + dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT, + region_id, area_id)); + sectors = (double) (dm_stats_get_counter(dms, DM_STATS_READ_SECTORS_COUNT, + region_id, area_id) + + dm_stats_get_counter(dms, DM_STATS_WRITE_SECTORS_COUNT, + region_id, area_id)); + + if (ios > 0.0) + *arqsz = sectors / ios; + else + *arqsz = 0.0; + + return 1; +} + +static int _average_queue_size(const struct dm_stats *dms, double *qusz, + uint64_t region_id, uint64_t area_id) +{ + double io_ticks; + io_ticks = (double) dm_stats_get_counter(dms, DM_STATS_WEIGHTED_IO_NSECS, + region_id, area_id); + + if (io_ticks > 0.0) + *qusz = io_ticks / (double) dms->interval_ns; + else + *qusz = 0.0; + + return 1; +} + +static int _average_wait_time(const struct dm_stats *dms, double *await, + uint64_t region_id, uint64_t area_id) +{ + uint64_t io_ticks, nr_ios; + + io_ticks = dm_stats_get_counter(dms, DM_STATS_READ_NSECS, + region_id, area_id); + io_ticks += dm_stats_get_counter(dms, DM_STATS_WRITE_NSECS, + region_id, area_id); + + nr_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT, + region_id, area_id); + nr_ios += dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT, + region_id, area_id); + + if (nr_ios > 0) + *await = (double) io_ticks / (double) nr_ios; + else + *await = 0.0; + + return 1; +} + +static int _average_rd_wait_time(const struct dm_stats *dms, double *await, + uint64_t region_id, uint64_t area_id) +{ + uint64_t rd_io_ticks, nr_rd_ios; + + rd_io_ticks = dm_stats_get_counter(dms, DM_STATS_READ_NSECS, + region_id, area_id); + nr_rd_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT, + region_id, area_id); + + /* + * If rd_io_ticks is > 0 this should imply that nr_rd_ios is + * also > 0 (unless a kernel bug exists). Test for both here + * before using the IO count as a divisor (Coverity). + */ + if (rd_io_ticks > 0 && nr_rd_ios > 0) + *await = (double) rd_io_ticks / (double) nr_rd_ios; + else + *await = 0.0; + + return 1; +} + +static int _average_wr_wait_time(const struct dm_stats *dms, double *await, + uint64_t region_id, uint64_t area_id) +{ + uint64_t wr_io_ticks, nr_wr_ios; + + wr_io_ticks = dm_stats_get_counter(dms, DM_STATS_WRITE_NSECS, + region_id, area_id); + nr_wr_ios = dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT, + region_id, area_id); + + /* + * If wr_io_ticks is > 0 this should imply that nr_wr_ios is + * also > 0 (unless a kernel bug exists). Test for both here + * before using the IO count as a divisor (Coverity). + */ + if (wr_io_ticks > 0 && nr_wr_ios > 0) + *await = (double) wr_io_ticks / (double) nr_wr_ios; + else + *await = 0.0; + + return 1; +} + +static int _throughput(const struct dm_stats *dms, double *tput, + uint64_t region_id, uint64_t area_id) +{ + uint64_t nr_ios; + + nr_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT, + region_id, area_id); + nr_ios += dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT, + region_id, area_id); + + *tput = ((double) NSEC_PER_SEC * (double) nr_ios) + / (double) (dms->interval_ns); + + return 1; +} + +static int _utilization(const struct dm_stats *dms, double *util, + uint64_t region_id, uint64_t area_id) +{ + uint64_t io_nsecs, interval_ns = dms->interval_ns; + + /** + * If io_nsec > interval_ns there is something wrong with the clock + * for the last interval; do not allow a value > 100% utilization + * to be passed to a dm_make_percent() call. We expect to see these + * at startup if counters have not been cleared before the first read. + * + * A zero interval_ns is also an error since metrics cannot be + * calculated without a defined interval - return zero and emit a + * backtrace in this case. + */ + io_nsecs = dm_stats_get_counter(dms, DM_STATS_IO_NSECS, + region_id, area_id); + + if (!interval_ns) { + *util = 0.0; + return_0; + } + + io_nsecs = ((io_nsecs < interval_ns) ? io_nsecs : interval_ns); + + *util = (double) io_nsecs / (double) interval_ns; + + return 1; +} + +static int _service_time(const struct dm_stats *dms, double *svctm, + uint64_t region_id, uint64_t area_id) +{ + double tput, util; + + if (!_throughput(dms, &tput, region_id, area_id)) + return 0; + + if (!_utilization(dms, &util, region_id, area_id)) + return 0; + + util *= 100; + + /* avoid NAN with zero counter values */ + if ( (uint64_t) tput == 0 || (uint64_t) util == 0) { + *svctm = 0.0; + return 1; + } + + *svctm = ((double) NSEC_PER_SEC * dm_percent_to_float(util)) + / (100.0 * tput); + + return 1; +} + +/* + * Table in enum order: + * DM_STATS_RD_MERGES_PER_SEC, + * DM_STATS_WR_MERGES_PER_SEC, + * DM_STATS_READS_PER_SEC, + * DM_STATS_WRITES_PER_SEC, + * DM_STATS_READ_SECTORS_PER_SEC, + * DM_STATS_WRITE_SECTORS_PER_SEC, + * DM_STATS_AVERAGE_REQUEST_SIZE, + * DM_STATS_AVERAGE_QUEUE_SIZE, + * DM_STATS_AVERAGE_WAIT_TIME, + * DM_STATS_AVERAGE_RD_WAIT_TIME, + * DM_STATS_AVERAGE_WR_WAIT_TIME + * DM_STATS_SERVICE_TIME, + * DM_STATS_THROUGHPUT, + * DM_STATS_UTILIZATION + * +*/ + +typedef int (*_metric_fn_t)(const struct dm_stats *, double *, + uint64_t, uint64_t); + +_metric_fn_t _metrics[DM_STATS_NR_METRICS] = { + _rd_merges_per_sec, + _wr_merges_per_sec, + _reads_per_sec, + _writes_per_sec, + _read_sectors_per_sec, + _write_sectors_per_sec, + _average_request_size, + _average_queue_size, + _average_wait_time, + _average_rd_wait_time, + _average_wr_wait_time, + _service_time, + _throughput, + _utilization +}; + +int dm_stats_get_metric(const struct dm_stats *dms, int metric, + uint64_t region_id, uint64_t area_id, double *value) +{ + if (!dms->interval_ns) + return_0; + + /* + * Decode DM_STATS_{REGION,AREA}_CURRENT here; counters will then + * be returned for the actual current region and area. + * + * DM_STATS_WALK_GROUP is passed through to the counter methods - + * aggregates for the group are returned and used to calculate + * the metric for the group totals. + */ + region_id = (region_id == DM_STATS_REGION_CURRENT) + ? dms->cur_region : region_id ; + area_id = (area_id == DM_STATS_REGION_CURRENT) + ? dms->cur_area : area_id ; + + if (metric < 0 || metric >= DM_STATS_NR_METRICS) { + log_error("Attempt to read invalid metric: %d", metric); + return 0; + } + + return _metrics[metric](dms, value, region_id, area_id); +} + +/** + * Methods for accessing stats metrics. All methods share the + * following naming scheme and prototype: + * + * uint64_t dm_stats_get_metric(struct dm_stats *, + * int, int, + * uint64_t, uint64_t, + * double *v) + * + * Where the two integer arguments are the region_id and area_id + * respectively. + * + * name is the name of the metric (lower case) + * metric is the part of the enum name following DM_STATS_ (upper case) + */ +#define MK_STATS_GET_METRIC_FN(name, metric, meta) \ +int dm_stats_get_ ## name(const struct dm_stats *dms, double *meta, \ + uint64_t region_id, uint64_t area_id) \ +{ \ + return dm_stats_get_metric(dms, DM_STATS_ ## metric, \ + region_id, area_id, meta); \ +} + +MK_STATS_GET_METRIC_FN(rd_merges_per_sec, RD_MERGES_PER_SEC, rrqm) +MK_STATS_GET_METRIC_FN(wr_merges_per_sec, WR_MERGES_PER_SEC, wrqm) +MK_STATS_GET_METRIC_FN(reads_per_sec, READS_PER_SEC, rd_s) +MK_STATS_GET_METRIC_FN(writes_per_sec, WRITES_PER_SEC, wr_s) +MK_STATS_GET_METRIC_FN(read_sectors_per_sec, READ_SECTORS_PER_SEC, rsec_s) +MK_STATS_GET_METRIC_FN(write_sectors_per_sec, WRITE_SECTORS_PER_SEC, wsec_s) +MK_STATS_GET_METRIC_FN(average_request_size, AVERAGE_REQUEST_SIZE, arqsz) +MK_STATS_GET_METRIC_FN(average_queue_size, AVERAGE_QUEUE_SIZE, qusz) +MK_STATS_GET_METRIC_FN(average_wait_time, AVERAGE_WAIT_TIME, await) +MK_STATS_GET_METRIC_FN(average_rd_wait_time, AVERAGE_RD_WAIT_TIME, await) +MK_STATS_GET_METRIC_FN(average_wr_wait_time, AVERAGE_WR_WAIT_TIME, await) +MK_STATS_GET_METRIC_FN(service_time, SERVICE_TIME, svctm) +MK_STATS_GET_METRIC_FN(throughput, THROUGHPUT, tput) + +/* + * Utilization is an exception since it used the dm_percent_t type in the + * original named function based interface: preserve this behaviour for + * backwards compatibility with existing users. + * + * The same metric may be accessed as a double via the enum based metric + * interface. + */ +int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util, + uint64_t region_id, uint64_t area_id) +{ + double _util; + + if (!dm_stats_get_metric(dms, DM_STATS_UTILIZATION, + region_id, area_id, &_util)) + return_0; + /* scale up utilization value in the range [0.00..1.00] */ + *util = dm_make_percent(DM_PERCENT_1 * _util, DM_PERCENT_1); + return 1; +} + +void dm_stats_set_sampling_interval_ms(struct dm_stats *dms, uint64_t interval_ms) +{ + /* All times use nsecs internally. */ + dms->interval_ns = interval_ms * NSEC_PER_MSEC; +} + +void dm_stats_set_sampling_interval_ns(struct dm_stats *dms, uint64_t interval_ns) +{ + dms->interval_ns = interval_ns; +} + +uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms) +{ + /* All times use nsecs internally. */ + return (dms->interval_ns / NSEC_PER_MSEC); +} + +uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms) +{ + /* All times use nsecs internally. */ + return (dms->interval_ns); +} + +int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty, + const char *program_id) +{ + if (!allow_empty && (!program_id || !strlen(program_id))) { + log_error("Empty program_id not permitted without " + "allow_empty=1"); + return 0; + } + + if (!program_id) + program_id = ""; + + dm_free(dms->program_id); + + if (!(dms->program_id = dm_strdup(program_id))) + return_0; + + return 1; +} + +uint64_t dm_stats_get_current_region(const struct dm_stats *dms) +{ + return dms->cur_region; +} + +uint64_t dm_stats_get_current_area(const struct dm_stats *dms) +{ + return dms->cur_area & ~DM_STATS_WALK_ALL; +} + +int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start, + uint64_t region_id) +{ + if (!dms || !dms->regions) + return_0; + + /* start is unchanged when aggregating areas */ + if (region_id & DM_STATS_WALK_REGION) + region_id &= ~DM_STATS_WALK_REGION; + + /* use start of first region as group start */ + if (region_id & DM_STATS_WALK_GROUP) { + if (region_id == DM_STATS_WALK_GROUP) + region_id = dms->cur_group; + else + region_id &= ~DM_STATS_WALK_GROUP; + } + + *start = dms->regions[region_id].start; + return 1; +} + +int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len, + uint64_t region_id) +{ + uint64_t i; + if (!dms || !dms->regions) + return_0; + + *len = 0; + + /* length is unchanged when aggregating areas */ + if (region_id & DM_STATS_WALK_REGION) + region_id &= ~DM_STATS_WALK_REGION; + + if (region_id & DM_STATS_WALK_GROUP) { + /* decode region / group ID */ + if (region_id == DM_STATS_WALK_GROUP) + region_id = dms->cur_group; + else + region_id &= ~DM_STATS_WALK_GROUP; + + /* use sum of region sizes as group size */ + if (_stats_region_is_grouped(dms, region_id)) + _foreach_group_region(dms, dms->cur_group, i) + *len += dms->regions[i].len; + else { + log_error("Group ID " FMTu64 " does not exist", + region_id); + return 0; + } + } else + *len = dms->regions[region_id].len; + + return 1; +} + +int dm_stats_get_region_area_len(const struct dm_stats *dms, uint64_t *len, + uint64_t region_id) +{ + if (!dms || !dms->regions) + return_0; + + /* groups are not subdivided - area size equals group size */ + if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION)) + /* get_region_len will decode region_id */ + return dm_stats_get_region_len(dms, len, region_id); + + *len = dms->regions[region_id].step; + return 1; +} + +int dm_stats_get_current_region_start(const struct dm_stats *dms, + uint64_t *start) +{ + return dm_stats_get_region_start(dms, start, dms->cur_region); +} + +int dm_stats_get_current_region_len(const struct dm_stats *dms, + uint64_t *len) +{ + return dm_stats_get_region_len(dms, len, dms->cur_region); +} + +int dm_stats_get_current_region_area_len(const struct dm_stats *dms, + uint64_t *step) +{ + return dm_stats_get_region_area_len(dms, step, dms->cur_region); +} + +int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start, + uint64_t region_id, uint64_t area_id) +{ + struct dm_stats_region *region; + if (!dms || !dms->regions) + return_0; + + /* group or region area start equals region start */ + if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION)) + return dm_stats_get_region_start(dms, start, region_id); + + region = &dms->regions[region_id]; + *start = region->start + region->step * area_id; + return 1; +} + +int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset, + uint64_t region_id, uint64_t area_id) +{ + if (!dms || !dms->regions) + return_0; + + /* no areas for groups or aggregate regions */ + if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION)) + *offset = 0; + else + *offset = dms->regions[region_id].step * area_id; + + return 1; +} + +int dm_stats_get_current_area_start(const struct dm_stats *dms, + uint64_t *start) +{ + return dm_stats_get_area_start(dms, start, + dms->cur_region, dms->cur_area); +} + +int dm_stats_get_current_area_offset(const struct dm_stats *dms, + uint64_t *offset) +{ + return dm_stats_get_area_offset(dms, offset, + dms->cur_region, dms->cur_area); +} + +int dm_stats_get_current_area_len(const struct dm_stats *dms, + uint64_t *len) +{ + return dm_stats_get_region_area_len(dms, len, dms->cur_region); +} + +const char *dm_stats_get_region_program_id(const struct dm_stats *dms, + uint64_t region_id) +{ + const char *program_id = NULL; + + if (region_id & DM_STATS_WALK_GROUP) + return dms->program_id; + + if (region_id & DM_STATS_WALK_REGION) + region_id &= ~DM_STATS_WALK_REGION; + + program_id = dms->regions[region_id].program_id; + return (program_id) ? program_id : ""; +} + +const char *dm_stats_get_region_aux_data(const struct dm_stats *dms, + uint64_t region_id) +{ + const char *aux_data = NULL; + + if (region_id & DM_STATS_WALK_GROUP) + return ""; + + if (region_id & DM_STATS_WALK_REGION) + region_id &= ~DM_STATS_WALK_REGION; + + aux_data = dms->regions[region_id].aux_data; + return (aux_data) ? aux_data : "" ; +} + +int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id, const char *alias) +{ + struct dm_stats_group *group = NULL; + const char *old_alias = NULL; + + if (!dms->regions || !dms->groups || !alias) + return_0; + + if (!_stats_region_is_grouped(dms, group_id)) { + log_error("Cannot set alias for ungrouped region ID " + FMTu64, group_id); + return 0; + } + + if (group_id & DM_STATS_WALK_GROUP) { + if (group_id == DM_STATS_WALK_GROUP) + group_id = dms->cur_group; + else + group_id &= ~DM_STATS_WALK_GROUP; + } + + if (group_id != dms->regions[group_id].group_id) { + /* dm_stats_set_alias() must be called on the group ID. */ + log_error("Cannot set alias for group member " FMTu64 ".", + group_id); + return 0; + } + + group = &dms->groups[group_id]; + old_alias = group->alias; + + group->alias = dm_strdup(alias); + if (!group->alias) { + log_error("Could not allocate memory for alias."); + goto bad; + } + + if (!_stats_set_aux(dms, group_id, dms->regions[group_id].aux_data)) { + log_error("Could not set new aux_data"); + goto bad; + } + + dm_free((char *) old_alias); + + return 1; + +bad: + dm_free((char *) group->alias); + group->alias = old_alias; + return 0; +} + +const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id) +{ + const struct dm_stats_region *region; + + id = (id == DM_STATS_REGION_CURRENT) ? dms->cur_region : id; + + if (id & DM_STATS_WALK_GROUP) { + if (id == DM_STATS_WALK_GROUP) + id = dms->cur_group; + else + id &= ~DM_STATS_WALK_GROUP; + } + + region = &dms->regions[id]; + if (!_stats_region_is_grouped(dms, id) + || !dms->groups[region->group_id].alias) + return dms->name; + + return dms->groups[region->group_id].alias; +} + +const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms) +{ + return dm_stats_get_region_program_id(dms, dms->cur_region); +} + +const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms) +{ + return dm_stats_get_region_aux_data(dms, dms->cur_region); +} + +int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms, + uint64_t region_id) +{ + struct dm_stats_region *region; + + if (region_id == DM_STATS_REGION_CURRENT) + region_id = dms->cur_region; + + if (region_id == DM_STATS_WALK_GROUP) + region_id = dms->cur_group; + else if (region_id & DM_STATS_WALK_GROUP) + region_id &= ~DM_STATS_WALK_GROUP; + + region = &dms->regions[region_id]; + return region->timescale == 1; +} + +int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms) +{ + return dm_stats_get_region_precise_timestamps(dms, + DM_STATS_REGION_CURRENT); +} + +/* + * Histogram access methods. + */ + +static void _sum_histogram_bins(const struct dm_stats *dms, + struct dm_histogram *dmh_aggr, + uint64_t region_id, uint64_t area_id) +{ + struct dm_stats_region *region; + struct dm_histogram_bin *bins; + struct dm_histogram *dmh_cur; + int bin; + + region = &dms->regions[region_id]; + dmh_cur = region->counters[area_id].histogram; + bins = dmh_aggr->bins; + + for (bin = 0; bin < dmh_aggr->nr_bins; bin++) + bins[bin].count += dmh_cur->bins[bin].count; +} + +/* + * Create an aggregate histogram for a sub-divided region or a group. + */ +static struct dm_histogram *_aggregate_histogram(const struct dm_stats *dms, + uint64_t region_id, + uint64_t area_id) +{ + struct dm_histogram *dmh_aggr, *dmh_cur, **dmh_cachep; + uint64_t group_id = DM_STATS_GROUP_NOT_PRESENT; + int bin, nr_bins, group = 1; + size_t hist_size; + + if (area_id == DM_STATS_WALK_REGION) { + /* region aggregation */ + group = 0; + if (!_stats_region_present(&dms->regions[region_id])) + return_NULL; + + if (!dms->regions[region_id].bounds) + return_NULL; + + if (!dms->regions[region_id].counters) + return dms->regions[region_id].bounds; + + if (dms->regions[region_id].histogram) + return dms->regions[region_id].histogram; + + dmh_cur = dms->regions[region_id].counters[0].histogram; + dmh_cachep = &dms->regions[region_id].histogram; + nr_bins = dms->regions[region_id].bounds->nr_bins; + } else { + /* group aggregation */ + group_id = region_id; + area_id = DM_STATS_WALK_GROUP; + if (!_stats_group_id_present(dms, group_id)) + return_NULL; + + if (!dms->regions[group_id].bounds) + return_NULL; + + if (!dms->regions[group_id].counters) + return dms->regions[group_id].bounds; + + if (dms->groups[group_id].histogram) + return dms->groups[group_id].histogram; + + dmh_cur = dms->regions[group_id].counters[0].histogram; + dmh_cachep = &dms->groups[group_id].histogram; + nr_bins = dms->regions[group_id].bounds->nr_bins; + } + + hist_size = sizeof(*dmh_aggr) + + nr_bins * sizeof(struct dm_histogram_bin); + + if (!(dmh_aggr = dm_pool_zalloc(dms->hist_mem, hist_size))) { + log_error("Could not allocate group histogram"); + return 0; + } + + dmh_aggr->nr_bins = dmh_cur->nr_bins; + dmh_aggr->dms = dms; + + if (!group) + _foreach_region_area(dms, region_id, area_id) { + _sum_histogram_bins(dms, dmh_aggr, region_id, area_id); + } + else { + _foreach_group_area(dms, group_id, region_id, area_id) { + _sum_histogram_bins(dms, dmh_aggr, region_id, area_id); + } + } + + for (bin = 0; bin < nr_bins; bin++) { + dmh_aggr->sum += dmh_aggr->bins[bin].count; + dmh_aggr->bins[bin].upper = dmh_cur->bins[bin].upper; + } + + /* cache aggregate histogram for subsequent access */ + *dmh_cachep = dmh_aggr; + + return dmh_aggr; +} + +struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms, + uint64_t region_id, + uint64_t area_id) +{ + int aggr = 0; + + if (region_id == DM_STATS_REGION_CURRENT) { + region_id = dms->cur_region; + if (region_id & DM_STATS_WALK_GROUP) { + region_id = dms->cur_group; + aggr = 1; + } + } else if (region_id & DM_STATS_WALK_GROUP) { + region_id &= ~DM_STATS_WALK_GROUP; + aggr = 1; + } + + area_id = (area_id == DM_STATS_AREA_CURRENT) + ? dms->cur_area : area_id ; + + if (area_id == DM_STATS_WALK_REGION) + aggr = 1; + + if (aggr) + return _aggregate_histogram(dms, region_id, area_id); + + if (region_id & DM_STATS_WALK_REGION) + region_id &= ~DM_STATS_WALK_REGION; + + if (!dms->regions[region_id].counters) + return dms->regions[region_id].bounds; + + return dms->regions[region_id].counters[area_id].histogram; +} + +int dm_histogram_get_nr_bins(const struct dm_histogram *dmh) +{ + return dmh->nr_bins; +} + +uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin) +{ + return (!bin) ? 0 : dmh->bins[bin - 1].upper; +} + +uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin) +{ + return dmh->bins[bin].upper; +} + +uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin) +{ + uint64_t upper, lower; + upper = dm_histogram_get_bin_upper(dmh, bin); + lower = dm_histogram_get_bin_lower(dmh, bin); + return (upper - lower); +} + +uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin) +{ + return dmh->bins[bin].count; +} + +uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh) +{ + return dmh->sum; +} + +dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh, + int bin) +{ + uint64_t value = dm_histogram_get_bin_count(dmh, bin); + uint64_t width = dm_histogram_get_bin_width(dmh, bin); + uint64_t total = dm_histogram_get_sum(dmh); + + double val = (double) value; + + if (!total || !value || !width) + return DM_PERCENT_0; + + return dm_make_percent((uint64_t) val, total); +} + +/* + * Histogram string helper functions: used to construct histogram and + * bin boundary strings from numeric data. + */ + +/* + * Allocate an unbound histogram object with nr_bins bins. Only used + * for histograms used to hold bounds values as arguments for calls to + * dm_stats_create_region(). + */ +static struct dm_histogram *_alloc_dm_histogram(int nr_bins) +{ + /* Allocate space for dm_histogram + nr_entries. */ + size_t size = sizeof(struct dm_histogram) + + (unsigned) nr_bins * sizeof(struct dm_histogram_bin); + return dm_zalloc(size); +} + +/* + * Parse a histogram bounds string supplied by the user. The string + * consists of a list of numbers, "n1,n2,n3,..." with optional 'ns', + * 'us', 'ms', or 's' unit suffixes. + * + * The scale parameter indicates the timescale used for this region: one + * for nanoscale resolution and NSEC_PER_MSEC for miliseconds. + * + * On return bounds contains a pointer to an array of uint64_t + * histogram bounds values expressed in units of nanoseconds. + */ +struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str) +{ + static const char _valid_chars[] = "0123456789,muns"; + uint64_t this_val = 0, mult = 1; + const char *c, *v, *val_start; + struct dm_histogram_bin *cur; + struct dm_histogram *dmh; + int nr_entries = 1; + char *endptr; + + c = bounds_str; + + /* Count number of bounds entries. */ + while(*c) + if (*(c++) == ',') + nr_entries++; + + c = bounds_str; + + if (!(dmh = _alloc_dm_histogram(nr_entries))) + return_0; + + dmh->nr_bins = nr_entries; + + cur = dmh->bins; + + do { + for (v = _valid_chars; *v; v++) + if (*c == *v) + break; + + if (!*v) { + stack; + goto badchar; + } + + if (*c == ',') { + log_error("Empty histogram bin not allowed: %s", + bounds_str); + goto bad; + } else { + val_start = c; + endptr = NULL; + + this_val = strtoull(val_start, &endptr, 10); + if (!endptr) { + log_error("Could not parse histogram bound."); + goto bad; + } + c = endptr; /* Advance to units, comma, or end. */ + + if (*c == 's') { + mult = NSEC_PER_SEC; + c++; /* Advance over 's'. */ + } else if (*(c + 1) == 's') { + if (*c == 'm') + mult = NSEC_PER_MSEC; + else if (*c == 'u') + mult = NSEC_PER_USEC; + else if (*c == 'n') + mult = 1; + else { + stack; + goto badchar; + } + c += 2; /* Advance over 'ms', 'us', or 'ns'. */ + } else if (*c == ',') + c++; + else if (*c) { /* Expected ',' or NULL. */ + stack; + goto badchar; + } + + if (*c == ',') + c++; + this_val *= mult; + (cur++)->upper = this_val; + } + } while (*c); + + /* Bounds histograms have no owner. */ + dmh->dms = NULL; + dmh->region = NULL; + + return dmh; + +badchar: + log_error("Invalid character in histogram: %c", *c); +bad: + dm_free(dmh); + return NULL; +} + +struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds) +{ + const uint64_t *entry = bounds; + struct dm_histogram_bin *cur; + struct dm_histogram *dmh; + int nr_entries = 1; + + if (!bounds || !bounds[0]) { + log_error("Could not parse empty histogram bounds array"); + return 0; + } + + /* Count number of bounds entries. */ + while(*entry) + if (*(++entry)) + nr_entries++; + + entry = bounds; + + if (!(dmh = _alloc_dm_histogram(nr_entries))) + return_0; + + dmh->nr_bins = nr_entries; + + cur = dmh->bins; + + while (*entry) + (cur++)->upper = *(entry++); + + /* Bounds histograms have no owner. */ + dmh->dms = NULL; + dmh->region = NULL; + + return dmh; +} + +void dm_histogram_bounds_destroy(struct dm_histogram *bounds) +{ + if (!bounds) + return; + + /* Bounds histograms are not bound to any handle or region. */ + if (bounds->dms || bounds->region) { + log_error("Freeing invalid histogram bounds pointer %p.", + (void *) bounds); + stack; + } + /* dm_free() expects a (void *). */ + dm_free((void *) bounds); +} + +/* + * Scale a bounds value down from nanoseconds to the largest possible + * whole unit suffix. + */ +static void _scale_bound_value_to_suffix(uint64_t *bound, const char **suffix) +{ + *suffix = "ns"; + if (!(*bound % NSEC_PER_SEC)) { + *bound /= NSEC_PER_SEC; + *suffix = "s"; + } else if (!(*bound % NSEC_PER_MSEC)) { + *bound /= NSEC_PER_MSEC; + *suffix = "ms"; + } else if (!(*bound % NSEC_PER_USEC)) { + *bound /= NSEC_PER_USEC; + *suffix = "us"; + } +} + +#define DM_HISTOGRAM_BOUNDS_MASK 0x30 +#define BOUNDS_LEN 64 + +static int _make_bounds_string(char *buf, size_t size, uint64_t lower, + uint64_t upper, int flags, int width) +{ + char bound_buf[BOUNDS_LEN]; + const char *l_suff = NULL; + const char *u_suff = NULL; + const char *sep = ""; + int bounds = flags & DM_HISTOGRAM_BOUNDS_MASK; + + if (!bounds) + return_0; + + *buf = '\0'; + + if (flags & DM_HISTOGRAM_SUFFIX) { + _scale_bound_value_to_suffix(&lower, &l_suff); + _scale_bound_value_to_suffix(&upper, &u_suff); + } else + l_suff = u_suff = ""; + + if (flags & DM_HISTOGRAM_VALUES) + sep = ":"; + + if (bounds > DM_HISTOGRAM_BOUNDS_LOWER) { + /* Handle infinite uppermost bound. */ + if (upper == UINT64_MAX) { + if (dm_snprintf(bound_buf, sizeof(bound_buf), + ">" FMTu64 "%s", lower, l_suff) < 0) + goto_out; + /* Only display an 'upper' string for final bin. */ + bounds = DM_HISTOGRAM_BOUNDS_UPPER; + } else { + if (dm_snprintf(bound_buf, sizeof(bound_buf), + FMTu64 "%s", upper, u_suff) < 0) + goto_out; + } + } else if (bounds == DM_HISTOGRAM_BOUNDS_LOWER) { + if ((dm_snprintf(bound_buf, sizeof(bound_buf), FMTu64 "%s", + lower, l_suff)) < 0) + goto_out; + } + + switch (bounds) { + case DM_HISTOGRAM_BOUNDS_LOWER: + case DM_HISTOGRAM_BOUNDS_UPPER: + return dm_snprintf(buf, size, "%*s%s", width, bound_buf, sep); + case DM_HISTOGRAM_BOUNDS_RANGE: + return dm_snprintf(buf, size, FMTu64 "%s-%s%s", + lower, l_suff, bound_buf, sep); + } +out: + return 0; +} + +#define BOUND_WIDTH_NOSUFFIX 10 /* 999999999 nsecs */ +#define BOUND_WIDTH 6 /* bounds string up to 9999xs */ +#define COUNT_WIDTH 6 /* count string: up to 9999 */ +#define PERCENT_WIDTH 6 /* percent string : 0.00-100.00% */ +#define DM_HISTOGRAM_VALUES_MASK 0x06 + +const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin, + int width, int flags) +{ + char buf[BOUNDS_LEN], bounds_buf[BOUNDS_LEN]; + int minwidth, bounds, values, start, last; + uint64_t lower, upper, val_u64; /* bounds of the current bin. */ + /* Use the histogram pool for string building. */ + struct dm_pool *mem = dmh->dms->hist_mem; + const char *sep = ""; + int bounds_width; + ssize_t len = 0; + float val_flt; + + bounds = flags & DM_HISTOGRAM_BOUNDS_MASK; + values = flags & DM_HISTOGRAM_VALUES; + + if (bin < 0) { + start = 0; + last = dmh->nr_bins - 1; + } else + start = last = bin; + + minwidth = width; + + if (width < 0 || !values) + width = minwidth = 0; /* no padding */ + else if (flags & DM_HISTOGRAM_PERCENT) + width = minwidth = (width) ? : PERCENT_WIDTH; + else if (flags & DM_HISTOGRAM_VALUES) + width = minwidth = (width) ? : COUNT_WIDTH; + + if (values && !width) + sep = ":"; + + /* Set bounds string to the empty string. */ + bounds_buf[0] = '\0'; + + if (!dm_pool_begin_object(mem, 64)) + return_0; + + for (bin = start; bin <= last; bin++) { + if (bounds) { + /* Default bounds width depends on time suffixes. */ + bounds_width = (!(flags & DM_HISTOGRAM_SUFFIX)) + ? BOUND_WIDTH_NOSUFFIX + : BOUND_WIDTH ; + + bounds_width = (!width) ? width : bounds_width; + + lower = dm_histogram_get_bin_lower(dmh, bin); + upper = dm_histogram_get_bin_upper(dmh, bin); + + len = sizeof(bounds_buf); + len = _make_bounds_string(bounds_buf, len, + lower, upper, flags, + bounds_width); + /* + * Comma separates "bounds: value" pairs unless + * --noheadings is used. + */ + sep = (width || !values) ? "," : ":"; + + /* Adjust width by real bounds length if set. */ + width -= (width) ? (len - (bounds_width + 1)) : 0; + + /* -ve width indicates specified width was overrun. */ + width = (width > 0) ? width : 0; + } + + if (bin == last) + sep = ""; + + if (flags & DM_HISTOGRAM_PERCENT) { + dm_percent_t pr; + pr = dm_histogram_get_bin_percent(dmh, bin); + val_flt = dm_percent_to_float(pr); + len = dm_snprintf(buf, sizeof(buf), "%s%*.2f%%%s", + bounds_buf, width, val_flt, sep); + } else if (values) { + val_u64 = dmh->bins[bin].count; + len = dm_snprintf(buf, sizeof(buf), "%s%*"PRIu64"%s", + bounds_buf, width, val_u64, sep); + } else if (bounds) + len = dm_snprintf(buf, sizeof(buf), "%s%s", bounds_buf, + sep); + else { + *buf = '\0'; + len = 0; + } + + if (len < 0) + goto_bad; + + width = minwidth; /* re-set histogram column width. */ + if (!dm_pool_grow_object(mem, buf, (size_t) len)) + goto_bad; + } + + if (!dm_pool_grow_object(mem, "\0", 1)) + goto_bad; + + return (const char *) dm_pool_end_object(mem); + +bad: + dm_pool_abandon_object(mem); + return NULL; +} + +/* + * A lightweight representation of an extent (region, area, file + * system block or extent etc.). A table of extents can be used + * to sort and to efficiently find holes or overlaps among a set + * of tuples of the form (id, start, len). + */ +struct _extent { + struct dm_list list; + uint64_t id; + uint64_t start; + uint64_t len; +}; + +/* last address in an extent */ +#define _extent_end(a) ((a)->start + (a)->len - 1) + +/* a and b must be sorted by increasing start sector */ +#define _extents_overlap(a, b) (_extent_end(a) > (b)->start) + +/* + * Comparison function to sort extents in ascending start order. + */ +static int _extent_start_compare(const void *p1, const void *p2) +{ + const struct _extent *r1, *r2; + r1 = (const struct _extent *) p1; + r2 = (const struct _extent *) p2; + + if (r1->start < r2->start) + return -1; + else if (r1->start == r2->start) + return 0; + return 1; +} + +static int _stats_create_group(struct dm_stats *dms, dm_bitset_t regions, + const char *alias, uint64_t *group_id) +{ + struct dm_stats_group *group; + *group_id = dm_bit_get_first(regions); + + /* group has no regions? */ + if (*group_id == DM_STATS_GROUP_NOT_PRESENT) + return_0; + + group = &dms->groups[*group_id]; + + if (group->regions) { + log_error(INTERNAL_ERROR "Unexpected group state while" + "creating group ID bitmap" FMTu64, *group_id); + return 0; + } + + group->group_id = *group_id; + group->regions = regions; + + if (alias) + group->alias = dm_strdup(alias); + else + group->alias = NULL; + + /* force an update of the group tag stored in aux_data */ + if (!_stats_set_aux(dms, *group_id, dms->regions[*group_id].aux_data)) + return 0; + + return 1; +} + +static int _stats_group_check_overlap(const struct dm_stats *dms, + dm_bitset_t regions, int count) +{ + struct dm_list ext_list = DM_LIST_HEAD_INIT(ext_list); + struct _extent *ext, *tmp, *next, *map = NULL; + size_t map_size = (dms->max_region + 1) * sizeof(*map); + int i = 0, id, overlap, merged; + + map = dm_pool_alloc(dms->mem, map_size); + if (!map) { + log_error("Could not allocate memory for region map"); + return 0; + } + + /* build a table of extents in order of region_id */ + for (id = dm_bit_get_first(regions); id >= 0; + id = dm_bit_get_next(regions, id)) { + dm_list_init(&map[i].list); + map[i].id = id; + map[i].start = dms->regions[id].start; + map[i].len = dms->regions[id].len; + i++; + } + + /* A single region cannot overlap itself. */ + if (i == 1) { + dm_pool_free(dms->mem, map); + return 1; + } + + /* sort by extent.start */ + qsort(map, count, sizeof(*map), _extent_start_compare); + + for (i = 0; i < count; i++) + dm_list_add(&ext_list, &map[i].list); + + overlap = 0; +merge: + merged = 0; + dm_list_iterate_items_safe(ext, tmp, &ext_list) { + next = dm_list_item(dm_list_next(&ext_list, &ext->list), + struct _extent); + if (!next) + continue; + + if (_extents_overlap(ext, next)) { + log_warn("WARNING: region IDs " FMTu64 " and " + FMTu64 " overlap. Some events will be " + "counted twice.", ext->id, next->id); + /* merge larger extent into smaller */ + if (_extent_end(ext) > _extent_end(next)) { + next->id = ext->id; + next->len = ext->len; + } + if (ext->start < next->start) + next->start = ext->start; + dm_list_del(&ext->list); + overlap = merged = 1; + } + } + /* continue until no merge candidates remain */ + if (merged) + goto merge; + + dm_pool_free(dms->mem, map); + return (overlap == 0); +} + +static void _stats_copy_histogram_bounds(struct dm_histogram *to, + struct dm_histogram *from) +{ + int i; + + to->nr_bins = from->nr_bins; + + for (i = 0; i < to->nr_bins; i++) + to->bins[i].upper = from->bins[i].upper; +} + +/* + * Compare histogram bounds h1 and h2, and return 1 if they match (i.e. + * have the same number of bins and identical bin boundary values), or 0 + * otherwise. + */ +static int _stats_check_histogram_bounds(struct dm_histogram *h1, + struct dm_histogram *h2) +{ + int i; + + if (!h1 || !h2) + return 0; + + if (h1->nr_bins != h2->nr_bins) + return 0; + + for (i = 0; i < h1->nr_bins; i++) + if (h1->bins[i].upper != h2->bins[i].upper) + return 0; + return 1; +} + +/* + * Create a new group in stats handle dms from the group description + * passed in group. + */ +int dm_stats_create_group(struct dm_stats *dms, const char *members, + const char *alias, uint64_t *group_id) +{ + struct dm_histogram *check = NULL, *bounds; + int i, count = 0, precise = 0; + dm_bitset_t regions; + + if (!dms->regions || !dms->groups) { + log_error("Could not create group: no regions found."); + return 0; + }; + + if (!(regions = dm_bitset_parse_list(members, NULL, 0))) { + log_error("Could not parse list: '%s'", members); + return 0; + } + + if (!(check = dm_pool_zalloc(dms->hist_mem, sizeof(*check)))) { + log_error("Could not allocate memory for bounds check"); + goto bad; + } + + /* too many bits? */ + if ((*regions - 1) > dms->max_region) { + log_error("Invalid region ID: %d", *regions - 1); + goto bad; + } + + /* + * Check that each region_id in the bitmap meets the group + * constraints: present, not already grouped, and if any + * histogram is present that they all have the same bounds. + */ + for (i = dm_bit_get_first(regions); i >= 0; + i = dm_bit_get_next(regions, i)) { + if (!dm_stats_region_present(dms, i)) { + log_error("Region ID %d does not exist", i); + goto bad; + } + if (_stats_region_is_grouped(dms, i)) { + log_error("Region ID %d already a member of group ID " + FMTu64, i, dms->regions[i].group_id); + goto bad; + } + if (dms->regions[i].timescale == 1) + precise++; + + /* check for matching histogram bounds */ + bounds = dms->regions[i].bounds; + if (bounds && !check->nr_bins) + _stats_copy_histogram_bounds(check, bounds); + else if (bounds) { + if (!_stats_check_histogram_bounds(check, bounds)) { + log_error("All region histogram bounds " + "must match exactly"); + goto bad; + } + } + count++; + } + + if (precise && (precise != count)) + log_warn("WARNING: Grouping regions with different clock resolution: " + "precision may be lost."); + + if (!_stats_group_check_overlap(dms, regions, count)) + log_very_verbose("Creating group with overlapping regions."); + + if (!_stats_create_group(dms, regions, alias, group_id)) + goto bad; + + dm_pool_free(dms->hist_mem, check); + return 1; + +bad: + dm_pool_free(dms->hist_mem, check); + dm_bitset_destroy(regions); + return 0; +} + +/* + * Remove the specified group_id. + */ +int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id, + int remove_regions) +{ + struct dm_stats_region *leader; + dm_bitset_t regions; + uint64_t i; + + if (group_id > dms->max_region) { + log_error("Invalid group ID: " FMTu64, group_id); + return 0; + } + + if (!_stats_group_id_present(dms, group_id)) { + log_error("Group ID " FMTu64 " does not exist", group_id); + return 0; + } + + regions = dms->groups[group_id].regions; + leader = &dms->regions[group_id]; + + /* delete all but the group leader */ + for (i = (*regions - 1); i > leader->region_id; i--) { + if (dm_bit(regions, i)) { + dm_bit_clear(regions, i); + if (remove_regions && !dm_stats_delete_region(dms, i)) + log_warn("WARNING: Failed to delete region " + FMTu64 " on %s.", i, dms->name); + } + } + + /* clear group and mark as not present */ + _stats_clear_group_regions(dms, group_id); + _stats_group_destroy(&dms->groups[group_id]); + + /* delete leader or clear aux_data */ + if (remove_regions) + return dm_stats_delete_region(dms, group_id); + else if (!_stats_set_aux(dms, group_id, leader->aux_data)) + return 0; + + return 1; +} + +uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id) +{ + region_id = (region_id == DM_STATS_REGION_CURRENT) + ? dms->cur_region : region_id; + + if (region_id & DM_STATS_WALK_GROUP) { + if (region_id == DM_STATS_WALK_GROUP) + return dms->cur_group; + else + return region_id & ~DM_STATS_WALK_GROUP; + } + + if (region_id & DM_STATS_WALK_REGION) + region_id &= ~DM_STATS_WALK_REGION; + + return dms->regions[region_id].group_id; +} + +int dm_stats_get_group_descriptor(const struct dm_stats *dms, + uint64_t group_id, char **buf) +{ + dm_bitset_t regions = dms->groups[group_id].regions; + size_t buflen; + + buflen = _stats_group_tag_len(dms, regions); + + *buf = dm_pool_alloc(dms->mem, buflen); + if (!*buf) { + log_error("Could not allocate memory for regions string"); + return 0; + } + + if (!_stats_group_tag_fill(dms, regions, *buf, buflen)) + return 0; + + return 1; +} + +#ifdef HAVE_LINUX_FIEMAP_H +/* + * Resize the group bitmap corresponding to group_id so that it can + * contain at least num_regions members. + */ +static int _stats_resize_group(struct dm_stats_group *group, + uint64_t num_regions) +{ + uint64_t last_bit = dm_bit_get_last(group->regions); + dm_bitset_t new, old; + + if (last_bit >= num_regions) { + log_error("Cannot resize group bitmap to " FMTu64 + " with bit " FMTu64 " set.", num_regions, last_bit); + return 0; + } + + log_very_verbose("Resizing group bitmap from " FMTu32 " to " FMTu64 + " (last_bit: " FMTu64 ").", group->regions[0], + num_regions, last_bit); + + new = dm_bitset_create(NULL, (unsigned) num_regions); + if (!new) { + log_error("Could not allocate memory for new group bitmap."); + return 0; + } + + old = group->regions; + dm_bit_copy(new, old); + group->regions = new; + dm_bitset_destroy(old); + return 1; +} + +/* + * Group a table of region_ids corresponding to the extents of a file. + */ +static int _stats_group_file_regions(struct dm_stats *dms, uint64_t *region_ids, + uint64_t count, const char *alias) +{ + dm_bitset_t regions = dm_bitset_create(NULL, dms->nr_regions); + uint64_t i, group_id = DM_STATS_GROUP_NOT_PRESENT; + char *members = NULL; + size_t buflen; + + if (!regions) { + log_error("Cannot map file: failed to allocate group bitmap."); + return 0; + } + + for (i = 0; i < count; i++) + dm_bit_set(regions, region_ids[i]); + + buflen = _stats_group_tag_len(dms, regions); + members = dm_malloc(buflen); + + if (!members) { + log_error("Cannot map file: failed to allocate group " + "descriptor."); + dm_bitset_destroy(regions); + return 0; + } + + if (!_stats_group_tag_fill(dms, regions, members, buflen)) + goto bad; + + /* + * overlaps should not be possible: overlapping file extents + * returned by FIEMAP imply a kernel bug or a corrupt fs. + */ + if (!_stats_group_check_overlap(dms, regions, count)) + log_very_verbose("Creating group with overlapping regions."); + + if (!_stats_create_group(dms, regions, alias, &group_id)) + goto bad; + + dm_free(members); + return 1; +bad: + dm_bitset_destroy(regions); + dm_free(members); + return 0; +} + +static int _stats_add_file_extent(int fd, struct dm_pool *mem, uint64_t id, + struct fiemap_extent *fm_ext) +{ + struct _extent extent; + + /* final address of list is unknown */ + memset(&extent.list, 0, sizeof(extent.list)); + + /* convert bytes to dm (512b) sectors */ + extent.start = fm_ext->fe_physical >> SECTOR_SHIFT; + extent.len = fm_ext->fe_length >> SECTOR_SHIFT; + extent.id = id; + + log_very_verbose("Extent " FMTu64 " on fd %d at " FMTu64 "+" + FMTu64, extent.id, fd, extent.start, extent.len); + + if (!dm_pool_grow_object(mem, &extent, + sizeof(extent))) { + log_error("Cannot map file: failed to grow extent map."); + return 0; + } + return 1; +} + +/* test for the boundary of an extent */ +#define ext_boundary(ext, exp) \ +((ext).fe_logical != 0) && \ +((ext).fe_physical != (exp)) + +/* + * Copy fields from fiemap_extent 'from' to the fiemap_extent + * pointed to by 'to'. + */ +#define ext_copy(to, from) \ +do { \ + *(to) = *(from); \ +} while (0) + +static uint64_t _stats_map_extents(int fd, struct dm_pool *mem, + struct fiemap *fiemap, + struct fiemap_extent *fm_ext, + struct fiemap_extent *fm_last, + struct fiemap_extent *fm_pending, + uint64_t next_extent, + int *eof) +{ + uint64_t expected = 0, nr_extents = next_extent; + unsigned int i; + + /* + * Loop over the returned extents adding the fm_pending extent + * to the table of extents each time a discontinuity (or eof) + * is detected. + * + * We use a pointer to fm_pending in the caller since it is + * possible that logical extents comprising a single physical + * extent are returned by successive FIEMAP calls. + */ + for (i = 0; i < fiemap->fm_mapped_extents; i++) { + expected = fm_last->fe_physical + fm_last->fe_length; + + if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) + *eof = 1; + + /* cannot map extents that are not yet allocated. */ + if (fm_ext[i].fe_flags + & (FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC)) + continue; + + /* + * Begin a new extent if the current physical address differs + * from the expected address yielded by fm_last.fe_physical + + * fm_last.fe_length. + * + * A logical discontinuity is seen at the start of the file if + * unwritten space exists before the first extent: do not add + * any extent record until we have accumulated a non-zero length + * in fm_pending. + */ + if (fm_pending->fe_length && + ext_boundary(fm_ext[i], expected)) { + if (!_stats_add_file_extent(fd, mem, nr_extents, + fm_pending)) + goto_bad; + nr_extents++; + /* Begin a new pending extent. */ + ext_copy(fm_pending, fm_ext + i); + } else { + expected = 0; + /* Begin a new pending extent for extent 0. If there is + * a hole at the start of the file, the first allocated + * extent will have a non-zero fe_logical. Detect this + * case by testing fm_pending->fe_length: if no length + * has been accumulated we are handling the first + * physical extent of the file. + */ + if (!fm_pending->fe_length || fm_ext[i].fe_logical == 0) + ext_copy(fm_pending, fm_ext + i); + else + /* accumulate this logical extent's length */ + fm_pending->fe_length += fm_ext[i].fe_length; + } + *fm_last = fm_ext[i]; + } + + /* + * If the file only has a single extent, no boundary is ever + * detected to trigger addition of the first extent. + */ + if (*eof || (fm_ext[i - 1].fe_logical == 0)) { + _stats_add_file_extent(fd, mem, nr_extents, fm_pending); + nr_extents++; + } + + fiemap->fm_start = (fm_ext[i - 1].fe_logical + + fm_ext[i - 1].fe_length); + + /* return the number of extents found in this call. */ + return nr_extents - next_extent; +bad: + /* signal mapping error to caller */ + *eof = -1; + return 0; +} + +/* + * Read the extents of an open file descriptor into a table of struct _extent. + * + * Based on e2fsprogs/misc/filefrag.c::filefrag_fiemap(). + * + * Copyright 2003 by Theodore Ts'o. + * + */ +static struct _extent *_stats_get_extents_for_file(struct dm_pool *mem, int fd, + uint64_t *count) +{ + struct fiemap_extent fm_last = {0}, fm_pending = {0}, *fm_ext = NULL; + struct fiemap *fiemap = NULL; + int eof = 0, nr_extents = 0; + struct _extent *extents; + unsigned long flags = 0; + uint64_t *buf; + + /* grow temporary extent table in the pool */ + if (!dm_pool_begin_object(mem, sizeof(*extents))) + return NULL; + + buf = dm_zalloc(STATS_FIE_BUF_LEN); + if (!buf) { + log_error("Could not allocate memory for FIEMAP buffer."); + goto bad; + } + + /* initialise pointers into the ioctl buffer. */ + fiemap = (struct fiemap *) buf; + fm_ext = &fiemap->fm_extents[0]; + + /* space available per ioctl */ + *count = (STATS_FIE_BUF_LEN - sizeof(*fiemap)) + / sizeof(struct fiemap_extent); + + flags = FIEMAP_FLAG_SYNC; + + do { + /* start of ioctl loop - zero size and set count to bufsize */ + fiemap->fm_length = ~0ULL; + fiemap->fm_flags = flags; + fiemap->fm_extent_count = *count; + + /* get count-sized chunk of extents */ + if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap) < 0) { + if (errno == EBADR) + log_err_once("FIEMAP failed with unknown " + "flags %x.", fiemap->fm_flags); + goto bad; + } + + /* If 0 extents are returned, more ioctls are not needed */ + if (fiemap->fm_mapped_extents == 0) + break; + + nr_extents += _stats_map_extents(fd, mem, fiemap, fm_ext, + &fm_last, &fm_pending, + nr_extents, &eof); + + /* check for extent mapping error */ + if (eof < 0) + goto bad; + + } while (eof == 0); + + if (!nr_extents) { + log_error("Cannot map file: no allocated extents."); + goto bad; + } + + /* return total number of extents */ + *count = nr_extents; + extents = dm_pool_end_object(mem); + + /* free FIEMAP buffer. */ + dm_free(buf); + + return extents; + +bad: + *count = 0; + dm_pool_abandon_object(mem); + dm_free(buf); + return NULL; +} + +#define MATCH_EXTENT(e, s, l) \ +(((e).start == (s)) && ((e).len == (l))) + +static struct _extent *_find_extent(uint64_t nr_extents, struct _extent *extents, + uint64_t start, uint64_t len) +{ + size_t i; + for (i = 0; i < nr_extents; i++) + if (MATCH_EXTENT(extents[i], start, len)) + return extents + i; + return NULL; +} + +/* + * Clean up a table of region_id values that were created during a + * failed dm_stats_create_regions_from_fd, or dm_stats_update_regions_from_fd + * operation. + */ +static void _stats_cleanup_region_ids(struct dm_stats *dms, uint64_t *regions, + uint64_t nr_regions) +{ + uint64_t i; + + for (i = 0; i < nr_regions; i++) + if (!_stats_delete_region(dms, regions[i])) + log_error("Could not delete region " FMTu64 ".", i); +} + +/* + * First update pass: prune no-longer-allocated extents from the group + * and build a table of the remaining extents so that their creation + * can be skipped in the second pass. + */ +static int _stats_unmap_regions(struct dm_stats *dms, uint64_t group_id, + struct dm_pool *mem, struct _extent *extents, + struct _extent **old_extents, uint64_t *count, + int *regroup) +{ + struct dm_stats_region *region = NULL; + struct dm_stats_group *group = NULL; + uint64_t nr_kept, nr_old; + struct _extent ext; + int64_t i; + + group = &dms->groups[group_id]; + + log_very_verbose("Checking for changed file extents in group ID " + FMTu64, group_id); + + if (!dm_pool_begin_object(mem, sizeof(**old_extents))) { + log_error("Could not allocate extent table."); + return 0; + } + + nr_kept = nr_old = 0; /* counts of old and retained extents */ + + /* + * First pass: delete de-allocated extents and set regroup=1 if + * deleting the current group leader. + */ + i = dm_bit_get_last(group->regions); + for (; i >= 0; i = dm_bit_get_prev(group->regions, i)) { + region = &dms->regions[i]; + nr_old++; + + if (extents && _find_extent(*count, extents, + region->start, region->len)) { + ext.start = region->start; + ext.len = region->len; + ext.id = i; + nr_kept++; + + if (!dm_pool_grow_object(mem, &ext, sizeof(ext))) + goto out; + + log_very_verbose("Kept region " FMTu64, i); + } else { + + if (i == group_id) + *regroup = 1; + + if (!_stats_delete_region(dms, i)) { + log_error("Could not remove region ID " FMTu64, + i); + goto out; + } + + log_very_verbose("Deleted region " FMTu64, i); + } + } + + *old_extents = dm_pool_end_object(mem); + if (!*old_extents) { + log_error("Could not finalize region extent table."); + goto out; + } + log_very_verbose("Kept " FMTd64 " of " FMTd64 " old extents", + nr_kept, nr_old); + log_very_verbose("Found " FMTu64 " new extents", + *count - nr_kept); + + return (int) nr_kept; +out: + dm_pool_abandon_object(mem); + return -1; +} + +/* + * Create or update a set of regions representing the extents of a file + * and return a table of uint64_t region_id values. The number of regions + * created is returned in the memory pointed to by count (which must be + * non-NULL). + * + * If group_id is not equal to DM_STATS_GROUP_NOT_PRESENT, it is assumed + * that group_id corresponds to a group containing existing regions that + * were mapped to this file at an earlier time: regions will be added or + * removed to reflect the current status of the file. + */ +static uint64_t *_stats_map_file_regions(struct dm_stats *dms, int fd, + struct dm_histogram *bounds, + int precise, uint64_t group_id, + uint64_t *count, int *regroup) +{ + struct _extent *extents = NULL, *old_extents = NULL; + uint64_t *regions = NULL, fail_region, i, num_bits; + struct dm_stats_group *group = NULL; + struct dm_pool *extent_mem = NULL; + struct _extent *old_ext; + char *hist_arg = NULL; + struct statfs fsbuf; + int64_t nr_kept = 0; + struct stat buf; + int update; + + *count = 0; + update = _stats_group_id_present(dms, group_id); + +#ifdef BTRFS_SUPER_MAGIC + if (fstatfs(fd, &fsbuf)) { + log_error("fstatfs failed for fd %d", fd); + return 0; + } + + if (fsbuf.f_type == BTRFS_SUPER_MAGIC) { + log_error("Cannot map file: btrfs does not provide " + "physical FIEMAP extent data."); + return 0; + } +#endif + + if (fstat(fd, &buf)) { + log_error("fstat failed for fd %d", fd); + return 0; + } + + if (!(buf.st_mode & S_IFREG)) { + log_error("Not a regular file"); + return 0; + } + + if (!dm_is_dm_major(major(buf.st_dev))) { + log_error("Cannot map file: not a device-mapper device."); + return 0; + } + + /* + * If regroup is set here, we are creating a new filemap: otherwise + * we are updating a group with a valid group identifier in group_id. + */ + if (update) + log_very_verbose("Updating extents from fd %d with group ID " + FMTu64 " on (%d:%d)", fd, group_id, + major(buf.st_dev), minor(buf.st_dev)); + else + log_very_verbose("Mapping extents from fd %d on (%d:%d)", + fd, major(buf.st_dev), minor(buf.st_dev)); + + /* Use a temporary, private pool for the extent table. This avoids + * hijacking the dms->mem (region table) pool which would lead to + * interleaving temporary allocations with dm_stats_list() data, + * causing complications in the error path. + */ + if (!(extent_mem = dm_pool_create("extents", sizeof(*extents)))) + return_NULL; + + if (!(extents = _stats_get_extents_for_file(extent_mem, fd, count))) { + log_very_verbose("No extents found in fd %d", fd); + if (!update) + goto out; + } + + if (update) { + group = &dms->groups[group_id]; + if ((nr_kept = _stats_unmap_regions(dms, group_id, extent_mem, + extents, &old_extents, + count, regroup)) < 0) + goto_out; + } + + if (bounds) + if (!(hist_arg = _build_histogram_arg(bounds, &precise))) + goto_out; + + /* make space for end-of-table marker */ + if (!(regions = dm_malloc((1 + *count) * sizeof(*regions)))) { + log_error("Could not allocate memory for region IDs."); + goto_out; + } + + /* + * Second pass (first for non-update case): create regions for + * all extents not retained from the prior mapping, and insert + * retained regions into the table of region_id values. + * + * If a regroup is not scheduled, set group bits for newly + * created regions in the group leader bitmap. + */ + for (i = 0; i < *count; i++) { + if (update) { + if ((old_ext = _find_extent((uint64_t) nr_kept, + old_extents, + extents[i].start, + extents[i].len))) { + regions[i] = old_ext->id; + continue; + } + } + if (!_stats_create_region(dms, regions + i, extents[i].start, + extents[i].len, -1, precise, hist_arg, + dms->program_id, "")) { + log_error("Failed to create region " FMTu64 " of " + FMTu64 " at " FMTu64 ".", i, *count, + extents[i].start); + goto out_remove; + } + + log_very_verbose("Created new region mapping " FMTu64 "+" FMTu64 + " with region ID " FMTu64, extents[i].start, + extents[i].len, regions[i]); + + if (!*regroup && update) { + /* expand group bitmap */ + if (regions[i] > (group->regions[0] - 1)) { + num_bits = regions[i] + *count; + if (!_stats_resize_group(group, num_bits)) { + log_error("Failed to resize group " + "bitmap."); + goto out_remove; + } + } + dm_bit_set(group->regions, regions[i]); + } + + } + regions[*count] = DM_STATS_REGION_NOT_PRESENT; + + /* Update group leader aux_data for new group members. */ + if (!*regroup && update) + if (!_stats_set_aux(dms, group_id, + dms->regions[group_id].aux_data)) + log_error("Failed to update group aux_data."); + + if (bounds) + dm_free(hist_arg); + + /* the extent table will be empty if the file has been truncated. */ + if (extents) + dm_pool_free(extent_mem, extents); + + dm_pool_destroy(extent_mem); + + return regions; + +out_remove: + /* New region creation may begin to fail part-way through creating + * a set of file mapped regions: in this case we need to roll back + * the regions that were already created and return the handle to + * a consistent state. A listed handle is required for this: use a + * single list operation and call _stats_delete_region() directly + * to avoid a @stats_list ioctl and list parsing for each region. + */ + if (!dm_stats_list(dms, NULL)) + goto out; + + fail_region = i; + _stats_cleanup_region_ids(dms, regions, fail_region); + *count = 0; + +out: + dm_pool_destroy(extent_mem); + dm_free(hist_arg); + dm_free(regions); + return NULL; +} + +uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd, + int group, int precise, + struct dm_histogram *bounds, + const char *alias) +{ + uint64_t *regions, count; + int regroup = 1; + + if (alias && !group) { + log_error("Cannot set alias without grouping regions."); + return NULL; + } + + if (!(regions = _stats_map_file_regions(dms, fd, bounds, precise, + DM_STATS_GROUP_NOT_PRESENT, + &count, ®roup))) + return NULL; + + if (!group) + return regions; + + /* refresh handle */ + if (!dm_stats_list(dms, NULL)) + goto_out; + + if (!_stats_group_file_regions(dms, regions, count, alias)) + goto_out; + + return regions; +out: + _stats_cleanup_region_ids(dms, regions, count); + dm_free(regions); + return NULL; +} + +uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd, + uint64_t group_id) +{ + struct dm_histogram *bounds = NULL; + int nr_bins, precise, regroup; + uint64_t *regions, count = 0; + const char *alias = NULL; + + if (!dms->regions || !dm_stats_group_present(dms, group_id)) { + if (!dm_stats_list(dms, dms->program_id)) { + log_error("Could not obtain region list while " + "updating group " FMTu64 ".", group_id); + return NULL; + } + } + + if (!dm_stats_group_present(dms, group_id)) { + log_error("Group ID " FMTu64 " does not exist.", group_id); + return NULL; + } + + /* + * If the extent corresponding to the group leader's region has been + * deallocated, _stats_map_file_regions() will remove the region and + * the group. In this case, regroup will be set by the call and the + * group will be re-created using saved values. + */ + regroup = 0; + + /* + * A copy of the alias is needed to re-create the group when regroup=1. + */ + if (dms->groups[group_id].alias) { + alias = dm_strdup(dms->groups[group_id].alias); + if (!alias) { + log_error("Failed to allocate group alias string."); + return NULL; + } + } + + if (dms->regions[group_id].bounds) { + /* + * A copy of the histogram bounds must be passed to + * _stats_map_file_regions() to be used when creating new + * regions: it is not safe to use the copy in the current group + * leader since it may be destroyed during the first group + * update pass. + */ + nr_bins = dms->regions[group_id].bounds->nr_bins; + bounds = _alloc_dm_histogram(nr_bins); + if (!bounds) { + log_error("Could not allocate memory for group " + "histogram bounds."); + goto out; + } + _stats_copy_histogram_bounds(bounds, + dms->regions[group_id].bounds); + } + + precise = (dms->regions[group_id].timescale == 1); + + regions = _stats_map_file_regions(dms, fd, bounds, precise, + group_id, &count, ®roup); + + if (!regions) + goto bad; + + if (!dm_stats_list(dms, NULL)) + goto bad; + + /* regroup if there are regions to group */ + if (regroup && (*regions != DM_STATS_REGION_NOT_PRESENT)) + if (!_stats_group_file_regions(dms, regions, count, alias)) + goto bad; + + dm_free(bounds); + dm_free((char *) alias); + return regions; +bad: + _stats_cleanup_region_ids(dms, regions, count); + dm_free(bounds); + dm_free(regions); +out: + dm_free((char *) alias); + return NULL; +} +#else /* !HAVE_LINUX_FIEMAP */ +uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd, + int group, int precise, + struct dm_histogram *bounds, + const char *alias) +{ + log_error("File mapping requires FIEMAP ioctl support."); + return 0; +} + +uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd, + uint64_t group_id) +{ + log_error("File mapping requires FIEMAP ioctl support."); + return 0; +} +#endif /* HAVE_LINUX_FIEMAP */ + +#ifdef DMFILEMAPD +static const char *_filemapd_mode_names[] = { + "inode", + "path", + NULL +}; + +dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str) +{ + dm_filemapd_mode_t mode = DM_FILEMAPD_FOLLOW_INODE; + const char **mode_name; + + if (mode_str) { + for (mode_name = _filemapd_mode_names; *mode_name; mode_name++) + if (!strcmp(*mode_name, mode_str)) + break; + if (*mode_name) + mode = DM_FILEMAPD_FOLLOW_INODE + + (mode_name - _filemapd_mode_names); + else { + log_error("Could not parse dmfilemapd mode: %s", + mode_str); + return DM_FILEMAPD_FOLLOW_NONE; + } + } + return mode; +} + +#define DM_FILEMAPD "dmfilemapd" +#define NR_FILEMAPD_ARGS 7 /* includes argv[0] */ +/* + * Start dmfilemapd to monitor the specified file descriptor, and to + * update the group given by 'group_id' when the file's allocation + * changes. + * + * usage: dmfilemapd <fd> <group_id> <mode> [<foreground>[<log_level>]] + */ +int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path, + dm_filemapd_mode_t mode, unsigned foreground, + unsigned verbose) +{ + char fd_str[8], group_str[8], fg_str[2], verb_str[2]; + const char *mode_str = _filemapd_mode_names[mode]; + char *args[NR_FILEMAPD_ARGS + 1]; + pid_t pid = 0; + int argc = 0; + + if (fd < 0) { + log_error("dmfilemapd file descriptor must be " + "non-negative: %d", fd); + return 0; + } + + if (path[0] != '/') { + log_error("Path argument must specify an absolute path."); + return 0; + } + + if (mode > DM_FILEMAPD_FOLLOW_PATH) { + log_error("Invalid dmfilemapd mode argument: " + "Must be DM_FILEMAPD_FOLLOW_INODE or " + "DM_FILEMAPD_FOLLOW_PATH"); + return 0; + } + + if (foreground > 1) { + log_error("Invalid dmfilemapd foreground argument. " + "Must be 0 or 1: %d.", foreground); + return 0; + } + + if (verbose > 3) { + log_error("Invalid dmfilemapd verbose argument. " + "Must be 0..3: %d.", verbose); + return 0; + } + + /* set argv[0] */ + args[argc++] = (char *) DM_FILEMAPD; + + /* set <fd> */ + if ((dm_snprintf(fd_str, sizeof(fd_str), "%d", fd)) < 0) { + log_error("Could not format fd argument."); + return 0; + } + args[argc++] = fd_str; + + /* set <group_id> */ + if ((dm_snprintf(group_str, sizeof(group_str), FMTu64, group_id)) < 0) { + log_error("Could not format group_id argument."); + return 0; + } + args[argc++] = group_str; + + /* set <path> */ + args[argc++] = (char *) path; + + /* set <mode> */ + args[argc++] = (char *) mode_str; + + /* set <foreground> */ + if ((dm_snprintf(fg_str, sizeof(fg_str), "%u", foreground)) < 0) { + log_error("Could not format foreground argument."); + return 0; + } + args[argc++] = fg_str; + + /* set <verbose> */ + if ((dm_snprintf(verb_str, sizeof(verb_str), "%u", verbose)) < 0) { + log_error("Could not format verbose argument."); + return 0; + } + args[argc++] = verb_str; + + /* terminate args[argc] */ + args[argc] = NULL; + + log_very_verbose("Spawning daemon as '%s %d " FMTu64 " %s %s %u %u'", + *args, fd, group_id, path, mode_str, + foreground, verbose); + + if (!foreground && ((pid = fork()) < 0)) { + log_error("Failed to fork dmfilemapd process."); + return 0; + } + + if (pid > 0) { + log_very_verbose("Forked dmfilemapd process as pid %d", pid); + return 1; + } + + execvp(args[0], args); + log_sys_error("execvp", args[0]); + if (!foreground) + _exit(127); + return 0; +} +# else /* !DMFILEMAPD */ +dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str) +{ + return 0; +}; + +int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path, + dm_filemapd_mode_t mode, unsigned foreground, + unsigned verbose) +{ + log_error("dmfilemapd support disabled."); + return 0; +} +#endif /* DMFILEMAPD */ + +/* + * Backward compatible dm_stats_create_region() implementations. + * + * Keep these at the end of the file to avoid adding clutter around the + * current dm_stats_create_region() version. + */ + +#if defined(__GNUC__) +int dm_stats_create_region_v1_02_106(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + int precise, const char *program_id, + const char *aux_data); +int dm_stats_create_region_v1_02_106(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + int precise, const char *program_id, + const char *aux_data) +{ + /* 1.02.106 lacks histogram argument. */ + return _stats_create_region(dms, region_id, start, len, step, precise, + NULL, program_id, aux_data); +} + +int dm_stats_create_region_v1_02_104(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + const char *program_id, const char *aux_data); +int dm_stats_create_region_v1_02_104(struct dm_stats *dms, uint64_t *region_id, + uint64_t start, uint64_t len, int64_t step, + const char *program_id, const char *aux_data) +{ + /* 1.02.104 lacks histogram and precise arguments. */ + return _stats_create_region(dms, region_id, start, len, step, 0, NULL, + program_id, aux_data); +} +#endif diff --git a/device_mapper/libdm-string.c b/device_mapper/libdm-string.c new file mode 100644 index 000000000..8bd6c2d15 --- /dev/null +++ b/device_mapper/libdm-string.c @@ -0,0 +1,718 @@ +/* + * Copyright (C) 2006-2015 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +#include <ctype.h> +#include <stdarg.h> +#include <math.h> /* fabs() */ +#include <float.h> /* DBL_EPSILON */ + +/* + * consume characters while they match the predicate function. + */ +static char *_consume(char *buffer, int (*fn) (int)) +{ + while (*buffer && fn(*buffer)) + buffer++; + + return buffer; +} + +static int _isword(int c) +{ + return !isspace(c); +} + +/* + * Split buffer into NULL-separated words in argv. + * Returns number of words. + */ +int dm_split_words(char *buffer, unsigned max, + unsigned ignore_comments __attribute__((unused)), + char **argv) +{ + unsigned arg; + + for (arg = 0; arg < max; arg++) { + buffer = _consume(buffer, isspace); + if (!*buffer) + break; + + argv[arg] = buffer; + buffer = _consume(buffer, _isword); + + if (*buffer) { + *buffer = '\0'; + buffer++; + } + } + + return arg; +} + +/* + * Remove hyphen quoting from a component of a name. + * NULL-terminates the component and returns start of next component. + */ +static char *_unquote(char *component) +{ + char *c = component; + char *o = c; + char *r; + + while (*c) { + if (*(c + 1)) { + if (*c == '-') { + if (*(c + 1) == '-') + c++; + else + break; + } + } + *o = *c; + o++; + c++; + } + + r = (*c) ? c + 1 : c; + *o = '\0'; + + return r; +} + +int dm_split_lvm_name(struct dm_pool *mem, const char *dmname, + char **vgname, char **lvname, char **layer) +{ + if (!vgname || !lvname || !layer) { + log_error(INTERNAL_ERROR "dm_split_lvm_name: Forbidden NULL parameter detected."); + return 0; + } + + if (mem && (!dmname || !(*vgname = dm_pool_strdup(mem, dmname)))) { + log_error("Failed to duplicate lvm name."); + return 0; + } else if (!*vgname) { + log_error("Missing lvm name for split."); + return 0; + } + + _unquote(*layer = _unquote(*lvname = _unquote(*vgname))); + + return 1; +} + +/* + * On error, up to glibc 2.0.6, snprintf returned -1 if buffer was too small; + * From glibc 2.1 it returns number of chars (excl. trailing null) that would + * have been written had there been room. + * + * dm_snprintf reverts to the old behaviour. + */ +int dm_snprintf(char *buf, size_t bufsize, const char *format, ...) +{ + int n; + va_list ap; + + va_start(ap, format); + n = vsnprintf(buf, bufsize, format, ap); + va_end(ap); + + if (n < 0 || ((unsigned) n >= bufsize)) + return -1; + + return n; +} + +const char *dm_basename(const char *path) +{ + const char *p = strrchr(path, '/'); + + return p ? p + 1 : path; +} + +int dm_vasprintf(char **result, const char *format, va_list aq) +{ + int i, n, size = 16; + va_list ap; + char *buf = dm_malloc(size); + + *result = 0; + + if (!buf) + return -1; + + for (i = 0;; i++) { + va_copy(ap, aq); + n = vsnprintf(buf, size, format, ap); + va_end(ap); + + if (0 <= n && n < size) + break; + + dm_free(buf); + /* Up to glibc 2.0.6 returns -1 */ + size = (n < 0) ? size * 2 : n + 1; + if (!(buf = dm_malloc(size))) + return -1; + } + + if (i > 1) { + /* Reallocating more then once? */ + if (!(*result = dm_strdup(buf))) { + dm_free(buf); + return -1; + } + dm_free(buf); + } else + *result = buf; + + return n + 1; +} + +int dm_asprintf(char **result, const char *format, ...) +{ + int r; + va_list ap; + va_start(ap, format); + r = dm_vasprintf(result, format, ap); + va_end(ap); + return r; +} + +/* + * Count occurences of 'c' in 'str' until we reach a null char. + * + * Returns: + * len - incremented for each char we encounter. + * count - number of occurrences of 'c' and 'c2'. + */ +static void _count_chars(const char *str, size_t *len, int *count, + const int c1, const int c2) +{ + const char *ptr; + + for (ptr = str; *ptr; ptr++, (*len)++) + if (*ptr == c1 || *ptr == c2) + (*count)++; +} + +/* + * Count occurrences of 'c' in 'str' of length 'size'. + * + * Returns: + * Number of occurrences of 'c' + */ +unsigned dm_count_chars(const char *str, size_t len, const int c) +{ + size_t i; + unsigned count = 0; + + for (i = 0; i < len; i++) + if (str[i] == c) + count++; + + return count; +} + +/* + * Length of string after escaping double quotes and backslashes. + */ +size_t dm_escaped_len(const char *str) +{ + size_t len = 1; + int count = 0; + + _count_chars(str, &len, &count, '\"', '\\'); + + return count + len; +} + +/* + * Copies a string, quoting orig_char with quote_char. + * Optionally also quote quote_char. + */ +static void _quote_characters(char **out, const char *src, + const int orig_char, const int quote_char, + int quote_quote_char) +{ + while (*src) { + if (*src == orig_char || + (*src == quote_char && quote_quote_char)) + *(*out)++ = quote_char; + + *(*out)++ = *src++; + } +} + +static void _unquote_one_character(char *src, const char orig_char, + const char quote_char) +{ + char *out; + char s, n; + + /* Optimise for the common case where no changes are needed. */ + while ((s = *src++)) { + if (s == quote_char && + ((n = *src) == orig_char || n == quote_char)) { + out = src++; + *(out - 1) = n; + + while ((s = *src++)) { + if (s == quote_char && + ((n = *src) == orig_char || n == quote_char)) { + s = n; + src++; + } + *out = s; + out++; + } + + *out = '\0'; + return; + } + } +} + +/* + * Unquote each character given in orig_char array and unquote quote_char + * as well. Also save the first occurrence of each character from orig_char + * that was found unquoted in arr_substr_first_unquoted array. This way we can + * process several characters in one go. + */ +static void _unquote_characters(char *src, const char *orig_chars, + size_t num_orig_chars, + const char quote_char, + char *arr_substr_first_unquoted[]) +{ + char *out = src; + char c, s, n; + unsigned i; + + while ((s = *src++)) { + for (i = 0; i < num_orig_chars; i++) { + c = orig_chars[i]; + if (s == quote_char && + ((n = *src) == c || n == quote_char)) { + s = n; + src++; + break; + } + if (arr_substr_first_unquoted && (s == c) && + !arr_substr_first_unquoted[i]) + arr_substr_first_unquoted[i] = out; + }; + *out++ = s; + } + + *out = '\0'; +} + +/* + * Copies a string, quoting hyphens with hyphens. + */ +static void _quote_hyphens(char **out, const char *src) +{ + _quote_characters(out, src, '-', '-', 0); +} + +/* + * <vg>-<lv>-<layer> or if !layer just <vg>-<lv>. + */ +char *dm_build_dm_name(struct dm_pool *mem, const char *vgname, + const char *lvname, const char *layer) +{ + size_t len = 1; + int hyphens = 1; + char *r, *out; + + _count_chars(vgname, &len, &hyphens, '-', 0); + _count_chars(lvname, &len, &hyphens, '-', 0); + + if (layer && *layer) { + _count_chars(layer, &len, &hyphens, '-', 0); + hyphens++; + } + + len += hyphens; + + if (!(r = dm_pool_alloc(mem, len))) { + log_error("build_dm_name: Allocation failed for %" PRIsize_t + " for %s %s %s.", len, vgname, lvname, layer); + return NULL; + } + + out = r; + _quote_hyphens(&out, vgname); + *out++ = '-'; + _quote_hyphens(&out, lvname); + + if (layer && *layer) { + /* No hyphen if the layer begins with _ e.g. _mlog */ + if (*layer != '_') + *out++ = '-'; + _quote_hyphens(&out, layer); + } + *out = '\0'; + + return r; +} + +char *dm_build_dm_uuid(struct dm_pool *mem, const char *uuid_prefix, const char *lvid, const char *layer) +{ + char *dmuuid; + size_t len; + + if (!layer) + layer = ""; + + len = strlen(uuid_prefix) + strlen(lvid) + strlen(layer) + 2; + + if (!(dmuuid = dm_pool_alloc(mem, len))) { + log_error("build_dm_name: Allocation failed for %" PRIsize_t + " %s %s.", len, lvid, layer); + return NULL; + } + + sprintf(dmuuid, "%s%s%s%s", uuid_prefix, lvid, (*layer) ? "-" : "", layer); + + return dmuuid; +} + +/* + * Copies a string, quoting double quotes with backslashes. + */ +char *dm_escape_double_quotes(char *out, const char *src) +{ + char *buf = out; + + _quote_characters(&buf, src, '\"', '\\', 1); + *buf = '\0'; + + return out; +} + +/* + * Undo quoting in situ. + */ +void dm_unescape_double_quotes(char *src) +{ + _unquote_one_character(src, '\"', '\\'); +} + +/* + * Unescape colons and "at" signs in situ and save the substrings + * starting at the position of the first unescaped colon and the + * first unescaped "at" sign. This is normally used to unescape + * device names used as PVs. + */ +void dm_unescape_colons_and_at_signs(char *src, + char **substr_first_unquoted_colon, + char **substr_first_unquoted_at_sign) +{ + const char *orig_chars = ":@"; + char *arr_substr_first_unquoted[] = {NULL, NULL, NULL}; + + _unquote_characters(src, orig_chars, 2, '\\', arr_substr_first_unquoted); + + if (substr_first_unquoted_colon) + *substr_first_unquoted_colon = arr_substr_first_unquoted[0]; + + if (substr_first_unquoted_at_sign) + *substr_first_unquoted_at_sign = arr_substr_first_unquoted[1]; +} + +int dm_strncpy(char *dest, const char *src, size_t n) +{ + if (memccpy(dest, src, 0, n)) + return 1; + + if (n > 0) + dest[n - 1] = '\0'; + + return 0; +} + +/* Test if the doubles are close enough to be considered equal */ +static int _close_enough(double d1, double d2) +{ + return fabs(d1 - d2) < DBL_EPSILON; +} + +#define BASE_UNKNOWN 0 +#define BASE_SHARED 1 +#define BASE_1024 8 +#define BASE_1000 15 +#define BASE_SPECIAL 21 +#define NUM_UNIT_PREFIXES 6 +#define NUM_SPECIAL 3 + +#define SIZE_BUF 128 + +const char *dm_size_to_string(struct dm_pool *mem, uint64_t size, + char unit_type, int use_si_units, + uint64_t unit_factor, int include_suffix, + dm_size_suffix_t suffix_type) +{ + unsigned base = BASE_UNKNOWN; + unsigned s; + int precision; + double d; + uint64_t byte = UINT64_C(0); + uint64_t units = UINT64_C(1024); + char *size_buf = NULL; + char new_unit_type = '\0', unit_type_buf[2]; + const char *prefix = ""; + const char * const size_str[][3] = { + /* BASE_UNKNOWN */ + {" ", " ", " "}, /* [0] */ + + /* BASE_SHARED - Used if use_si_units = 0 */ + {" Exabyte", " EB", "E"}, /* [1] */ + {" Petabyte", " PB", "P"}, /* [2] */ + {" Terabyte", " TB", "T"}, /* [3] */ + {" Gigabyte", " GB", "G"}, /* [4] */ + {" Megabyte", " MB", "M"}, /* [5] */ + {" Kilobyte", " KB", "K"}, /* [6] */ + {" Byte ", " B", "B"}, /* [7] */ + + /* BASE_1024 - Used if use_si_units = 1 */ + {" Exbibyte", " EiB", "e"}, /* [8] */ + {" Pebibyte", " PiB", "p"}, /* [9] */ + {" Tebibyte", " TiB", "t"}, /* [10] */ + {" Gibibyte", " GiB", "g"}, /* [11] */ + {" Mebibyte", " MiB", "m"}, /* [12] */ + {" Kibibyte", " KiB", "k"}, /* [13] */ + {" Byte ", " B", "b"}, /* [14] */ + + /* BASE_1000 - Used if use_si_units = 1 */ + {" Exabyte", " EB", "E"}, /* [15] */ + {" Petabyte", " PB", "P"}, /* [16] */ + {" Terabyte", " TB", "T"}, /* [17] */ + {" Gigabyte", " GB", "G"}, /* [18] */ + {" Megabyte", " MB", "M"}, /* [19] */ + {" Kilobyte", " kB", "K"}, /* [20] */ + + /* BASE_SPECIAL */ + {" Byte ", " B ", "B"}, /* [21] (shared with BASE_1000) */ + {" Units ", " Un", "U"}, /* [22] */ + {" Sectors ", " Se", "S"}, /* [23] */ + }; + + if (!(size_buf = dm_pool_alloc(mem, SIZE_BUF))) { + log_error("no memory for size display buffer"); + return ""; + } + + if (!use_si_units) { + /* Case-independent match */ + for (s = 0; s < NUM_UNIT_PREFIXES; s++) + if (toupper((int) unit_type) == + *size_str[BASE_SHARED + s][2]) { + base = BASE_SHARED; + break; + } + } else { + /* Case-dependent match for powers of 1000 */ + for (s = 0; s < NUM_UNIT_PREFIXES; s++) + if (unit_type == *size_str[BASE_1000 + s][2]) { + base = BASE_1000; + break; + } + + /* Case-dependent match for powers of 1024 */ + if (base == BASE_UNKNOWN) + for (s = 0; s < NUM_UNIT_PREFIXES; s++) + if (unit_type == *size_str[BASE_1024 + s][2]) { + base = BASE_1024; + break; + } + } + + if (base == BASE_UNKNOWN) + /* Check for special units - s, b or u */ + for (s = 0; s < NUM_SPECIAL; s++) + if (toupper((int) unit_type) == + *size_str[BASE_SPECIAL + s][2]) { + base = BASE_SPECIAL; + break; + } + + if (size == UINT64_C(0)) { + if (base == BASE_UNKNOWN) + s = 0; + sprintf(size_buf, "0%s", include_suffix ? size_str[base + s][suffix_type] : ""); + return size_buf; + } + + size *= UINT64_C(512); + + if (base != BASE_UNKNOWN) { + if (!unit_factor) { + unit_type_buf[0] = unit_type; + unit_type_buf[1] = '\0'; + if (!(unit_factor = dm_units_to_factor(&unit_type_buf[0], &new_unit_type, 1, NULL)) || + unit_type != new_unit_type) { + /* The two functions should match (and unrecognised units get treated like 'h'). */ + log_error(INTERNAL_ERROR "Inconsistent units: %c and %c.", unit_type, new_unit_type); + return ""; + } + } + byte = unit_factor; + } else { + /* Human-readable style */ + if (unit_type == 'H' || unit_type == 'R') { + units = UINT64_C(1000); + base = BASE_1000; + } else { + units = UINT64_C(1024); + base = BASE_1024; + } + + if (!use_si_units) + base = BASE_SHARED; + + byte = units * units * units * units * units * units; + + for (s = 0; s < NUM_UNIT_PREFIXES && size < byte; s++) + byte /= units; + + if ((s < NUM_UNIT_PREFIXES) && + ((unit_type == 'R') || (unit_type == 'r'))) { + /* When the rounding would cause difference, add '<' prefix + * i.e. 2043M is more then 1.9949G prints <2.00G + * This version is for 2 digits fixed precision */ + d = 100. * (double) size / byte; + if (!_close_enough(floorl(d), nearbyintl(d))) + prefix = "<"; + } + + include_suffix = 1; + } + + /* FIXME Make precision configurable */ + switch (toupper(*size_str[base + s][DM_SIZE_UNIT])) { + case 'B': + case 'S': + precision = 0; + break; + default: + precision = 2; + } + + snprintf(size_buf, SIZE_BUF, "%s%.*f%s", prefix, precision, + (double) size / byte, include_suffix ? size_str[base + s][suffix_type] : ""); + + return size_buf; +} + +uint64_t dm_units_to_factor(const char *units, char *unit_type, + int strict, const char **endptr) +{ + char *ptr = NULL; + uint64_t v; + double custom_value = 0; + uint64_t multiplier; + + if (endptr) + *endptr = units; + + if (isdigit(*units)) { + custom_value = strtod(units, &ptr); + if (ptr == units) + return 0; + v = (uint64_t) strtoull(units, NULL, 10); + if (_close_enough((double) v, custom_value)) + custom_value = 0; /* Use integer arithmetic */ + units = ptr; + } else + v = 1; + + /* Only one units char permitted in strict mode. */ + if (strict && units[0] && units[1]) + return 0; + + if (v == 1) + *unit_type = *units; + else + *unit_type = 'U'; + + switch (*units) { + case 'h': + case 'H': + case 'r': + case 'R': + multiplier = v = UINT64_C(1); + *unit_type = *units; + break; + case 'b': + case 'B': + multiplier = UINT64_C(1); + break; +#define KILO UINT64_C(1024) + case 's': + case 'S': + multiplier = (KILO/2); + break; + case 'k': + multiplier = KILO; + break; + case 'm': + multiplier = KILO * KILO; + break; + case 'g': + multiplier = KILO * KILO * KILO; + break; + case 't': + multiplier = KILO * KILO * KILO * KILO; + break; + case 'p': + multiplier = KILO * KILO * KILO * KILO * KILO; + break; + case 'e': + multiplier = KILO * KILO * KILO * KILO * KILO * KILO; + break; +#undef KILO +#define KILO UINT64_C(1000) + case 'K': + multiplier = KILO; + break; + case 'M': + multiplier = KILO * KILO; + break; + case 'G': + multiplier = KILO * KILO * KILO; + break; + case 'T': + multiplier = KILO * KILO * KILO * KILO; + break; + case 'P': + multiplier = KILO * KILO * KILO * KILO * KILO; + break; + case 'E': + multiplier = KILO * KILO * KILO * KILO * KILO * KILO; + break; +#undef KILO + default: + return 0; + } + + if (endptr) + *endptr = units + 1; + + if (_close_enough(custom_value, 0.)) + return v * multiplier; /* Use integer arithmetic */ + else + return (uint64_t) (custom_value * multiplier); +} diff --git a/device_mapper/libdm-targets.c b/device_mapper/libdm-targets.c new file mode 100644 index 000000000..5ab4701bb --- /dev/null +++ b/device_mapper/libdm-targets.c @@ -0,0 +1,565 @@ +/* + * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "libdm-common.h" + +int dm_get_status_snapshot(struct dm_pool *mem, const char *params, + struct dm_status_snapshot **status) +{ + struct dm_status_snapshot *s; + int r; + + if (!params) { + log_error("Failed to parse invalid snapshot params."); + return 0; + } + + if (!(s = dm_pool_zalloc(mem, sizeof(*s)))) { + log_error("Failed to allocate snapshot status structure."); + return 0; + } + + r = sscanf(params, FMTu64 "/" FMTu64 " " FMTu64, + &s->used_sectors, &s->total_sectors, + &s->metadata_sectors); + + if (r == 3 || r == 2) + s->has_metadata_sectors = (r == 3); + else if (!strcmp(params, "Invalid")) + s->invalid = 1; + else if (!strcmp(params, "Merge failed")) + s->merge_failed = 1; + else if (!strcmp(params, "Overflow")) + s->overflow = 1; + else { + dm_pool_free(mem, s); + log_error("Failed to parse snapshot params: %s.", params); + return 0; + } + + *status = s; + + return 1; +} + +/* + * Skip nr fields each delimited by a single space. + * FIXME Don't assume single space. + */ +static const char *_skip_fields(const char *p, unsigned nr) +{ + while (p && nr-- && (p = strchr(p, ' '))) + p++; + + return p; +} + +/* + * Count number of single-space delimited fields. + * Number of fields is number of spaces plus one. + */ +static unsigned _count_fields(const char *p) +{ + unsigned nr = 1; + + if (!p || !*p) + return 0; + + while ((p = _skip_fields(p, 1))) + nr++; + + return nr; +} + +/* + * Various RAID status versions include: + * Versions < 1.5.0 (4 fields): + * <raid_type> <#devs> <health_str> <sync_ratio> + * Versions 1.5.0+ (6 fields): + * <raid_type> <#devs> <health_str> <sync_ratio> <sync_action> <mismatch_cnt> + * Versions 1.9.0+ (7 fields): + * <raid_type> <#devs> <health_str> <sync_ratio> <sync_action> <mismatch_cnt> <data_offset> + */ +int dm_get_status_raid(struct dm_pool *mem, const char *params, + struct dm_status_raid **status) +{ + int i; + unsigned num_fields; + const char *p, *pp, *msg_fields = ""; + struct dm_status_raid *s = NULL; + unsigned a = 0; + + if ((num_fields = _count_fields(params)) < 4) + goto_bad; + + /* Second field holds the device count */ + msg_fields = "<#devs> "; + if (!(p = _skip_fields(params, 1)) || (sscanf(p, "%d", &i) != 1)) + goto_bad; + + msg_fields = ""; + if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_raid)))) + goto_bad; + + if (!(s->raid_type = dm_pool_zalloc(mem, p - params))) + goto_bad; /* memory is freed when pool is destroyed */ + + if (!(s->dev_health = dm_pool_zalloc(mem, i + 1))) /* Space for health chars */ + goto_bad; + + msg_fields = "<raid_type> <#devices> <health_chars> and <sync_ratio> "; + if (sscanf(params, "%s %u %s " FMTu64 "/" FMTu64, + s->raid_type, + &s->dev_count, + s->dev_health, + &s->insync_regions, + &s->total_regions) != 5) + goto_bad; + + /* + * All pre-1.5.0 version parameters are read. Now we check + * for additional 1.5.0+ parameters (i.e. num_fields at least 6). + * + * Note that 'sync_action' will be NULL (and mismatch_count + * will be 0) if the kernel returns a pre-1.5.0 status. + */ + if (num_fields < 6) + goto out; + + msg_fields = "<sync_action> and <mismatch_cnt> "; + + /* Skip pre-1.5.0 params */ + if (!(p = _skip_fields(params, 4)) || !(pp = _skip_fields(p, 1))) + goto_bad; + + if (!(s->sync_action = dm_pool_zalloc(mem, pp - p))) + goto_bad; + + if (sscanf(p, "%s " FMTu64, s->sync_action, &s->mismatch_count) != 2) + goto_bad; + + if (num_fields < 7) + goto out; + + /* + * All pre-1.9.0 version parameters are read. Now we check + * for additional 1.9.0+ parameters (i.e. nr_fields at least 7). + * + * Note that data_offset will be 0 if the + * kernel returns a pre-1.9.0 status. + */ + msg_fields = "<data_offset>"; + if (!(p = _skip_fields(params, 6))) /* skip pre-1.9.0 params */ + goto bad; + if (sscanf(p, FMTu64, &s->data_offset) != 1) + goto bad; + +out: + *status = s; + + if (s->insync_regions == s->total_regions) { + /* FIXME: kernel gives misleading info here + * Trying to recognize a true state */ + while (i-- > 0) + if (s->dev_health[i] == 'a') + a++; /* Count number of 'a' */ + + if (a && a < s->dev_count) { + /* SOME legs are in 'a' */ + if (!strcasecmp(s->sync_action, "recover") + || !strcasecmp(s->sync_action, "idle")) + /* Kernel may possibly start some action + * in near-by future, do not report 100% */ + s->insync_regions--; + } + } + + return 1; + +bad: + log_error("Failed to parse %sraid params: %s", msg_fields, params); + + if (s) + dm_pool_free(mem, s); + + *status = NULL; + + return 0; +} + +/* + * <metadata block size> <#used metadata blocks>/<#total metadata blocks> + * <cache block size> <#used cache blocks>/<#total cache blocks> + * <#read hits> <#read misses> <#write hits> <#write misses> + * <#demotions> <#promotions> <#dirty> <#features> <features>* + * <#core args> <core args>* <policy name> <#policy args> <policy args>* + * + * metadata block size : Fixed block size for each metadata block in + * sectors + * #used metadata blocks : Number of metadata blocks used + * #total metadata blocks : Total number of metadata blocks + * cache block size : Configurable block size for the cache device + * in sectors + * #used cache blocks : Number of blocks resident in the cache + * #total cache blocks : Total number of cache blocks + * #read hits : Number of times a READ bio has been mapped + * to the cache + * #read misses : Number of times a READ bio has been mapped + * to the origin + * #write hits : Number of times a WRITE bio has been mapped + * to the cache + * #write misses : Number of times a WRITE bio has been + * mapped to the origin + * #demotions : Number of times a block has been removed + * from the cache + * #promotions : Number of times a block has been moved to + * the cache + * #dirty : Number of blocks in the cache that differ + * from the origin + * #feature args : Number of feature args to follow + * feature args : 'writethrough' (optional) + * #core args : Number of core arguments (must be even) + * core args : Key/value pairs for tuning the core + * e.g. migration_threshold + * *policy name : Name of the policy + * #policy args : Number of policy arguments to follow (must be even) + * policy args : Key/value pairs + * e.g. sequential_threshold + */ +int dm_get_status_cache(struct dm_pool *mem, const char *params, + struct dm_status_cache **status) +{ + int i, feature_argc; + char *str; + const char *p, *pp; + struct dm_status_cache *s; + + if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_cache)))) + return_0; + + if (strstr(params, "Error")) { + s->error = 1; + s->fail = 1; /* This is also I/O fail state */ + goto out; + } + + if (strstr(params, "Fail")) { + s->fail = 1; + goto out; + } + + /* Read in args that have definitive placement */ + if (sscanf(params, + " " FMTu32 + " " FMTu64 "/" FMTu64 + " " FMTu32 + " " FMTu64 "/" FMTu64 + " " FMTu64 " " FMTu64 + " " FMTu64 " " FMTu64 + " " FMTu64 " " FMTu64 + " " FMTu64 + " %d", + &s->metadata_block_size, + &s->metadata_used_blocks, &s->metadata_total_blocks, + &s->block_size, /* AKA, chunk_size */ + &s->used_blocks, &s->total_blocks, + &s->read_hits, &s->read_misses, + &s->write_hits, &s->write_misses, + &s->demotions, &s->promotions, + &s->dirty_blocks, + &feature_argc) != 14) + goto bad; + + /* Now jump to "features" section */ + if (!(p = _skip_fields(params, 12))) + goto bad; + + /* Read in features */ + for (i = 0; i < feature_argc; i++) { + if (!strncmp(p, "writethrough ", 13)) + s->feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH; + else if (!strncmp(p, "writeback ", 10)) + s->feature_flags |= DM_CACHE_FEATURE_WRITEBACK; + else if (!strncmp(p, "passthrough ", 12)) + s->feature_flags |= DM_CACHE_FEATURE_PASSTHROUGH; + else if (!strncmp(p, "metadata2 ", 10)) + s->feature_flags |= DM_CACHE_FEATURE_METADATA2; + else + log_error("Unknown feature in status: %s", params); + + if (!(p = _skip_fields(p, 1))) + goto bad; + } + + /* Read in core_args. */ + if (sscanf(p, "%d ", &s->core_argc) != 1) + goto bad; + if ((s->core_argc > 0) && + (!(s->core_argv = dm_pool_zalloc(mem, sizeof(char *) * s->core_argc)) || + !(p = _skip_fields(p, 1)) || + !(str = dm_pool_strdup(mem, p)) || + !(p = _skip_fields(p, (unsigned) s->core_argc)) || + (dm_split_words(str, s->core_argc, 0, s->core_argv) != s->core_argc))) + goto bad; + + /* Read in policy args */ + pp = p; + if (!(p = _skip_fields(p, 1)) || + !(s->policy_name = dm_pool_zalloc(mem, (p - pp)))) + goto bad; + if (sscanf(pp, "%s %d", s->policy_name, &s->policy_argc) != 2) + goto bad; + if (s->policy_argc && + (!(s->policy_argv = dm_pool_zalloc(mem, sizeof(char *) * s->policy_argc)) || + !(p = _skip_fields(p, 1)) || + !(str = dm_pool_strdup(mem, p)) || + (dm_split_words(str, s->policy_argc, 0, s->policy_argv) != s->policy_argc))) + goto bad; + + /* TODO: improve this parser */ + if (strstr(p, " ro")) + s->read_only = 1; + + if (strstr(p, " needs_check")) + s->needs_check = 1; +out: + *status = s; + return 1; + +bad: + log_error("Failed to parse cache params: %s", params); + dm_pool_free(mem, s); + *status = NULL; + + return 0; +} + +int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s) +{ + int pos; + + memset(s, 0, sizeof(*s)); + + if (!params) { + log_error("Failed to parse invalid thin params."); + return 0; + } + + if (strstr(params, "Error")) { + s->error = 1; + s->fail = 1; /* This is also I/O fail state */ + return 1; + } + + if (strstr(params, "Fail")) { + s->fail = 1; + return 1; + } + + /* FIXME: add support for held metadata root */ + if (sscanf(params, FMTu64 " " FMTu64 "/" FMTu64 " " FMTu64 "/" FMTu64 "%n", + &s->transaction_id, + &s->used_metadata_blocks, + &s->total_metadata_blocks, + &s->used_data_blocks, + &s->total_data_blocks, &pos) < 5) { + log_error("Failed to parse thin pool params: %s.", params); + return 0; + } + + /* New status flags */ + if (strstr(params + pos, "no_discard_passdown")) + s->discards = DM_THIN_DISCARDS_NO_PASSDOWN; + else if (strstr(params + pos, "ignore_discard")) + s->discards = DM_THIN_DISCARDS_IGNORE; + else /* default discard_passdown */ + s->discards = DM_THIN_DISCARDS_PASSDOWN; + + /* Default is 'writable' (rw) data */ + if (strstr(params + pos, "out_of_data_space")) + s->out_of_data_space = 1; + else if (strstr(params + pos, "ro ")) + s->read_only = 1; + + /* Default is 'queue_if_no_space' */ + if (strstr(params + pos, "error_if_no_space")) + s->error_if_no_space = 1; + + if (strstr(params + pos, "needs_check")) + s->needs_check = 1; + + return 1; +} + +int dm_get_status_thin_pool(struct dm_pool *mem, const char *params, + struct dm_status_thin_pool **status) +{ + struct dm_status_thin_pool *s; + + if (!(s = dm_pool_alloc(mem, sizeof(struct dm_status_thin_pool)))) { + log_error("Failed to allocate thin_pool status structure."); + return 0; + } + + if (!parse_thin_pool_status(params, s)) { + dm_pool_free(mem, s); + return_0; + } + + *status = s; + + return 1; +} + +int dm_get_status_thin(struct dm_pool *mem, const char *params, + struct dm_status_thin **status) +{ + struct dm_status_thin *s; + + if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_thin)))) { + log_error("Failed to allocate thin status structure."); + return 0; + } + + if (strchr(params, '-')) { + /* nothing to parse */ + } else if (strstr(params, "Fail")) { + s->fail = 1; + } else if (sscanf(params, FMTu64 " " FMTu64, + &s->mapped_sectors, + &s->highest_mapped_sector) != 2) { + dm_pool_free(mem, s); + log_error("Failed to parse thin params: %s.", params); + return 0; + } + + *status = s; + + return 1; +} + +/* + * dm core parms: 0 409600 mirror + * Mirror core parms: 2 253:4 253:5 400/400 + * New-style failure params: 1 AA + * New-style log params: 3 cluster 253:3 A + * or 3 disk 253:3 A + * or 1 core + */ +#define DM_MIRROR_MAX_IMAGES 8 /* limited by kernel DM_KCOPYD_MAX_REGIONS */ + +int dm_get_status_mirror(struct dm_pool *mem, const char *params, + struct dm_status_mirror **status) +{ + struct dm_status_mirror *s; + const char *p, *pos = params; + unsigned num_devs, argc, i; + int used; + + if (!(s = dm_pool_zalloc(mem, sizeof(*s)))) { + log_error("Failed to alloc mem pool to parse mirror status."); + return 0; + } + + if (sscanf(pos, "%u %n", &num_devs, &used) != 1) + goto_out; + pos += used; + + if (num_devs > DM_MIRROR_MAX_IMAGES) { + log_error(INTERNAL_ERROR "More then " DM_TO_STRING(DM_MIRROR_MAX_IMAGES) + " reported in mirror status."); + goto out; + } + + if (!(s->devs = dm_pool_alloc(mem, num_devs * sizeof(*(s->devs))))) { + log_error("Allocation of devs failed."); + goto out; + } + + for (i = 0; i < num_devs; ++i, pos += used) + if (sscanf(pos, "%u:%u %n", + &(s->devs[i].major), &(s->devs[i].minor), &used) != 2) + goto_out; + + if (sscanf(pos, FMTu64 "/" FMTu64 "%n", + &s->insync_regions, &s->total_regions, &used) != 2) + goto_out; + pos += used; + + if (sscanf(pos, "%u %n", &argc, &used) != 1) + goto_out; + pos += used; + + for (i = 0; i < num_devs ; ++i) + s->devs[i].health = pos[i]; + + if (!(pos = _skip_fields(pos, argc))) + goto_out; + + if (strncmp(pos, "userspace", 9) == 0) { + pos += 9; + /* FIXME: support status of userspace mirror implementation */ + } + + if (sscanf(pos, "%u %n", &argc, &used) != 1) + goto_out; + pos += used; + + if (argc == 1) { + /* core, cluster-core */ + if (!(s->log_type = dm_pool_strdup(mem, pos))) { + log_error("Allocation of log type string failed."); + goto out; + } + } else { + if (!(p = _skip_fields(pos, 1))) + goto_out; + + /* disk, cluster-disk */ + if (!(s->log_type = dm_pool_strndup(mem, pos, p - pos - 1))) { + log_error("Allocation of log type string failed."); + goto out; + } + pos = p; + + if ((argc > 2) && !strcmp(s->log_type, "disk")) { + s->log_count = argc - 2; + + if (!(s->logs = dm_pool_alloc(mem, s->log_count * sizeof(*(s->logs))))) { + log_error("Allocation of logs failed."); + goto out; + } + + for (i = 0; i < s->log_count; ++i, pos += used) + if (sscanf(pos, "%u:%u %n", + &s->logs[i].major, &s->logs[i].minor, &used) != 2) + goto_out; + + for (i = 0; i < s->log_count; ++i) + s->logs[i].health = pos[i]; + } + } + + s->dev_count = num_devs; + *status = s; + + return 1; +out: + log_error("Failed to parse mirror status %s.", params); + dm_pool_free(mem, s); + *status = NULL; + + return 0; +} diff --git a/device_mapper/libdm-timestamp.c b/device_mapper/libdm-timestamp.c new file mode 100644 index 000000000..c2d0ad8d2 --- /dev/null +++ b/device_mapper/libdm-timestamp.c @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2006 Rackable Systems All rights reserved. + * Copyright (C) 2015 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Abstract out the time methods used so they can be adjusted later - + * the results of these routines should stay in-core. + */ + +#include "misc/dmlib.h" + +#include <stdlib.h> + +#define NSEC_PER_USEC UINT64_C(1000) +#define NSEC_PER_MSEC UINT64_C(1000000) +#define NSEC_PER_SEC UINT64_C(1000000000) + +/* + * The realtime section uses clock_gettime with the CLOCK_MONOTONIC + * parameter to prevent issues with time warps + * This implementation requires librt. + */ +#ifdef HAVE_REALTIME + +#include <time.h> + +struct dm_timestamp { + struct timespec t; +}; + +static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts) +{ + uint64_t stamp = 0; + + stamp += (uint64_t) ts->t.tv_sec * NSEC_PER_SEC; + stamp += (uint64_t) ts->t.tv_nsec; + + return stamp; +} + +struct dm_timestamp *dm_timestamp_alloc(void) +{ + struct dm_timestamp *ts = NULL; + + if (!(ts = dm_zalloc(sizeof(*ts)))) + stack; + + return ts; +} + +int dm_timestamp_get(struct dm_timestamp *ts) +{ + if (!ts) + return 0; + + if (clock_gettime(CLOCK_MONOTONIC, &ts->t)) { + log_sys_error("clock_gettime", "get_timestamp"); + ts->t.tv_sec = 0; + ts->t.tv_nsec = 0; + return 0; + } + + return 1; +} + +#else /* ! HAVE_REALTIME */ + +/* + * The !realtime section just uses gettimeofday and is therefore subject + * to ntp-type time warps - not sure if should allow that. + */ + +#include <sys/time.h> + +struct dm_timestamp { + struct timeval t; +}; + +static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts) +{ + uint64_t stamp = 0; + + stamp += ts->t.tv_sec * NSEC_PER_SEC; + stamp += ts->t.tv_usec * NSEC_PER_USEC; + + return stamp; +} + +struct dm_timestamp *dm_timestamp_alloc(void) +{ + struct dm_timestamp *ts; + + if (!(ts = dm_malloc(sizeof(*ts)))) + stack; + + return ts; +} + +int dm_timestamp_get(struct dm_timestamp *ts) +{ + if (!ts) + return 0; + + if (gettimeofday(&ts->t, NULL)) { + log_sys_error("gettimeofday", "get_timestamp"); + ts->t.tv_sec = 0; + ts->t.tv_usec = 0; + return 0; + } + + return 1; +} + +#endif /* HAVE_REALTIME */ + +/* + * Compare two timestamps. + * + * Return: -1 if ts1 is less than ts2 + * 0 if ts1 is equal to ts2 + * 1 if ts1 is greater than ts2 + */ +int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2) +{ + uint64_t t1, t2; + + t1 = _timestamp_to_uint64(ts1); + t2 = _timestamp_to_uint64(ts2); + + if (t2 < t1) + return 1; + + if (t1 < t2) + return -1; + + return 0; +} + +/* + * Return the absolute difference in nanoseconds between + * the dm_timestamp objects ts1 and ts2. + * + * Callers that need to know whether ts1 is before, equal to, or after ts2 + * in addition to the magnitude should use dm_timestamp_compare. + */ +uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2) +{ + uint64_t t1, t2; + + t1 = _timestamp_to_uint64(ts1); + t2 = _timestamp_to_uint64(ts2); + + if (t1 > t2) + return t1 - t2; + + return t2 - t1; +} + +void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old) +{ + *ts_new = *ts_old; +} + +void dm_timestamp_destroy(struct dm_timestamp *ts) +{ + dm_free(ts); +} diff --git a/device_mapper/misc/dm-ioctl.h b/device_mapper/misc/dm-ioctl.h new file mode 100644 index 000000000..79f574cd9 --- /dev/null +++ b/device_mapper/misc/dm-ioctl.h @@ -0,0 +1,364 @@ +/* + * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited. + * Copyright (C) 2004 - 2017 Red Hat, Inc. All rights reserved. + * + * This file is released under the LGPL. + */ + +#ifndef _LINUX_DM_IOCTL_V4_H +#define _LINUX_DM_IOCTL_V4_H + +#ifdef __linux__ +# include <linux/types.h> +#endif + +#define DM_DIR "mapper" /* Slashes not supported */ +#define DM_CONTROL_NODE "control" +#define DM_MAX_TYPE_NAME 16 +#define DM_NAME_LEN 128 +#define DM_UUID_LEN 129 + +/* + * A traditional ioctl interface for the device mapper. + * + * Each device can have two tables associated with it, an + * 'active' table which is the one currently used by io passing + * through the device, and an 'inactive' one which is a table + * that is being prepared as a replacement for the 'active' one. + * + * DM_VERSION: + * Just get the version information for the ioctl interface. + * + * DM_REMOVE_ALL: + * Remove all dm devices, destroy all tables. Only really used + * for debug. + * + * DM_LIST_DEVICES: + * Get a list of all the dm device names. + * + * DM_DEV_CREATE: + * Create a new device, neither the 'active' or 'inactive' table + * slots will be filled. The device will be in suspended state + * after creation, however any io to the device will get errored + * since it will be out-of-bounds. + * + * DM_DEV_REMOVE: + * Remove a device, destroy any tables. + * + * DM_DEV_RENAME: + * Rename a device or set its uuid if none was previously supplied. + * + * DM_SUSPEND: + * This performs both suspend and resume, depending which flag is + * passed in. + * Suspend: This command will not return until all pending io to + * the device has completed. Further io will be deferred until + * the device is resumed. + * Resume: It is no longer an error to issue this command on an + * unsuspended device. If a table is present in the 'inactive' + * slot, it will be moved to the active slot, then the old table + * from the active slot will be _destroyed_. Finally the device + * is resumed. + * + * DM_DEV_STATUS: + * Retrieves the status for the table in the 'active' slot. + * + * DM_DEV_WAIT: + * Wait for a significant event to occur to the device. This + * could either be caused by an event triggered by one of the + * targets of the table in the 'active' slot, or a table change. + * + * DM_TABLE_LOAD: + * Load a table into the 'inactive' slot for the device. The + * device does _not_ need to be suspended prior to this command. + * + * DM_TABLE_CLEAR: + * Destroy any table in the 'inactive' slot (ie. abort). + * + * DM_TABLE_DEPS: + * Return a set of device dependencies for the 'active' table. + * + * DM_TABLE_STATUS: + * Return the targets status for the 'active' table. + * + * DM_TARGET_MSG: + * Pass a message string to the target at a specific offset of a device. + * + * DM_DEV_SET_GEOMETRY: + * Set the geometry of a device by passing in a string in this format: + * + * "cylinders heads sectors_per_track start_sector" + * + * Beware that CHS geometry is nearly obsolete and only provided + * for compatibility with dm devices that can be booted by a PC + * BIOS. See struct hd_geometry for range limits. Also note that + * the geometry is erased if the device size changes. + */ + +/* + * All ioctl arguments consist of a single chunk of memory, with + * this structure at the start. If a uuid is specified any + * lookup (eg. for a DM_INFO) will be done on that, *not* the + * name. + */ +struct dm_ioctl { + /* + * The version number is made up of three parts: + * major - no backward or forward compatibility, + * minor - only backwards compatible, + * patch - both backwards and forwards compatible. + * + * All clients of the ioctl interface should fill in the + * version number of the interface that they were + * compiled with. + * + * All recognised ioctl commands (ie. those that don't + * return -ENOTTY) fill out this field, even if the + * command failed. + */ + uint32_t version[3]; /* in/out */ + uint32_t data_size; /* total size of data passed in + * including this struct */ + + uint32_t data_start; /* offset to start of data + * relative to start of this struct */ + + uint32_t target_count; /* in/out */ + int32_t open_count; /* out */ + uint32_t flags; /* in/out */ + + /* + * event_nr holds either the event number (input and output) or the + * udev cookie value (input only). + * The DM_DEV_WAIT ioctl takes an event number as input. + * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls + * use the field as a cookie to return in the DM_COOKIE + * variable with the uevents they issue. + * For output, the ioctls return the event number, not the cookie. + */ + uint32_t event_nr; /* in/out */ + uint32_t padding; + + uint64_t dev; /* in/out */ + + char name[DM_NAME_LEN]; /* device name */ + char uuid[DM_UUID_LEN]; /* unique identifier for + * the block device */ + char data[7]; /* padding or data */ +}; + +/* + * Used to specify tables. These structures appear after the + * dm_ioctl. + */ +struct dm_target_spec { + uint64_t sector_start; + uint64_t length; + int32_t status; /* used when reading from kernel only */ + + /* + * Location of the next dm_target_spec. + * - When specifying targets on a DM_TABLE_LOAD command, this value is + * the number of bytes from the start of the "current" dm_target_spec + * to the start of the "next" dm_target_spec. + * - When retrieving targets on a DM_TABLE_STATUS command, this value + * is the number of bytes from the start of the first dm_target_spec + * (that follows the dm_ioctl struct) to the start of the "next" + * dm_target_spec. + */ + uint32_t next; + + char target_type[DM_MAX_TYPE_NAME]; + + /* + * Parameter string starts immediately after this object. + * Be careful to add padding after string to ensure correct + * alignment of subsequent dm_target_spec. + */ +}; + +/* + * Used to retrieve the target dependencies. + */ +struct dm_target_deps { + uint32_t count; /* Array size */ + uint32_t padding; /* unused */ + uint64_t dev[0]; /* out */ +}; + +/* + * Used to get a list of all dm devices. + */ +struct dm_name_list { + uint64_t dev; + uint32_t next; /* offset to the next record from + the _start_ of this */ + char name[0]; +}; + +/* + * Used to retrieve the target versions + */ +struct dm_target_versions { + uint32_t next; + uint32_t version[3]; + + char name[0]; +}; + +/* + * Used to pass message to a target + */ +struct dm_target_msg { + uint64_t sector; /* Device sector */ + + char message[0]; +}; + +/* + * If you change this make sure you make the corresponding change + * to dm-ioctl.c:lookup_ioctl() + */ +enum { + /* Top level cmds */ + DM_VERSION_CMD = 0, + DM_REMOVE_ALL_CMD, + DM_LIST_DEVICES_CMD, + + /* device level cmds */ + DM_DEV_CREATE_CMD, + DM_DEV_REMOVE_CMD, + DM_DEV_RENAME_CMD, + DM_DEV_SUSPEND_CMD, + DM_DEV_STATUS_CMD, + DM_DEV_WAIT_CMD, + + /* Table level cmds */ + DM_TABLE_LOAD_CMD, + DM_TABLE_CLEAR_CMD, + DM_TABLE_DEPS_CMD, + DM_TABLE_STATUS_CMD, + + /* Added later */ + DM_LIST_VERSIONS_CMD, + DM_TARGET_MSG_CMD, + DM_DEV_SET_GEOMETRY_CMD, + DM_DEV_ARM_POLL_CMD, +}; + +#define DM_IOCTL 0xfd + +#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl) +#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl) +#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl) + +#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl) +#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl) +#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl) +#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl) +#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl) +#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl) +#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl) + +#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl) +#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl) +#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl) +#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl) + +#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl) + +#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl) +#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) + +#define DM_VERSION_MAJOR 4 +#define DM_VERSION_MINOR 36 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2017-06-09)" + +/* Status bits */ +#define DM_READONLY_FLAG (1 << 0) /* In/Out */ +#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */ +#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */ + +/* + * Flag passed into ioctl STATUS command to get table information + * rather than current status. + */ +#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */ + +/* + * Flags that indicate whether a table is present in either of + * the two table slots that a device has. + */ +#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */ +#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */ + +/* + * Indicates that the buffer passed in wasn't big enough for the + * results. + */ +#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */ + +/* + * This flag is now ignored. + */ +#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */ + +/* + * Set this to avoid attempting to freeze any filesystem when suspending. + */ +#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */ + +/* + * Set this to suspend without flushing queued ios. + * Also disables flushing uncommitted changes in the thin target before + * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT. + */ +#define DM_NOFLUSH_FLAG (1 << 11) /* In */ + +/* + * If set, any table information returned will relate to the inactive + * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG + * is set before using the data returned. + */ +#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */ + +/* + * If set, a uevent was generated for which the caller may need to wait. + */ +#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */ + +/* + * If set, rename changes the uuid not the name. Only permitted + * if no uuid was previously supplied: an existing uuid cannot be changed. + */ +#define DM_UUID_FLAG (1 << 14) /* In */ + +/* + * If set, all buffers are wiped after use. Use when sending + * or requesting sensitive data such as an encryption key. + */ +#define DM_SECURE_DATA_FLAG (1 << 15) /* In */ + +/* + * If set, a message generated output data. + */ +#define DM_DATA_OUT_FLAG (1 << 16) /* Out */ + +/* + * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if + * the device cannot be removed immediately because it is still in use + * it should instead be scheduled for removal when it gets closed. + * + * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this + * flag indicates that the device is scheduled to be removed when it + * gets closed. + */ +#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */ + +/* + * If set, the device is suspended internally. + */ +#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */ + +#endif /* _LINUX_DM_IOCTL_H */ diff --git a/device_mapper/misc/dm-log-userspace.h b/device_mapper/misc/dm-log-userspace.h new file mode 100644 index 000000000..a770ae62e --- /dev/null +++ b/device_mapper/misc/dm-log-userspace.h @@ -0,0 +1,418 @@ +/* + * Copyright (C) 2006-2009 Red Hat, Inc. + * + * This file is released under the LGPL. + */ + +#ifndef __DM_LOG_USERSPACE_H__ +#define __DM_LOG_USERSPACE_H__ + +#include <inttypes.h> + +#include "dm-ioctl.h" /* For DM_UUID_LEN */ + +/* + * The device-mapper userspace log module consists of a kernel component and + * a user-space component. The kernel component implements the API defined + * in dm-dirty-log.h. Its purpose is simply to pass the parameters and + * return values of those API functions between kernel and user-space. + * + * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc. + * These request types represent the different functions in the device-mapper + * dirty log API. Each of these is described in more detail below. + * + * The user-space program must listen for requests from the kernel (representing + * the various API functions) and process them. + * + * User-space begins by setting up the communication link (error checking + * removed for clarity): + * fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + * addr.nl_family = AF_NETLINK; + * addr.nl_groups = CN_IDX_DM; + * addr.nl_pid = 0; + * r = bind(fd, (struct sockaddr *) &addr, sizeof(addr)); + * opt = addr.nl_groups; + * setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt)); + * + * User-space will then wait to receive requests from the kernel, which it + * will process as described below. The requests are received in the form, + * ((struct dm_ulog_request) + (additional data)). Depending on the request + * type, there may or may not be 'additional data'. In the descriptions below, + * you will see 'Payload-to-userspace' and 'Payload-to-kernel'. The + * 'Payload-to-userspace' is what the kernel sends in 'additional data' as + * necessary parameters to complete the request. The 'Payload-to-kernel' is + * the 'additional data' returned to the kernel that contains the necessary + * results of the request. The 'data_size' field in the dm_ulog_request + * structure denotes the availability and amount of payload data. + */ + +/* + * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h): + * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, + * unsigned argc, char **argv); + * + * Payload-to-userspace: + * A single string containing all the argv arguments separated by ' 's + * Payload-to-kernel: + * The name of the device that is used as the backing store for the log + * data. 'dm_get_device' will be called on this device. ('dm_put_device' + * will be called on this device automatically after calling DM_ULOG_DTR.) + * If there is no device needed for log data, 'data_size' in the + * dm_ulog_request struct should be 0. + * + * The UUID contained in the dm_ulog_request structure is the reference that + * will be used by all request types to a specific log. The constructor must + * record this assotiation with the instance created. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field, filling the + * data field with the log device if necessary, and setting 'data_size' + * appropriately. + */ +#define DM_ULOG_CTR 1 + +/* + * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h): + * void (*dtr)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * A single string containing all the argv arguments separated by ' 's + * Payload-to-kernel: + * None. ('data_size' in the dm_ulog_request struct should be 0.) + * + * The UUID contained in the dm_ulog_request structure is all that is + * necessary to identify the log instance being destroyed. There is no + * payload data. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and clearing + * 'data_size' appropriately. + */ +#define DM_ULOG_DTR 2 + +/* + * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h): + * int (*presuspend)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * None. + * + * The UUID contained in the dm_ulog_request structure is all that is + * necessary to identify the log instance being presuspended. There is no + * payload data. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_PRESUSPEND 3 + +/* + * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h): + * int (*postsuspend)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * None. + * + * The UUID contained in the dm_ulog_request structure is all that is + * necessary to identify the log instance being postsuspended. There is no + * payload data. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_POSTSUSPEND 4 + +/* + * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h): + * int (*resume)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * None. + * + * The UUID contained in the dm_ulog_request structure is all that is + * necessary to identify the log instance being resumed. There is no + * payload data. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_RESUME 5 + +/* + * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h): + * uint32_t (*get_region_size)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * uint64_t - contains the region size + * + * The region size is something that was determined at constructor time. + * It is returned in the payload area and 'data_size' is set to + * reflect this. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field appropriately. + */ +#define DM_ULOG_GET_REGION_SIZE 6 + +/* + * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h): + * int (*is_clean)(struct dm_dirty_log *log, region_t region); + * + * Payload-to-userspace: + * uint64_t - the region to get clean status on + * Payload-to-kernel: + * int64_t - 1 if clean, 0 otherwise + * + * Payload is sizeof(uint64_t) and contains the region for which the clean + * status is being made. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or + * 1 (clean), setting 'data_size' and 'error' appropriately. + */ +#define DM_ULOG_IS_CLEAN 7 + +/* + * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h): + * int (*in_sync)(struct dm_dirty_log *log, region_t region, + * int can_block); + * + * Payload-to-userspace: + * uint64_t - the region to get sync status on + * Payload-to-kernel: + * int64_t - 1 if in-sync, 0 otherwise + * + * Exactly the same as 'is_clean' above, except this time asking "has the + * region been recovered?" vs. "is the region not being modified?" + */ +#define DM_ULOG_IN_SYNC 8 + +/* + * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h): + * int (*flush)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * None. + * + * No incoming or outgoing payload. Simply flush log state to disk. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and clearing + * 'data_size' appropriately. + */ +#define DM_ULOG_FLUSH 9 + +/* + * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h): + * void (*mark_region)(struct dm_dirty_log *log, region_t region); + * + * Payload-to-userspace: + * uint64_t [] - region(s) to mark + * Payload-to-kernel: + * None. + * + * Incoming payload contains the one or more regions to mark dirty. + * The number of regions contained in the payload can be determined from + * 'data_size/sizeof(uint64_t)'. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and clearing + * 'data_size' appropriately. + */ +#define DM_ULOG_MARK_REGION 10 + +/* + * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h): + * void (*clear_region)(struct dm_dirty_log *log, region_t region); + * + * Payload-to-userspace: + * uint64_t [] - region(s) to clear + * Payload-to-kernel: + * None. + * + * Incoming payload contains the one or more regions to mark clean. + * The number of regions contained in the payload can be determined from + * 'data_size/sizeof(uint64_t)'. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and clearing + * 'data_size' appropriately. + */ +#define DM_ULOG_CLEAR_REGION 11 + +/* + * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h): + * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * { + * int64_t i; -- 1 if recovery necessary, 0 otherwise + * uint64_t r; -- The region to recover if i=1 + * } + * 'data_size' should be set appropriately. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field appropriately. + */ +#define DM_ULOG_GET_RESYNC_WORK 12 + +/* + * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h): + * void (*set_region_sync)(struct dm_dirty_log *log, + * region_t region, int in_sync); + * + * Payload-to-userspace: + * { + * uint64_t - region to set sync state on + * int64_t - 0 if not-in-sync, 1 if in-sync + * } + * Payload-to-kernel: + * None. + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and clearing + * 'data_size' appropriately. + */ +#define DM_ULOG_SET_REGION_SYNC 13 + +/* + * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h): + * region_t (*get_sync_count)(struct dm_dirty_log *log); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * uint64_t - the number of in-sync regions + * + * No incoming payload. Kernel-bound payload contains the number of + * regions that are in-sync (in a size_t). + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_GET_SYNC_COUNT 14 + +/* + * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h): + * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO, + * char *result, unsigned maxlen); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * Character string containing STATUSTYPE_INFO + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_STATUS_INFO 15 + +/* + * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h): + * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE, + * char *result, unsigned maxlen); + * + * Payload-to-userspace: + * None. + * Payload-to-kernel: + * Character string containing STATUSTYPE_TABLE + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_STATUS_TABLE 16 + +/* + * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h): + * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); + * + * Payload-to-userspace: + * uint64_t - region to determine recovery status on + * Payload-to-kernel: + * { + * int64_t is_recovering; -- 0 if no, 1 if yes + * uint64_t in_sync_hint; -- lowest region still needing resync + * } + * + * When the request has been processed, user-space must return the + * dm_ulog_request to the kernel - setting the 'error' field and + * 'data_size' appropriately. + */ +#define DM_ULOG_IS_REMOTE_RECOVERING 17 + +/* + * (DM_ULOG_REQUEST_MASK & request_type) to get the request type + * + * Payload-to-userspace: + * A single string containing all the argv arguments separated by ' 's + * Payload-to-kernel: + * None. ('data_size' in the dm_ulog_request struct should be 0.) + * + * We are reserving 8 bits of the 32-bit 'request_type' field for the + * various request types above. The remaining 24-bits are currently + * set to zero and are reserved for future use and compatibility concerns. + * + * User-space should always use DM_ULOG_REQUEST_TYPE to acquire the + * request type from the 'request_type' field to maintain forward compatibility. + */ +#define DM_ULOG_REQUEST_MASK 0xFF +#define DM_ULOG_REQUEST_TYPE(request_type) \ + (DM_ULOG_REQUEST_MASK & (request_type)) + +/* + * DM_ULOG_REQUEST_VERSION is incremented when there is a + * change to the way information is passed between kernel + * and userspace. This could be a structure change of + * dm_ulog_request or a change in the way requests are + * issued/handled. Changes are outlined here: + * version 1: Initial implementation + * version 2: DM_ULOG_CTR allowed to return a string containing a + * device name that is to be registered with DM via + * 'dm_get_device'. + */ +#define DM_ULOG_REQUEST_VERSION 2 + +struct dm_ulog_request { + /* + * The local unique identifier (luid) and the universally unique + * identifier (uuid) are used to tie a request to a specific + * mirror log. A single machine log could probably make due with + * just the 'luid', but a cluster-aware log must use the 'uuid' and + * the 'luid'. The uuid is what is required for node to node + * communication concerning a particular log, but the 'luid' helps + * differentiate between logs that are being swapped and have the + * same 'uuid'. (Think "live" and "inactive" device-mapper tables.) + */ + uint64_t luid; + char uuid[DM_UUID_LEN]; + char padding[3]; /* Padding because DM_UUID_LEN = 129 */ + + uint32_t version; /* See DM_ULOG_REQUEST_VERSION */ + int32_t error; /* Used to report back processing errors */ + + uint32_t seq; /* Sequence number for request */ + uint32_t request_type; /* DM_ULOG_* defined above */ + uint32_t data_size; /* How much data (not including this struct) */ + + char data[]; +}; + +#endif /* __DM_LOG_USERSPACE_H__ */ diff --git a/device_mapper/misc/dm-logging.h b/device_mapper/misc/dm-logging.h new file mode 100644 index 000000000..a35480e36 --- /dev/null +++ b/device_mapper/misc/dm-logging.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _DM_LOGGING_H +#define _DM_LOGGING_H + +#include "libdevmapper.h" + +extern dm_log_with_errno_fn dm_log_with_errno; + +#define LOG_MESG(l, f, ln, e, x...) \ + dm_log_with_errno(l, f, ln, e, ## x) + +#define LOG_LINE(l, x...) LOG_MESG(l, __FILE__, __LINE__, 0, ## x) +#define LOG_LINE_WITH_ERRNO(l, e, x...) LOG_MESG(l, __FILE__, __LINE__, e, ## x) + +/* Debug messages may have a type instead of an errno */ +#define LOG_LINE_WITH_CLASS(l, c, x...) LOG_MESG(l, __FILE__, __LINE__, c, ## x) + +#include "lib/log/log.h" + +#endif diff --git a/device_mapper/misc/dmlib.h b/device_mapper/misc/dmlib.h new file mode 100644 index 000000000..ba376bcff --- /dev/null +++ b/device_mapper/misc/dmlib.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * This file must be included first by every device-mapper library source file. + */ +#ifndef _DM_LIB_H +#define _DM_LIB_H + +// FIXME: get rid of this whole file + +#include "configure.h" + +#define _REENTRANT +#define _GNU_SOURCE + +#include "libdevmapper.h" +#include "lib/misc/util.h" +#include "dm-logging.h" + +#endif diff --git a/device_mapper/misc/kdev_t.h b/device_mapper/misc/kdev_t.h new file mode 100644 index 000000000..f88bb0ab6 --- /dev/null +++ b/device_mapper/misc/kdev_t.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * + * This file is part of LVM2. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _LIBDM_KDEV_H +#define _LIBDM_KDEV_H + +#define MAJOR(dev) ((dev & 0xfff00) >> 8) +#define MINOR(dev) ((dev & 0xff) | ((dev >> 12) & 0xfff00)) +#define MKDEV(ma,mi) ((mi & 0xff) | (ma << 8) | ((mi & ~0xff) << 12)) + +#endif diff --git a/device_mapper/mm/dbg_malloc.c b/device_mapper/mm/dbg_malloc.c new file mode 100644 index 000000000..a17203c48 --- /dev/null +++ b/device_mapper/mm/dbg_malloc.c @@ -0,0 +1,413 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" + +#ifdef VALGRIND_POOL +#include "memcheck.h" +#endif +#include <assert.h> +#include <stdarg.h> +#include <unistd.h> + +void *dm_malloc_aux(size_t s, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_malloc_aux_debug(size_t s, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +static void *_dm_malloc_aligned_aux(size_t s, size_t a, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_zalloc_aux(size_t s, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_zalloc_aux_debug(size_t s, const char *file, int line) + __attribute__((__malloc__)) __attribute__((__warn_unused_result__)); +void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line) + __attribute__((__warn_unused_result__)); +void dm_free_aux(void *p); +char *dm_strdup_aux(const char *str, const char *file, int line) + __attribute__((__warn_unused_result__)); +int dm_dump_memory_debug(void); +void dm_bounds_check_debug(void); + +char *dm_strdup_aux(const char *str, const char *file, int line) +{ + char *ret; + + if (!str) { + log_error(INTERNAL_ERROR "dm_strdup called with NULL pointer"); + return NULL; + } + + if ((ret = dm_malloc_aux_debug(strlen(str) + 1, file, line))) + strcpy(ret, str); + + return ret; +} + +struct memblock { + struct memblock *prev, *next; /* All allocated blocks are linked */ + size_t length; /* Size of the requested block */ + int id; /* Index of the block */ + const char *file; /* File that allocated */ + int line; /* Line that allocated */ + void *magic; /* Address of this block */ +} __attribute__((aligned(8))); + +static struct { + unsigned block_serialno;/* Non-decreasing serialno of block */ + unsigned blocks_allocated; /* Current number of blocks allocated */ + unsigned blocks_max; /* Max no of concurrently-allocated blocks */ + unsigned int bytes, mbytes; + +} _mem_stats = { +0, 0, 0, 0, 0}; + +static struct memblock *_head = 0; +static struct memblock *_tail = 0; + +void *dm_malloc_aux_debug(size_t s, const char *file, int line) +{ + struct memblock *nb; + size_t tsize = s + sizeof(*nb) + sizeof(unsigned long); + + if (s > 50000000) { + log_error("Huge memory allocation (size %" PRIsize_t + ") rejected - metadata corruption?", s); + return 0; + } + + if (!(nb = malloc(tsize))) { + log_error("couldn't allocate any memory, size = %" PRIsize_t, + s); + return 0; + } + + /* set up the file and line info */ + nb->file = file; + nb->line = line; + + dm_bounds_check(); + + /* setup fields */ + nb->magic = nb + 1; + nb->length = s; + nb->id = ++_mem_stats.block_serialno; + nb->next = 0; + + /* stomp a pretty pattern across the new memory + and fill in the boundary bytes */ + { + char *ptr = (char *) (nb + 1); + size_t i; + for (i = 0; i < s; i++) + *ptr++ = i & 0x1 ? (char) 0xba : (char) 0xbe; + + for (i = 0; i < sizeof(unsigned long); i++) + *ptr++ = (char) nb->id; + } + + nb->prev = _tail; + + /* link to tail of the list */ + if (!_head) + _head = _tail = nb; + else { + _tail->next = nb; + _tail = nb; + } + + _mem_stats.blocks_allocated++; + if (_mem_stats.blocks_allocated > _mem_stats.blocks_max) + _mem_stats.blocks_max = _mem_stats.blocks_allocated; + + _mem_stats.bytes += s; + if (_mem_stats.bytes > _mem_stats.mbytes) + _mem_stats.mbytes = _mem_stats.bytes; + + /* log_debug_mem("Allocated: %u %u %u", nb->id, _mem_stats.blocks_allocated, + _mem_stats.bytes); */ +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_UNDEFINED(nb + 1, s); +#endif + return nb + 1; +} + +void *dm_zalloc_aux_debug(size_t s, const char *file, int line) +{ + void *ptr = dm_malloc_aux_debug(s, file, line); + + if (ptr) + memset(ptr, 0, s); + + return ptr; +} + +void dm_free_aux(void *p) +{ + char *ptr; + size_t i; + struct memblock *mb = ((struct memblock *) p) - 1; + if (!p) + return; + + dm_bounds_check(); + + /* sanity check */ + assert(mb->magic == p); +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_DEFINED(p, mb->length); +#endif + /* check data at the far boundary */ + ptr = (char *) p + mb->length; + for (i = 0; i < sizeof(unsigned long); i++) + if (ptr[i] != (char) mb->id) + assert(!"Damage at far end of block"); + + /* have we freed this before ? */ + assert(mb->id != 0); + + /* unlink */ + if (mb->prev) + mb->prev->next = mb->next; + else + _head = mb->next; + + if (mb->next) + mb->next->prev = mb->prev; + else + _tail = mb->prev; + + mb->id = 0; + + /* stomp a different pattern across the memory */ + ptr = p; + for (i = 0; i < mb->length; i++) + ptr[i] = i & 1 ? (char) 0xde : (char) 0xad; + + assert(_mem_stats.blocks_allocated); + _mem_stats.blocks_allocated--; + _mem_stats.bytes -= mb->length; + + /* free the memory */ + free(mb); +} + +void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line) +{ + void *r; + struct memblock *mb = ((struct memblock *) p) - 1; + + r = dm_malloc_aux_debug(s, file, line); + + if (r && p) { + memcpy(r, p, mb->length); + dm_free_aux(p); + } + + return r; +} + +int dm_dump_memory_debug(void) +{ + unsigned long tot = 0; + struct memblock *mb; + char str[32]; + + if (_head) + log_very_verbose("You have a memory leak:"); + + for (mb = _head; mb; mb = mb->next) { +#ifdef VALGRIND_POOL + /* + * We can't look at the memory in case it has had + * VALGRIND_MAKE_MEM_NOACCESS called on it. + */ + str[0] = '\0'; +#else + size_t c; + + for (c = 0; c < sizeof(str) - 1; c++) { + if (c >= mb->length) + str[c] = ' '; + else if (((char *)mb->magic)[c] == '\0') + str[c] = '\0'; + else if (((char *)mb->magic)[c] < ' ') + str[c] = '?'; + else + str[c] = ((char *)mb->magic)[c]; + } + str[sizeof(str) - 1] = '\0'; +#endif + + LOG_MESG(_LOG_INFO, mb->file, mb->line, 0, + "block %d at %p, size %" PRIsize_t "\t [%s]", + mb->id, mb->magic, mb->length, str); + tot += mb->length; + } + + if (_head) + log_very_verbose("%ld bytes leaked in total", tot); + + return 1; +} + +void dm_bounds_check_debug(void) +{ + struct memblock *mb = _head; + while (mb) { + size_t i; + char *ptr = ((char *) (mb + 1)) + mb->length; + for (i = 0; i < sizeof(unsigned long); i++) + if (*ptr++ != (char) mb->id) + assert(!"Memory smash"); + + mb = mb->next; + } +} + +void *dm_malloc_aux(size_t s, const char *file __attribute__((unused)), + int line __attribute__((unused))) +{ + if (s > 50000000) { + log_error("Huge memory allocation (size %" PRIsize_t + ") rejected - metadata corruption?", s); + return 0; + } + + return malloc(s); +} + +/* Allocate size s with alignment a (or page size if 0) */ +static void *_dm_malloc_aligned_aux(size_t s, size_t a, const char *file __attribute__((unused)), + int line __attribute__((unused))) +{ + void *memptr; + int r; + + if (!a) + a = getpagesize(); + + if (s > 50000000) { + log_error("Huge memory allocation (size %" PRIsize_t + ") rejected - metadata corruption?", s); + return 0; + } + + if ((r = posix_memalign(&memptr, a, s))) { + log_error("Failed to allocate %" PRIsize_t " bytes aligned to %" PRIsize_t ": %s", s, a, strerror(r)); + return 0; + } + + return memptr; +} + +void *dm_zalloc_aux(size_t s, const char *file, int line) +{ + void *ptr = dm_malloc_aux(s, file, line); + + if (ptr) + memset(ptr, 0, s); + + return ptr; +} + +#ifdef DEBUG_MEM + +void *dm_malloc_wrapper(size_t s, const char *file, int line) +{ + return dm_malloc_aux_debug(s, file, line); +} + +void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line) +{ + /* FIXME Implement alignment when debugging - currently just ignored */ + return _dm_malloc_aux_debug(s, file, line); +} + +void *dm_zalloc_wrapper(size_t s, const char *file, int line) +{ + return dm_zalloc_aux_debug(s, file, line); +} + +char *dm_strdup_wrapper(const char *str, const char *file, int line) +{ + return dm_strdup_aux(str, file, line); +} + +void dm_free_wrapper(void *ptr) +{ + dm_free_aux(ptr); +} + +void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line) +{ + return dm_realloc_aux(p, s, file, line); +} + +int dm_dump_memory_wrapper(void) +{ + return dm_dump_memory_debug(); +} + +void dm_bounds_check_wrapper(void) +{ + dm_bounds_check_debug(); +} + +#else /* !DEBUG_MEM */ + +void *dm_malloc_wrapper(size_t s, const char *file, int line) +{ + return dm_malloc_aux(s, file, line); +} + +void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line) +{ + return _dm_malloc_aligned_aux(s, a, file, line); +} + +void *dm_zalloc_wrapper(size_t s, const char *file, int line) +{ + return dm_zalloc_aux(s, file, line); +} + +char *dm_strdup_wrapper(const char *str, + const char *file __attribute__((unused)), + int line __attribute__((unused))) +{ + return strdup(str); +} + +void dm_free_wrapper(void *ptr) +{ + free(ptr); +} + +void *dm_realloc_wrapper(void *p, unsigned int s, + const char *file __attribute__((unused)), + int line __attribute__((unused))) +{ + return realloc(p, s); +} + +int dm_dump_memory_wrapper(void) +{ + return 1; +} + +void dm_bounds_check_wrapper(void) +{ +} + +#endif /* DEBUG_MEM */ diff --git a/device_mapper/mm/pool-debug.c b/device_mapper/mm/pool-debug.c new file mode 100644 index 000000000..c5232386f --- /dev/null +++ b/device_mapper/mm/pool-debug.c @@ -0,0 +1,292 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dmlib.h" +#include <assert.h> + +struct block { + struct block *next; + size_t size; + void *data; +}; + +typedef struct { + unsigned block_serialno; /* Non-decreasing serialno of block */ + unsigned blocks_allocated; /* Current number of blocks allocated */ + unsigned blocks_max; /* Max no of concurrently-allocated blocks */ + unsigned int bytes, maxbytes; +} pool_stats; + +struct dm_pool { + struct dm_list list; + const char *name; + void *orig_pool; /* to pair it with first allocation call */ + unsigned locked; + long crc; + + int begun; + struct block *object; + + struct block *blocks; + struct block *tail; + + pool_stats stats; +}; + +/* by default things come out aligned for doubles */ +#define DEFAULT_ALIGNMENT __alignof__ (double) + +struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint) +{ + struct dm_pool *mem = dm_zalloc(sizeof(*mem)); + + if (!mem) { + log_error("Couldn't create memory pool %s (size %" + PRIsize_t ")", name, sizeof(*mem)); + return NULL; + } + + mem->name = name; + mem->orig_pool = mem; + +#ifdef DEBUG_POOL + log_debug_mem("Created mempool %s at %p", name, mem); +#endif + + dm_list_add(&_dm_pools, &mem->list); + return mem; +} + +static void _free_blocks(struct dm_pool *p, struct block *b) +{ + struct block *n; + + if (p->locked) + log_error(INTERNAL_ERROR "_free_blocks from locked pool %s", + p->name); + + while (b) { + p->stats.bytes -= b->size; + p->stats.blocks_allocated--; + + n = b->next; + dm_free(b->data); + dm_free(b); + b = n; + } +} + +static void _pool_stats(struct dm_pool *p, const char *action) +{ +#ifdef DEBUG_POOL + log_debug_mem("%s mempool %s at %p: %u/%u bytes, %u/%u blocks, " + "%u allocations)", action, p->name, p, p->stats.bytes, + p->stats.maxbytes, p->stats.blocks_allocated, + p->stats.blocks_max, p->stats.block_serialno); +#else + ; +#endif +} + +void dm_pool_destroy(struct dm_pool *p) +{ + _pool_stats(p, "Destroying"); + _free_blocks(p, p->blocks); + dm_list_del(&p->list); + dm_free(p); +} + +void *dm_pool_alloc(struct dm_pool *p, size_t s) +{ + return dm_pool_alloc_aligned(p, s, DEFAULT_ALIGNMENT); +} + +static void _append_block(struct dm_pool *p, struct block *b) +{ + if (p->locked) + log_error(INTERNAL_ERROR "_append_blocks to locked pool %s", + p->name); + + if (p->tail) { + p->tail->next = b; + p->tail = b; + } else + p->blocks = p->tail = b; + + p->stats.block_serialno++; + p->stats.blocks_allocated++; + if (p->stats.blocks_allocated > p->stats.blocks_max) + p->stats.blocks_max = p->stats.blocks_allocated; + + p->stats.bytes += b->size; + if (p->stats.bytes > p->stats.maxbytes) + p->stats.maxbytes = p->stats.bytes; +} + +static struct block *_new_block(size_t s, unsigned alignment) +{ + /* FIXME: I'm currently ignoring the alignment arg. */ + size_t len = sizeof(struct block) + s; + struct block *b = dm_malloc(len); + + /* + * Too lazy to implement alignment for debug version, and + * I don't think LVM will use anything but default + * align. + */ + assert(alignment <= DEFAULT_ALIGNMENT); + + if (!b) { + log_error("Out of memory"); + return NULL; + } + + if (!(b->data = dm_malloc(s))) { + log_error("Out of memory"); + dm_free(b); + return NULL; + } + + b->next = NULL; + b->size = s; + + return b; +} + +void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment) +{ + struct block *b = _new_block(s, alignment); + + if (!b) + return_NULL; + + _append_block(p, b); + + return b->data; +} + +void dm_pool_empty(struct dm_pool *p) +{ + _pool_stats(p, "Emptying"); + _free_blocks(p, p->blocks); + p->blocks = p->tail = NULL; +} + +void dm_pool_free(struct dm_pool *p, void *ptr) +{ + struct block *b, *prev = NULL; + + _pool_stats(p, "Freeing (before)"); + + for (b = p->blocks; b; b = b->next) { + if (b->data == ptr) + break; + prev = b; + } + + /* + * If this fires then you tried to free a + * pointer that either wasn't from this + * pool, or isn't the start of a block. + */ + assert(b); + + _free_blocks(p, b); + + if (prev) { + p->tail = prev; + prev->next = NULL; + } else + p->blocks = p->tail = NULL; + + _pool_stats(p, "Freeing (after)"); +} + +int dm_pool_begin_object(struct dm_pool *p, size_t init_size) +{ + assert(!p->begun); + p->begun = 1; + return 1; +} + +int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta) +{ + struct block *new; + size_t new_size; + + if (p->locked) + log_error(INTERNAL_ERROR "Grow objects in locked pool %s", + p->name); + + if (!delta) + delta = strlen(extra); + + assert(p->begun); + + if (p->object) + new_size = delta + p->object->size; + else + new_size = delta; + + if (!(new = _new_block(new_size, DEFAULT_ALIGNMENT))) { + log_error("Couldn't extend object."); + return 0; + } + + if (p->object) { + memcpy(new->data, p->object->data, p->object->size); + dm_free(p->object->data); + dm_free(p->object); + } + p->object = new; + + memcpy((char*)new->data + new_size - delta, extra, delta); + + return 1; +} + +void *dm_pool_end_object(struct dm_pool *p) +{ + assert(p->begun); + _append_block(p, p->object); + + p->begun = 0; + p->object = NULL; + return p->tail->data; +} + +void dm_pool_abandon_object(struct dm_pool *p) +{ + assert(p->begun); + dm_free(p->object); + p->begun = 0; + p->object = NULL; +} + +static long _pool_crc(const struct dm_pool *p) +{ +#ifndef DEBUG_ENFORCE_POOL_LOCKING +#warning pool crc not implemented with pool debug +#endif + return 0; +} + +static int _pool_protect(struct dm_pool *p, int prot) +{ +#ifdef DEBUG_ENFORCE_POOL_LOCKING +#warning pool mprotect not implemented with pool debug +#endif + return 1; +} diff --git a/device_mapper/mm/pool-fast.c b/device_mapper/mm/pool-fast.c new file mode 100644 index 000000000..895872e4b --- /dev/null +++ b/device_mapper/mm/pool-fast.c @@ -0,0 +1,363 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifdef VALGRIND_POOL +#include "memcheck.h" +#endif + +#include "misc/dmlib.h" +#include <stddef.h> /* For musl libc */ +#include <malloc.h> + +struct chunk { + char *begin, *end; + struct chunk *prev; +} __attribute__((aligned(8))); + +struct dm_pool { + struct dm_list list; + struct chunk *chunk, *spare_chunk; /* spare_chunk is a one entry free + list to stop 'bobbling' */ + const char *name; + size_t chunk_size; + size_t object_len; + unsigned object_alignment; + int locked; + long crc; +}; + +static void _align_chunk(struct chunk *c, unsigned alignment); +static struct chunk *_new_chunk(struct dm_pool *p, size_t s); +static void _free_chunk(struct chunk *c); + +/* by default things come out aligned for doubles */ +#define DEFAULT_ALIGNMENT __alignof__ (double) + +struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint) +{ + size_t new_size = 1024; + struct dm_pool *p = dm_zalloc(sizeof(*p)); + + if (!p) { + log_error("Couldn't create memory pool %s (size %" + PRIsize_t ")", name, sizeof(*p)); + return 0; + } + + p->name = name; + /* round chunk_hint up to the next power of 2 */ + p->chunk_size = chunk_hint + sizeof(struct chunk); + while (new_size < p->chunk_size) + new_size <<= 1; + p->chunk_size = new_size; + pthread_mutex_lock(&_dm_pools_mutex); + dm_list_add(&_dm_pools, &p->list); + pthread_mutex_unlock(&_dm_pools_mutex); + return p; +} + +void dm_pool_destroy(struct dm_pool *p) +{ + struct chunk *c, *pr; + _free_chunk(p->spare_chunk); + c = p->chunk; + while (c) { + pr = c->prev; + _free_chunk(c); + c = pr; + } + + pthread_mutex_lock(&_dm_pools_mutex); + dm_list_del(&p->list); + pthread_mutex_unlock(&_dm_pools_mutex); + dm_free(p); +} + +void *dm_pool_alloc(struct dm_pool *p, size_t s) +{ + return dm_pool_alloc_aligned(p, s, DEFAULT_ALIGNMENT); +} + +void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment) +{ + struct chunk *c = p->chunk; + void *r; + + /* realign begin */ + if (c) + _align_chunk(c, alignment); + + /* have we got room ? */ + if (!c || (c->begin > c->end) || ((c->end - c->begin) < (int) s)) { + /* allocate new chunk */ + size_t needed = s + alignment + sizeof(struct chunk); + c = _new_chunk(p, (needed > p->chunk_size) ? + needed : p->chunk_size); + + if (!c) + return_NULL; + + _align_chunk(c, alignment); + } + + r = c->begin; + c->begin += s; + +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_UNDEFINED(r, s); +#endif + + return r; +} + +void dm_pool_empty(struct dm_pool *p) +{ + struct chunk *c; + + for (c = p->chunk; c && c->prev; c = c->prev) + ; + + if (c) + dm_pool_free(p, (char *) (c + 1)); +} + +void dm_pool_free(struct dm_pool *p, void *ptr) +{ + struct chunk *c = p->chunk; + + while (c) { + if (((char *) c < (char *) ptr) && + ((char *) c->end > (char *) ptr)) { + c->begin = ptr; +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin); +#endif + break; + } + + if (p->spare_chunk) + _free_chunk(p->spare_chunk); + + c->begin = (char *) (c + 1); +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin); +#endif + + p->spare_chunk = c; + c = c->prev; + } + + if (!c) + log_error(INTERNAL_ERROR "pool_free asked to free pointer " + "not in pool"); + else + p->chunk = c; +} + +int dm_pool_begin_object(struct dm_pool *p, size_t hint) +{ + struct chunk *c = p->chunk; + const size_t align = DEFAULT_ALIGNMENT; + + p->object_len = 0; + p->object_alignment = align; + + if (c) + _align_chunk(c, align); + + if (!c || (c->begin > c->end) || ((c->end - c->begin) < (int) hint)) { + /* allocate a new chunk */ + c = _new_chunk(p, + hint > (p->chunk_size - sizeof(struct chunk)) ? + hint + sizeof(struct chunk) + align : + p->chunk_size); + + if (!c) + return 0; + + _align_chunk(c, align); + } + + return 1; +} + +int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta) +{ + struct chunk *c = p->chunk, *nc; + + if (!delta) + delta = strlen(extra); + + if ((c->end - (c->begin + p->object_len)) < (int) delta) { + /* move into a new chunk */ + if (p->object_len + delta > (p->chunk_size / 2)) + nc = _new_chunk(p, (p->object_len + delta) * 2); + else + nc = _new_chunk(p, p->chunk_size); + + if (!nc) + return 0; + + _align_chunk(p->chunk, p->object_alignment); + +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_UNDEFINED(p->chunk->begin, p->object_len); +#endif + + memcpy(p->chunk->begin, c->begin, p->object_len); + +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_NOACCESS(c->begin, p->object_len); +#endif + + c = p->chunk; + } + +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_UNDEFINED(p->chunk->begin + p->object_len, delta); +#endif + + memcpy(c->begin + p->object_len, extra, delta); + p->object_len += delta; + return 1; +} + +void *dm_pool_end_object(struct dm_pool *p) +{ + struct chunk *c = p->chunk; + void *r = c->begin; + c->begin += p->object_len; + p->object_len = 0u; + p->object_alignment = DEFAULT_ALIGNMENT; + return r; +} + +void dm_pool_abandon_object(struct dm_pool *p) +{ +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_NOACCESS(p->chunk, p->object_len); +#endif + p->object_len = 0; + p->object_alignment = DEFAULT_ALIGNMENT; +} + +static void _align_chunk(struct chunk *c, unsigned alignment) +{ + c->begin += alignment - ((unsigned long) c->begin & (alignment - 1)); +} + +static struct chunk *_new_chunk(struct dm_pool *p, size_t s) +{ + struct chunk *c; + + if (p->spare_chunk && + ((p->spare_chunk->end - p->spare_chunk->begin) >= (ptrdiff_t)s)) { + /* reuse old chunk */ + c = p->spare_chunk; + p->spare_chunk = 0; + } else { +#ifdef DEBUG_ENFORCE_POOL_LOCKING + if (!_pagesize) { + _pagesize = getpagesize(); /* lvm_pagesize(); */ + _pagesize_mask = _pagesize - 1; + } + /* + * Allocate page aligned size so malloc could work. + * Otherwise page fault would happen from pool unrelated + * memory writes of internal malloc pointers. + */ +# define aligned_malloc(s) (posix_memalign((void**)&c, _pagesize, \ + ALIGN_ON_PAGE(s)) == 0) +#else +# define aligned_malloc(s) (c = dm_malloc(s)) +#endif /* DEBUG_ENFORCE_POOL_LOCKING */ + if (!aligned_malloc(s)) { +#undef aligned_malloc + log_error("Out of memory. Requested %" PRIsize_t + " bytes.", s); + return NULL; + } + + c->begin = (char *) (c + 1); + c->end = (char *) c + s; + +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin); +#endif + } + + c->prev = p->chunk; + p->chunk = c; + return c; +} + +static void _free_chunk(struct chunk *c) +{ +#ifdef VALGRIND_POOL +# ifdef DEBUG_MEM + if (c) + VALGRIND_MAKE_MEM_UNDEFINED(c + 1, c->end - (char *) (c + 1)); +# endif +#endif +#ifdef DEBUG_ENFORCE_POOL_LOCKING + /* since DEBUG_MEM is using own memory list */ + free(c); /* for posix_memalign() */ +#else + dm_free(c); +#endif +} + + +/** + * Calc crc/hash from pool's memory chunks with internal pointers + */ +static long _pool_crc(const struct dm_pool *p) +{ + long crc_hash = 0; +#ifndef DEBUG_ENFORCE_POOL_LOCKING + const struct chunk *c; + const long *ptr, *end; + + for (c = p->chunk; c; c = c->prev) { + end = (const long *) (c->begin < c->end ? (long) c->begin & ~7: (long) c->end); + ptr = (const long *) c; +#ifdef VALGRIND_POOL + VALGRIND_MAKE_MEM_DEFINED(ptr, (end - ptr) * sizeof(*end)); +#endif + while (ptr < end) { + crc_hash += *ptr++; + crc_hash += (crc_hash << 10); + crc_hash ^= (crc_hash >> 6); + } + } +#endif /* DEBUG_ENFORCE_POOL_LOCKING */ + + return crc_hash; +} + +static int _pool_protect(struct dm_pool *p, int prot) +{ +#ifdef DEBUG_ENFORCE_POOL_LOCKING + struct chunk *c; + + for (c = p->chunk; c; c = c->prev) { + if (mprotect(c, (size_t) ((c->end - (char *) c) - 1), prot) != 0) { + log_sys_error("mprotect", ""); + return 0; + } + } +#endif + return 1; +} diff --git a/device_mapper/mm/pool.c b/device_mapper/mm/pool.c new file mode 100644 index 000000000..a710704ae --- /dev/null +++ b/device_mapper/mm/pool.c @@ -0,0 +1,189 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include <sys/mman.h> +#include <pthread.h> + +static DM_LIST_INIT(_dm_pools); +static pthread_mutex_t _dm_pools_mutex = PTHREAD_MUTEX_INITIALIZER; +void dm_pools_check_leaks(void); + +#ifdef DEBUG_ENFORCE_POOL_LOCKING +#ifdef DEBUG_POOL +#error Do not use DEBUG_POOL with DEBUG_ENFORCE_POOL_LOCKING +#endif + +/* + * Use mprotect system call to ensure all locked pages are not writable. + * Generates segmentation fault with write access to the locked pool. + * + * - Implementation is using posix_memalign() to get page aligned + * memory blocks (could be implemented also through malloc). + * - Only pool-fast is properly handled for now. + * - Checksum is slower compared to mprotect. + */ +static size_t _pagesize = 0; +static size_t _pagesize_mask = 0; +#define ALIGN_ON_PAGE(size) (((size) + (_pagesize_mask)) & ~(_pagesize_mask)) +#endif + +#ifdef DEBUG_POOL +#include "pool-debug.c" +#else +#include "pool-fast.c" +#endif + +char *dm_pool_strdup(struct dm_pool *p, const char *str) +{ + size_t len = strlen(str) + 1; + char *ret = dm_pool_alloc(p, len); + + if (ret) + memcpy(ret, str, len); + + return ret; +} + +char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n) +{ + char *ret = dm_pool_alloc(p, n + 1); + + if (ret) { + strncpy(ret, str, n); + ret[n] = '\0'; + } + + return ret; +} + +void *dm_pool_zalloc(struct dm_pool *p, size_t s) +{ + void *ptr = dm_pool_alloc(p, s); + + if (ptr) + memset(ptr, 0, s); + + return ptr; +} + +void dm_pools_check_leaks(void) +{ + struct dm_pool *p; + + pthread_mutex_lock(&_dm_pools_mutex); + if (dm_list_empty(&_dm_pools)) { + pthread_mutex_unlock(&_dm_pools_mutex); + return; + } + + log_error("You have a memory leak (not released memory pool):"); + dm_list_iterate_items(p, &_dm_pools) { +#ifdef DEBUG_POOL + log_error(" [%p] %s (%u bytes)", + p->orig_pool, + p->name, p->stats.bytes); +#else + log_error(" [%p] %s", p, p->name); +#endif + } + pthread_mutex_unlock(&_dm_pools_mutex); + log_error(INTERNAL_ERROR "Unreleased memory pool(s) found."); +} + +/** + * Status of locked pool. + * + * \param p + * Pool to be tested for lock status. + * + * \return + * 1 when the pool is locked, 0 otherwise. + */ +int dm_pool_locked(struct dm_pool *p) +{ + return p->locked; +} + +/** + * Lock memory pool. + * + * \param p + * Pool to be locked. + * + * \param crc + * Bool specifies whether to store the pool crc/hash checksum. + * + * \return + * 1 (success) when the pool was preperly locked, 0 otherwise. + */ +int dm_pool_lock(struct dm_pool *p, int crc) +{ + if (p->locked) { + log_error(INTERNAL_ERROR "Pool %s is already locked.", + p->name); + return 0; + } + + if (crc) + p->crc = _pool_crc(p); /* Get crc for pool */ + + if (!_pool_protect(p, PROT_READ)) { + _pool_protect(p, PROT_READ | PROT_WRITE); + return_0; + } + + p->locked = 1; + + log_debug_mem("Pool %s is locked.", p->name); + + return 1; +} + +/** + * Unlock memory pool. + * + * \param p + * Pool to be unlocked. + * + * \param crc + * Bool enables compare of the pool crc/hash with the stored value + * at pool lock. The pool is not properly unlocked if there is a mismatch. + * + * \return + * 1 (success) when the pool was properly unlocked, 0 otherwise. + */ +int dm_pool_unlock(struct dm_pool *p, int crc) +{ + if (!p->locked) { + log_error(INTERNAL_ERROR "Pool %s is already unlocked.", + p->name); + return 0; + } + + p->locked = 0; + + if (!_pool_protect(p, PROT_READ | PROT_WRITE)) + return_0; + + log_debug_mem("Pool %s is unlocked.", p->name); + + if (crc && (p->crc != _pool_crc(p))) { + log_error(INTERNAL_ERROR "Pool %s crc mismatch.", p->name); + return 0; + } + + return 1; +} diff --git a/device_mapper/regex/matcher.c b/device_mapper/regex/matcher.c new file mode 100644 index 000000000..375c1abdc --- /dev/null +++ b/device_mapper/regex/matcher.c @@ -0,0 +1,575 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "parse_rx.h" +#include "ttree.h" +#include "assert.h" + +struct dfa_state { + struct dfa_state *next; + int final; + dm_bitset_t bits; + struct dfa_state *lookup[256]; +}; + +struct dm_regex { /* Instance variables for the lexer */ + struct dfa_state *start; + unsigned num_nodes; + unsigned num_charsets; + int nodes_entered; + struct rx_node **nodes; + int charsets_entered; + struct rx_node **charsets; + struct dm_pool *scratch, *mem; + + /* stuff for on the fly dfa calculation */ + dm_bitset_t charmap[256]; + dm_bitset_t dfa_copy; + struct ttree *tt; + dm_bitset_t bs; + struct dfa_state *h, *t; +}; + +static int _count_nodes(struct rx_node *rx) +{ + int r = 1; + + if (rx->left) + r += _count_nodes(rx->left); + + if (rx->right) + r += _count_nodes(rx->right); + + return r; +} + +static unsigned _count_charsets(struct rx_node *rx) +{ + if (rx->type == CHARSET) + return 1; + + return (rx->left ? _count_charsets(rx->left) : 0) + + (rx->right ? _count_charsets(rx->right) : 0); +} + +static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i) +{ + if (rx->type == CHARSET) + rx->charset_index = (*i)++; + else { + if (rx->left) + _enumerate_charsets_internal(rx->left, i); + if (rx->right) + _enumerate_charsets_internal(rx->right, i); + } +} + +static void _enumerate_charsets(struct rx_node *rx) +{ + unsigned i = 0; + _enumerate_charsets_internal(rx, &i); +} + +static void _fill_table(struct dm_regex *m, struct rx_node *rx) +{ + assert((rx->type != OR) || (rx->left && rx->right)); + + if (rx->left) + _fill_table(m, rx->left); + + if (rx->right) + _fill_table(m, rx->right); + + m->nodes[m->nodes_entered++] = rx; + if (rx->type == CHARSET) + m->charsets[m->charsets_entered++] = rx; +} + +static int _create_bitsets(struct dm_regex *m) +{ + unsigned i; + struct rx_node *n; + + for (i = 0; i < m->num_nodes; i++) { + n = m->nodes[i]; + if (!(n->firstpos = dm_bitset_create(m->scratch, m->num_charsets))) + return_0; + if (!(n->lastpos = dm_bitset_create(m->scratch, m->num_charsets))) + return_0; + if (!(n->followpos = dm_bitset_create(m->scratch, m->num_charsets))) + return_0; + } + + return 1; +} + +static void _calc_functions(struct dm_regex *m) +{ + unsigned i, j, final = 1; + struct rx_node *rx, *c1, *c2; + + for (i = 0; i < m->num_nodes; i++) { + rx = m->nodes[i]; + c1 = rx->left; + c2 = rx->right; + + if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS)) + rx->final = final++; + + switch (rx->type) { + case CAT: + if (c1->nullable) + dm_bit_union(rx->firstpos, + c1->firstpos, c2->firstpos); + else + dm_bit_copy(rx->firstpos, c1->firstpos); + + if (c2->nullable) + dm_bit_union(rx->lastpos, + c1->lastpos, c2->lastpos); + else + dm_bit_copy(rx->lastpos, c2->lastpos); + + rx->nullable = c1->nullable && c2->nullable; + break; + + case PLUS: + dm_bit_copy(rx->firstpos, c1->firstpos); + dm_bit_copy(rx->lastpos, c1->lastpos); + rx->nullable = c1->nullable; + break; + + case OR: + dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos); + dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos); + rx->nullable = c1->nullable || c2->nullable; + break; + + case QUEST: + case STAR: + dm_bit_copy(rx->firstpos, c1->firstpos); + dm_bit_copy(rx->lastpos, c1->lastpos); + rx->nullable = 1; + break; + + case CHARSET: + dm_bit_set(rx->firstpos, rx->charset_index); + dm_bit_set(rx->lastpos, rx->charset_index); + rx->nullable = 0; + break; + + default: + log_error(INTERNAL_ERROR "Unknown calc node type"); + } + + /* + * followpos has it's own switch + * because PLUS and STAR do the + * same thing. + */ + switch (rx->type) { + case CAT: + for (j = 0; j < m->num_charsets; j++) { + struct rx_node *n = m->charsets[j]; + if (dm_bit(c1->lastpos, j)) + dm_bit_union(n->followpos, + n->followpos, c2->firstpos); + } + break; + + case PLUS: + case STAR: + for (j = 0; j < m->num_charsets; j++) { + struct rx_node *n = m->charsets[j]; + if (dm_bit(rx->lastpos, j)) + dm_bit_union(n->followpos, + n->followpos, rx->firstpos); + } + break; + } + } +} + +static struct dfa_state *_create_dfa_state(struct dm_pool *mem) +{ + return dm_pool_zalloc(mem, sizeof(struct dfa_state)); +} + +static struct dfa_state *_create_state_queue(struct dm_pool *mem, + struct dfa_state *dfa, + dm_bitset_t bits) +{ + if (!(dfa->bits = dm_bitset_create(mem, bits[0]))) /* first element is the size */ + return_NULL; + + dm_bit_copy(dfa->bits, bits); + dfa->next = 0; + dfa->final = -1; + + return dfa; +} + +static int _calc_state(struct dm_regex *m, struct dfa_state *dfa, int a) +{ + int set_bits = 0, i; + dm_bitset_t dfa_bits = dfa->bits; + dm_bit_and(m->dfa_copy, m->charmap[a], dfa_bits); + + /* iterate through all the states in firstpos */ + for (i = dm_bit_get_first(m->dfa_copy); i >= 0; i = dm_bit_get_next(m->dfa_copy, i)) { + if (a == TARGET_TRANS) + dfa->final = m->charsets[i]->final; + + dm_bit_union(m->bs, m->bs, m->charsets[i]->followpos); + set_bits = 1; + } + + if (set_bits) { + struct dfa_state *tmp; + struct dfa_state *ldfa = ttree_lookup(m->tt, m->bs + 1); + if (!ldfa) { + /* push */ + if (!(ldfa = _create_dfa_state(m->mem))) + return_0; + + ttree_insert(m->tt, m->bs + 1, ldfa); + if (!(tmp = _create_state_queue(m->scratch, ldfa, m->bs))) + return_0; + if (!m->h) + m->h = m->t = tmp; + else { + m->t->next = tmp; + m->t = tmp; + } + } + + dfa->lookup[a] = ldfa; + dm_bit_clear_all(m->bs); + } + + return 1; +} + +static int _calc_states(struct dm_regex *m, struct rx_node *rx) +{ + unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1; + struct dfa_state *dfa; + struct rx_node *n; + unsigned i; + int a; + + if (!(m->tt = ttree_create(m->scratch, iwidth))) + return_0; + + if (!(m->bs = dm_bitset_create(m->scratch, m->num_charsets))) + return_0; + + /* build some char maps */ + for (a = 0; a < 256; a++) + if (!(m->charmap[a] = dm_bitset_create(m->scratch, m->num_charsets))) + return_0; + + for (i = 0; i < m->num_nodes; i++) { + n = m->nodes[i]; + if (n->type == CHARSET) { + for (a = dm_bit_get_first(n->charset); + a >= 0; a = dm_bit_get_next(n->charset, a)) + dm_bit_set(m->charmap[a], n->charset_index); + } + } + + /* create first state */ + if (!(dfa = _create_dfa_state(m->mem))) + return_0; + + m->start = dfa; + ttree_insert(m->tt, rx->firstpos + 1, dfa); + + /* prime the queue */ + if (!(m->h = m->t = _create_state_queue(m->scratch, dfa, rx->firstpos))) + return_0; + + if (!(m->dfa_copy = dm_bitset_create(m->scratch, m->num_charsets))) + return_0; + + return 1; +} + +/* + * Forces all the dfa states to be calculated up front, ie. what + * _calc_states() used to do before we switched to calculating on demand. + */ +static int _force_states(struct dm_regex *m) +{ + int a; + + /* keep processing until there's nothing in the queue */ + struct dfa_state *s; + while ((s = m->h)) { + /* pop state off front of the queue */ + m->h = m->h->next; + + /* iterate through all the inputs for this state */ + dm_bit_clear_all(m->bs); + for (a = 0; a < 256; a++) + if (!_calc_state(m, s, a)) + return_0; + } + + return 1; +} + +struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns, + unsigned num_patterns) +{ + char *all, *ptr; + unsigned i; + size_t len = 0; + struct rx_node *rx; + struct dm_regex *m; + struct dm_pool *scratch = mem; + + if (!(m = dm_pool_zalloc(mem, sizeof(*m)))) + return_NULL; + + /* join the regexps together, delimiting with zero */ + for (i = 0; i < num_patterns; i++) + len += strlen(patterns[i]) + 8; + + ptr = all = dm_pool_alloc(scratch, len + 1); + + if (!all) + goto_bad; + + for (i = 0; i < num_patterns; i++) { + ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS); + if (i < (num_patterns - 1)) + *ptr++ = '|'; + } + + /* parse this expression */ + if (!(rx = rx_parse_tok(scratch, all, ptr))) { + log_error("Couldn't parse regex"); + goto bad; + } + + m->mem = mem; + m->scratch = scratch; + m->num_nodes = _count_nodes(rx); + m->num_charsets = _count_charsets(rx); + _enumerate_charsets(rx); + if (!(m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes))) + goto_bad; + + if (!(m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets))) + goto_bad; + + _fill_table(m, rx); + + if (!_create_bitsets(m)) + goto_bad; + + _calc_functions(m); + + if (!_calc_states(m, rx)) + goto_bad; + + return m; + + bad: + dm_pool_free(mem, m); + + return NULL; +} + +static struct dfa_state *_step_matcher(struct dm_regex *m, int c, struct dfa_state *cs, int *r) +{ + struct dfa_state *ns; + + if (!(ns = cs->lookup[(unsigned char) c])) { + if (!_calc_state(m, cs, (unsigned char) c)) + return_NULL; + + if (!(ns = cs->lookup[(unsigned char) c])) + return NULL; + } + + // yuck, we have to special case the target trans + if ((ns->final == -1) && + !_calc_state(m, ns, TARGET_TRANS)) + return_NULL; + + if (ns->final && (ns->final > *r)) + *r = ns->final; + + return ns; +} + +int dm_regex_match(struct dm_regex *regex, const char *s) +{ + struct dfa_state *cs = regex->start; + int r = 0; + + dm_bit_clear_all(regex->bs); + if (!(cs = _step_matcher(regex, HAT_CHAR, cs, &r))) + goto out; + + for (; *s; s++) + if (!(cs = _step_matcher(regex, *s, cs, &r))) + goto out; + + _step_matcher(regex, DOLLAR_CHAR, cs, &r); + + out: + /* subtract 1 to get back to zero index */ + return r - 1; +} + +/* + * The next block of code concerns calculating a fingerprint for the dfa. + * + * We're not calculating a minimal dfa in _calculate_state (maybe a future + * improvement). As such it's possible that two non-isomorphic dfas + * recognise the same language. This can only really happen if you start + * with equivalent, but different regexes (for example the simplifier in + * parse_rx.c may have changed). + * + * The code is inefficient; repeatedly searching a singly linked list for + * previously seen nodes. Not worried since this is test code. + */ +struct node_list { + unsigned node_id; + struct dfa_state *node; + struct node_list *next; +}; + +struct printer { + struct dm_pool *mem; + struct node_list *pending; + struct node_list *processed; + unsigned next_index; +}; + +static uint32_t _randomise(uint32_t n) +{ + /* 2^32 - 5 */ + uint32_t const prime = (~0) - 4; + return n * prime; +} + +static int _seen(struct node_list *n, struct dfa_state *node, uint32_t *i) +{ + while (n) { + if (n->node == node) { + *i = n->node_id; + return 1; + } + n = n->next; + } + + return 0; +} + +/* + * Push node if it's not been seen before, returning a unique index. + */ +static uint32_t _push_node(struct printer *p, struct dfa_state *node) +{ + uint32_t i; + struct node_list *n; + + if (_seen(p->pending, node, &i) || + _seen(p->processed, node, &i)) + return i; + + if (!(n = dm_pool_alloc(p->mem, sizeof(*n)))) + return_0; + + n->node_id = ++p->next_index; /* start from 1, keep 0 as error code */ + n->node = node; + n->next = p->pending; + p->pending = n; + + return n->node_id; +} + +/* + * Pop the front node, and fill out it's previously assigned index. + */ +static struct dfa_state *_pop_node(struct printer *p) +{ + struct dfa_state *node = NULL; + struct node_list *n; + + if (p->pending) { + n = p->pending; + p->pending = n->next; + n->next = p->processed; + p->processed = n; + + node = n->node; + } + + return node; +} + +static uint32_t _combine(uint32_t n1, uint32_t n2) +{ + return ((n1 << 8) | (n1 >> 24)) ^ _randomise(n2); +} + +static uint32_t _fingerprint(struct printer *p) +{ + int c; + uint32_t result = 0; + struct dfa_state *node; + + while ((node = _pop_node(p))) { + result = _combine(result, (node->final < 0) ? 0 : node->final); + for (c = 0; c < 256; c++) + result = _combine(result, + _push_node(p, node->lookup[c])); + } + + return result; +} + +uint32_t dm_regex_fingerprint(struct dm_regex *regex) +{ + struct printer p; + uint32_t result = 0; + struct dm_pool *mem = dm_pool_create("regex fingerprint", 1024); + + if (!mem) + return_0; + + if (!_force_states(regex)) + goto_out; + + p.mem = mem; + p.pending = NULL; + p.processed = NULL; + p.next_index = 0; + + if (!_push_node(&p, regex->start)) + goto_out; + + result = _fingerprint(&p); +out: + dm_pool_destroy(mem); + + return result; +} diff --git a/device_mapper/regex/parse_rx.c b/device_mapper/regex/parse_rx.c new file mode 100644 index 000000000..cc83bfe35 --- /dev/null +++ b/device_mapper/regex/parse_rx.c @@ -0,0 +1,667 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "parse_rx.h" + +#ifdef DEBUG +#include <ctype.h> + +__attribute__ ((__unused__)) +static void _regex_print(struct rx_node *rx, int depth, unsigned show_nodes) +{ + int i, numchars; + + if (rx->left) { + if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT)))) + printf("("); + + _regex_print(rx->left, depth + 1, show_nodes); + + if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT)))) + printf(")"); + } + + /* display info about the node */ + switch (rx->type) { + case CAT: + break; + + case OR: + printf("|"); + break; + + case STAR: + printf("*"); + break; + + case PLUS: + printf("+"); + break; + + case QUEST: + printf("?"); + break; + + case CHARSET: + numchars = 0; + for (i = 0; i < 256; i++) + if (dm_bit(rx->charset, i) && (isprint(i) || i == HAT_CHAR || i == DOLLAR_CHAR)) + numchars++; + if (numchars == 97) { + printf("."); + break; + } + if (numchars > 1) + printf("["); + for (i = 0; i < 256; i++) + if (dm_bit(rx->charset, i)) { + if (isprint(i)) + printf("%c", (char) i); + else if (i == HAT_CHAR) + printf("^"); + else if (i == DOLLAR_CHAR) + printf("$"); + } + if (numchars > 1) + printf("]"); + break; + + default: + fprintf(stderr, "Unknown type"); + } + + if (rx->right) { + if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right))) + printf("("); + _regex_print(rx->right, depth + 1, show_nodes); + if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right))) + printf(")"); + } + + if (!depth) + printf("\n"); +} +#endif /* DEBUG */ + +struct parse_sp { /* scratch pad for the parsing process */ + struct dm_pool *mem; + int type; /* token type, 0 indicates a charset */ + dm_bitset_t charset; /* The current charset */ + const char *cursor; /* where we are in the regex */ + const char *rx_end; /* 1pte for the expression being parsed */ +}; + +static struct rx_node *_or_term(struct parse_sp *ps); + +static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr) +{ + ps->type = 0; + ps->cursor = ptr + 1; + dm_bit_clear_all(ps->charset); + dm_bit_set(ps->charset, c); +} + +/* + * Get the next token from the regular expression. + * Returns: 1 success, 0 end of input, -1 error. + */ +static int _rx_get_token(struct parse_sp *ps) +{ + int neg = 0, range = 0; + char c, lc = 0; + const char *ptr = ps->cursor; + if (ptr == ps->rx_end) { /* end of input ? */ + ps->type = -1; + return 0; + } + + switch (*ptr) { + /* charsets and ncharsets */ + case '[': + ptr++; + if (*ptr == '^') { + dm_bit_set_all(ps->charset); + + /* never transition on zero */ + dm_bit_clear(ps->charset, 0); + neg = 1; + ptr++; + + } else + dm_bit_clear_all(ps->charset); + + while ((ptr < ps->rx_end) && (*ptr != ']')) { + if (*ptr == '\\') { + /* an escaped character */ + ptr++; + switch (*ptr) { + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + default: + c = *ptr; + } + } else if (*ptr == '-' && lc) { + /* we've got a range on our hands */ + range = 1; + ptr++; + if (ptr == ps->rx_end) { + log_error("Incomplete range" + "specification"); + return -1; + } + c = *ptr; + } else + c = *ptr; + + if (range) { + /* add lc - c into the bitset */ + if (lc > c) { + char tmp = c; + c = lc; + lc = tmp; + } + + for (; lc <= c; lc++) { + if (neg) + dm_bit_clear(ps->charset, lc); + else + dm_bit_set(ps->charset, lc); + } + range = 0; + } else { + /* add c into the bitset */ + if (neg) + dm_bit_clear(ps->charset, c); + else + dm_bit_set(ps->charset, c); + } + ptr++; + lc = c; + } + + if (ptr >= ps->rx_end) { + ps->type = -1; + return -1; + } + + ps->type = 0; + ps->cursor = ptr + 1; + break; + + /* These characters are special, we just return their ASCII + codes as the type. Sorted into ascending order to help the + compiler */ + case '(': + case ')': + case '*': + case '+': + case '?': + case '|': + ps->type = (int) *ptr; + ps->cursor = ptr + 1; + break; + + case '^': + _single_char(ps, HAT_CHAR, ptr); + break; + + case '$': + _single_char(ps, DOLLAR_CHAR, ptr); + break; + + case '.': + /* The 'all but newline' character set */ + ps->type = 0; + ps->cursor = ptr + 1; + dm_bit_set_all(ps->charset); + dm_bit_clear(ps->charset, (int) '\n'); + dm_bit_clear(ps->charset, (int) '\r'); + dm_bit_clear(ps->charset, 0); + break; + + case '\\': + /* escaped character */ + ptr++; + if (ptr >= ps->rx_end) { + log_error("Badly quoted character at end " + "of expression"); + ps->type = -1; + return -1; + } + + ps->type = 0; + ps->cursor = ptr + 1; + dm_bit_clear_all(ps->charset); + switch (*ptr) { + case 'n': + dm_bit_set(ps->charset, (int) '\n'); + break; + case 'r': + dm_bit_set(ps->charset, (int) '\r'); + break; + case 't': + dm_bit_set(ps->charset, (int) '\t'); + break; + default: + dm_bit_set(ps->charset, (int) *ptr); + } + break; + + default: + /* add a single character to the bitset */ + ps->type = 0; + ps->cursor = ptr + 1; + dm_bit_clear_all(ps->charset); + dm_bit_set(ps->charset, (int) (unsigned char) *ptr); + break; + } + + return 1; +} + +static struct rx_node *_node(struct dm_pool *mem, int type, + struct rx_node *l, struct rx_node *r) +{ + struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n)); + + if (n) { + if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) { + dm_pool_free(mem, n); + return NULL; + } + + n->type = type; + n->left = l; + n->right = r; + } + + return n; +} + +static struct rx_node *_term(struct parse_sp *ps) +{ + struct rx_node *n; + + switch (ps->type) { + case 0: + if (!(n = _node(ps->mem, CHARSET, NULL, NULL))) + return_NULL; + + dm_bit_copy(n->charset, ps->charset); + _rx_get_token(ps); /* match charset */ + break; + + case '(': + _rx_get_token(ps); /* match '(' */ + n = _or_term(ps); + if (ps->type != ')') { + log_error("missing ')' in regular expression"); + return 0; + } + _rx_get_token(ps); /* match ')' */ + break; + + default: + n = 0; + } + + return n; +} + +static struct rx_node *_closure_term(struct parse_sp *ps) +{ + struct rx_node *l, *n; + + if (!(l = _term(ps))) + return NULL; + + for (;;) { + switch (ps->type) { + case '*': + n = _node(ps->mem, STAR, l, NULL); + break; + + case '+': + n = _node(ps->mem, PLUS, l, NULL); + break; + + case '?': + n = _node(ps->mem, QUEST, l, NULL); + break; + + default: + return l; + } + + if (!n) + return_NULL; + + _rx_get_token(ps); + l = n; + } + + return n; +} + +static struct rx_node *_cat_term(struct parse_sp *ps) +{ + struct rx_node *l, *r, *n; + + if (!(l = _closure_term(ps))) + return NULL; + + if (ps->type == '|') + return l; + + if (!(r = _cat_term(ps))) + return l; + + if (!(n = _node(ps->mem, CAT, l, r))) + stack; + + return n; +} + +static struct rx_node *_or_term(struct parse_sp *ps) +{ + struct rx_node *l, *r, *n; + + if (!(l = _cat_term(ps))) + return NULL; + + if (ps->type != '|') + return l; + + _rx_get_token(ps); /* match '|' */ + + if (!(r = _or_term(ps))) { + log_error("Badly formed 'or' expression"); + return NULL; + } + + if (!(n = _node(ps->mem, OR, l, r))) + stack; + + return n; +} + +/*----------------------------------------------------------------*/ + +/* Macros for left and right nodes. Inverted if 'leftmost' is set. */ +#define LEFT(a) (leftmost ? (a)->left : (a)->right) +#define RIGHT(a) (leftmost ? (a)->right : (a)->left) + +/* + * The optimiser spots common prefixes on either side of an 'or' node, and + * lifts them outside the 'or' with a 'cat'. + */ +static unsigned _depth(struct rx_node *r, unsigned leftmost) +{ + int count = 1; + + while (r->type != CHARSET && LEFT(r) && (leftmost || r->type != OR)) { + count++; + r = LEFT(r); + } + + return count; +} + +/* + * FIXME: a unique key could be built up as part of the parse, to make the + * comparison quick. Alternatively we could use cons-hashing, and then + * this would simply be a pointer comparison. + */ +static int _nodes_equal(struct rx_node *l, struct rx_node *r) +{ + if (l->type != r->type) + return 0; + + switch (l->type) { + case CAT: + case OR: + return _nodes_equal(l->left, r->left) && + _nodes_equal(l->right, r->right); + + case STAR: + case PLUS: + case QUEST: + return _nodes_equal(l->left, r->left); + + case CHARSET: + /* + * Never change anything containing TARGET_TRANS + * used by matcher as boundary marker between concatenated + * expressions. + */ + return (!dm_bit(l->charset, TARGET_TRANS) && dm_bitset_equal(l->charset, r->charset)); + } + + /* NOTREACHED */ + return_0; +} + +static int _find_leftmost_common(struct rx_node *or, + struct rx_node **l, + struct rx_node **r, + unsigned leftmost) +{ + struct rx_node *left = or->left, *right = or->right; + unsigned left_depth = _depth(left, leftmost); + unsigned right_depth = _depth(right, leftmost); + + while (left_depth > right_depth && left->type != OR) { + left = LEFT(left); + left_depth--; + } + + while (right_depth > left_depth && right->type != OR) { + right = LEFT(right); + right_depth--; + } + + if (left_depth != right_depth) + return 0; + + while (left_depth) { + if (left->type == CAT && right->type == CAT) { + if (_nodes_equal(LEFT(left), LEFT(right))) { + *l = left; + *r = right; + return 1; + } + } + if (left->type == OR || right->type == OR) + break; + left = LEFT(left); + right = LEFT(right); + left_depth--; + } + + return 0; +} + +/* If top node is OR, rotate (leftmost example) from ((ab)|((ac)|d)) to (((ab)|(ac))|d) */ +static int _rotate_ors(struct rx_node *r, unsigned leftmost) +{ + struct rx_node *old_node; + + if (r->type != OR || RIGHT(r)->type != OR) + return 0; + + old_node = RIGHT(r); + + if (leftmost) { + r->right = RIGHT(old_node); + old_node->right = LEFT(old_node); + old_node->left = LEFT(r); + r->left = old_node; + } else { + r->left = RIGHT(old_node); + old_node->left = LEFT(old_node); + old_node->right = LEFT(r); + r->right = old_node; + } + + return 1; +} + +static struct rx_node *_exchange_nodes(struct dm_pool *mem, struct rx_node *r, + struct rx_node *left_cat, struct rx_node *right_cat, + unsigned leftmost) +{ + struct rx_node *new_r; + + if (leftmost) + new_r = _node(mem, CAT, LEFT(left_cat), r); + else + new_r = _node(mem, CAT, r, LEFT(right_cat)); + + if (!new_r) + return_NULL; + + memcpy(left_cat, RIGHT(left_cat), sizeof(*left_cat)); + memcpy(right_cat, RIGHT(right_cat), sizeof(*right_cat)); + + return new_r; +} + +static struct rx_node *_pass(struct dm_pool *mem, + struct rx_node *r, + int *changed) +{ + struct rx_node *left, *right; + + /* + * walk the tree, optimising every 'or' node. + */ + switch (r->type) { + case CAT: + if (!(r->left = _pass(mem, r->left, changed))) + return_NULL; + + if (!(r->right = _pass(mem, r->right, changed))) + return_NULL; + + break; + + case STAR: + case PLUS: + case QUEST: + if (!(r->left = _pass(mem, r->left, changed))) + return_NULL; + + break; + case OR: + /* It's important we optimise sub nodes first */ + if (!(r->left = _pass(mem, r->left, changed))) + return_NULL; + + if (!(r->right = _pass(mem, r->right, changed))) + return_NULL; + /* + * If rotate_ors changes the tree, left and right are stale, + * so just set 'changed' to repeat the search. + * + * FIXME Check we can't 'bounce' between left and right rotations here. + */ + if (_find_leftmost_common(r, &left, &right, 1)) { + if (!_rotate_ors(r, 1)) + r = _exchange_nodes(mem, r, left, right, 1); + *changed = 1; + } else if (_find_leftmost_common(r, &left, &right, 0)) { + if (!_rotate_ors(r, 0)) + r = _exchange_nodes(mem, r, left, right, 0); + *changed = 1; + } + break; + + case CHARSET: + break; + } + + return r; +} + +static struct rx_node *_optimise(struct dm_pool *mem, struct rx_node *r) +{ + /* + * We're looking for (or (... (cat <foo> a)) (... (cat <foo> b))) + * and want to turn it into (cat <foo> (or (... a) (... b))) + * + * (fa)|(fb) becomes f(a|b) + */ + + /* + * Initially done as an inefficient multipass algorithm. + */ + int changed; + + do { + changed = 0; + r = _pass(mem, r, &changed); + } while (r && changed); + + return r; +} + +/*----------------------------------------------------------------*/ + +struct rx_node *rx_parse_tok(struct dm_pool *mem, + const char *begin, const char *end) +{ + struct rx_node *r; + struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps)); + + if (!ps) + return_NULL; + + ps->mem = mem; + if (!(ps->charset = dm_bitset_create(mem, 256))) { + log_error("Regex charset allocation failed"); + dm_pool_free(mem, ps); + return NULL; + } + ps->cursor = begin; + ps->rx_end = end; + _rx_get_token(ps); /* load the first token */ + + if (!(r = _or_term(ps))) { + log_error("Parse error in regex"); + dm_pool_free(mem, ps); + return NULL; + } + + if (!(r = _optimise(mem, r))) { + log_error("Regex optimisation error"); + dm_pool_free(mem, ps); + return NULL; + } + + return r; +} + +struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str) +{ + return rx_parse_tok(mem, str, str + strlen(str)); +} diff --git a/device_mapper/regex/parse_rx.h b/device_mapper/regex/parse_rx.h new file mode 100644 index 000000000..08970605d --- /dev/null +++ b/device_mapper/regex/parse_rx.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _DM_PARSE_REGEX_H +#define _DM_PARSE_REGEX_H + +enum { + CAT, + STAR, + PLUS, + OR, + QUEST, + CHARSET +}; + +/* + * We're never going to be running the regex on non-printable + * chars, so we can use a couple of these chars to represent the + * start and end of a string. + */ +#define HAT_CHAR 0x2 +#define DOLLAR_CHAR 0x3 + +#define TARGET_TRANS '\0' + +struct rx_node { + int type; + dm_bitset_t charset; + struct rx_node *left, *right; + + /* used to build the dfa for the toker */ + unsigned charset_index; + int nullable, final; + dm_bitset_t firstpos; + dm_bitset_t lastpos; + dm_bitset_t followpos; +}; + +struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str); +struct rx_node *rx_parse_tok(struct dm_pool *mem, + const char *begin, const char *end); + +#endif diff --git a/device_mapper/regex/ttree.c b/device_mapper/regex/ttree.c new file mode 100644 index 000000000..62c5bf786 --- /dev/null +++ b/device_mapper/regex/ttree.c @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "misc/dmlib.h" +#include "ttree.h" + +struct node { + unsigned k; + struct node *l, *m, *r; + void *data; +}; + +struct ttree { + int klen; + struct dm_pool *mem; + struct node *root; +}; + +__attribute__((nonnull(1))) +static struct node **_lookup_single(struct node **c, unsigned int k) +{ + while (*c) { + if (k < (*c)->k) + c = &((*c)->l); + + else if (k > (*c)->k) + c = &((*c)->r); + + else { + c = &((*c)->m); + break; + } + } + + return c; +} + +void *ttree_lookup(struct ttree *tt, unsigned *key) +{ + struct node **c = &tt->root; + int count = tt->klen; + + while (*c && count) { + c = _lookup_single(c, *key++); + count--; + } + + return *c ? (*c)->data : NULL; +} + +static struct node *_tree_node(struct dm_pool *mem, unsigned int k) +{ + struct node *n = dm_pool_zalloc(mem, sizeof(*n)); + + if (n) + n->k = k; + + return n; +} + +int ttree_insert(struct ttree *tt, unsigned int *key, void *data) +{ + struct node **c = &tt->root; + int count = tt->klen; + unsigned int k; + + do { + k = *key++; + c = _lookup_single(c, k); + count--; + + } while (*c && count); + + if (!*c) { + count++; + + while (count--) { + if (!(*c = _tree_node(tt->mem, k))) + return_0; + + if (count) { + k = *key++; + c = &((*c)->m); + } + } + } + (*c)->data = data; + + return 1; +} + +struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen) +{ + struct ttree *tt; + + if (!(tt = dm_pool_zalloc(mem, sizeof(*tt)))) + return_NULL; + + tt->klen = klen; + tt->mem = mem; + return tt; +} diff --git a/device_mapper/regex/ttree.h b/device_mapper/regex/ttree.h new file mode 100644 index 000000000..8b62181f4 --- /dev/null +++ b/device_mapper/regex/ttree.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. + * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. + * + * This file is part of the device-mapper userspace tools. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU Lesser General Public License v.2.1. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef _DM_TTREE_H +#define _DM_TTREE_H + +struct ttree; + +struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen); + +void *ttree_lookup(struct ttree *tt, unsigned *key); +int ttree_insert(struct ttree *tt, unsigned *key, void *data); + +#endif diff --git a/device-mapper/vdo/status.c b/device_mapper/vdo/status.c similarity index 99% rename from device-mapper/vdo/status.c rename to device_mapper/vdo/status.c index 1739a7e70..4bd15e462 100644 --- a/device-mapper/vdo/status.c +++ b/device_mapper/vdo/status.c @@ -1,7 +1,7 @@ #include "target.h" // For DM_ARRAY_SIZE! -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include <ctype.h> #include <stdlib.h> diff --git a/device-mapper/vdo/target.h b/device_mapper/vdo/target.h similarity index 100% rename from device-mapper/vdo/target.h rename to device_mapper/vdo/target.h diff --git a/lib/config/config.h b/lib/config/config.h index b797c78d8..146aae152 100644 --- a/lib/config/config.h +++ b/lib/config/config.h @@ -16,7 +16,7 @@ #ifndef _LVM_CONFIG_H #define _LVM_CONFIG_H -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "lib/device/device.h" /* 16 bits: 3 bits for major, 4 bits for minor, 9 bits for patchlevel */ diff --git a/lib/device/bcache.c b/lib/device/bcache.c index aa6bb7ace..d213758e7 100644 --- a/lib/device/bcache.c +++ b/lib/device/bcache.c @@ -15,7 +15,7 @@ #define _GNU_SOURCE #include "lib/device/bcache.h" -#include "libdm/misc/dm-logging.h" +#include "device_mapper/misc/dm-logging.h" #include "lib/log/log.h" #include <errno.h> diff --git a/lib/device/bcache.h b/lib/device/bcache.h index cf3d6688d..8f328c76c 100644 --- a/lib/device/bcache.h +++ b/lib/device/bcache.h @@ -15,7 +15,7 @@ #ifndef BCACHE_H #define BCACHE_H -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include <linux/fs.h> #include <stdint.h> diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c index f3c23260d..38026a381 100644 --- a/lib/device/dev-cache.c +++ b/lib/device/dev-cache.c @@ -17,7 +17,7 @@ #include "lib/datastruct/btree.h" #include "lib/config/config.h" #include "lib/commands/toolcontext.h" -#include "libdm/misc/dm-ioctl.h" +#include "device_mapper/misc/dm-ioctl.h" #include "lib/misc/lvm-string.h" #ifdef UDEV_SYNC_SUPPORT diff --git a/lib/metadata/pv.h b/lib/metadata/pv.h index 23d2bd759..d5d91ce0f 100644 --- a/lib/metadata/pv.h +++ b/lib/metadata/pv.h @@ -16,7 +16,7 @@ #define _LVM_PV_H #include "lib/uuid/uuid.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" struct device; struct format_type; diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h index b37c01d83..3d24bba7d 100644 --- a/lib/metadata/vg.h +++ b/lib/metadata/vg.h @@ -16,7 +16,7 @@ #define _LVM_VG_H #include "lib/uuid/uuid.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" struct cmd_context; struct format_instance; diff --git a/lib/misc/lib.h b/lib/misc/lib.h index 13c7110f7..3ae3aacc0 100644 --- a/lib/misc/lib.h +++ b/lib/misc/lib.h @@ -79,7 +79,7 @@ #include "lib/misc/intl.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "lib/misc/util.h" #ifdef DM diff --git a/lib/report/properties.h b/lib/report/properties.h index f52705e9a..38b61110c 100644 --- a/lib/report/properties.h +++ b/lib/report/properties.h @@ -14,7 +14,7 @@ #ifndef _LVM_PROPERTIES_H #define _LVM_PROPERTIES_H -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "lib/metadata/metadata.h" #include "lib/report/report.h" #include "lib/properties/prop_common.h" diff --git a/libdaemon/client/config-util.c b/libdaemon/client/config-util.c index 12baa998b..f8526d75c 100644 --- a/libdaemon/client/config-util.c +++ b/libdaemon/client/config-util.c @@ -17,7 +17,7 @@ #include "tools/tool.h" #include "libdaemon/client/daemon-io.h" -#include "libdm/misc/dm-logging.h" +#include "device_mapper/misc/dm-logging.h" #include <math.h> /* fabs() */ #include <float.h> /* DBL_EPSILON */ diff --git a/libdaemon/client/daemon-client.c b/libdaemon/client/daemon-client.c index b5dfbf130..28d7c04a3 100644 --- a/libdaemon/client/daemon-client.c +++ b/libdaemon/client/daemon-client.c @@ -18,7 +18,7 @@ #include "libdaemon/client/daemon-io.h" #include "libdaemon/client/daemon-client.h" -#include "libdm/misc/dm-logging.h" +#include "device_mapper/misc/dm-logging.h" #include <sys/un.h> #include <sys/socket.h> diff --git a/libdm/Makefile.in b/libdm/Makefile.in index 66ec39513..eeef0801d 100644 --- a/libdm/Makefile.in +++ b/libdm/Makefile.in @@ -52,7 +52,7 @@ CFLOW_LIST_TARGET = libdevmapper.cflow EXPORTED_HEADER = $(srcdir)/libdevmapper.h EXPORTED_FN_PREFIX = dm -include $(top_builddir)/make.tmpl +include $(top_builddir)/libdm/make.tmpl PROGS_CFLAGS = $(UDEV_CFLAGS) diff --git a/libdm/make.tmpl.in b/libdm/make.tmpl.in new file mode 100644 index 000000000..7e3f4a481 --- /dev/null +++ b/libdm/make.tmpl.in @@ -0,0 +1,578 @@ +# @configure_input@ +# +# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. +# Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved. +# +# This file is part of LVM2. +# +# This copyrighted material is made available to anyone wishing to use, +# modify, copy, or redistribute it subject to the terms and conditions +# of the GNU General Public License v.2. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software Foundation, +# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +ifeq ($(V),1) + Q= +else + Q=@ +endif + +SHELL = @SHELL@ + +@SET_MAKE@ + +# Allow environment to override any built-in default value for CC. +# If there is a built-in default, CC is NOT set to @CC@ here. +CC ?= @CC@ + +# If $(CC) holds the usual built-in default value of 'cc' then replace it with +# the configured value. +# (To avoid this and force the use of 'cc' from the environment, supply its +# full path.) +ifeq ($(CC), cc) + CC = @CC@ +endif + +RANLIB = @RANLIB@ +INSTALL = @INSTALL@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +LCOV = @LCOV@ +GENHTML = @GENHTML@ +LN_S = @LN_S@ +SED = @SED@ +CFLOW_CMD = @CFLOW_CMD@ +AWK = @AWK@ +CHMOD = @CHMOD@ +EGREP = @EGREP@ +GREP = @GREP@ +SORT = @SORT@ +WC = @WC@ +AR = @AR@ +RM = rm -f + +PYTHON2 = @PYTHON2@ +PYTHON3 = @PYTHON3@ +PYCOMPILE = $(top_srcdir)/autoconf/py-compile + +LIBS = @LIBS@ +# Extra libraries always linked with static binaries +STATIC_LIBS = $(SELINUX_LIBS) $(UDEV_LIBS) $(BLKID_LIBS) +DEFS += @DEFS@ +# FIXME set this only where it's needed, not globally? +CFLAGS ?= @COPTIMISE_FLAG@ @CFLAGS@ +LDFLAGS ?= @LDFLAGS@ +CLDFLAGS += @CLDFLAGS@ +ELDFLAGS += @ELDFLAGS@ +LDDEPS += @LDDEPS@ +LIB_SUFFIX = @LIB_SUFFIX@ +LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS) +DL_LIBS = @DL_LIBS@ +RT_LIBS = @RT_LIBS@ +M_LIBS = @M_LIBS@ +PTHREAD_LIBS = @PTHREAD_LIBS@ +READLINE_LIBS = @READLINE_LIBS@ +SELINUX_LIBS = @SELINUX_LIBS@ +UDEV_CFLAGS = @UDEV_CFLAGS@ +UDEV_LIBS = @UDEV_LIBS@ +BLKID_CFLAGS = @BLKID_CFLAGS@ +BLKID_LIBS = @BLKID_LIBS@ +SYSTEMD_LIBS = @SYSTEMD_LIBS@ +VALGRIND_CFLAGS = @VALGRIND_CFLAGS@ + +# Setup directory variables +prefix = @prefix@ +exec_prefix = @exec_prefix@ +udev_prefix = @udev_prefix@ +sysconfdir = @sysconfdir@ +rootdir = $(DESTDIR)/ +bindir = $(DESTDIR)@bindir@ +confdir = $(DESTDIR)@CONFDIR@/lvm +profiledir = $(confdir)/@DEFAULT_PROFILE_SUBDIR@ +includedir = $(DESTDIR)@includedir@ +libdir = $(DESTDIR)@libdir@ +libexecdir = $(DESTDIR)@libexecdir@ +usrlibdir = $(DESTDIR)@usrlibdir@ +sbindir = $(DESTDIR)@sbindir@ +usrsbindir = $(DESTDIR)@usrsbindir@ +datarootdir = @datarootdir@ +datadir = $(DESTDIR)@datadir@ +infodir = $(DESTDIR)@infodir@ +mandir = $(DESTDIR)@mandir@ +localedir = $(DESTDIR)@localedir@ +staticdir = $(DESTDIR)@STATICDIR@ +udevdir = $(DESTDIR)@udevdir@ +pkgconfigdir = $(usrlibdir)/pkgconfig +initdir = $(DESTDIR)$(sysconfdir)/rc.d/init.d +dbusconfdir = $(DESTDIR)$(sysconfdir)/dbus-1/system.d +dbusservicedir = $(datadir)/dbus-1/system-services +systemd_unit_dir = $(DESTDIR)@systemdsystemunitdir@ +systemd_generator_dir = $(DESTDIR)$(SYSTEMD_GENERATOR_DIR) +systemd_dir = $(DESTDIR)@systemdutildir@ +tmpfiles_dir = $(DESTDIR)@tmpfilesdir@ +ocf_scriptdir = $(DESTDIR)@OCFDIR@ +pythonprefix = $(DESTDIR)$(prefix) + +# N.B. No $(DESTDIR) prefix here. +python2dir = @PYTHON2DIR@ +python3dir = @PYTHON3DIR@ + +USRLIB_RELPATH = $(shell echo $(abspath $(usrlibdir) $(libdir)) | \ + $(AWK) -f $(top_srcdir)/scripts/relpath.awk) + +SYSTEMD_GENERATOR_DIR = @systemdutildir@/system-generators +DEFAULT_SYS_DIR = @DEFAULT_SYS_DIR@ +DEFAULT_ARCHIVE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_ARCHIVE_SUBDIR@ +DEFAULT_BACKUP_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_BACKUP_SUBDIR@ +DEFAULT_CACHE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_CACHE_SUBDIR@ +DEFAULT_PROFILE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_PROFILE_SUBDIR@ +DEFAULT_LOCK_DIR = @DEFAULT_LOCK_DIR@ +DEFAULT_RUN_DIR = @DEFAULT_RUN_DIR@ +DEFAULT_PID_DIR = @DEFAULT_PID_DIR@ +DEFAULT_MANGLING = @MANGLING@ + +# Setup vpath search paths for some suffixes +vpath %.c $(srcdir) +vpath %.cpp $(srcdir) +vpath %.in $(srcdir) +vpath %.po $(srcdir) +vpath %.exported_symbols $(srcdir) + +interface = @interface@ +interfacebuilddir = $(top_builddir)/libdm/$(interface) +rpmbuilddir = $(abs_top_builddir)/build + +# The number of jobs to run, if blank, defaults to the make standard +ifndef MAKEFLAGS +MAKEFLAGS = @JOBS@ +endif + +# Handle installation of files +ifeq ("@WRITE_INSTALL@", "yes") +# leaving defaults +M_INSTALL_SCRIPT = +M_INSTALL_DATA = -m 644 +else +M_INSTALL_PROGRAM = -m 555 +M_INSTALL_DATA = -m 444 +endif +INSTALL_PROGRAM = $(INSTALL) $(M_INSTALL_PROGRAM) $(STRIP) +INSTALL_DATA = $(INSTALL) -p $(M_INSTALL_DATA) +INSTALL_WDATA = $(INSTALL) -p -m 644 + +INSTALL_DIR = $(INSTALL) -m 755 -d +INSTALL_ROOT_DIR = $(INSTALL) -m 700 -d +INSTALL_ROOT_DATA = $(INSTALL) -m 600 +INSTALL_SCRIPT = $(INSTALL) -p $(M_INSTALL_PROGRAM) + +.SUFFIXES: +.SUFFIXES: .c .cpp .d .o .so .a .po .pot .mo .dylib + +ifeq ("$(notdir $(CC))", "gcc") +WFLAGS +=\ + -Wall\ + -Wcast-align\ + -Wfloat-equal\ + -Wformat-security\ + -Winline\ + -Wmissing-format-attribute\ + -Wmissing-include-dirs\ + -Wmissing-noreturn\ + -Wpointer-arith\ + -Wredundant-decls\ + -Wshadow\ + -Wundef\ + -Wwrite-strings + +WCFLAGS +=\ + -Wmissing-declarations\ + -Wmissing-prototypes\ + -Wnested-externs\ + -Wold-style-definition\ + -Wstrict-prototypes\ + -Wuninitialized + +ifeq ("@HAVE_WJUMP@", "yes") +WCFLAGS += -Wjump-misses-init +endif + +ifeq ("@HAVE_WCLOBBERED@", "yes") +WFLAGS +=\ + -Wclobbered\ + -Wempty-body\ + -Wignored-qualifiers\ + -Wlogical-op\ + -Wtype-limits + +WCFLAGS +=\ + -Wmissing-parameter-type\ + -Wold-style-declaration\ + -Woverride-init +endif + +ifeq ("@HAVE_WSYNCNAND@", "yes") +WFLAGS += -Wsync-nand +endif +endif + +ifneq ("@STATIC_LINK@", "yes") +ifeq ("@HAVE_PIE@", "yes") +ifeq ("@HAVE_FULL_RELRO@", "yes") + EXTRA_EXEC_CFLAGS += -fPIE + EXTRA_EXEC_LDFLAGS += -Wl,-z,relro,-z,now -pie -fPIE + CLDFLAGS += -Wl,-z,relro +endif +endif +endif + +#WFLAGS += -W -Wno-sign-compare -Wno-unused-parameter -Wno-missing-field-initializers +#WFLAGS += -Wsign-compare -Wunused-parameter -Wmissing-field-initializers +#WFLAGS += -Wconversion -Wbad-function-cast -Wcast-qual -Waggregate-return -Wpacked +#WFLAGS += -pedantic -std=gnu99 +#DEFS += -DDEBUG_CRC32 + +# +# Avoid recursive extension of CFLAGS +# by checking whether CFLAGS already has fPIC string +# +ifeq (,$(findstring fPIC,$(CFLAGS))) + +CFLAGS += -fPIC + +ifeq ("@DEBUG@", "yes") +ifeq (,$(findstring -g,$(CFLAGS))) + CFLAGS += -g +endif + CFLAGS += -fno-omit-frame-pointer + DEFS += -DDEBUG + # memory debugging is not thread-safe yet + ifneq ("@BUILD_DMEVENTD@", "yes") + ifneq ("@BUILD_DMFILEMAPD@", "yes") + ifneq ("@BUILD_LVMLOCKD@", "yes") + ifneq ("@BUILD_LVMPOLLD@", "yes") + ifneq ("@BUILD_LVMETAD@", "yes") + ifeq ("@CLVMD@", "none") + DEFS += -DDEBUG_MEM + endif + endif + endif + endif + endif + endif +endif + +# end of fPIC protection +endif + +DEFS += -D_BUILDING_LVM + +LDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib +CLDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib + +DAEMON_LIBS = -ldaemonclient +LDFLAGS += -L$(top_builddir)/libdaemon/client +CLDFLAGS += -L$(top_builddir)/libdaemon/client + +ifeq ("@BUILD_DMEVENTD@", "yes") + DMEVENT_LIBS = -ldevmapper-event + LDFLAGS += -L$(top_builddir)/daemons/dmeventd + CLDFLAGS += -L$(top_builddir)/daemons/dmeventd +endif + +# Combination of DEBUG_POOL and DEBUG_ENFORCE_POOL_LOCKING is not suppored. +#DEFS += -DDEBUG_POOL +# Default pool locking is using the crc checksum. With mprotect memory +# enforcing compilation faulty memory write could be easily found. +#DEFS += -DDEBUG_ENFORCE_POOL_LOCKING +#DEFS += -DBOUNDS_CHECK + +# LVM is not supposed to use mmap while devices are suspended. +# This code causes a core dump if gets called. +#DEFS += -DDEBUG_MEMLOCK + +#CFLAGS += -pg +#LDFLAGS += -pg + +STRIP= +#STRIP = -s + +LVM_VERSION := $(shell cat $(top_srcdir)/VERSION) + +LIB_VERSION_LVM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)/VERSION) + +LIB_VERSION_DM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)/VERSION_DM) + +LIB_VERSION_APP := $(shell $(AWK) -F '[(). ]' '{printf "%s.%s",$$1,$$4}' $(top_srcdir)/VERSION) + +INCLUDES += -I$(top_srcdir) -I$(srcdir) -I$(top_builddir)/include + +DEPS = $(top_builddir)/make.tmpl $(top_srcdir)/VERSION \ + $(top_builddir)/Makefile + +OBJECTS = $(SOURCES:%.c=%.o) $(CXXSOURCES:%.cpp=%.o) +POTFILES = $(SOURCES:%.c=%.pot) + +.PHONY: all pofile distclean clean cleandir cflow device-mapper +.PHONY: install install_cluster install_device-mapper install_lvm2 +.PHONY: install_dbus_service +.PHONY: install_lib_shared install_dm_plugin install_lvm2_plugin +.PHONY: install_ocf install_systemd_generators install_all_man all_man man help +.PHONY: python_bindings install_python_bindings +.PHONY: $(SUBDIRS) $(SUBDIRS.install) $(SUBDIRS.clean) $(SUBDIRS.distclean) +.PHONY: $(SUBDIRS.pofile) $(SUBDIRS.install_cluster) $(SUBDIRS.cflow) +.PHONY: $(SUBDIRS.device-mapper) $(SUBDIRS.install-device-mapper) +.PHONY: $(SUBDIRS.generate) generate + +SUBDIRS.device-mapper := $(SUBDIRS:=.device-mapper) +SUBDIRS.install := $(SUBDIRS:=.install) +SUBDIRS.install_cluster := $(SUBDIRS:=.install_cluster) +SUBDIRS.install_device-mapper := $(SUBDIRS:=.install_device-mapper) +SUBDIRS.install_lvm2 := $(SUBDIRS:=.install_lvm2) +SUBDIRS.install_ocf := $(SUBDIRS:=.install_ocf) +SUBDIRS.pofile := $(SUBDIRS:=.pofile) +SUBDIRS.cflow := $(SUBDIRS:=.cflow) +SUBDIRS.clean := $(SUBDIRS:=.clean) +SUBDIRS.distclean := $(SUBDIRS:=.distclean) + +TARGETS += $(LIB_SHARED) $(LIB_STATIC) + +all: $(SUBDIRS) $(TARGETS) + +install: all $(SUBDIRS.install) +install_cluster: all $(SUBDIRS.install_cluster) +install_device-mapper: $(SUBDIRS.install_device-mapper) +install_lvm2: $(SUBDIRS.install_lvm2) +install_ocf: $(SUBDIRS.install_ocf) +cflow: $(SUBDIRS.cflow) + +$(SUBDIRS): $(SUBDIRS.device-mapper) + $(MAKE) -C $@ + +$(SUBDIRS.device-mapper): + $(MAKE) -C $(@:.device-mapper=) device-mapper + +$(SUBDIRS.install): $(SUBDIRS) + $(MAKE) -C $(@:.install=) install + +$(SUBDIRS.install_cluster): $(SUBDIRS) + $(MAKE) -C $(@:.install_cluster=) install_cluster + +$(SUBDIRS.install_device-mapper): device-mapper + $(MAKE) -C $(@:.install_device-mapper=) install_device-mapper + +$(SUBDIRS.install_lvm2): $(SUBDIRS) + $(MAKE) -C $(@:.install_lvm2=) install_lvm2 + +$(SUBDIRS.install_ocf): + $(MAKE) -C $(@:.install_ocf=) install_ocf + +$(SUBDIRS.clean): + -$(MAKE) -C $(@:.clean=) clean + +$(SUBDIRS.distclean): + -$(MAKE) -C $(@:.distclean=) distclean + +$(SUBDIRS.cflow): + $(MAKE) -C $(@:.cflow=) cflow + +ifeq ("@INTL@", "yes") +pofile: $(SUBDIRS.pofile) $(POTFILES) + +$(SUBDIRS.pofile): + $(MAKE) -C $(@:.pofile=) pofile +endif + +$(SUBDIRS.generate): + $(MAKE) -C $(@:.generate=) generate + +ifneq ("$(CFLOW_LIST_TARGET)", "") +CLEAN_CFLOW += $(CFLOW_LIST_TARGET) +$(CFLOW_LIST_TARGET): $(CFLOW_LIST) + echo "CFLOW_SOURCES += $(addprefix \ + \$$(top_srcdir)$(subst $(top_srcdir),,$(srcdir))/, $(CFLOW_LIST))" > $@ +cflow: $(CFLOW_LIST_TARGET) +endif + +ifneq ("$(CFLOW_TARGET)", "") +CLEAN_CFLOW += \ + $(CFLOW_TARGET).cflow \ + $(CFLOW_TARGET).xref \ + $(CFLOW_TARGET).tree \ + $(CFLOW_TARGET).rtree \ + $(CFLOW_TARGET).rxref + +ifneq ("$(CFLOW_CMD)", "") +CFLOW_FLAGS +=\ + --cpp="$(CC) -E" \ + --symbol _ISbit:wrapper \ + --symbol __attribute__:wrapper \ + --symbol __const__:wrapper \ + --symbol __const:type \ + --symbol __restrict:type \ + --symbol __extension__:wrapper \ + --symbol __nonnull:wrapper \ + --symbol __nothrow__:wrapper \ + --symbol __pure__:wrapper \ + --symbol __REDIRECT:wrapper \ + --symbol __REDIRECT_NTH:wrapper \ + --symbol __wur:wrapper \ + -I$(top_srcdir)/libdm \ + -I$(top_srcdir)/libdm/ioctl \ + -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2/ \ + $(INCLUDES) $(DEFS) + +$(CFLOW_TARGET).cflow: $(CFLOW_SOURCES) + $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) $(CFLOW_SOURCES) +$(CFLOW_TARGET).rxref: $(CFLOW_SOURCES) + $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) -r --omit-arguments $(CFLOW_SOURCES) +$(CFLOW_TARGET).tree: $(CFLOW_SOURCES) + $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) --omit-arguments -T -b $(CFLOW_SOURCES) +$(CFLOW_TARGET).xref: $(CFLOW_SOURCES) + $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) --omit-arguments -x $(CFLOW_SOURCES) +#$(CFLOW_TARGET).rtree: $(CFLOW_SOURCES) +# $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) -r --omit-arguments -T -b $(CFLOW_SOURCES) +cflow: $(CFLOW_TARGET).cflow $(CFLOW_TARGET).tree $(CFLOW_TARGET).rxref $(CFLOW_TARGET).xref +#$(CFLOW_TARGET).rtree +endif +endif + +.LIBPATTERNS = lib%.so lib%.a + +DEPFLAGS=-MT $@ -MMD -MP -MF $*.d + +# still needed in 2018 for 32bit builds +DEFS+=-D_FILE_OFFSET_BITS=64 + +%.o: %.c + @echo " [CC] $<" + $(Q) $(CC) $(DEPFLAGS) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(WCFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@ + +%.o: %.cpp + @echo " [CXX] $<" + $(Q) $(CXX) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(CXXFLAGS) $(CXXFLAGS_$@) $< -o $@ + +%.pot: %.c Makefile + @echo " [CC] $@" + $(Q) $(CC) -E $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) -include $(top_builddir)/include/pogen.h $(DEFS) $(WFLAGS) $(CFLAGS) $< >$@ + +%.so: %.o + @echo " [CC] $<" + $(Q) $(CC) -c $(CFLAGS) $(CLDFLAGS) $< $(LIBS) -o $@ + +ifneq (,$(LIB_SHARED)) + +TARGETS += $(LIB_SHARED).$(LIB_VERSION) +$(LIB_SHARED).$(LIB_VERSION): $(OBJECTS) $(LDDEPS) + @echo " [CC] $@" +ifeq ("@LIB_SUFFIX@","so") + $(Q) $(CC) -shared -Wl,-soname,$(notdir $@) \ + $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@ +endif +ifeq ("@LIB_SUFFIX@","dylib") + $(Q) $(CC) -dynamiclib -dylib_current_version,$(LIB_VERSION) \ + $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@ +endif + +$(LIB_SHARED): $(LIB_SHARED).$(LIB_VERSION) + @echo " [LN] $<" + $(Q) $(LN_S) -f $(<F) $@ + +CLEAN_TARGETS += $(LDDEPS) .exported_symbols_generated + +install_lib_shared: $(LIB_SHARED) + @echo " [INSTALL] $<" + $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/$(<F).$(LIB_VERSION) + $(Q) $(INSTALL_DIR) $(usrlibdir) + $(Q) $(LN_S) -f $(USRLIB_RELPATH)$(<F).$(LIB_VERSION) $(usrlibdir)/$(<F) + +# FIXME: plugins are installed to subdirs +# and for compatibility links in libdir are created +# when the code is fixed links could be removed. +install_dm_plugin: $(LIB_SHARED) + @echo " [INSTALL] $<" + $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/device-mapper/$(<F) + $(Q) $(LN_S) -f device-mapper/$(<F) $(libdir)/$(<F) + +install_lvm2_plugin: $(LIB_SHARED) + @echo " [INSTALL] $<" + $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/lvm2/$(<F) + $(Q) $(LN_S) -f lvm2/$(<F) $(libdir)/$(<F) + $(Q) $(LN_S) -f $(<F) $(libdir)/$(<F).$(LIB_VERSION) +endif + +$(LIB_STATIC): $(OBJECTS) + @echo " [AR] $@" + $(Q) $(RM) $@ + $(Q) $(AR) rsv $@ $(OBJECTS) > /dev/null + +%.d: +.PRECIOUS: %.d + +%.mo: %.po + @echo " [MSGFMT] $<" + $(Q) $(MSGFMT) -o $@ $< + +CLEAN_TARGETS += \ + $(SOURCES:%.c=%.d) $(SOURCES:%.c=%.gcno) $(SOURCES:%.c=%.gcda) \ + $(SOURCES2:%.c=%.o) $(SOURCES2:%.c=%.d) $(SOURCES2:%.c=%.gcno) $(SOURCES2:%.c=%.gcda) \ + $(POTFILES) $(CLEAN_CFLOW) + +cleandir: +ifneq (,$(firstword $(CLEAN_DIRS))) + $(RM) -r $(CLEAN_DIRS) +endif + $(RM) $(OBJECTS) $(TARGETS) $(CLEAN_TARGETS) core + +clean: $(SUBDIRS.clean) cleandir + +distclean: cleandir $(SUBDIRS.distclean) +ifneq (,$(firstword $(DISTCLEAN_DIRS))) + $(RM) -r $(DISTCLEAN_DIRS) +endif + $(RM) $(DISTCLEAN_TARGETS) Makefile + +.exported_symbols_generated: $(EXPORTED_HEADER) .exported_symbols $(DEPS) + $(Q) set -e; \ + ( cat $(srcdir)/.exported_symbols; \ + if test -n "$(EXPORTED_HEADER)"; then \ + $(CC) -E -P $(INCLUDES) $(DEFS) $(EXPORTED_HEADER) | \ + $(SED) -ne "/^typedef|}/!s/.*[ *]\($(EXPORTED_FN_PREFIX)_[a-z0-9_]*\)(.*/\1/p"; \ + fi \ + ) > $@ + +EXPORTED_UC := $(shell echo $(EXPORTED_FN_PREFIX) | tr '[a-z]' '[A-Z]') +EXPORTED_SYMBOLS := $(wildcard $(srcdir)/.exported_symbols.Base $(srcdir)/.exported_symbols.$(EXPORTED_UC)_[0-9_]*[0-9]) + +.export.sym: .exported_symbols_generated $(EXPORTED_SYMBOLS) +ifeq (,$(firstword $(EXPORTED_SYMBOLS))) + $(Q) set -e; (echo "Base {"; echo " global:";\ + $(SED) "s/^/ /;s/$$/;/" $<;\ + echo "};";\ + echo "Local {"; echo " local:"; echo " *;"; echo "};";\ + ) > $@ +else + $(Q) set -e;\ + R=$$($(SORT) $^ | uniq -u);\ + test -z "$$R" || { echo "Mismatch between symbols in shared library and lists in .exported_symbols.* files: $$R"; false; } ;\ + ( for i in $$(echo $(EXPORTED_SYMBOLS) | tr ' ' '\n' | $(SORT) -rnt_ -k5 ); do\ + echo "$${i##*.} {"; echo " global:";\ + $(SED) "s/^/ /;s/$$/;/" $$i;\ + echo "};";\ + done;\ + echo "Local {"; echo " local:"; echo " *;"; echo "};";\ + ) > $@ +endif + +ifeq ("@USE_TRACKING@","yes") +ifeq (,$(findstring $(MAKECMDGOALS),cscope.out cflow clean distclean lcov \ + help check check_local check_cluster check_lvmetad check_lvmpolld)) + ifdef SOURCES + -include $(SOURCES:.c=.d) $(CXXSOURCES:.cpp=.d) + endif + ifdef SOURCES2 + -include $(SOURCES2:.c=.d) + endif +endif +endif diff --git a/liblvm/Makefile.in b/liblvm/Makefile.in index be3049a9e..210ee3760 100644 --- a/liblvm/Makefile.in +++ b/liblvm/Makefile.in @@ -43,7 +43,7 @@ LDDEPS += $(top_builddir)/lib/liblvm-internal.a include $(top_builddir)/make.tmpl LDFLAGS += -L$(top_builddir)/lib -L$(top_builddir)/daemons/dmeventd -LIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio +LIBS += $(LVMINTERNAL_LIBS) -laio .PHONY: install_dynamic install_static install_include install_pkgconfig diff --git a/liblvm/lvm_misc.h b/liblvm/lvm_misc.h index 62f91ce50..b83a44305 100644 --- a/liblvm/lvm_misc.h +++ b/liblvm/lvm_misc.h @@ -14,7 +14,7 @@ #ifndef _LVM2APP_MISC_H #define _LVM2APP_MISC_H -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "liblvm/lvm2app.h" #include "lib/metadata/metadata-exported.h" #include "lib/commands/toolcontext.h" diff --git a/liblvm/lvm_prop.c b/liblvm/lvm_prop.c index 199759667..5d274a601 100644 --- a/liblvm/lvm_prop.c +++ b/liblvm/lvm_prop.c @@ -13,7 +13,7 @@ */ #include "lvm_prop.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "lib/metadata/metadata.h" /* lv create parameters */ diff --git a/make.tmpl.in b/make.tmpl.in index 7e3f4a481..c4d9d583f 100644 --- a/make.tmpl.in +++ b/make.tmpl.in @@ -68,7 +68,9 @@ CLDFLAGS += @CLDFLAGS@ ELDFLAGS += @ELDFLAGS@ LDDEPS += @LDDEPS@ LIB_SUFFIX = @LIB_SUFFIX@ -LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS) +LVMINTERNAL_LIBS=\ + -llvm-internal \ + $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS) DL_LIBS = @DL_LIBS@ RT_LIBS = @RT_LIBS@ M_LIBS = @M_LIBS@ @@ -338,7 +340,7 @@ SUBDIRS.distclean := $(SUBDIRS:=.distclean) TARGETS += $(LIB_SHARED) $(LIB_STATIC) -all: $(SUBDIRS) $(TARGETS) +all: $(top_builddir)/device_mapper/libdevice-mapper.a $(SUBDIRS) $(TARGETS) install: all $(SUBDIRS.install) install_cluster: all $(SUBDIRS.install_cluster) @@ -347,7 +349,7 @@ install_lvm2: $(SUBDIRS.install_lvm2) install_ocf: $(SUBDIRS.install_ocf) cflow: $(SUBDIRS.cflow) -$(SUBDIRS): $(SUBDIRS.device-mapper) +$(SUBDIRS): $(SUBDIRS.device-mapper) $(top_builddir)/device_mapper/libdevice-mapper.a $(MAKE) -C $@ $(SUBDIRS.device-mapper): diff --git a/scripts/Makefile.in b/scripts/Makefile.in index 720ae9f0e..eeac88156 100644 --- a/scripts/Makefile.in +++ b/scripts/Makefile.in @@ -23,12 +23,12 @@ endif include $(top_builddir)/make.tmpl ifeq ("@APPLIB@", "yes") - DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so $(top_builddir)/libdm/libdevmapper.so + DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so LDFLAGS += -L$(top_builddir)/liblvm ifeq ("@BUILD_DMEVENTD@", "yes") LDFLAGS += -Wl,-rpath-link,$(top_builddir)/daemons/dmeventd endif - LVMLIBS = @LVM2APP_LIB@ -ldevmapper -laio + LVMLIBS = @LVM2APP_LIB@ -laio endif LVM_SCRIPTS = lvmdump.sh lvmconf.sh diff --git a/test/api/Makefile.in b/test/api/Makefile.in index e953675a0..6661149d9 100644 --- a/test/api/Makefile.in +++ b/test/api/Makefile.in @@ -41,9 +41,12 @@ endif include $(top_builddir)/make.tmpl DEFS += -D_REENTRANT -DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so $(top_builddir)/libdm/libdevmapper.so -LDFLAGS += -L$(top_builddir)/liblvm -L$(top_builddir)/daemons/dmeventd -LIBS += @LVM2APP_LIB@ $(DMEVENT_LIBS) -ldevmapper +DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so +LDFLAGS+=\ + -L$(top_builddir)/liblvm \ + -L$(top_builddir)/daemons/dmeventd \ + -L$(top_builddir)/device_mapper/libdevice-mapper.a +LIBS += @LVM2APP_LIB@ $(DMEVENT_LIBS) %.t: %.o $(DEPLIBS) $(CC) -o $@ $(<) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) $(LIBS) diff --git a/test/unit/Makefile.in b/test/unit/Makefile.in index 9d1860882..54b7d723c 100644 --- a/test/unit/Makefile.in +++ b/test/unit/Makefile.in @@ -12,7 +12,7 @@ UNIT_SOURCE=\ base/data-struct/radix-tree.c \ - device-mapper/vdo/status.c \ + device_mapper/vdo/status.c \ \ test/unit/bcache_t.c \ test/unit/bcache_utils_t.c \ @@ -32,9 +32,9 @@ UNIT_SOURCE=\ UNIT_DEPENDS=$(subst .c,.d,$(UNIT_SOURCE)) UNIT_OBJECTS=$(UNIT_SOURCE:%.c=%.o) CLEAN_TARGETS+=$(UNIT_DEPENDS) $(UNIT_OBJECTS) -UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio +UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -laio -test/unit/unit-test: $(UNIT_OBJECTS) libdm/libdevmapper.$(LIB_SUFFIX) lib/liblvm-internal.a +test/unit/unit-test: $(UNIT_OBJECTS) device_mapper/libdevice-mapper.a lib/liblvm-internal.a @echo " [LD] $@" $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \ -o $@ $(UNIT_OBJECTS) $(UNIT_LDLIBS) diff --git a/test/unit/bitset_t.c b/test/unit/bitset_t.c index 66e0d321d..9b18fcbf0 100644 --- a/test/unit/bitset_t.c +++ b/test/unit/bitset_t.c @@ -13,7 +13,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" enum { NR_BITS = 137 diff --git a/test/unit/config_t.c b/test/unit/config_t.c index e988706d2..21af55146 100644 --- a/test/unit/config_t.c +++ b/test/unit/config_t.c @@ -13,7 +13,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" static void *_mem_init(void) { diff --git a/test/unit/dmlist_t.c b/test/unit/dmlist_t.c index 5a4951e1a..8a9948f72 100644 --- a/test/unit/dmlist_t.c +++ b/test/unit/dmlist_t.c @@ -13,7 +13,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" static void test_dmlist_splice(void *fixture) { diff --git a/test/unit/dmstatus_t.c b/test/unit/dmstatus_t.c index 43fb0bf82..f50dd75c4 100644 --- a/test/unit/dmstatus_t.c +++ b/test/unit/dmstatus_t.c @@ -13,7 +13,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" static void *_mem_init(void) { diff --git a/test/unit/framework.h b/test/unit/framework.h index a90a6e2bd..0a8a5f27b 100644 --- a/test/unit/framework.h +++ b/test/unit/framework.h @@ -1,7 +1,7 @@ #ifndef TEST_UNIT_FRAMEWORK_H #define TEST_UNIT_FRAMEWORK_H -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include <stdbool.h> #include <stdint.h> diff --git a/test/unit/matcher_t.c b/test/unit/matcher_t.c index 8405a347f..296c78ad1 100644 --- a/test/unit/matcher_t.c +++ b/test/unit/matcher_t.c @@ -14,7 +14,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "matcher_data.h" diff --git a/test/unit/percent_t.c b/test/unit/percent_t.c index fc168d4e1..43414809a 100644 --- a/test/unit/percent_t.c +++ b/test/unit/percent_t.c @@ -13,7 +13,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include <stdio.h> #include <string.h> diff --git a/test/unit/string_t.c b/test/unit/string_t.c index 74886f0bd..3557247e8 100644 --- a/test/unit/string_t.c +++ b/test/unit/string_t.c @@ -13,7 +13,7 @@ */ #include "units.h" -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include <stdio.h> #include <string.h> diff --git a/test/unit/vdo_t.c b/test/unit/vdo_t.c index 21ecd1ad8..9c41887ee 100644 --- a/test/unit/vdo_t.c +++ b/test/unit/vdo_t.c @@ -12,7 +12,7 @@ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "device-mapper/vdo/target.h" +#include "device_mapper/vdo/target.h" #include "framework.h" #include "units.h" diff --git a/tools/Makefile.in b/tools/Makefile.in index 103b76732..d4514a289 100644 --- a/tools/Makefile.in +++ b/tools/Makefile.in @@ -95,7 +95,7 @@ ifeq ("@STATIC_LINK@", "yes") INSTALL_CMDLIB_TARGETS += install_cmdlib_static endif -LVMLIBS = $(LVMINTERNAL_LIBS) -ldevmapper -laio +LVMLIBS = $(LVMINTERNAL_LIBS) -laio LIB_VERSION = $(LIB_VERSION_LVM) CLEAN_TARGETS = liblvm2cmd.$(LIB_SUFFIX) $(TARGETS_DM) \ @@ -122,15 +122,15 @@ device-mapper: $(TARGETS_DM) CFLAGS_dmsetup.o += $(UDEV_CFLAGS) $(EXTRA_EXEC_CFLAGS) -dmsetup: dmsetup.o $(top_builddir)/libdm/libdevmapper.$(LIB_SUFFIX) +dmsetup: dmsetup.o $(top_builddir)/device_mapper/libdevice-mapper.a @echo " [CC] $@" $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \ - -o $@ dmsetup.o -ldevmapper $(LIBS) + -o $@ $+ $(LIBS) -lm -dmsetup.static: dmsetup.o $(interfacebuilddir)/libdevmapper.a +dmsetup.static: dmsetup.o $(top_builddir)/device_mapper/libdevice-mapper.a @echo " [CC] $@" $(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) \ - -o $@ dmsetup.o -ldevmapper $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS) + -o $@ $+ $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS) all: device-mapper @@ -138,10 +138,10 @@ CFLAGS_lvm.o += $(EXTRA_EXEC_CFLAGS) INCLUDES += -I$(top_builddir)/tools -lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a +lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a $(top_builddir)/device_mapper/libdevice-mapper.a @echo " [CC] $@" - $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $(OBJECTS) lvm.o \ - $(LVMLIBS) $(READLINE_LIBS) $(LIBS) + $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $+ \ + $(LVMLIBS) $(READLINE_LIBS) $(LIBS) -lm DEFS_man-generator.o += -DMAN_PAGE_GENERATOR @@ -157,7 +157,7 @@ ifeq ("@BUILD_LVMETAD@", "yes") lvm: $(top_builddir)/libdaemon/client/libdaemonclient.a endif -lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a $(interfacebuilddir)/libdevmapper.a +lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a $(top_builddir)/device_mapper/libdevice-mapper.a @echo " [CC] $@" $(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) -o $@ \ $(OBJECTS) lvm-static.o $(LVMLIBS) $(STATIC_LIBS) $(LIBS) @@ -222,7 +222,6 @@ $(SOURCES:%.c=%.o) $(SOURCES2:%.c=%.o): command-lines-input.h command-count.h cm ifneq ("$(CFLOW_CMD)", "") CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES)) --include $(top_builddir)/libdm/libdevmapper.cflow -include $(top_builddir)/lib/liblvm-internal.cflow endif diff --git a/tools/dmsetup.c b/tools/dmsetup.c index d3080bba8..5486ed38a 100644 --- a/tools/dmsetup.c +++ b/tools/dmsetup.c @@ -17,7 +17,7 @@ */ #include "tools/tool.h" -#include "libdm/misc/dm-logging.h" +#include "device_mapper/misc/dm-logging.h" #include <ctype.h> #include <dirent.h> diff --git a/tools/tool.h b/tools/tool.h index 656234c34..51d530c76 100644 --- a/tools/tool.h +++ b/tools/tool.h @@ -24,7 +24,7 @@ #include <unistd.h> -#include "libdm/libdevmapper.h" +#include "device_mapper/libdevmapper.h" #include "lib/misc/util.h" #endif /* _LVM_TOOL_H */