summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.in3
-rwxr-xr-xconfigure3
-rw-r--r--configure.ac1
-rw-r--r--daemons/clvmd/Makefile.in2
-rw-r--r--daemons/cmirrord/Makefile.in1
-rw-r--r--daemons/cmirrord/cluster.h4
-rw-r--r--daemons/cmirrord/functions.h2
-rw-r--r--daemons/dmeventd/Makefile.in7
-rw-r--r--daemons/dmeventd/dmeventd.c2
-rw-r--r--daemons/dmeventd/libdevmapper-event.c6
-rw-r--r--daemons/dmeventd/libdevmapper-event.pc.in1
-rw-r--r--daemons/dmeventd/plugins/lvm2/Makefile.in2
-rw-r--r--daemons/dmeventd/plugins/mirror/Makefile.in2
-rw-r--r--daemons/dmeventd/plugins/raid/Makefile.in2
-rw-r--r--daemons/dmeventd/plugins/snapshot/Makefile.in2
-rw-r--r--daemons/dmeventd/plugins/thin/Makefile.in2
-rw-r--r--daemons/dmfilemapd/Makefile.in3
-rw-r--r--daemons/dmfilemapd/dmfilemapd.c2
-rw-r--r--daemons/lvmetad/Makefile.in12
-rw-r--r--daemons/lvmlockd/Makefile.in2
-rw-r--r--daemons/lvmlockd/lvmlockd-core.c2
-rw-r--r--daemons/lvmpolld/Makefile.in2
-rw-r--r--device-mapper/Makefile20
-rw-r--r--device_mapper/Makefile46
-rw-r--r--device_mapper/datastruct/bitset.c258
-rw-r--r--device_mapper/datastruct/hash.c392
-rw-r--r--device_mapper/datastruct/list.c168
-rw-r--r--device_mapper/ioctl/libdm-iface.c2196
-rw-r--r--device_mapper/ioctl/libdm-targets.h88
-rw-r--r--device_mapper/libdevmapper.h3755
-rw-r--r--device_mapper/libdm-common.c2691
-rw-r--r--device_mapper/libdm-common.h58
-rw-r--r--device_mapper/libdm-config.c1486
-rw-r--r--device_mapper/libdm-deptree.c3853
-rw-r--r--device_mapper/libdm-file.c261
-rw-r--r--device_mapper/libdm-report.c5104
-rw-r--r--device_mapper/libdm-stats.c5095
-rw-r--r--device_mapper/libdm-string.c718
-rw-r--r--device_mapper/libdm-targets.c565
-rw-r--r--device_mapper/libdm-timestamp.c178
-rw-r--r--device_mapper/misc/dm-ioctl.h364
-rw-r--r--device_mapper/misc/dm-log-userspace.h418
-rw-r--r--device_mapper/misc/dm-logging.h34
-rw-r--r--device_mapper/misc/dmlib.h33
-rw-r--r--device_mapper/misc/kdev_t.h22
-rw-r--r--device_mapper/mm/dbg_malloc.c413
-rw-r--r--device_mapper/mm/pool-debug.c292
-rw-r--r--device_mapper/mm/pool-fast.c363
-rw-r--r--device_mapper/mm/pool.c189
-rw-r--r--device_mapper/regex/matcher.c575
-rw-r--r--device_mapper/regex/parse_rx.c667
-rw-r--r--device_mapper/regex/parse_rx.h55
-rw-r--r--device_mapper/regex/ttree.c114
-rw-r--r--device_mapper/regex/ttree.h26
-rw-r--r--device_mapper/vdo/status.c (renamed from device-mapper/vdo/status.c)2
-rw-r--r--device_mapper/vdo/target.h (renamed from device-mapper/vdo/target.h)0
-rw-r--r--lib/config/config.h2
-rw-r--r--lib/device/bcache.c2
-rw-r--r--lib/device/bcache.h2
-rw-r--r--lib/device/dev-cache.c2
-rw-r--r--lib/metadata/pv.h2
-rw-r--r--lib/metadata/vg.h2
-rw-r--r--lib/misc/lib.h2
-rw-r--r--lib/report/properties.h2
-rw-r--r--libdaemon/client/config-util.c2
-rw-r--r--libdaemon/client/daemon-client.c2
-rw-r--r--libdm/Makefile.in2
-rw-r--r--libdm/make.tmpl.in578
-rw-r--r--liblvm/Makefile.in2
-rw-r--r--liblvm/lvm_misc.h2
-rw-r--r--liblvm/lvm_prop.c2
-rw-r--r--make.tmpl.in8
-rw-r--r--scripts/Makefile.in4
-rw-r--r--test/api/Makefile.in9
-rw-r--r--test/unit/Makefile.in6
-rw-r--r--test/unit/bitset_t.c2
-rw-r--r--test/unit/config_t.c2
-rw-r--r--test/unit/dmlist_t.c2
-rw-r--r--test/unit/dmstatus_t.c2
-rw-r--r--test/unit/framework.h2
-rw-r--r--test/unit/matcher_t.c2
-rw-r--r--test/unit/percent_t.c2
-rw-r--r--test/unit/string_t.c2
-rw-r--r--test/unit/vdo_t.c2
-rw-r--r--tools/Makefile.in19
-rw-r--r--tools/dmsetup.c2
-rw-r--r--tools/tool.h2
87 files changed, 31139 insertions, 99 deletions
diff --git a/Makefile.in b/Makefile.in
index 3274f174f..a1c87468b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -211,8 +211,7 @@ endif
endif
include test/unit/Makefile
-
-include device-mapper/Makefile
+include device_mapper/Makefile
ifneq ($(shell which ctags),)
.PHONY: tags
diff --git a/configure b/configure
index 3b88dadce..60f51d645 100755
--- a/configure
+++ b/configure
@@ -15559,7 +15559,7 @@ _ACEOF
################################################################################
-ac_config_files="$ac_config_files Makefile make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile"
+ac_config_files="$ac_config_files Makefile make.tmpl libdm/make.tmpl daemons/Makefile daemons/clvmd/Makefile daemons/cmirrord/Makefile daemons/dmeventd/Makefile daemons/dmeventd/libdevmapper-event.pc daemons/dmeventd/plugins/Makefile daemons/dmeventd/plugins/lvm2/Makefile daemons/dmeventd/plugins/raid/Makefile daemons/dmeventd/plugins/mirror/Makefile daemons/dmeventd/plugins/snapshot/Makefile daemons/dmeventd/plugins/thin/Makefile daemons/dmfilemapd/Makefile daemons/lvmdbusd/Makefile daemons/lvmdbusd/lvmdbusd daemons/lvmdbusd/lvmdb.py daemons/lvmdbusd/lvm_shell_proxy.py daemons/lvmdbusd/path.py daemons/lvmetad/Makefile daemons/lvmpolld/Makefile daemons/lvmlockd/Makefile conf/Makefile conf/example.conf conf/lvmlocal.conf conf/command_profile_template.profile conf/metadata_profile_template.profile include/Makefile lib/Makefile lib/locking/Makefile include/lvm-version.h libdaemon/Makefile libdaemon/client/Makefile libdaemon/server/Makefile libdm/Makefile libdm/libdevmapper.pc liblvm/Makefile liblvm/liblvm2app.pc man/Makefile po/Makefile python/Makefile python/setup.py scripts/blkdeactivate.sh scripts/blk_availability_init_red_hat scripts/blk_availability_systemd_red_hat.service scripts/clvmd_init_red_hat scripts/cmirrord_init_red_hat scripts/com.redhat.lvmdbus1.service scripts/dm_event_systemd_red_hat.service scripts/dm_event_systemd_red_hat.socket scripts/lvm2_cluster_activation_red_hat.sh scripts/lvm2_cluster_activation_systemd_red_hat.service scripts/lvm2_clvmd_systemd_red_hat.service scripts/lvm2_cmirrord_systemd_red_hat.service scripts/lvm2_lvmdbusd_systemd_red_hat.service scripts/lvm2_lvmetad_init_red_hat scripts/lvm2_lvmetad_systemd_red_hat.service scripts/lvm2_lvmetad_systemd_red_hat.socket scripts/lvm2_lvmpolld_init_red_hat scripts/lvm2_lvmpolld_systemd_red_hat.service scripts/lvm2_lvmpolld_systemd_red_hat.socket scripts/lvm2_lvmlockd_systemd_red_hat.service scripts/lvm2_lvmlocking_systemd_red_hat.service scripts/lvm2_monitoring_init_red_hat scripts/lvm2_monitoring_systemd_red_hat.service scripts/lvm2_pvscan_systemd_red_hat@.service scripts/lvm2_tmpfiles_red_hat.conf scripts/lvmdump.sh scripts/Makefile test/Makefile test/api/Makefile test/unit/Makefile tools/Makefile udev/Makefile"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
@@ -16256,6 +16256,7 @@ do
"include/configure.h") CONFIG_HEADERS="$CONFIG_HEADERS include/configure.h" ;;
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
"make.tmpl") CONFIG_FILES="$CONFIG_FILES make.tmpl" ;;
+ "libdm/make.tmpl") CONFIG_FILES="$CONFIG_FILES libdm/make.tmpl" ;;
"daemons/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/Makefile" ;;
"daemons/clvmd/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/clvmd/Makefile" ;;
"daemons/cmirrord/Makefile") CONFIG_FILES="$CONFIG_FILES daemons/cmirrord/Makefile" ;;
diff --git a/configure.ac b/configure.ac
index a05f051c4..8dc9c189c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2088,6 +2088,7 @@ dnl -- keep utility scripts running properly
AC_CONFIG_FILES([
Makefile
make.tmpl
+libdm/make.tmpl
daemons/Makefile
daemons/clvmd/Makefile
daemons/cmirrord/Makefile
diff --git a/daemons/clvmd/Makefile.in b/daemons/clvmd/Makefile.in
index 83af00e20..47a3411e4 100644
--- a/daemons/clvmd/Makefile.in
+++ b/daemons/clvmd/Makefile.in
@@ -74,7 +74,7 @@ TARGETS = \
include $(top_builddir)/make.tmpl
-LIBS += $(LVMINTERNAL_LIBS) -ldevmapper $(PTHREAD_LIBS) -laio
+LIBS += $(LVMINTERNAL_LIBS) $(PTHREAD_LIBS) -laio
CFLAGS += -fno-strict-aliasing $(EXTRA_EXEC_CFLAGS)
INSTALL_TARGETS = \
diff --git a/daemons/cmirrord/Makefile.in b/daemons/cmirrord/Makefile.in
index 96e0db8ce..fc0ef6d5a 100644
--- a/daemons/cmirrord/Makefile.in
+++ b/daemons/cmirrord/Makefile.in
@@ -26,7 +26,6 @@ TARGETS = cmirrord
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper
LMLIBS += $(CPG_LIBS) $(SACKPT_LIBS)
CFLAGS += $(CPG_CFLAGS) $(SACKPT_CFLAGS) $(EXTRA_EXEC_CFLAGS)
LDFLAGS += $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
diff --git a/daemons/cmirrord/cluster.h b/daemons/cmirrord/cluster.h
index 0efbd64bf..54ddd79ec 100644
--- a/daemons/cmirrord/cluster.h
+++ b/daemons/cmirrord/cluster.h
@@ -12,8 +12,8 @@
#ifndef _LVM_CLOG_CLUSTER_H
#define _LVM_CLOG_CLUSTER_H
-#include "libdm/misc/dm-log-userspace.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/misc/dm-log-userspace.h"
+#include "device_mapper/libdevmapper.h"
#define DM_ULOG_RESPONSE 0x1000U /* in last byte of 32-bit value */
#define DM_ULOG_CHECKPOINT_READY 21
diff --git a/daemons/cmirrord/functions.h b/daemons/cmirrord/functions.h
index c770459a9..8a7301d01 100644
--- a/daemons/cmirrord/functions.h
+++ b/daemons/cmirrord/functions.h
@@ -12,7 +12,7 @@
#ifndef _LVM_CLOG_FUNCTIONS_H
#define _LVM_CLOG_FUNCTIONS_H
-#include "libdm/misc/dm-log-userspace.h"
+#include "device_mapper/misc/dm-log-userspace.h"
#include "cluster.h"
#define LOG_RESUMED 1
diff --git a/daemons/dmeventd/Makefile.in b/daemons/dmeventd/Makefile.in
index d5241ebfb..e43bb2b2a 100644
--- a/daemons/dmeventd/Makefile.in
+++ b/daemons/dmeventd/Makefile.in
@@ -57,13 +57,13 @@ all: device-mapper
device-mapper: $(TARGETS)
CFLAGS_dmeventd.o += $(EXTRA_EXEC_CFLAGS)
-LIBS += -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(PTHREAD_LIBS)
dmeventd: $(LIB_SHARED) dmeventd.o
$(CC) $(CFLAGS) -L. $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) dmeventd.o \
- -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS)
+ -o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(top_builddir)/device_mapper/libdevice-mapper.a $(LIBS) -lm
-dmeventd.static: $(LIB_STATIC) dmeventd.o $(interfacebuilddir)/libdevmapper.a
+dmeventd.static: $(LIB_STATIC) dmeventd.o
$(CC) $(CFLAGS) $(LDFLAGS) -static -L. -L$(interfacebuilddir) dmeventd.o \
-o $@ $(DL_LIBS) $(DMEVENT_LIBS) $(LIBS) $(STATIC_LIBS)
@@ -73,7 +73,6 @@ endif
ifneq ("$(CFLOW_CMD)", "")
CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
--include $(top_builddir)/libdm/libdevmapper.cflow
-include $(top_builddir)/lib/liblvm-internal.cflow
-include $(top_builddir)/lib/liblvm2cmd.cflow
-include $(top_builddir)/daemons/dmeventd/$(LIB_NAME).cflow
diff --git a/daemons/dmeventd/dmeventd.c b/daemons/dmeventd/dmeventd.c
index 438cf1668..533186b97 100644
--- a/daemons/dmeventd/dmeventd.c
+++ b/daemons/dmeventd/dmeventd.c
@@ -16,7 +16,7 @@
* dmeventd - dm event daemon to monitor active mapped devices
*/
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
#include "daemons/dmeventd/libdevmapper-event.h"
#include "dmeventd.h"
diff --git a/daemons/dmeventd/libdevmapper-event.c b/daemons/dmeventd/libdevmapper-event.c
index a75924c27..7f0722831 100644
--- a/daemons/dmeventd/libdevmapper-event.c
+++ b/daemons/dmeventd/libdevmapper-event.c
@@ -12,10 +12,11 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "libdm/misc/dm-logging.h"
-#include "libdm/misc/dmlib.h"
+#include "device_mapper/misc/dm-logging.h"
+#include "device_mapper/misc/dmlib.h"
#include "daemons/dmeventd/libdevmapper-event.h"
#include "dmeventd.h"
+#include "lib/misc/intl.h"
#include <fcntl.h>
#include <sys/file.h>
@@ -25,6 +26,7 @@
#include <arpa/inet.h> /* for htonl, ntohl */
#include <pthread.h>
#include <syslog.h>
+#include <unistd.h>
static int _debug_level = 0;
static int _use_syslog = 0;
diff --git a/daemons/dmeventd/libdevmapper-event.pc.in b/daemons/dmeventd/libdevmapper-event.pc.in
index 839433fb8..fcad5bca4 100644
--- a/daemons/dmeventd/libdevmapper-event.pc.in
+++ b/daemons/dmeventd/libdevmapper-event.pc.in
@@ -8,4 +8,3 @@ Description: device-mapper event library
Version: @DM_LIB_PATCHLEVEL@
Cflags: -I${includedir}
Libs: -L${libdir} -ldevmapper-event
-Requires.private: devmapper
diff --git a/daemons/dmeventd/plugins/lvm2/Makefile.in b/daemons/dmeventd/plugins/lvm2/Makefile.in
index 956d31edc..7e4696c68 100644
--- a/daemons/dmeventd/plugins/lvm2/Makefile.in
+++ b/daemons/dmeventd/plugins/lvm2/Makefile.in
@@ -24,7 +24,7 @@ LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
-LIBS += @LVM2CMD_LIB@ -ldevmapper $(PTHREAD_LIBS)
+LIBS += @LVM2CMD_LIB@ $(PTHREAD_LIBS)
install_lvm2: install_lib_shared
diff --git a/daemons/dmeventd/plugins/mirror/Makefile.in b/daemons/dmeventd/plugins/mirror/Makefile.in
index 1d9666daa..22832ddf0 100644
--- a/daemons/dmeventd/plugins/mirror/Makefile.in
+++ b/daemons/dmeventd/plugins/mirror/Makefile.in
@@ -30,7 +30,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
install_lvm2: install_dm_plugin
diff --git a/daemons/dmeventd/plugins/raid/Makefile.in b/daemons/dmeventd/plugins/raid/Makefile.in
index 1bca8b2aa..54343b1ca 100644
--- a/daemons/dmeventd/plugins/raid/Makefile.in
+++ b/daemons/dmeventd/plugins/raid/Makefile.in
@@ -29,7 +29,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
install_lvm2: install_dm_plugin
diff --git a/daemons/dmeventd/plugins/snapshot/Makefile.in b/daemons/dmeventd/plugins/snapshot/Makefile.in
index 5eb7a47f1..75f4342ad 100644
--- a/daemons/dmeventd/plugins/snapshot/Makefile.in
+++ b/daemons/dmeventd/plugins/snapshot/Makefile.in
@@ -26,7 +26,7 @@ LIB_VERSION = $(LIB_VERSION_LVM)
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
install_lvm2: install_dm_plugin
diff --git a/daemons/dmeventd/plugins/thin/Makefile.in b/daemons/dmeventd/plugins/thin/Makefile.in
index f54ee2da5..9f1c2b34b 100644
--- a/daemons/dmeventd/plugins/thin/Makefile.in
+++ b/daemons/dmeventd/plugins/thin/Makefile.in
@@ -29,7 +29,7 @@ CFLOW_LIST_TARGET = $(LIB_NAME).cflow
include $(top_builddir)/make.tmpl
-LIBS += -ldevmapper-event-lvm2 -ldevmapper
+LIBS += -ldevmapper-event-lvm2
install_lvm2: install_dm_plugin
diff --git a/daemons/dmfilemapd/Makefile.in b/daemons/dmfilemapd/Makefile.in
index 8a4938b22..1afd6b8ed 100644
--- a/daemons/dmfilemapd/Makefile.in
+++ b/daemons/dmfilemapd/Makefile.in
@@ -35,13 +35,12 @@ all: device-mapper
device-mapper: $(TARGETS)
CFLAGS_dmfilemapd.o += $(EXTRA_EXEC_CFLAGS)
-LIBS += -ldevmapper
dmfilemapd: $(LIB_SHARED) dmfilemapd.o
$(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \
-o $@ dmfilemapd.o $(DL_LIBS) $(LIBS)
-dmfilemapd.static: $(LIB_STATIC) dmfilemapd.o $(interfacebuilddir)/libdevmapper.a
+dmfilemapd.static: $(LIB_STATIC) dmfilemapd.o
$(CC) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) -static -L$(interfacebuilddir) \
-o $@ dmfilemapd.o $(DL_LIBS) $(LIBS) $(STATIC_LIBS)
diff --git a/daemons/dmfilemapd/dmfilemapd.c b/daemons/dmfilemapd/dmfilemapd.c
index 7fc95c8a4..4e048fff3 100644
--- a/daemons/dmfilemapd/dmfilemapd.c
+++ b/daemons/dmfilemapd/dmfilemapd.c
@@ -16,7 +16,7 @@
#include "tools/tool.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
#include "lib/config/defaults.h"
diff --git a/daemons/lvmetad/Makefile.in b/daemons/lvmetad/Makefile.in
index 1d901aabc..f652db8bf 100644
--- a/daemons/lvmetad/Makefile.in
+++ b/daemons/lvmetad/Makefile.in
@@ -32,15 +32,17 @@ CFLAGS_lvmetactl.o += $(EXTRA_EXEC_CFLAGS)
CFLAGS_lvmetad-core.o += $(EXTRA_EXEC_CFLAGS)
INCLUDES += -I$(top_srcdir)/libdaemon/server
LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
-LIBS += $(RT_LIBS) $(DAEMON_LIBS) -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(RT_LIBS) $(DAEMON_LIBS) $(PTHREAD_LIBS) -lm
lvmetad: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
- $(top_builddir)/libdaemon/server/libdaemonserver.a
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) -ldaemonserver $(LIBS)
+ $(top_builddir)/libdaemon/server/libdaemonserver.a \
+ $(top_builddir)/device_mapper/libdevice-mapper.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS) $(top_builddir)/device_mapper/libdevice-mapper.a -ldaemonserver $(LIBS)
lvmetactl: lvmetactl.o $(top_builddir)/libdaemon/client/libdaemonclient.a \
- $(top_builddir)/libdaemon/server/libdaemonserver.a
- $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(LIBS)
+ $(top_builddir)/libdaemon/server/libdaemonserver.a \
+ $(top_builddir)/device_mapper/libdevice-mapper.a
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ lvmetactl.o $(top_builddir)/device_mapper/libdevice-mapper.a $(LIBS)
CLEAN_TARGETS += lvmetactl.o
diff --git a/daemons/lvmlockd/Makefile.in b/daemons/lvmlockd/Makefile.in
index 8f16d0652..50463999a 100644
--- a/daemons/lvmlockd/Makefile.in
+++ b/daemons/lvmlockd/Makefile.in
@@ -36,7 +36,7 @@ include $(top_builddir)/make.tmpl
CFLAGS += $(EXTRA_EXEC_CFLAGS)
INCLUDES += -I$(top_srcdir)/libdaemon/server
LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
-LIBS += $(RT_LIBS) $(DAEMON_LIBS) -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(RT_LIBS) $(DAEMON_LIBS) $(PTHREAD_LIBS)
lvmlockd: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
$(top_builddir)/libdaemon/server/libdaemonserver.a
diff --git a/daemons/lvmlockd/lvmlockd-core.c b/daemons/lvmlockd/lvmlockd-core.c
index e75d02bc0..f1a2a2038 100644
--- a/daemons/lvmlockd/lvmlockd-core.c
+++ b/daemons/lvmlockd/lvmlockd-core.c
@@ -19,7 +19,7 @@
#include "lvm-version.h"
#include "daemons/lvmetad/lvmetad-client.h"
#include "daemons/lvmlockd/lvmlockd-client.h"
-#include "libdm/misc/dm-ioctl.h"
+#include "device_mapper/misc/dm-ioctl.h"
/* #include <assert.h> */
#include <errno.h>
diff --git a/daemons/lvmpolld/Makefile.in b/daemons/lvmpolld/Makefile.in
index 483758dcd..69c4a8d86 100644
--- a/daemons/lvmpolld/Makefile.in
+++ b/daemons/lvmpolld/Makefile.in
@@ -30,7 +30,7 @@ include $(top_builddir)/make.tmpl
CFLAGS += $(EXTRA_EXEC_CFLAGS)
INCLUDES += -I$(top_srcdir)/libdaemon/server
LDFLAGS += -L$(top_builddir)/libdaemon/server $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS)
-LIBS += $(DAEMON_LIBS) -ldaemonserver -ldevmapper $(PTHREAD_LIBS)
+LIBS += $(DAEMON_LIBS) -ldaemonserver $(PTHREAD_LIBS)
lvmpolld: $(OBJECTS) $(top_builddir)/libdaemon/client/libdaemonclient.a \
$(top_builddir)/libdaemon/server/libdaemonserver.a
diff --git a/device-mapper/Makefile b/device-mapper/Makefile
deleted file mode 100644
index 76e19f020..000000000
--- a/device-mapper/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (C) 2018 Red Hat, Inc. All rights reserved.
-#
-# This file is part of LVM2.
-#
-# This copyrighted material is made available to anyone wishing to use,
-# modify, copy, or redistribute it subject to the terms and conditions
-# of the GNU General Public License v.2.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-DM_SOURCE=\
- device-mapper/vdo/status.c
-
-DM_DEPENDS=$(subst .c,.d,$(DM_SOURCE))
-DM_OBJECTS=$(DM_SOURCE:%.c=%.o)
-CLEAN_TARGETS+=$(DM_DEPENDS) $(DM_OBJECTS)
-
--include $(DM_DEPENDS)
diff --git a/device_mapper/Makefile b/device_mapper/Makefile
new file mode 100644
index 000000000..999c3babf
--- /dev/null
+++ b/device_mapper/Makefile
@@ -0,0 +1,46 @@
+# Copyright (C) 2018 Red Hat, Inc. All rights reserved.
+#
+# This file is part of the device-mapper userspace tools.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU Lesser General Public License v.2.1.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+DEVICE_MAPPER_SOURCE=\
+ device_mapper/datastruct/bitset.c \
+ device_mapper/datastruct/hash.c \
+ device_mapper/datastruct/list.c \
+ device_mapper/libdm-common.c \
+ device_mapper/libdm-config.c \
+ device_mapper/libdm-deptree.c \
+ device_mapper/libdm-file.c \
+ device_mapper/libdm-report.c \
+ device_mapper/libdm-stats.c \
+ device_mapper/libdm-string.c \
+ device_mapper/libdm-targets.c \
+ device_mapper/libdm-timestamp.c \
+ device_mapper/mm/dbg_malloc.c \
+ device_mapper/mm/pool.c \
+ device_mapper/regex/matcher.c \
+ device_mapper/regex/parse_rx.c \
+ device_mapper/regex/ttree.c \
+ device_mapper/ioctl/libdm-iface.c
+
+DEVICE_MAPPER_DEPENDS=$(subst .c,.d,$(DEVICE_MAPPER_SOURCE))
+DEVICE_MAPPER_OBJECTS=$(subst .c,.o,$(DEVICE_MAPPER_SOURCE))
+CLEAN_TARGETS+=$(DEVICE_MAPPER_DEPENDS) $(DEVICE_MAPPER_OBJECTS)
+
+-include $(DEVICE_MAPPER_DEPENDS)
+
+$(DEVICE_MAPPER_OBJECTS): INCLUDES+=-Idevice_mapper/
+
+device_mapper/libdevice-mapper.a: $(DEVICE_MAPPER_OBJECTS)
+ @echo " [AR] $@"
+ $(Q) $(RM) $@
+ $(Q) $(AR) rsv $@ $(DEVICE_MAPPER_OBJECTS) > /dev/null
+
+CLEAN_TARGETS+=device_mapper/libdevice-mapper.a
diff --git a/device_mapper/datastruct/bitset.c b/device_mapper/datastruct/bitset.c
new file mode 100644
index 000000000..6ae99d3de
--- /dev/null
+++ b/device_mapper/datastruct/bitset.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <ctype.h>
+
+/* FIXME: calculate this. */
+#define INT_SHIFT 5
+
+dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits)
+{
+ unsigned n = (num_bits / DM_BITS_PER_INT) + 2;
+ size_t size = sizeof(int) * n;
+ dm_bitset_t bs;
+
+ if (mem)
+ bs = dm_pool_zalloc(mem, size);
+ else
+ bs = dm_zalloc(size);
+
+ if (!bs)
+ return NULL;
+
+ *bs = num_bits;
+
+ return bs;
+}
+
+void dm_bitset_destroy(dm_bitset_t bs)
+{
+ dm_free(bs);
+}
+
+int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2)
+{
+ int i;
+
+ for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--)
+ if (in1[i] != in2[i])
+ return 0;
+
+ return 1;
+}
+
+void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2)
+{
+ int i;
+
+ for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--)
+ out[i] = in1[i] & in2[i];
+}
+void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2)
+{
+ int i;
+ for (i = (in1[0] / DM_BITS_PER_INT) + 1; i; i--)
+ out[i] = in1[i] | in2[i];
+}
+
+static int _test_word(uint32_t test, int bit)
+{
+ uint32_t tb = test >> bit;
+
+ return (tb ? ffs(tb) + bit - 1 : -1);
+}
+
+static int _test_word_rev(uint32_t test, int bit)
+{
+ uint32_t tb = test << (DM_BITS_PER_INT - 1 - bit);
+
+ return (tb ? bit - clz(tb) : -1);
+}
+
+int dm_bit_get_next(dm_bitset_t bs, int last_bit)
+{
+ int bit, word;
+ uint32_t test;
+
+ last_bit++; /* otherwise we'll return the same bit again */
+
+ /*
+ * bs[0] holds number of bits
+ */
+ while (last_bit < (int) bs[0]) {
+ word = last_bit >> INT_SHIFT;
+ test = bs[word + 1];
+ bit = last_bit & (DM_BITS_PER_INT - 1);
+
+ if ((bit = _test_word(test, bit)) >= 0)
+ return (word * DM_BITS_PER_INT) + bit;
+
+ last_bit = last_bit - (last_bit & (DM_BITS_PER_INT - 1)) +
+ DM_BITS_PER_INT;
+ }
+
+ return -1;
+}
+
+int dm_bit_get_prev(dm_bitset_t bs, int last_bit)
+{
+ int bit, word;
+ uint32_t test;
+
+ last_bit--; /* otherwise we'll return the same bit again */
+
+ /*
+ * bs[0] holds number of bits
+ */
+ while (last_bit >= 0) {
+ word = last_bit >> INT_SHIFT;
+ test = bs[word + 1];
+ bit = last_bit & (DM_BITS_PER_INT - 1);
+
+ if ((bit = _test_word_rev(test, bit)) >= 0)
+ return (word * DM_BITS_PER_INT) + bit;
+
+ last_bit = (last_bit & ~(DM_BITS_PER_INT - 1)) - 1;
+ }
+
+ return -1;
+}
+
+int dm_bit_get_first(dm_bitset_t bs)
+{
+ return dm_bit_get_next(bs, -1);
+}
+
+int dm_bit_get_last(dm_bitset_t bs)
+{
+ return dm_bit_get_prev(bs, bs[0] + 1);
+}
+
+/*
+ * Based on the Linux kernel __bitmap_parselist from lib/bitmap.c
+ */
+dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem,
+ size_t min_num_bits)
+{
+ unsigned a, b;
+ int c, old_c, totaldigits, ndigits, nmaskbits;
+ int at_start, in_range;
+ dm_bitset_t mask = NULL;
+ const char *start = str;
+ size_t len;
+
+scan:
+ len = strlen(str);
+ totaldigits = c = 0;
+ nmaskbits = 0;
+ do {
+ at_start = 1;
+ in_range = 0;
+ a = b = 0;
+ ndigits = totaldigits;
+
+ /* Get the next value or range of values */
+ while (len) {
+ old_c = c;
+ c = *str++;
+ len--;
+ if (isspace(c))
+ continue;
+
+ /* A '\0' or a ',' signal the end of a value or range */
+ if (c == '\0' || c == ',')
+ break;
+ /*
+ * whitespaces between digits are not allowed,
+ * but it's ok if whitespaces are on head or tail.
+ * when old_c is whilespace,
+ * if totaldigits == ndigits, whitespace is on head.
+ * if whitespace is on tail, it should not run here.
+ * as c was ',' or '\0',
+ * the last code line has broken the current loop.
+ */
+ if ((totaldigits != ndigits) && isspace(old_c))
+ goto_bad;
+
+ if (c == '-') {
+ if (at_start || in_range)
+ goto_bad;
+ b = 0;
+ in_range = 1;
+ at_start = 1;
+ continue;
+ }
+
+ if (!isdigit(c))
+ goto_bad;
+
+ b = b * 10 + (c - '0');
+ if (!in_range)
+ a = b;
+ at_start = 0;
+ totaldigits++;
+ }
+ if (ndigits == totaldigits)
+ continue;
+ /* if no digit is after '-', it's wrong */
+ if (at_start && in_range)
+ goto_bad;
+ if (!(a <= b))
+ goto_bad;
+ if (b >= nmaskbits)
+ nmaskbits = b + 1;
+ while ((a <= b) && mask) {
+ dm_bit_set(mask, a);
+ a++;
+ }
+ } while (len && c == ',');
+
+ if (!mask) {
+ if (min_num_bits && (nmaskbits < min_num_bits))
+ nmaskbits = min_num_bits;
+
+ if (!(mask = dm_bitset_create(mem, nmaskbits)))
+ goto_bad;
+ str = start;
+ goto scan;
+ }
+
+ return mask;
+bad:
+ if (mask) {
+ if (mem)
+ dm_pool_free(mem, mask);
+ else
+ dm_bitset_destroy(mask);
+ }
+ return NULL;
+}
+
+#if defined(__GNUC__)
+/*
+ * Maintain backward compatibility with older versions that did not
+ * accept a 'min_num_bits' argument to dm_bitset_parse_list().
+ */
+dm_bitset_t dm_bitset_parse_list_v1_02_129(const char *str, struct dm_pool *mem);
+dm_bitset_t dm_bitset_parse_list_v1_02_129(const char *str, struct dm_pool *mem)
+{
+ return dm_bitset_parse_list(str, mem, 0);
+}
+
+#else /* if defined(__GNUC__) */
+
+#endif
diff --git a/device_mapper/datastruct/hash.c b/device_mapper/datastruct/hash.c
new file mode 100644
index 000000000..9b9c939f4
--- /dev/null
+++ b/device_mapper/datastruct/hash.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+struct dm_hash_node {
+ struct dm_hash_node *next;
+ void *data;
+ unsigned data_len;
+ unsigned keylen;
+ char key[0];
+};
+
+struct dm_hash_table {
+ unsigned num_nodes;
+ unsigned num_slots;
+ struct dm_hash_node **slots;
+};
+
+/* Permutation of the Integers 0 through 255 */
+static unsigned char _nums[] = {
+ 1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
+ 87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
+ 49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
+ 12, 181, 103, 70, 22, 58, 75, 78, 183, 167, 238, 157, 124, 147, 172,
+ 144,
+ 176, 161, 141, 86, 60, 66, 128, 83, 156, 241, 79, 46, 168, 198, 41, 254,
+ 178, 85, 253, 237, 250, 154, 133, 88, 35, 206, 95, 116, 252, 192, 54,
+ 221,
+ 102, 218, 255, 240, 82, 106, 158, 201, 61, 3, 89, 9, 42, 155, 159, 93,
+ 166, 80, 50, 34, 175, 195, 100, 99, 26, 150, 16, 145, 4, 33, 8, 189,
+ 121, 64, 77, 72, 208, 245, 130, 122, 143, 55, 105, 134, 29, 164, 185,
+ 194,
+ 193, 239, 101, 242, 5, 171, 126, 11, 74, 59, 137, 228, 108, 191, 232,
+ 139,
+ 6, 24, 81, 20, 127, 17, 91, 92, 251, 151, 225, 207, 21, 98, 113, 112,
+ 84, 226, 18, 214, 199, 187, 13, 32, 94, 220, 224, 212, 247, 204, 196,
+ 43,
+ 249, 236, 45, 244, 111, 182, 153, 136, 129, 90, 217, 202, 19, 165, 231,
+ 71,
+ 230, 142, 96, 227, 62, 179, 246, 114, 162, 53, 160, 215, 205, 180, 47,
+ 109,
+ 44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
+ 163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120,
+ 209
+};
+
+static struct dm_hash_node *_create_node(const char *str, unsigned len)
+{
+ struct dm_hash_node *n = dm_malloc(sizeof(*n) + len);
+
+ if (n) {
+ memcpy(n->key, str, len);
+ n->keylen = len;
+ }
+
+ return n;
+}
+
+static unsigned long _hash(const char *str, unsigned len)
+{
+ unsigned long h = 0, g;
+ unsigned i;
+
+ for (i = 0; i < len; i++) {
+ h <<= 4;
+ h += _nums[(unsigned char) *str++];
+ g = h & ((unsigned long) 0xf << 16u);
+ if (g) {
+ h ^= g >> 16u;
+ h ^= g >> 5u;
+ }
+ }
+
+ return h;
+}
+
+struct dm_hash_table *dm_hash_create(unsigned size_hint)
+{
+ size_t len;
+ unsigned new_size = 16u;
+ struct dm_hash_table *hc = dm_zalloc(sizeof(*hc));
+
+ if (!hc)
+ return_0;
+
+ /* round size hint up to a power of two */
+ while (new_size < size_hint)
+ new_size = new_size << 1;
+
+ hc->num_slots = new_size;
+ len = sizeof(*(hc->slots)) * new_size;
+ if (!(hc->slots = dm_zalloc(len)))
+ goto_bad;
+
+ return hc;
+
+ bad:
+ dm_free(hc->slots);
+ dm_free(hc);
+ return 0;
+}
+
+static void _free_nodes(struct dm_hash_table *t)
+{
+ struct dm_hash_node *c, *n;
+ unsigned i;
+
+ for (i = 0; i < t->num_slots; i++)
+ for (c = t->slots[i]; c; c = n) {
+ n = c->next;
+ dm_free(c);
+ }
+}
+
+void dm_hash_destroy(struct dm_hash_table *t)
+{
+ _free_nodes(t);
+ dm_free(t->slots);
+ dm_free(t);
+}
+
+static struct dm_hash_node **_find(struct dm_hash_table *t, const void *key,
+ uint32_t len)
+{
+ unsigned h = _hash(key, len) & (t->num_slots - 1);
+ struct dm_hash_node **c;
+
+ for (c = &t->slots[h]; *c; c = &((*c)->next)) {
+ if ((*c)->keylen != len)
+ continue;
+
+ if (!memcmp(key, (*c)->key, len))
+ break;
+ }
+
+ return c;
+}
+
+void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key,
+ uint32_t len)
+{
+ struct dm_hash_node **c = _find(t, key, len);
+
+ return *c ? (*c)->data : 0;
+}
+
+int dm_hash_insert_binary(struct dm_hash_table *t, const void *key,
+ uint32_t len, void *data)
+{
+ struct dm_hash_node **c = _find(t, key, len);
+
+ if (*c)
+ (*c)->data = data;
+ else {
+ struct dm_hash_node *n = _create_node(key, len);
+
+ if (!n)
+ return 0;
+
+ n->data = data;
+ n->next = 0;
+ *c = n;
+ t->num_nodes++;
+ }
+
+ return 1;
+}
+
+void dm_hash_remove_binary(struct dm_hash_table *t, const void *key,
+ uint32_t len)
+{
+ struct dm_hash_node **c = _find(t, key, len);
+
+ if (*c) {
+ struct dm_hash_node *old = *c;
+ *c = (*c)->next;
+ dm_free(old);
+ t->num_nodes--;
+ }
+}
+
+void *dm_hash_lookup(struct dm_hash_table *t, const char *key)
+{
+ return dm_hash_lookup_binary(t, key, strlen(key) + 1);
+}
+
+int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data)
+{
+ return dm_hash_insert_binary(t, key, strlen(key) + 1, data);
+}
+
+void dm_hash_remove(struct dm_hash_table *t, const char *key)
+{
+ dm_hash_remove_binary(t, key, strlen(key) + 1);
+}
+
+static struct dm_hash_node **_find_str_with_val(struct dm_hash_table *t,
+ const void *key, const void *val,
+ uint32_t len, uint32_t val_len)
+{
+ struct dm_hash_node **c;
+ unsigned h;
+
+ h = _hash(key, len) & (t->num_slots - 1);
+
+ for (c = &t->slots[h]; *c; c = &((*c)->next)) {
+ if ((*c)->keylen != len)
+ continue;
+
+ if (!memcmp(key, (*c)->key, len) && (*c)->data) {
+ if (((*c)->data_len == val_len) &&
+ !memcmp(val, (*c)->data, val_len))
+ return c;
+ }
+ }
+
+ return NULL;
+}
+
+int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key,
+ const void *val, uint32_t val_len)
+{
+ struct dm_hash_node *n;
+ struct dm_hash_node *first;
+ int len = strlen(key) + 1;
+ unsigned h;
+
+ n = _create_node(key, len);
+ if (!n)
+ return 0;
+
+ n->data = (void *)val;
+ n->data_len = val_len;
+
+ h = _hash(key, len) & (t->num_slots - 1);
+
+ first = t->slots[h];
+
+ if (first)
+ n->next = first;
+ else
+ n->next = 0;
+ t->slots[h] = n;
+
+ t->num_nodes++;
+ return 1;
+}
+
+/*
+ * Look through multiple entries with the same key for one that has a
+ * matching val and return that. If none have maching val, return NULL.
+ */
+void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key,
+ const void *val, uint32_t val_len)
+{
+ struct dm_hash_node **c;
+
+ c = _find_str_with_val(t, key, val, strlen(key) + 1, val_len);
+
+ return (c && *c) ? (*c)->data : 0;
+}
+
+/*
+ * Look through multiple entries with the same key for one that has a
+ * matching val and remove that.
+ */
+void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key,
+ const void *val, uint32_t val_len)
+{
+ struct dm_hash_node **c;
+
+ c = _find_str_with_val(t, key, val, strlen(key) + 1, val_len);
+
+ if (c && *c) {
+ struct dm_hash_node *old = *c;
+ *c = (*c)->next;
+ dm_free(old);
+ t->num_nodes--;
+ }
+}
+
+/*
+ * Look up the value for a key and count how many
+ * entries have the same key.
+ *
+ * If no entries have key, return NULL and set count to 0.
+ *
+ * If one entry has the key, the function returns the val,
+ * and sets count to 1.
+ *
+ * If N entries have the key, the function returns the val
+ * from the first entry, and sets count to N.
+ */
+void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count)
+{
+ struct dm_hash_node **c;
+ struct dm_hash_node **c1 = NULL;
+ uint32_t len = strlen(key) + 1;
+ unsigned h;
+
+ *count = 0;
+
+ h = _hash(key, len) & (t->num_slots - 1);
+
+ for (c = &t->slots[h]; *c; c = &((*c)->next)) {
+ if ((*c)->keylen != len)
+ continue;
+
+ if (!memcmp(key, (*c)->key, len)) {
+ (*count)++;
+ if (!c1)
+ c1 = c;
+ }
+ }
+
+ if (!c1)
+ return NULL;
+ else
+ return *c1 ? (*c1)->data : 0;
+}
+
+unsigned dm_hash_get_num_entries(struct dm_hash_table *t)
+{
+ return t->num_nodes;
+}
+
+void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f)
+{
+ struct dm_hash_node *c, *n;
+ unsigned i;
+
+ for (i = 0; i < t->num_slots; i++)
+ for (c = t->slots[i]; c; c = n) {
+ n = c->next;
+ f(c->data);
+ }
+}
+
+void dm_hash_wipe(struct dm_hash_table *t)
+{
+ _free_nodes(t);
+ memset(t->slots, 0, sizeof(struct dm_hash_node *) * t->num_slots);
+ t->num_nodes = 0u;
+}
+
+char *dm_hash_get_key(struct dm_hash_table *t __attribute__((unused)),
+ struct dm_hash_node *n)
+{
+ return n->key;
+}
+
+void *dm_hash_get_data(struct dm_hash_table *t __attribute__((unused)),
+ struct dm_hash_node *n)
+{
+ return n->data;
+}
+
+static struct dm_hash_node *_next_slot(struct dm_hash_table *t, unsigned s)
+{
+ struct dm_hash_node *c = NULL;
+ unsigned i;
+
+ for (i = s; i < t->num_slots && !c; i++)
+ c = t->slots[i];
+
+ return c;
+}
+
+struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t)
+{
+ return _next_slot(t, 0);
+}
+
+struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n)
+{
+ unsigned h = _hash(n->key, n->keylen) & (t->num_slots - 1);
+
+ return n->next ? n->next : _next_slot(t, h + 1);
+}
diff --git a/device_mapper/datastruct/list.c b/device_mapper/datastruct/list.c
new file mode 100644
index 000000000..86c3e4ef8
--- /dev/null
+++ b/device_mapper/datastruct/list.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include <assert.h>
+
+/*
+ * Initialise a list before use.
+ * The list head's next and previous pointers point back to itself.
+ */
+void dm_list_init(struct dm_list *head)
+{
+ head->n = head->p = head;
+}
+
+/*
+ * Insert an element before 'head'.
+ * If 'head' is the list head, this adds an element to the end of the list.
+ */
+void dm_list_add(struct dm_list *head, struct dm_list *elem)
+{
+ assert(head->n);
+
+ elem->n = head;
+ elem->p = head->p;
+
+ head->p->n = elem;
+ head->p = elem;
+}
+
+/*
+ * Insert an element after 'head'.
+ * If 'head' is the list head, this adds an element to the front of the list.
+ */
+void dm_list_add_h(struct dm_list *head, struct dm_list *elem)
+{
+ assert(head->n);
+
+ elem->n = head->n;
+ elem->p = head;
+
+ head->n->p = elem;
+ head->n = elem;
+}
+
+/*
+ * Delete an element from its list.
+ * Note that this doesn't change the element itself - it may still be safe
+ * to follow its pointers.
+ */
+void dm_list_del(struct dm_list *elem)
+{
+ elem->n->p = elem->p;
+ elem->p->n = elem->n;
+}
+
+/*
+ * Remove an element from existing list and insert before 'head'.
+ */
+void dm_list_move(struct dm_list *head, struct dm_list *elem)
+{
+ dm_list_del(elem);
+ dm_list_add(head, elem);
+}
+
+/*
+ * Is the list empty?
+ */
+int dm_list_empty(const struct dm_list *head)
+{
+ return head->n == head;
+}
+
+/*
+ * Is this the first element of the list?
+ */
+int dm_list_start(const struct dm_list *head, const struct dm_list *elem)
+{
+ return elem->p == head;
+}
+
+/*
+ * Is this the last element of the list?
+ */
+int dm_list_end(const struct dm_list *head, const struct dm_list *elem)
+{
+ return elem->n == head;
+}
+
+/*
+ * Return first element of the list or NULL if empty
+ */
+struct dm_list *dm_list_first(const struct dm_list *head)
+{
+ return (dm_list_empty(head) ? NULL : head->n);
+}
+
+/*
+ * Return last element of the list or NULL if empty
+ */
+struct dm_list *dm_list_last(const struct dm_list *head)
+{
+ return (dm_list_empty(head) ? NULL : head->p);
+}
+
+/*
+ * Return the previous element of the list, or NULL if we've reached the start.
+ */
+struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem)
+{
+ return (dm_list_start(head, elem) ? NULL : elem->p);
+}
+
+/*
+ * Return the next element of the list, or NULL if we've reached the end.
+ */
+struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem)
+{
+ return (dm_list_end(head, elem) ? NULL : elem->n);
+}
+
+/*
+ * Return the number of elements in a list by walking it.
+ */
+unsigned int dm_list_size(const struct dm_list *head)
+{
+ unsigned int s = 0;
+ const struct dm_list *v;
+
+ dm_list_iterate(v, head)
+ s++;
+
+ return s;
+}
+
+/*
+ * Join two lists together.
+ * This moves all the elements of the list 'head1' to the end of the list
+ * 'head', leaving 'head1' empty.
+ */
+void dm_list_splice(struct dm_list *head, struct dm_list *head1)
+{
+ assert(head->n);
+ assert(head1->n);
+
+ if (dm_list_empty(head1))
+ return;
+
+ head1->p->n = head;
+ head1->n->p = head->p;
+
+ head->p->n = head1->n;
+ head->p = head1->p;
+
+ dm_list_init(head1);
+}
diff --git a/device_mapper/ioctl/libdm-iface.c b/device_mapper/ioctl/libdm-iface.c
new file mode 100644
index 000000000..4825f1d17
--- /dev/null
+++ b/device_mapper/ioctl/libdm-iface.c
@@ -0,0 +1,2196 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2013 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "libdm-targets.h"
+#include "libdm-common.h"
+
+#include <stddef.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <limits.h>
+#include <unistd.h>
+
+#ifdef __linux__
+# include "misc/kdev_t.h"
+# include <linux/limits.h>
+#else
+# define MAJOR(x) major((x))
+# define MINOR(x) minor((x))
+# define MKDEV(x,y) makedev((x),(y))
+#endif
+
+#include "misc/dm-ioctl.h"
+
+/*
+ * Ensure build compatibility.
+ * The hard-coded versions here are the highest present
+ * in the _cmd_data arrays.
+ */
+
+#if !((DM_VERSION_MAJOR == 4 && DM_VERSION_MINOR >= 6))
+#error The version of dm-ioctl.h included is incompatible.
+#endif
+
+/* FIXME This should be exported in device-mapper.h */
+#define DM_NAME "device-mapper"
+
+#define PROC_MISC "/proc/misc"
+#define PROC_DEVICES "/proc/devices"
+#define MISC_NAME "misc"
+
+#define NUMBER_OF_MAJORS 4096
+
+/*
+ * Static minor number assigned since kernel version 2.6.36.
+ * The original definition is in kernel's include/linux/miscdevice.h.
+ * This number is also visible in modules.devname exported by depmod
+ * utility (support included in module-init-tools version >= 3.12).
+ */
+#define MAPPER_CTRL_MINOR 236
+#define MISC_MAJOR 10
+
+/* dm major version no for running kernel */
+static unsigned _dm_version = DM_VERSION_MAJOR;
+static unsigned _dm_version_minor = 0;
+static unsigned _dm_version_patchlevel = 0;
+static int _log_suppress = 0;
+static struct dm_timestamp *_dm_ioctl_timestamp = NULL;
+
+/*
+ * If the kernel dm driver only supports one major number
+ * we store it in _dm_device_major. Otherwise we indicate
+ * which major numbers have been claimed by device-mapper
+ * in _dm_bitset.
+ */
+static unsigned _dm_multiple_major_support = 1;
+static dm_bitset_t _dm_bitset = NULL;
+static uint32_t _dm_device_major = 0;
+
+static int _control_fd = -1;
+static int _hold_control_fd_open = 0;
+static int _version_checked = 0;
+static int _version_ok = 1;
+static unsigned _ioctl_buffer_double_factor = 0;
+
+const int _dm_compat = 0;
+
+/* *INDENT-OFF* */
+static struct cmd_data _cmd_data_v4[] = {
+ {"create", DM_DEV_CREATE, {4, 0, 0}},
+ {"reload", DM_TABLE_LOAD, {4, 0, 0}},
+ {"remove", DM_DEV_REMOVE, {4, 0, 0}},
+ {"remove_all", DM_REMOVE_ALL, {4, 0, 0}},
+ {"suspend", DM_DEV_SUSPEND, {4, 0, 0}},
+ {"resume", DM_DEV_SUSPEND, {4, 0, 0}},
+ {"info", DM_DEV_STATUS, {4, 0, 0}},
+ {"deps", DM_TABLE_DEPS, {4, 0, 0}},
+ {"rename", DM_DEV_RENAME, {4, 0, 0}},
+ {"version", DM_VERSION, {4, 0, 0}},
+ {"status", DM_TABLE_STATUS, {4, 0, 0}},
+ {"table", DM_TABLE_STATUS, {4, 0, 0}},
+ {"waitevent", DM_DEV_WAIT, {4, 0, 0}},
+ {"names", DM_LIST_DEVICES, {4, 0, 0}},
+ {"clear", DM_TABLE_CLEAR, {4, 0, 0}},
+ {"mknodes", DM_DEV_STATUS, {4, 0, 0}},
+#ifdef DM_LIST_VERSIONS
+ {"versions", DM_LIST_VERSIONS, {4, 1, 0}},
+#endif
+#ifdef DM_TARGET_MSG
+ {"message", DM_TARGET_MSG, {4, 2, 0}},
+#endif
+#ifdef DM_DEV_SET_GEOMETRY
+ {"setgeometry", DM_DEV_SET_GEOMETRY, {4, 6, 0}},
+#endif
+};
+/* *INDENT-ON* */
+
+#define ALIGNMENT 8
+
+/* FIXME Rejig library to record & use errno instead */
+#ifndef DM_EXISTS_FLAG
+# define DM_EXISTS_FLAG 0x00000004
+#endif
+
+static char *_align(char *ptr, unsigned int a)
+{
+ register unsigned long agn = --a;
+
+ return (char *) (((unsigned long) ptr + agn) & ~agn);
+}
+
+#ifdef DM_IOCTLS
+static unsigned _kernel_major = 0;
+static unsigned _kernel_minor = 0;
+static unsigned _kernel_release = 0;
+
+static int _uname(void)
+{
+ static int _uts_set = 0;
+ struct utsname _uts;
+ int parts;
+
+ if (_uts_set)
+ return 1;
+
+ if (uname(&_uts)) {
+ log_error("uname failed: %s", strerror(errno));
+ return 0;
+ }
+
+ parts = sscanf(_uts.release, "%u.%u.%u",
+ &_kernel_major, &_kernel_minor, &_kernel_release);
+
+ /* Kernels with a major number of 2 always had 3 parts. */
+ if (parts < 1 || (_kernel_major < 3 && parts < 3)) {
+ log_error("Could not determine kernel version used.");
+ return 0;
+ }
+
+ _uts_set = 1;
+ return 1;
+}
+
+int get_uname_version(unsigned *major, unsigned *minor, unsigned *release)
+{
+ if (!_uname())
+ return_0;
+
+ *major = _kernel_major;
+ *minor = _kernel_minor;
+ *release = _kernel_release;
+
+ return 1;
+}
+/*
+ * Set number to NULL to populate _dm_bitset - otherwise first
+ * match is returned.
+ * Returns:
+ * 0 - error
+ * 1 - success - number found
+ * 2 - success - number not found (only if require_module_loaded=0)
+ */
+static int _get_proc_number(const char *file, const char *name,
+ uint32_t *number, int require_module_loaded)
+{
+ FILE *fl;
+ char nm[256];
+ char *line = NULL;
+ size_t len;
+ uint32_t num;
+
+ if (!(fl = fopen(file, "r"))) {
+ log_sys_error("fopen", file);
+ return 0;
+ }
+
+ while (getline(&line, &len, fl) != -1) {
+ if (sscanf(line, "%d %255s\n", &num, &nm[0]) == 2) {
+ if (!strcmp(name, nm)) {
+ if (number) {
+ *number = num;
+ if (fclose(fl))
+ log_sys_error("fclose", file);
+ free(line);
+ return 1;
+ }
+ dm_bit_set(_dm_bitset, num);
+ }
+ }
+ }
+ if (fclose(fl))
+ log_sys_error("fclose", file);
+ free(line);
+
+ if (number) {
+ if (require_module_loaded) {
+ log_error("%s: No entry for %s found", file, name);
+ return 0;
+ }
+
+ return 2;
+ }
+
+ return 1;
+}
+
+static int _control_device_number(uint32_t *major, uint32_t *minor)
+{
+ if (!_get_proc_number(PROC_DEVICES, MISC_NAME, major, 1) ||
+ !_get_proc_number(PROC_MISC, DM_NAME, minor, 1)) {
+ *major = 0;
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Returns 1 if it exists on returning; 0 if it doesn't; -1 if it's wrong.
+ */
+static int _control_exists(const char *control, uint32_t major, uint32_t minor)
+{
+ struct stat buf;
+
+ if (stat(control, &buf) < 0) {
+ if (errno != ENOENT)
+ log_sys_error("stat", control);
+ return 0;
+ }
+
+ if (!S_ISCHR(buf.st_mode)) {
+ log_verbose("%s: Wrong inode type", control);
+ if (!unlink(control))
+ return 0;
+ log_sys_error("unlink", control);
+ return -1;
+ }
+
+ if (major && buf.st_rdev != MKDEV((dev_t)major, (dev_t)minor)) {
+ log_verbose("%s: Wrong device number: (%u, %u) instead of "
+ "(%u, %u)", control,
+ MAJOR(buf.st_mode), MINOR(buf.st_mode),
+ major, minor);
+ if (!unlink(control))
+ return 0;
+ log_sys_error("unlink", control);
+ return -1;
+ }
+
+ return 1;
+}
+
+static int _create_control(const char *control, uint32_t major, uint32_t minor)
+{
+ int ret;
+ mode_t old_umask;
+
+ /*
+ * Return if the control already exists with intended major/minor
+ * or there's an error unlinking an apparently incorrect one.
+ */
+ ret = _control_exists(control, major, minor);
+ if (ret == -1)
+ return_0; /* Failed to unlink existing incorrect node */
+ if (ret)
+ return 1; /* Already exists and correct */
+
+ (void) dm_prepare_selinux_context(dm_dir(), S_IFDIR);
+ old_umask = umask(DM_DEV_DIR_UMASK);
+ ret = dm_create_dir(dm_dir());
+ umask(old_umask);
+ (void) dm_prepare_selinux_context(NULL, 0);
+
+ if (!ret)
+ return_0;
+
+ log_verbose("Creating device %s (%u, %u)", control, major, minor);
+
+ (void) dm_prepare_selinux_context(control, S_IFCHR);
+ old_umask = umask(DM_CONTROL_NODE_UMASK);
+ if (mknod(control, S_IFCHR | S_IRUSR | S_IWUSR,
+ MKDEV((dev_t)major, (dev_t)minor)) < 0) {
+ log_sys_error("mknod", control);
+ ret = 0;
+ }
+ umask(old_umask);
+ (void) dm_prepare_selinux_context(NULL, 0);
+
+ return ret;
+}
+#endif
+
+/*
+ * FIXME Update bitset in long-running process if dm claims new major numbers.
+ */
+/*
+ * If require_module_loaded=0, caller is responsible to check
+ * whether _dm_device_major or _dm_bitset is really set. If
+ * it's not, it means the module is not loaded.
+ */
+static int _create_dm_bitset(int require_module_loaded)
+{
+ int r;
+
+#ifdef DM_IOCTLS
+ if (_dm_bitset || _dm_device_major)
+ return 1;
+
+ if (!_uname())
+ return 0;
+
+ /*
+ * 2.6 kernels are limited to one major number.
+ * Assume 2.4 kernels are patched not to.
+ * FIXME Check _dm_version and _dm_version_minor if 2.6 changes this.
+ */
+ if (KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) >=
+ KERNEL_VERSION(2, 6, 0))
+ _dm_multiple_major_support = 0;
+
+ if (!_dm_multiple_major_support) {
+ if (!_get_proc_number(PROC_DEVICES, DM_NAME, &_dm_device_major,
+ require_module_loaded))
+ return 0;
+ return 1;
+ }
+
+ /* Multiple major numbers supported */
+ if (!(_dm_bitset = dm_bitset_create(NULL, NUMBER_OF_MAJORS)))
+ return 0;
+
+ r = _get_proc_number(PROC_DEVICES, DM_NAME, NULL, require_module_loaded);
+ if (!r || r == 2) {
+ dm_bitset_destroy(_dm_bitset);
+ _dm_bitset = NULL;
+ /*
+ * It's not an error if we didn't find anything and we
+ * didn't require module to be loaded at the same time.
+ */
+ return r == 2;
+ }
+
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int dm_is_dm_major(uint32_t major)
+{
+ if (!_create_dm_bitset(0))
+ return 0;
+
+ if (_dm_multiple_major_support) {
+ if (!_dm_bitset)
+ return 0;
+ return dm_bit(_dm_bitset, major) ? 1 : 0;
+ }
+
+ if (!_dm_device_major)
+ return 0;
+
+ return (major == _dm_device_major) ? 1 : 0;
+}
+
+static void _close_control_fd(void)
+{
+ if (_control_fd != -1) {
+ if (close(_control_fd) < 0)
+ log_sys_error("close", "_control_fd");
+ _control_fd = -1;
+ }
+}
+
+#ifdef DM_IOCTLS
+static int _open_and_assign_control_fd(const char *control)
+{
+ if ((_control_fd = open(control, O_RDWR)) < 0) {
+ log_sys_error("open", control);
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
+static int _open_control(void)
+{
+#ifdef DM_IOCTLS
+ char control[PATH_MAX];
+ uint32_t major = MISC_MAJOR;
+ uint32_t minor = MAPPER_CTRL_MINOR;
+
+ if (_control_fd != -1)
+ return 1;
+
+ if (!_uname())
+ return 0;
+
+ if (dm_snprintf(control, sizeof(control), "%s/%s", dm_dir(), DM_CONTROL_NODE) < 0)
+ goto_bad;
+
+ /*
+ * Prior to 2.6.36 the minor number should be looked up in /proc.
+ */
+ if ((KERNEL_VERSION(_kernel_major, _kernel_minor, _kernel_release) <
+ KERNEL_VERSION(2, 6, 36)) &&
+ !_control_device_number(&major, &minor))
+ goto_bad;
+
+ /*
+ * Create the node with correct major and minor if not already done.
+ * Udev may already have created /dev/mapper/control
+ * from the modules.devname file generated by depmod.
+ */
+ if (!_create_control(control, major, minor))
+ goto_bad;
+
+ /*
+ * As of 2.6.36 kernels, the open can trigger autoloading dm-mod.
+ */
+ if (!_open_and_assign_control_fd(control))
+ goto_bad;
+
+ if (!_create_dm_bitset(1)) {
+ log_error("Failed to set up list of device-mapper major numbers");
+ return 0;
+ }
+
+ return 1;
+
+bad:
+ log_error("Failure to communicate with kernel device-mapper driver.");
+ if (!geteuid())
+ log_error("Check that device-mapper is available in the kernel.");
+ return 0;
+#else
+ return 1;
+#endif
+}
+
+static void _dm_zfree_string(char *string)
+{
+ if (string) {
+ memset(string, 0, strlen(string));
+ dm_free(string);
+ }
+}
+
+static void _dm_zfree_dmi(struct dm_ioctl *dmi)
+{
+ if (dmi) {
+ memset(dmi, 0, dmi->data_size);
+ dm_free(dmi);
+ }
+}
+
+static void _dm_task_free_targets(struct dm_task *dmt)
+{
+ struct target *t, *n;
+
+ for (t = dmt->head; t; t = n) {
+ n = t->next;
+ _dm_zfree_string(t->params);
+ dm_free(t->type);
+ dm_free(t);
+ }
+
+ dmt->head = dmt->tail = NULL;
+}
+
+void dm_task_destroy(struct dm_task *dmt)
+{
+ _dm_task_free_targets(dmt);
+ _dm_zfree_dmi(dmt->dmi.v4);
+ dm_free(dmt->dev_name);
+ dm_free(dmt->mangled_dev_name);
+ dm_free(dmt->newname);
+ dm_free(dmt->message);
+ dm_free(dmt->geometry);
+ dm_free(dmt->uuid);
+ dm_free(dmt->mangled_uuid);
+ dm_free(dmt);
+}
+
+/*
+ * Protocol Version 4 functions.
+ */
+
+int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size)
+{
+ unsigned *v;
+
+ if (!dmt->dmi.v4) {
+ if (version)
+ version[0] = '\0';
+ return 0;
+ }
+
+ v = dmt->dmi.v4->version;
+ _dm_version_minor = v[1];
+ _dm_version_patchlevel = v[2];
+ if (version &&
+ (snprintf(version, size, "%u.%u.%u", v[0], v[1], v[2]) < 0)) {
+ log_error("Buffer for version is to short.");
+ if (size > 0)
+ version[0] = '\0';
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _check_version(char *version, size_t size, int log_suppress)
+{
+ struct dm_task *task;
+ int r;
+
+ if (!(task = dm_task_create(DM_DEVICE_VERSION))) {
+ log_error("Failed to get device-mapper version");
+ version[0] = '\0';
+ return 0;
+ }
+
+ if (log_suppress)
+ _log_suppress = 1;
+
+ r = dm_task_run(task);
+ if (!dm_task_get_driver_version(task, version, size))
+ stack;
+ dm_task_destroy(task);
+ _log_suppress = 0;
+
+ return r;
+}
+
+/*
+ * Find out device-mapper's major version number the first time
+ * this is called and whether or not we support it.
+ */
+int dm_check_version(void)
+{
+ char libversion[64] = "", dmversion[64] = "";
+ const char *compat = "";
+
+ if (_version_checked)
+ return _version_ok;
+
+ _version_checked = 1;
+
+ if (_check_version(dmversion, sizeof(dmversion), _dm_compat))
+ return 1;
+
+ if (!_dm_compat)
+ goto_bad;
+
+ log_verbose("device-mapper ioctl protocol version %u failed. "
+ "Trying protocol version 1.", _dm_version);
+ _dm_version = 1;
+ if (_check_version(dmversion, sizeof(dmversion), 0)) {
+ log_verbose("Using device-mapper ioctl protocol version 1");
+ return 1;
+ }
+
+ compat = "(compat)";
+
+ bad:
+ dm_get_library_version(libversion, sizeof(libversion));
+
+ log_error("Incompatible libdevmapper %s%s and kernel driver %s.",
+ *libversion ? libversion : "(unknown version)", compat,
+ *dmversion ? dmversion : "(unknown version)");
+
+ _version_ok = 0;
+ return 0;
+}
+
+int dm_cookie_supported(void)
+{
+ return (dm_check_version() &&
+ _dm_version >= 4 &&
+ _dm_version_minor >= 15);
+}
+
+static int _dm_inactive_supported(void)
+{
+ int inactive_supported = 0;
+
+ if (dm_check_version() && _dm_version >= 4) {
+ if (_dm_version_minor >= 16)
+ inactive_supported = 1; /* upstream */
+ else if (_dm_version_minor == 11 &&
+ (_dm_version_patchlevel >= 6 &&
+ _dm_version_patchlevel <= 40)) {
+ inactive_supported = 1; /* RHEL 5.7 */
+ }
+ }
+
+ return inactive_supported;
+}
+
+int dm_message_supports_precise_timestamps(void)
+{
+ /*
+ * 4.32.0 supports "precise_timestamps" and "histogram:" options
+ * to @stats_create messages but lacks the ability to report
+ * these properties via a subsequent @stats_list: require at
+ * least 4.33.0 in order to use these features.
+ */
+ if (dm_check_version() && _dm_version >= 4)
+ if (_dm_version_minor >= 33)
+ return 1;
+ return 0;
+}
+
+void *dm_get_next_target(struct dm_task *dmt, void *next,
+ uint64_t *start, uint64_t *length,
+ char **target_type, char **params)
+{
+ struct target *t = (struct target *) next;
+
+ if (!t)
+ t = dmt->head;
+
+ if (!t) {
+ *start = 0;
+ *length = 0;
+ *target_type = 0;
+ *params = 0;
+ return NULL;
+ }
+
+ *start = t->start;
+ *length = t->length;
+ *target_type = t->type;
+ *params = t->params;
+
+ return t->next;
+}
+
+/* Unmarshall the target info returned from a status call */
+static int _unmarshal_status(struct dm_task *dmt, struct dm_ioctl *dmi)
+{
+ char *outbuf = (char *) dmi + dmi->data_start;
+ char *outptr = outbuf;
+ uint32_t i;
+ struct dm_target_spec *spec;
+
+ _dm_task_free_targets(dmt);
+
+ for (i = 0; i < dmi->target_count; i++) {
+ spec = (struct dm_target_spec *) outptr;
+ if (!dm_task_add_target(dmt, spec->sector_start,
+ spec->length,
+ spec->target_type,
+ outptr + sizeof(*spec))) {
+ return 0;
+ }
+
+ outptr = outbuf + spec->next;
+ }
+
+ return 1;
+}
+
+int dm_format_dev(char *buf, int bufsize, uint32_t dev_major,
+ uint32_t dev_minor)
+{
+ int r;
+
+ if (bufsize < 8)
+ return 0;
+
+ r = snprintf(buf, (size_t) bufsize, "%u:%u", dev_major, dev_minor);
+ if (r < 0 || r > bufsize - 1)
+ return 0;
+
+ return 1;
+}
+
+int dm_task_get_info(struct dm_task *dmt, struct dm_info *info)
+{
+ if (!dmt->dmi.v4)
+ return 0;
+
+ memset(info, 0, sizeof(*info));
+
+ info->exists = dmt->dmi.v4->flags & DM_EXISTS_FLAG ? 1 : 0;
+ if (!info->exists)
+ return 1;
+
+ info->suspended = dmt->dmi.v4->flags & DM_SUSPEND_FLAG ? 1 : 0;
+ info->read_only = dmt->dmi.v4->flags & DM_READONLY_FLAG ? 1 : 0;
+ info->live_table = dmt->dmi.v4->flags & DM_ACTIVE_PRESENT_FLAG ? 1 : 0;
+ info->inactive_table = dmt->dmi.v4->flags & DM_INACTIVE_PRESENT_FLAG ?
+ 1 : 0;
+ info->deferred_remove = dmt->dmi.v4->flags & DM_DEFERRED_REMOVE;
+ info->internal_suspend = (dmt->dmi.v4->flags & DM_INTERNAL_SUSPEND_FLAG) ? 1 : 0;
+ info->target_count = dmt->dmi.v4->target_count;
+ info->open_count = dmt->dmi.v4->open_count;
+ info->event_nr = dmt->dmi.v4->event_nr;
+ info->major = MAJOR(dmt->dmi.v4->dev);
+ info->minor = MINOR(dmt->dmi.v4->dev);
+
+ return 1;
+}
+
+uint32_t dm_task_get_read_ahead(const struct dm_task *dmt, uint32_t *read_ahead)
+{
+ const char *dev_name;
+
+ *read_ahead = 0;
+
+ if (!dmt->dmi.v4 || !(dmt->dmi.v4->flags & DM_EXISTS_FLAG))
+ return 0;
+
+ if (*dmt->dmi.v4->name)
+ dev_name = dmt->dmi.v4->name;
+ else if (!(dev_name = DEV_NAME(dmt))) {
+ log_error("Get read ahead request failed: device name unrecorded.");
+ return 0;
+ }
+
+ return get_dev_node_read_ahead(dev_name, MAJOR(dmt->dmi.v4->dev),
+ MINOR(dmt->dmi.v4->dev), read_ahead);
+}
+
+struct dm_deps *dm_task_get_deps(struct dm_task *dmt)
+{
+ return (struct dm_deps *) (((char *) dmt->dmi.v4) +
+ dmt->dmi.v4->data_start);
+}
+
+struct dm_names *dm_task_get_names(struct dm_task *dmt)
+{
+ return (struct dm_names *) (((char *) dmt->dmi.v4) +
+ dmt->dmi.v4->data_start);
+}
+
+struct dm_versions *dm_task_get_versions(struct dm_task *dmt)
+{
+ return (struct dm_versions *) (((char *) dmt->dmi.v4) +
+ dmt->dmi.v4->data_start);
+}
+
+const char *dm_task_get_message_response(struct dm_task *dmt)
+{
+ const char *start, *end;
+
+ if (!(dmt->dmi.v4->flags & DM_DATA_OUT_FLAG))
+ return NULL;
+
+ start = (const char *) dmt->dmi.v4 + dmt->dmi.v4->data_start;
+ end = (const char *) dmt->dmi.v4 + dmt->dmi.v4->data_size;
+
+ if (end < start) {
+ log_error(INTERNAL_ERROR "Corrupted message structure returned: start %d > end %d", (int)dmt->dmi.v4->data_start, (int)dmt->dmi.v4->data_size);
+ return NULL;
+ }
+
+ if (!memchr(start, 0, end - start)) {
+ log_error(INTERNAL_ERROR "Message response doesn't contain terminating NUL character");
+ return NULL;
+ }
+
+ return start;
+}
+
+int dm_task_set_ro(struct dm_task *dmt)
+{
+ dmt->read_only = 1;
+ return 1;
+}
+
+int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead,
+ uint32_t read_ahead_flags)
+{
+ dmt->read_ahead = read_ahead;
+ dmt->read_ahead_flags = read_ahead_flags;
+
+ return 1;
+}
+
+int dm_task_suppress_identical_reload(struct dm_task *dmt)
+{
+ dmt->suppress_identical_reload = 1;
+ return 1;
+}
+
+int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node)
+{
+ switch (add_node) {
+ case DM_ADD_NODE_ON_RESUME:
+ case DM_ADD_NODE_ON_CREATE:
+ dmt->add_node = add_node;
+ return 1;
+ default:
+ log_error("Unknown add node parameter");
+ return 0;
+ }
+}
+
+int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid)
+{
+ dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode();
+ char mangled_uuid[DM_UUID_LEN];
+ int r = 0;
+
+ if (strlen(newuuid) >= DM_UUID_LEN) {
+ log_error("Uuid \"%s\" too long", newuuid);
+ return 0;
+ }
+
+ if (!check_multiple_mangled_string_allowed(newuuid, "new UUID", mangling_mode))
+ return_0;
+
+ if (mangling_mode != DM_STRING_MANGLING_NONE &&
+ (r = mangle_string(newuuid, "new UUID", strlen(newuuid), mangled_uuid,
+ sizeof(mangled_uuid), mangling_mode)) < 0) {
+ log_error("Failed to mangle new device UUID \"%s\"", newuuid);
+ return 0;
+ }
+
+ if (r) {
+ log_debug_activation("New device uuid mangled [%s]: %s --> %s",
+ mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+ newuuid, mangled_uuid);
+ newuuid = mangled_uuid;
+ }
+
+ dm_free(dmt->newname);
+ if (!(dmt->newname = dm_strdup(newuuid))) {
+ log_error("dm_task_set_newuuid: strdup(%s) failed", newuuid);
+ return 0;
+ }
+ dmt->new_uuid = 1;
+
+ return 1;
+}
+
+int dm_task_set_message(struct dm_task *dmt, const char *message)
+{
+ dm_free(dmt->message);
+ if (!(dmt->message = dm_strdup(message))) {
+ log_error("dm_task_set_message: strdup failed");
+ return 0;
+ }
+
+ return 1;
+}
+
+int dm_task_set_sector(struct dm_task *dmt, uint64_t sector)
+{
+ dmt->sector = sector;
+
+ return 1;
+}
+
+int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads,
+ const char *sectors, const char *start)
+{
+ dm_free(dmt->geometry);
+ if (dm_asprintf(&(dmt->geometry), "%s %s %s %s",
+ cylinders, heads, sectors, start) < 0) {
+ log_error("dm_task_set_geometry: sprintf failed");
+ return 0;
+ }
+
+ return 1;
+}
+
+int dm_task_no_flush(struct dm_task *dmt)
+{
+ dmt->no_flush = 1;
+
+ return 1;
+}
+
+int dm_task_no_open_count(struct dm_task *dmt)
+{
+ dmt->no_open_count = 1;
+
+ return 1;
+}
+
+int dm_task_skip_lockfs(struct dm_task *dmt)
+{
+ dmt->skip_lockfs = 1;
+
+ return 1;
+}
+
+int dm_task_secure_data(struct dm_task *dmt)
+{
+ dmt->secure_data = 1;
+
+ return 1;
+}
+
+int dm_task_retry_remove(struct dm_task *dmt)
+{
+ dmt->retry_remove = 1;
+
+ return 1;
+}
+
+int dm_task_deferred_remove(struct dm_task *dmt)
+{
+ dmt->deferred_remove = 1;
+
+ return 1;
+}
+
+int dm_task_query_inactive_table(struct dm_task *dmt)
+{
+ dmt->query_inactive_table = 1;
+
+ return 1;
+}
+
+int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr)
+{
+ dmt->event_nr = event_nr;
+
+ return 1;
+}
+
+int dm_task_set_record_timestamp(struct dm_task *dmt)
+{
+ if (!_dm_ioctl_timestamp)
+ _dm_ioctl_timestamp = dm_timestamp_alloc();
+
+ if (!_dm_ioctl_timestamp)
+ return_0;
+
+ dmt->record_timestamp = 1;
+
+ return 1;
+}
+
+struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt)
+{
+ return dmt->record_timestamp ? _dm_ioctl_timestamp : NULL;
+}
+
+struct target *create_target(uint64_t start, uint64_t len, const char *type,
+ const char *params)
+{
+ struct target *t;
+
+ if (strlen(type) >= DM_MAX_TYPE_NAME) {
+ log_error("Target type name %s is too long.", type);
+ return NULL;
+ }
+
+ if (!(t = dm_zalloc(sizeof(*t)))) {
+ log_error("create_target: malloc(%" PRIsize_t ") failed",
+ sizeof(*t));
+ return NULL;
+ }
+
+ if (!(t->params = dm_strdup(params))) {
+ log_error("create_target: strdup(params) failed");
+ goto bad;
+ }
+
+ if (!(t->type = dm_strdup(type))) {
+ log_error("create_target: strdup(type) failed");
+ goto bad;
+ }
+
+ t->start = start;
+ t->length = len;
+ return t;
+
+ bad:
+ _dm_zfree_string(t->params);
+ dm_free(t->type);
+ dm_free(t);
+ return NULL;
+}
+
+static char *_add_target(struct target *t, char *out, char *end)
+{
+ char *out_sp = out;
+ struct dm_target_spec sp;
+ size_t sp_size = sizeof(struct dm_target_spec);
+ unsigned int backslash_count = 0;
+ int len;
+ char *pt;
+
+ if (strlen(t->type) >= sizeof(sp.target_type)) {
+ log_error("Target type name %s is too long.", t->type);
+ return NULL;
+ }
+
+ sp.status = 0;
+ sp.sector_start = t->start;
+ sp.length = t->length;
+ strncpy(sp.target_type, t->type, sizeof(sp.target_type) - 1);
+ sp.target_type[sizeof(sp.target_type) - 1] = '\0';
+
+ out += sp_size;
+ pt = t->params;
+
+ while (*pt)
+ if (*pt++ == '\\')
+ backslash_count++;
+ len = strlen(t->params) + backslash_count;
+
+ if ((out >= end) || (out + len + 1) >= end) {
+ log_error("Ran out of memory building ioctl parameter");
+ return NULL;
+ }
+
+ if (backslash_count) {
+ /* replace "\" with "\\" */
+ pt = t->params;
+ do {
+ if (*pt == '\\')
+ *out++ = '\\';
+ *out++ = *pt++;
+ } while (*pt);
+ *out++ = '\0';
+ }
+ else {
+ strcpy(out, t->params);
+ out += len + 1;
+ }
+
+ /* align next block */
+ out = _align(out, ALIGNMENT);
+
+ sp.next = out - out_sp;
+ memcpy(out_sp, &sp, sp_size);
+
+ return out;
+}
+
+static int _lookup_dev_name(uint64_t dev, char *buf, size_t len)
+{
+ struct dm_names *names;
+ unsigned next = 0;
+ struct dm_task *dmt;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_LIST)))
+ return 0;
+
+ if (!dm_task_run(dmt))
+ goto out;
+
+ if (!(names = dm_task_get_names(dmt)))
+ goto out;
+
+ if (!names->dev)
+ goto out;
+
+ do {
+ names = (struct dm_names *)((char *) names + next);
+ if (names->dev == dev) {
+ strncpy(buf, names->name, len);
+ r = 1;
+ break;
+ }
+ next = names->next;
+ } while (next);
+
+ out:
+ dm_task_destroy(dmt);
+ return r;
+}
+
+static struct dm_ioctl *_flatten(struct dm_task *dmt, unsigned repeat_count)
+{
+ const size_t min_size = 16 * 1024;
+ const int (*version)[3];
+
+ struct dm_ioctl *dmi;
+ struct target *t;
+ struct dm_target_msg *tmsg;
+ size_t len = sizeof(struct dm_ioctl);
+ char *b, *e;
+ int count = 0;
+
+ for (t = dmt->head; t; t = t->next) {
+ len += sizeof(struct dm_target_spec);
+ len += strlen(t->params) + 1 + ALIGNMENT;
+ count++;
+ }
+
+ if (count && (dmt->sector || dmt->message)) {
+ log_error("targets and message are incompatible");
+ return NULL;
+ }
+
+ if (count && dmt->newname) {
+ log_error("targets and rename are incompatible");
+ return NULL;
+ }
+
+ if (count && dmt->geometry) {
+ log_error("targets and geometry are incompatible");
+ return NULL;
+ }
+
+ if (dmt->newname && (dmt->sector || dmt->message)) {
+ log_error("message and rename are incompatible");
+ return NULL;
+ }
+
+ if (dmt->newname && dmt->geometry) {
+ log_error("geometry and rename are incompatible");
+ return NULL;
+ }
+
+ if (dmt->geometry && (dmt->sector || dmt->message)) {
+ log_error("geometry and message are incompatible");
+ return NULL;
+ }
+
+ if (dmt->sector && !dmt->message) {
+ log_error("message is required with sector");
+ return NULL;
+ }
+
+ if (dmt->newname)
+ len += strlen(dmt->newname) + 1;
+
+ if (dmt->message)
+ len += sizeof(struct dm_target_msg) + strlen(dmt->message) + 1;
+
+ if (dmt->geometry)
+ len += strlen(dmt->geometry) + 1;
+
+ /*
+ * Give len a minimum size so that we have space to store
+ * dependencies or status information.
+ */
+ if (len < min_size)
+ len = min_size;
+
+ /* Increase buffer size if repeating because buffer was too small */
+ while (repeat_count--)
+ len *= 2;
+
+ if (!(dmi = dm_zalloc(len)))
+ return NULL;
+
+ version = &_cmd_data_v4[dmt->type].version;
+
+ dmi->version[0] = (*version)[0];
+ dmi->version[1] = (*version)[1];
+ dmi->version[2] = (*version)[2];
+
+ dmi->data_size = len;
+ dmi->data_start = sizeof(struct dm_ioctl);
+
+ if (dmt->minor >= 0) {
+ if (!_dm_multiple_major_support && dmt->allow_default_major_fallback &&
+ dmt->major != (int) _dm_device_major) {
+ log_verbose("Overriding major number of %d "
+ "with %u for persistent device.",
+ dmt->major, _dm_device_major);
+ dmt->major = _dm_device_major;
+ }
+
+ if (dmt->major <= 0) {
+ log_error("Missing major number for persistent device.");
+ goto bad;
+ }
+
+ dmi->flags |= DM_PERSISTENT_DEV_FLAG;
+ dmi->dev = MKDEV((dev_t)dmt->major, (dev_t)dmt->minor);
+ }
+
+ /* Does driver support device number referencing? */
+ if (_dm_version_minor < 3 && !DEV_NAME(dmt) && !DEV_UUID(dmt) && dmi->dev) {
+ if (!_lookup_dev_name(dmi->dev, dmi->name, sizeof(dmi->name))) {
+ log_error("Unable to find name for device (%" PRIu32
+ ":%" PRIu32 ")", dmt->major, dmt->minor);
+ goto bad;
+ }
+ log_verbose("device (%" PRIu32 ":%" PRIu32 ") is %s "
+ "for compatibility with old kernel",
+ dmt->major, dmt->minor, dmi->name);
+ }
+
+ /* FIXME Until resume ioctl supplies name, use dev_name for readahead */
+ if (DEV_NAME(dmt) && (dmt->type != DM_DEVICE_RESUME || dmt->minor < 0 ||
+ dmt->major < 0))
+ strncpy(dmi->name, DEV_NAME(dmt), sizeof(dmi->name));
+
+ if (DEV_UUID(dmt))
+ strncpy(dmi->uuid, DEV_UUID(dmt), sizeof(dmi->uuid));
+
+ if (dmt->type == DM_DEVICE_SUSPEND)
+ dmi->flags |= DM_SUSPEND_FLAG;
+ if (dmt->no_flush) {
+ if (_dm_version_minor < 12)
+ log_verbose("No flush flag unsupported by kernel. "
+ "Buffers will be flushed.");
+ else
+ dmi->flags |= DM_NOFLUSH_FLAG;
+ }
+ if (dmt->read_only)
+ dmi->flags |= DM_READONLY_FLAG;
+ if (dmt->skip_lockfs)
+ dmi->flags |= DM_SKIP_LOCKFS_FLAG;
+ if (dmt->deferred_remove && (dmt->type == DM_DEVICE_REMOVE || dmt->type == DM_DEVICE_REMOVE_ALL))
+ dmi->flags |= DM_DEFERRED_REMOVE;
+
+ if (dmt->secure_data) {
+ if (_dm_version_minor < 20)
+ log_verbose("Secure data flag unsupported by kernel. "
+ "Buffers will not be wiped after use.");
+ dmi->flags |= DM_SECURE_DATA_FLAG;
+ }
+ if (dmt->query_inactive_table) {
+ if (!_dm_inactive_supported())
+ log_warn("WARNING: Inactive table query unsupported "
+ "by kernel. It will use live table.");
+ dmi->flags |= DM_QUERY_INACTIVE_TABLE_FLAG;
+ }
+ if (dmt->new_uuid) {
+ if (_dm_version_minor < 19) {
+ log_error("WARNING: Setting UUID unsupported by "
+ "kernel. Aborting operation.");
+ goto bad;
+ }
+ dmi->flags |= DM_UUID_FLAG;
+ }
+
+ dmi->target_count = count;
+ dmi->event_nr = dmt->event_nr;
+
+ b = (char *) (dmi + 1);
+ e = (char *) dmi + len;
+
+ for (t = dmt->head; t; t = t->next)
+ if (!(b = _add_target(t, b, e)))
+ goto_bad;
+
+ if (dmt->newname)
+ strcpy(b, dmt->newname);
+
+ if (dmt->message) {
+ tmsg = (struct dm_target_msg *) b;
+ tmsg->sector = dmt->sector;
+ strcpy(tmsg->message, dmt->message);
+ }
+
+ if (dmt->geometry)
+ strcpy(b, dmt->geometry);
+
+ return dmi;
+
+ bad:
+ _dm_zfree_dmi(dmi);
+ return NULL;
+}
+
+static int _process_mapper_dir(struct dm_task *dmt)
+{
+ struct dirent *dirent;
+ DIR *d;
+ const char *dir;
+ int r = 1;
+
+ dir = dm_dir();
+ if (!(d = opendir(dir))) {
+ log_sys_error("opendir", dir);
+ return 0;
+ }
+
+ while ((dirent = readdir(d))) {
+ if (!strcmp(dirent->d_name, ".") ||
+ !strcmp(dirent->d_name, "..") ||
+ !strcmp(dirent->d_name, "control"))
+ continue;
+ if (!dm_task_set_name(dmt, dirent->d_name)) {
+ r = 0;
+ stack;
+ continue; /* try next name */
+ }
+ if (!dm_task_run(dmt)) {
+ r = 0;
+ stack; /* keep going */
+ }
+ }
+
+ if (closedir(d))
+ log_sys_error("closedir", dir);
+
+ return r;
+}
+
+static int _process_all_v4(struct dm_task *dmt)
+{
+ struct dm_task *task;
+ struct dm_names *names;
+ unsigned next = 0;
+ int r = 1;
+
+ if (!(task = dm_task_create(DM_DEVICE_LIST)))
+ return 0;
+
+ if (!dm_task_run(task)) {
+ r = 0;
+ goto out;
+ }
+
+ if (!(names = dm_task_get_names(task))) {
+ r = 0;
+ goto out;
+ }
+
+ if (!names->dev)
+ goto out;
+
+ do {
+ names = (struct dm_names *)((char *) names + next);
+ if (!dm_task_set_name(dmt, names->name)) {
+ r = 0;
+ goto out;
+ }
+ if (!dm_task_run(dmt))
+ r = 0;
+ next = names->next;
+ } while (next);
+
+ out:
+ dm_task_destroy(task);
+ return r;
+}
+
+static int _mknodes_v4(struct dm_task *dmt)
+{
+ (void) _process_mapper_dir(dmt);
+
+ return _process_all_v4(dmt);
+}
+
+/*
+ * If an operation that uses a cookie fails, decrement the
+ * semaphore instead of udev.
+ */
+static int _udev_complete(struct dm_task *dmt)
+{
+ uint16_t base;
+
+ if (dmt->cookie_set &&
+ (base = dmt->event_nr & ~DM_UDEV_FLAGS_MASK))
+ /* strip flags from the cookie and use cookie magic instead */
+ return dm_udev_complete(base | (DM_COOKIE_MAGIC <<
+ DM_UDEV_FLAGS_SHIFT));
+
+ return 1;
+}
+
+#ifdef DM_IOCTLS
+static int _check_uevent_generated(struct dm_ioctl *dmi)
+{
+ if (!dm_check_version() ||
+ _dm_version < 4 ||
+ _dm_version_minor < 17)
+ /* can't check, assume uevent is generated */
+ return 1;
+
+ return dmi->flags & DM_UEVENT_GENERATED_FLAG;
+}
+#endif
+
+static int _create_and_load_v4(struct dm_task *dmt)
+{
+ struct dm_task *task;
+ int r;
+ uint32_t cookie;
+
+ /* Use new task struct to create the device */
+ if (!(task = dm_task_create(DM_DEVICE_CREATE))) {
+ _udev_complete(dmt);
+ return_0;
+ }
+
+ /* Copy across relevant fields */
+ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name))
+ goto_bad;
+
+ if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid))
+ goto_bad;
+
+ task->major = dmt->major;
+ task->minor = dmt->minor;
+ task->uid = dmt->uid;
+ task->gid = dmt->gid;
+ task->mode = dmt->mode;
+ /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */
+ task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK;
+ task->cookie_set = dmt->cookie_set;
+ task->add_node = dmt->add_node;
+
+ if (!dm_task_run(task))
+ goto_bad;
+
+ dm_task_destroy(task);
+
+ /* Next load the table */
+ if (!(task = dm_task_create(DM_DEVICE_RELOAD))) {
+ stack;
+ _udev_complete(dmt);
+ goto revert;
+ }
+
+ /* Copy across relevant fields */
+ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) {
+ stack;
+ dm_task_destroy(task);
+ _udev_complete(dmt);
+ goto revert;
+ }
+
+ task->read_only = dmt->read_only;
+ task->head = dmt->head;
+ task->tail = dmt->tail;
+ task->secure_data = dmt->secure_data;
+
+ r = dm_task_run(task);
+
+ task->head = NULL;
+ task->tail = NULL;
+ dm_task_destroy(task);
+
+ if (!r) {
+ stack;
+ _udev_complete(dmt);
+ goto revert;
+ }
+
+ /* Use the original structure last so the info will be correct */
+ dmt->type = DM_DEVICE_RESUME;
+ dm_free(dmt->uuid);
+ dmt->uuid = NULL;
+ dm_free(dmt->mangled_uuid);
+ dmt->mangled_uuid = NULL;
+
+ if (dm_task_run(dmt))
+ return 1;
+
+ revert:
+ dmt->type = DM_DEVICE_REMOVE;
+ dm_free(dmt->uuid);
+ dmt->uuid = NULL;
+ dm_free(dmt->mangled_uuid);
+ dmt->mangled_uuid = NULL;
+
+ /*
+ * Also udev-synchronize "remove" dm task that is a part of this revert!
+ * But only if the original dm task was supposed to be synchronized.
+ */
+ if (dmt->cookie_set) {
+ cookie = (dmt->event_nr & ~DM_UDEV_FLAGS_MASK) |
+ (DM_COOKIE_MAGIC << DM_UDEV_FLAGS_SHIFT);
+ if (!dm_task_set_cookie(dmt, &cookie,
+ (dmt->event_nr & DM_UDEV_FLAGS_MASK) >>
+ DM_UDEV_FLAGS_SHIFT))
+ stack; /* keep going */
+ }
+
+ if (!dm_task_run(dmt))
+ log_error("Failed to revert device creation.");
+
+ return 0;
+
+ bad:
+ dm_task_destroy(task);
+ _udev_complete(dmt);
+
+ return 0;
+}
+
+uint64_t dm_task_get_existing_table_size(struct dm_task *dmt)
+{
+ return dmt->existing_table_size;
+}
+
+static int _reload_with_suppression_v4(struct dm_task *dmt)
+{
+ struct dm_task *task;
+ struct target *t1, *t2;
+ size_t len;
+ int r;
+
+ /* New task to get existing table information */
+ if (!(task = dm_task_create(DM_DEVICE_TABLE))) {
+ log_error("Failed to create device-mapper task struct");
+ return 0;
+ }
+
+ /* Copy across relevant fields */
+ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name)) {
+ dm_task_destroy(task);
+ return 0;
+ }
+
+ if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid)) {
+ dm_task_destroy(task);
+ return 0;
+ }
+
+ task->major = dmt->major;
+ task->minor = dmt->minor;
+
+ r = dm_task_run(task);
+
+ if (!r) {
+ dm_task_destroy(task);
+ return r;
+ }
+
+ /* Store existing table size */
+ t2 = task->head;
+ while (t2 && t2->next)
+ t2 = t2->next;
+ dmt->existing_table_size = t2 ? t2->start + t2->length : 0;
+
+ if (((task->dmi.v4->flags & DM_READONLY_FLAG) ? 1 : 0) != dmt->read_only)
+ goto no_match;
+
+ t1 = dmt->head;
+ t2 = task->head;
+
+ while (t1 && t2) {
+ len = strlen(t2->params);
+ while (len-- > 0 && t2->params[len] == ' ')
+ t2->params[len] = '\0';
+ if ((t1->start != t2->start) ||
+ (t1->length != t2->length) ||
+ (strcmp(t1->type, t2->type)) ||
+ (strcmp(t1->params, t2->params)))
+ goto no_match;
+ t1 = t1->next;
+ t2 = t2->next;
+ }
+
+ if (!t1 && !t2) {
+ dmt->dmi.v4 = task->dmi.v4;
+ task->dmi.v4 = NULL;
+ dm_task_destroy(task);
+ return 1;
+ }
+
+no_match:
+ dm_task_destroy(task);
+
+ /* Now do the original reload */
+ dmt->suppress_identical_reload = 0;
+ r = dm_task_run(dmt);
+
+ return r;
+}
+
+static int _check_children_not_suspended_v4(struct dm_task *dmt, uint64_t device)
+{
+ struct dm_task *task;
+ struct dm_info info;
+ struct dm_deps *deps;
+ int r = 0;
+ uint32_t i;
+
+ /* Find dependencies */
+ if (!(task = dm_task_create(DM_DEVICE_DEPS)))
+ return 0;
+
+ /* Copy across or set relevant fields */
+ if (device) {
+ task->major = MAJOR(device);
+ task->minor = MINOR(device);
+ } else {
+ if (dmt->dev_name && !dm_task_set_name(task, dmt->dev_name))
+ goto out;
+
+ if (dmt->uuid && !dm_task_set_uuid(task, dmt->uuid))
+ goto out;
+
+ task->major = dmt->major;
+ task->minor = dmt->minor;
+ }
+
+ task->uid = dmt->uid;
+ task->gid = dmt->gid;
+ task->mode = dmt->mode;
+ /* FIXME: Just for udev_check in dm_task_run. Can we avoid this? */
+ task->event_nr = dmt->event_nr & DM_UDEV_FLAGS_MASK;
+ task->cookie_set = dmt->cookie_set;
+ task->add_node = dmt->add_node;
+
+ if (!(r = dm_task_run(task)))
+ goto out;
+
+ if (!dm_task_get_info(task, &info) || !info.exists)
+ goto out;
+
+ /*
+ * Warn if any of the devices this device depends upon are already
+ * suspended: I/O could become trapped between the two devices.
+ */
+ if (info.suspended) {
+ if (!device)
+ log_debug_activation("Attempting to suspend a device that is already suspended "
+ "(%u:%u)", info.major, info.minor);
+ else
+ log_error(INTERNAL_ERROR "Attempt to suspend device %s%s%s%.0d%s%.0d%s%s"
+ "that uses already-suspended device (%u:%u)",
+ DEV_NAME(dmt) ? : "", DEV_UUID(dmt) ? : "",
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ") " : "",
+ info.major, info.minor);
+
+ /* No need for further recursion */
+ r = 1;
+ goto out;
+ }
+
+ if (!(deps = dm_task_get_deps(task)))
+ goto out;
+
+ for (i = 0; i < deps->count; i++) {
+ /* Only recurse with dm devices */
+ if (MAJOR(deps->device[i]) != _dm_device_major)
+ continue;
+
+ if (!_check_children_not_suspended_v4(task, deps->device[i]))
+ goto out;
+ }
+
+ r = 1;
+
+out:
+ dm_task_destroy(task);
+
+ return r;
+}
+
+static int _suspend_with_validation_v4(struct dm_task *dmt)
+{
+ /* Avoid recursion */
+ dmt->enable_checks = 0;
+
+ /*
+ * Ensure we can't leave any I/O trapped between suspended devices.
+ */
+ if (!_check_children_not_suspended_v4(dmt, 0))
+ return 0;
+
+ /* Finally, perform the original suspend. */
+ return dm_task_run(dmt);
+}
+
+static const char *_sanitise_message(char *message)
+{
+ const char *sanitised_message = message ?: "";
+
+ /* FIXME: Check for whitespace variations. */
+ /* This traps what cryptsetup sends us. */
+ if (message && !strncasecmp(message, "key set", 7))
+ sanitised_message = "key set";
+
+ return sanitised_message;
+}
+
+#ifdef DM_IOCTLS
+static int _do_dm_ioctl_unmangle_string(char *str, const char *str_name,
+ char *buf, size_t buf_size,
+ dm_string_mangling_t mode)
+{
+ int r;
+
+ if (mode == DM_STRING_MANGLING_NONE)
+ return 1;
+
+ if (!check_multiple_mangled_string_allowed(str, str_name, mode))
+ return_0;
+
+ if ((r = unmangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0) {
+ log_debug_activation("_do_dm_ioctl_unmangle_string: failed to "
+ "unmangle %s \"%s\"", str_name, str);
+ return 0;
+ }
+
+ if (r)
+ memcpy(str, buf, strlen(buf) + 1);
+
+ return 1;
+}
+
+static int _dm_ioctl_unmangle_names(int type, struct dm_ioctl *dmi)
+{
+ char buf[DM_NAME_LEN];
+ struct dm_names *names;
+ unsigned next = 0;
+ char *name;
+ int r = 1;
+
+ if ((name = dmi->name))
+ r = _do_dm_ioctl_unmangle_string(name, "name", buf, sizeof(buf),
+ dm_get_name_mangling_mode());
+
+ if (type == DM_DEVICE_LIST &&
+ ((names = ((struct dm_names *) ((char *)dmi + dmi->data_start)))) &&
+ names->dev) {
+ do {
+ names = (struct dm_names *)((char *) names + next);
+ r = _do_dm_ioctl_unmangle_string(names->name, "name",
+ buf, sizeof(buf),
+ dm_get_name_mangling_mode());
+ next = names->next;
+ } while (next);
+ }
+
+ return r;
+}
+
+static int _dm_ioctl_unmangle_uuids(int type, struct dm_ioctl *dmi)
+{
+ char buf[DM_UUID_LEN];
+ char *uuid = dmi->uuid;
+
+ if (uuid)
+ return _do_dm_ioctl_unmangle_string(uuid, "UUID", buf, sizeof(buf),
+ dm_get_name_mangling_mode());
+
+ return 1;
+}
+#endif
+
+static struct dm_ioctl *_do_dm_ioctl(struct dm_task *dmt, unsigned command,
+ unsigned buffer_repeat_count,
+ unsigned retry_repeat_count,
+ int *retryable)
+{
+ struct dm_ioctl *dmi;
+ int ioctl_with_uevent;
+ int r;
+
+ dmt->ioctl_errno = 0;
+
+ dmi = _flatten(dmt, buffer_repeat_count);
+ if (!dmi) {
+ log_error("Couldn't create ioctl argument.");
+ return NULL;
+ }
+
+ if (dmt->type == DM_DEVICE_TABLE)
+ dmi->flags |= DM_STATUS_TABLE_FLAG;
+
+ dmi->flags |= DM_EXISTS_FLAG; /* FIXME */
+
+ if (dmt->no_open_count)
+ dmi->flags |= DM_SKIP_BDGET_FLAG;
+
+ ioctl_with_uevent = dmt->type == DM_DEVICE_RESUME ||
+ dmt->type == DM_DEVICE_REMOVE ||
+ dmt->type == DM_DEVICE_RENAME;
+
+ if (ioctl_with_uevent && dm_cookie_supported()) {
+ /*
+ * Always mark events coming from libdevmapper as
+ * "primary sourced". This is needed to distinguish
+ * any spurious events so we can act appropriately.
+ * This needs to be applied even when udev_sync is
+ * not used because udev flags could be used alone.
+ */
+ dmi->event_nr |= DM_UDEV_PRIMARY_SOURCE_FLAG <<
+ DM_UDEV_FLAGS_SHIFT;
+
+ /*
+ * Prevent udev vs. libdevmapper race when processing nodes
+ * and symlinks. This can happen when the udev rules are
+ * installed and udev synchronisation code is enabled in
+ * libdevmapper but the software using libdevmapper does not
+ * make use of it (by not calling dm_task_set_cookie before).
+ * We need to instruct the udev rules not to be applied at
+ * all in this situation so we can gracefully fallback to
+ * libdevmapper's node and symlink creation code.
+ */
+ if (!dmt->cookie_set && dm_udev_get_sync_support()) {
+ log_debug_activation("Cookie value is not set while trying to call %s "
+ "ioctl. Please, consider using libdevmapper's udev "
+ "synchronisation interface or disable it explicitly "
+ "by calling dm_udev_set_sync_support(0).",
+ dmt->type == DM_DEVICE_RESUME ? "DM_DEVICE_RESUME" :
+ dmt->type == DM_DEVICE_REMOVE ? "DM_DEVICE_REMOVE" :
+ "DM_DEVICE_RENAME");
+ log_debug_activation("Switching off device-mapper and all subsystem related "
+ "udev rules. Falling back to libdevmapper node creation.");
+ /*
+ * Disable general dm and subsystem rules but keep
+ * dm disk rules if not flagged out explicitly before.
+ * We need /dev/disk content for the software that expects it.
+ */
+ dmi->event_nr |= (DM_UDEV_DISABLE_DM_RULES_FLAG |
+ DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) <<
+ DM_UDEV_FLAGS_SHIFT;
+ }
+ }
+
+ log_debug_activation("dm %s %s%s %s%s%s %s%.0d%s%.0d%s"
+ "%s[ %s%s%s%s%s%s%s%s%s] %.0" PRIu64 " %s [%u] (*%u)",
+ _cmd_data_v4[dmt->type].name,
+ dmt->new_uuid ? "UUID " : "",
+ dmi->name, dmi->uuid, dmt->newname ? " " : "",
+ dmt->newname ? dmt->newname : "",
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ") " : "",
+ dmt->no_open_count ? "noopencount " : "opencount ",
+ dmt->no_flush ? "noflush " : "flush ",
+ dmt->read_only ? "readonly " : "",
+ dmt->skip_lockfs ? "skiplockfs " : "",
+ dmt->retry_remove ? "retryremove " : "",
+ dmt->deferred_remove ? "deferredremove " : "",
+ dmt->secure_data ? "securedata " : "",
+ dmt->query_inactive_table ? "inactive " : "",
+ dmt->enable_checks ? "enablechecks " : "",
+ dmt->sector, _sanitise_message(dmt->message),
+ dmi->data_size, retry_repeat_count);
+#ifdef DM_IOCTLS
+ r = ioctl(_control_fd, command, dmi);
+
+ if (dmt->record_timestamp)
+ if (!dm_timestamp_get(_dm_ioctl_timestamp))
+ stack;
+
+ if (r < 0 && dmt->expected_errno != errno) {
+ dmt->ioctl_errno = errno;
+ if (dmt->ioctl_errno == ENXIO && ((dmt->type == DM_DEVICE_INFO) ||
+ (dmt->type == DM_DEVICE_MKNODES) ||
+ (dmt->type == DM_DEVICE_STATUS)))
+ dmi->flags &= ~DM_EXISTS_FLAG; /* FIXME */
+ else {
+ if (_log_suppress || dmt->ioctl_errno == EINTR)
+ log_verbose("device-mapper: %s ioctl on %s %s%s%.0d%s%.0d%s%s "
+ "failed: %s",
+ _cmd_data_v4[dmt->type].name,
+ dmi->name, dmi->uuid,
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ")" : "",
+ strerror(dmt->ioctl_errno));
+ else
+ log_error("device-mapper: %s ioctl on %s %s%s%.0d%s%.0d%s%s "
+ "failed: %s",
+ _cmd_data_v4[dmt->type].name,
+ dmi->name, dmi->uuid,
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ")" : "",
+ strerror(dmt->ioctl_errno));
+
+ /*
+ * It's sometimes worth retrying after EBUSY in case
+ * it's a transient failure caused by an asynchronous
+ * process quickly scanning the device.
+ */
+ *retryable = dmt->ioctl_errno == EBUSY;
+
+ goto error;
+ }
+ }
+
+ if (ioctl_with_uevent && dm_udev_get_sync_support() &&
+ !_check_uevent_generated(dmi)) {
+ log_debug_activation("Uevent not generated! Calling udev_complete "
+ "internally to avoid process lock-up.");
+ _udev_complete(dmt);
+ }
+
+ if (!_dm_ioctl_unmangle_names(dmt->type, dmi))
+ goto error;
+
+ if (dmt->type != DM_DEVICE_REMOVE &&
+ !_dm_ioctl_unmangle_uuids(dmt->type, dmi))
+ goto error;
+
+#else /* Userspace alternative for testing */
+ goto error;
+#endif
+ return dmi;
+
+error:
+ _dm_zfree_dmi(dmi);
+ return NULL;
+}
+
+void dm_task_update_nodes(void)
+{
+ update_devs();
+}
+
+#define DM_IOCTL_RETRIES 25
+#define DM_RETRY_USLEEP_DELAY 200000
+
+int dm_task_get_errno(struct dm_task *dmt)
+{
+ return dmt->ioctl_errno;
+}
+
+int dm_task_run(struct dm_task *dmt)
+{
+ struct dm_ioctl *dmi;
+ unsigned command;
+ int check_udev;
+ int rely_on_udev;
+ int suspended_counter;
+ unsigned ioctl_retry = 1;
+ int retryable = 0;
+ const char *dev_name = DEV_NAME(dmt);
+ const char *dev_uuid = DEV_UUID(dmt);
+
+ if ((unsigned) dmt->type >= DM_ARRAY_SIZE(_cmd_data_v4)) {
+ log_error(INTERNAL_ERROR "unknown device-mapper task %d",
+ dmt->type);
+ return 0;
+ }
+
+ command = _cmd_data_v4[dmt->type].cmd;
+
+ /* Old-style creation had a table supplied */
+ if (dmt->type == DM_DEVICE_CREATE && dmt->head)
+ return _create_and_load_v4(dmt);
+
+ if (dmt->type == DM_DEVICE_MKNODES && !dev_name &&
+ !dev_uuid && dmt->major <= 0)
+ return _mknodes_v4(dmt);
+
+ if ((dmt->type == DM_DEVICE_RELOAD) && dmt->suppress_identical_reload)
+ return _reload_with_suppression_v4(dmt);
+
+ if ((dmt->type == DM_DEVICE_SUSPEND) && dmt->enable_checks)
+ return _suspend_with_validation_v4(dmt);
+
+ if (!_open_control()) {
+ _udev_complete(dmt);
+ return_0;
+ }
+
+ if ((suspended_counter = dm_get_suspended_counter()) &&
+ dmt->type == DM_DEVICE_RELOAD)
+ log_error(INTERNAL_ERROR "Performing unsafe table load while %d device(s) "
+ "are known to be suspended: "
+ "%s%s%s %s%.0d%s%.0d%s%s",
+ suspended_counter,
+ dev_name ? : "",
+ dev_uuid ? " UUID " : "",
+ dev_uuid ? : "",
+ dmt->major > 0 ? "(" : "",
+ dmt->major > 0 ? dmt->major : 0,
+ dmt->major > 0 ? ":" : "",
+ dmt->minor > 0 ? dmt->minor : 0,
+ dmt->major > 0 && dmt->minor == 0 ? "0" : "",
+ dmt->major > 0 ? ") " : "");
+
+ /* FIXME Detect and warn if cookie set but should not be. */
+repeat_ioctl:
+ if (!(dmi = _do_dm_ioctl(dmt, command, _ioctl_buffer_double_factor,
+ ioctl_retry, &retryable))) {
+ /*
+ * Async udev rules that scan devices commonly cause transient
+ * failures. Normally you'd expect the user to have made sure
+ * nothing was using the device before issuing REMOVE, so it's
+ * worth retrying in case the failure is indeed transient.
+ */
+ if (retryable && dmt->type == DM_DEVICE_REMOVE &&
+ dmt->retry_remove && ++ioctl_retry <= DM_IOCTL_RETRIES) {
+ usleep(DM_RETRY_USLEEP_DELAY);
+ goto repeat_ioctl;
+ }
+
+ _udev_complete(dmt);
+ return 0;
+ }
+
+ if (dmi->flags & DM_BUFFER_FULL_FLAG) {
+ switch (dmt->type) {
+ case DM_DEVICE_LIST_VERSIONS:
+ case DM_DEVICE_LIST:
+ case DM_DEVICE_DEPS:
+ case DM_DEVICE_STATUS:
+ case DM_DEVICE_TABLE:
+ case DM_DEVICE_WAITEVENT:
+ case DM_DEVICE_TARGET_MSG:
+ _ioctl_buffer_double_factor++;
+ _dm_zfree_dmi(dmi);
+ goto repeat_ioctl;
+ default:
+ log_error("WARNING: libdevmapper buffer too small for data");
+ }
+ }
+
+ /*
+ * Are we expecting a udev operation to occur that we need to check for?
+ */
+ check_udev = dmt->cookie_set &&
+ !(dmt->event_nr >> DM_UDEV_FLAGS_SHIFT &
+ DM_UDEV_DISABLE_DM_RULES_FLAG);
+
+ rely_on_udev = dmt->cookie_set ? (dmt->event_nr >> DM_UDEV_FLAGS_SHIFT &
+ DM_UDEV_DISABLE_LIBRARY_FALLBACK) : 0;
+
+ switch (dmt->type) {
+ case DM_DEVICE_CREATE:
+ if ((dmt->add_node == DM_ADD_NODE_ON_CREATE) &&
+ dev_name && *dev_name && !rely_on_udev)
+ add_dev_node(dev_name, MAJOR(dmi->dev),
+ MINOR(dmi->dev), dmt->uid, dmt->gid,
+ dmt->mode, check_udev, rely_on_udev);
+ break;
+ case DM_DEVICE_REMOVE:
+ /* FIXME Kernel needs to fill in dmi->name */
+ if (dev_name && !rely_on_udev)
+ rm_dev_node(dev_name, check_udev, rely_on_udev);
+ break;
+
+ case DM_DEVICE_RENAME:
+ /* FIXME Kernel needs to fill in dmi->name */
+ if (!dmt->new_uuid && dev_name)
+ rename_dev_node(dev_name, dmt->newname,
+ check_udev, rely_on_udev);
+ break;
+
+ case DM_DEVICE_RESUME:
+ if ((dmt->add_node == DM_ADD_NODE_ON_RESUME) &&
+ dev_name && *dev_name)
+ add_dev_node(dev_name, MAJOR(dmi->dev),
+ MINOR(dmi->dev), dmt->uid, dmt->gid,
+ dmt->mode, check_udev, rely_on_udev);
+ /* FIXME Kernel needs to fill in dmi->name */
+ set_dev_node_read_ahead(dev_name,
+ MAJOR(dmi->dev), MINOR(dmi->dev),
+ dmt->read_ahead, dmt->read_ahead_flags);
+ break;
+
+ case DM_DEVICE_MKNODES:
+ if (dmi->flags & DM_EXISTS_FLAG)
+ add_dev_node(dmi->name, MAJOR(dmi->dev),
+ MINOR(dmi->dev), dmt->uid,
+ dmt->gid, dmt->mode, 0, rely_on_udev);
+ else if (dev_name)
+ rm_dev_node(dev_name, 0, rely_on_udev);
+ break;
+
+ case DM_DEVICE_STATUS:
+ case DM_DEVICE_TABLE:
+ case DM_DEVICE_WAITEVENT:
+ if (!_unmarshal_status(dmt, dmi))
+ goto bad;
+ break;
+ }
+
+ /* Was structure reused? */
+ _dm_zfree_dmi(dmt->dmi.v4);
+ dmt->dmi.v4 = dmi;
+ return 1;
+
+ bad:
+ _dm_zfree_dmi(dmi);
+ return 0;
+}
+
+void dm_hold_control_dev(int hold_open)
+{
+ _hold_control_fd_open = hold_open ? 1 : 0;
+
+ log_debug("Hold of control device is now %sset.",
+ _hold_control_fd_open ? "" : "un");
+}
+
+void dm_lib_release(void)
+{
+ if (!_hold_control_fd_open)
+ _close_control_fd();
+ dm_timestamp_destroy(_dm_ioctl_timestamp);
+ _dm_ioctl_timestamp = NULL;
+ update_devs();
+}
+
+void dm_pools_check_leaks(void);
+
+void dm_lib_exit(void)
+{
+ int suspended_counter;
+ static unsigned _exited = 0;
+
+ if (_exited++)
+ return;
+
+ if ((suspended_counter = dm_get_suspended_counter()))
+ log_error("libdevmapper exiting with %d device(s) still suspended.", suspended_counter);
+
+ dm_lib_release();
+ selinux_release();
+ if (_dm_bitset)
+ dm_bitset_destroy(_dm_bitset);
+ _dm_bitset = NULL;
+ dm_pools_check_leaks();
+ dm_dump_memory();
+ _version_ok = 1;
+ _version_checked = 0;
+}
+
+#if defined(__GNUC__)
+/*
+ * Maintain binary backward compatibility.
+ * Version script mechanism works with 'gcc' compatible compilers only.
+ */
+
+/*
+ * This following code is here to retain ABI compatibility after adding
+ * the field deferred_remove to struct dm_info in version 1.02.89.
+ *
+ * Binaries linked against version 1.02.88 of libdevmapper or earlier
+ * will use this function that returns dm_info without the
+ * deferred_remove field.
+ *
+ * Binaries compiled against version 1.02.89 onwards will use
+ * the new function dm_task_get_info_with_deferred_remove due to the
+ * #define.
+ *
+ * N.B. Keep this function at the end of the file to make sure that
+ * no code in this file accidentally calls it.
+ */
+
+int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info);
+int dm_task_get_info_base(struct dm_task *dmt, struct dm_info *info)
+{
+ struct dm_info new_info;
+
+ if (!dm_task_get_info(dmt, &new_info))
+ return 0;
+
+ memcpy(info, &new_info, offsetof(struct dm_info, deferred_remove));
+
+ return 1;
+}
+
+int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info);
+int dm_task_get_info_with_deferred_remove(struct dm_task *dmt, struct dm_info *info)
+{
+ struct dm_info new_info;
+
+ if (!dm_task_get_info(dmt, &new_info))
+ return 0;
+
+ memcpy(info, &new_info, offsetof(struct dm_info, internal_suspend));
+
+ return 1;
+}
+#endif
diff --git a/device_mapper/ioctl/libdm-targets.h b/device_mapper/ioctl/libdm-targets.h
new file mode 100644
index 000000000..b5b20d5e9
--- /dev/null
+++ b/device_mapper/ioctl/libdm-targets.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef LIB_DMTARGETS_H
+#define LIB_DMTARGETS_H
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+struct dm_ioctl;
+
+struct target {
+ uint64_t start;
+ uint64_t length;
+ char *type;
+ char *params;
+
+ struct target *next;
+};
+
+struct dm_task {
+ int type;
+ char *dev_name;
+ char *mangled_dev_name;
+
+ struct target *head, *tail;
+
+ int read_only;
+ uint32_t event_nr;
+ int major;
+ int minor;
+ int allow_default_major_fallback;
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ uint32_t read_ahead;
+ uint32_t read_ahead_flags;
+ union {
+ struct dm_ioctl *v4;
+ } dmi;
+ char *newname;
+ char *message;
+ char *geometry;
+ uint64_t sector;
+ int no_flush;
+ int no_open_count;
+ int skip_lockfs;
+ int query_inactive_table;
+ int suppress_identical_reload;
+ dm_add_node_t add_node;
+ uint64_t existing_table_size;
+ int cookie_set;
+ int new_uuid;
+ int secure_data;
+ int retry_remove;
+ int deferred_remove;
+ int enable_checks;
+ int expected_errno;
+ int ioctl_errno;
+
+ int record_timestamp;
+
+ char *uuid;
+ char *mangled_uuid;
+};
+
+struct cmd_data {
+ const char *name;
+ const unsigned cmd;
+ const int version[3];
+};
+
+int dm_check_version(void);
+uint64_t dm_task_get_existing_table_size(struct dm_task *dmt);
+
+#endif
diff --git a/device_mapper/libdevmapper.h b/device_mapper/libdevmapper.h
new file mode 100644
index 000000000..2438f74c1
--- /dev/null
+++ b/device_mapper/libdevmapper.h
@@ -0,0 +1,3755 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2017 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006 Rackable Systems All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef LIB_DEVICE_MAPPER_H
+#define LIB_DEVICE_MAPPER_H
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#ifdef __linux__
+# include <linux/types.h>
+#endif
+
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifndef __GNUC__
+# define __typeof__ typeof
+#endif
+
+/* Macros to make string defines */
+#define DM_TO_STRING_EXP(A) #A
+#define DM_TO_STRING(A) DM_TO_STRING_EXP(A)
+
+#define DM_ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*****************************************************************
+ * The first section of this file provides direct access to the
+ * individual device-mapper ioctls. Since it is quite laborious to
+ * build the ioctl arguments for the device-mapper, people are
+ * encouraged to use this library.
+ ****************************************************************/
+
+/*
+ * The library user may wish to register their own
+ * logging function. By default errors go to stderr.
+ * Use dm_log_with_errno_init(NULL) to restore the default log fn.
+ * Error messages may have a non-zero errno.
+ * Debug messages may have a non-zero class.
+ * Aborts on internal error when env DM_ABORT_ON_INTERNAL_ERRORS is 1
+ */
+
+typedef void (*dm_log_with_errno_fn) (int level, const char *file, int line,
+ int dm_errno_or_class, const char *f, ...)
+ __attribute__ ((format(printf, 5, 6)));
+
+void dm_log_with_errno_init(dm_log_with_errno_fn fn);
+void dm_log_init_verbose(int level);
+
+/*
+ * Original version of this function.
+ * dm_errno is set to 0.
+ *
+ * Deprecated: Use the _with_errno_ versions above instead.
+ */
+typedef void (*dm_log_fn) (int level, const char *file, int line,
+ const char *f, ...)
+ __attribute__ ((format(printf, 4, 5)));
+
+void dm_log_init(dm_log_fn fn);
+/*
+ * For backward-compatibility, indicate that dm_log_init() was used
+ * to set a non-default value of dm_log().
+ */
+int dm_log_is_non_default(void);
+
+/*
+ * Number of devices currently in suspended state (via the library).
+ */
+int dm_get_suspended_counter(void);
+
+enum {
+ DM_DEVICE_CREATE,
+ DM_DEVICE_RELOAD,
+ DM_DEVICE_REMOVE,
+ DM_DEVICE_REMOVE_ALL,
+
+ DM_DEVICE_SUSPEND,
+ DM_DEVICE_RESUME,
+
+ DM_DEVICE_INFO,
+ DM_DEVICE_DEPS,
+ DM_DEVICE_RENAME,
+
+ DM_DEVICE_VERSION,
+
+ DM_DEVICE_STATUS,
+ DM_DEVICE_TABLE,
+ DM_DEVICE_WAITEVENT,
+
+ DM_DEVICE_LIST,
+
+ DM_DEVICE_CLEAR,
+
+ DM_DEVICE_MKNODES,
+
+ DM_DEVICE_LIST_VERSIONS,
+
+ DM_DEVICE_TARGET_MSG,
+
+ DM_DEVICE_SET_GEOMETRY
+};
+
+/*
+ * You will need to build a struct dm_task for
+ * each ioctl command you want to execute.
+ */
+
+struct dm_pool;
+struct dm_task;
+struct dm_timestamp;
+
+struct dm_task *dm_task_create(int type);
+void dm_task_destroy(struct dm_task *dmt);
+
+int dm_task_set_name(struct dm_task *dmt, const char *name);
+int dm_task_set_uuid(struct dm_task *dmt, const char *uuid);
+
+/*
+ * Retrieve attributes after an info.
+ */
+struct dm_info {
+ int exists;
+ int suspended;
+ int live_table;
+ int inactive_table;
+ int32_t open_count;
+ uint32_t event_nr;
+ uint32_t major;
+ uint32_t minor; /* minor device number */
+ int read_only; /* 0:read-write; 1:read-only */
+
+ int32_t target_count;
+
+ int deferred_remove;
+ int internal_suspend;
+};
+
+struct dm_deps {
+ uint32_t count;
+ uint32_t filler;
+ uint64_t device[0];
+};
+
+struct dm_names {
+ uint64_t dev;
+ uint32_t next; /* Offset to next struct from start of this struct */
+ char name[0];
+};
+
+struct dm_versions {
+ uint32_t next; /* Offset to next struct from start of this struct */
+ uint32_t version[3];
+
+ char name[0];
+};
+
+int dm_get_library_version(char *version, size_t size);
+int dm_task_get_driver_version(struct dm_task *dmt, char *version, size_t size);
+int dm_task_get_info(struct dm_task *dmt, struct dm_info *dmi);
+
+/*
+ * This function returns dm device's UUID based on the value
+ * of the mangling mode set during preceding dm_task_run call:
+ * - unmangled UUID for DM_STRING_MANGLING_{AUTO, HEX},
+ * - UUID without any changes for DM_STRING_MANGLING_NONE.
+ *
+ * To get mangled or unmangled form of the UUID directly, use
+ * dm_task_get_uuid_mangled or dm_task_get_uuid_unmangled function.
+ */
+const char *dm_task_get_uuid(const struct dm_task *dmt);
+
+struct dm_deps *dm_task_get_deps(struct dm_task *dmt);
+struct dm_versions *dm_task_get_versions(struct dm_task *dmt);
+const char *dm_task_get_message_response(struct dm_task *dmt);
+
+/*
+ * These functions return device-mapper names based on the value
+ * of the mangling mode set during preceding dm_task_run call:
+ * - unmangled name for DM_STRING_MANGLING_{AUTO, HEX},
+ * - name without any changes for DM_STRING_MANGLING_NONE.
+ *
+ * To get mangled or unmangled form of the name directly, use
+ * dm_task_get_name_mangled or dm_task_get_name_unmangled function.
+ */
+const char *dm_task_get_name(const struct dm_task *dmt);
+struct dm_names *dm_task_get_names(struct dm_task *dmt);
+
+int dm_task_set_ro(struct dm_task *dmt);
+int dm_task_set_newname(struct dm_task *dmt, const char *newname);
+int dm_task_set_newuuid(struct dm_task *dmt, const char *newuuid);
+int dm_task_set_minor(struct dm_task *dmt, int minor);
+int dm_task_set_major(struct dm_task *dmt, int major);
+int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor, int allow_default_major_fallback);
+int dm_task_set_uid(struct dm_task *dmt, uid_t uid);
+int dm_task_set_gid(struct dm_task *dmt, gid_t gid);
+int dm_task_set_mode(struct dm_task *dmt, mode_t mode);
+/* See also description for DM_UDEV_DISABLE_LIBRARY_FALLBACK flag! */
+int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags);
+int dm_task_set_event_nr(struct dm_task *dmt, uint32_t event_nr);
+int dm_task_set_geometry(struct dm_task *dmt, const char *cylinders, const char *heads, const char *sectors, const char *start);
+int dm_task_set_message(struct dm_task *dmt, const char *message);
+int dm_task_set_sector(struct dm_task *dmt, uint64_t sector);
+int dm_task_no_flush(struct dm_task *dmt);
+int dm_task_no_open_count(struct dm_task *dmt);
+int dm_task_skip_lockfs(struct dm_task *dmt);
+int dm_task_query_inactive_table(struct dm_task *dmt);
+int dm_task_suppress_identical_reload(struct dm_task *dmt);
+int dm_task_secure_data(struct dm_task *dmt);
+int dm_task_retry_remove(struct dm_task *dmt);
+int dm_task_deferred_remove(struct dm_task *dmt);
+
+/*
+ * Record timestamp immediately after the ioctl returns.
+ */
+int dm_task_set_record_timestamp(struct dm_task *dmt);
+struct dm_timestamp *dm_task_get_ioctl_timestamp(struct dm_task *dmt);
+
+/*
+ * Enable checks for common mistakes such as issuing ioctls in an unsafe order.
+ */
+int dm_task_enable_checks(struct dm_task *dmt);
+
+typedef enum {
+ DM_ADD_NODE_ON_RESUME, /* add /dev/mapper node with dmsetup resume */
+ DM_ADD_NODE_ON_CREATE /* add /dev/mapper node with dmsetup create */
+} dm_add_node_t;
+int dm_task_set_add_node(struct dm_task *dmt, dm_add_node_t add_node);
+
+/*
+ * Control read_ahead.
+ */
+#define DM_READ_AHEAD_AUTO UINT32_MAX /* Use kernel default readahead */
+#define DM_READ_AHEAD_NONE 0 /* Disable readahead */
+
+#define DM_READ_AHEAD_MINIMUM_FLAG 0x1 /* Value supplied is minimum */
+
+/*
+ * Read ahead is set with DM_DEVICE_CREATE with a table or DM_DEVICE_RESUME.
+ */
+int dm_task_set_read_ahead(struct dm_task *dmt, uint32_t read_ahead,
+ uint32_t read_ahead_flags);
+uint32_t dm_task_get_read_ahead(const struct dm_task *dmt,
+ uint32_t *read_ahead);
+
+/*
+ * Use these to prepare for a create or reload.
+ */
+int dm_task_add_target(struct dm_task *dmt,
+ uint64_t start,
+ uint64_t size, const char *ttype, const char *params);
+
+/*
+ * Format major/minor numbers correctly for input to driver.
+ */
+#define DM_FORMAT_DEV_BUFSIZE 13 /* Minimum bufsize to handle worst case. */
+int dm_format_dev(char *buf, int bufsize, uint32_t dev_major, uint32_t dev_minor);
+
+/* Use this to retrive target information returned from a STATUS call */
+void *dm_get_next_target(struct dm_task *dmt,
+ void *next, uint64_t *start, uint64_t *length,
+ char **target_type, char **params);
+
+/*
+ * Following dm_get_status_* functions will allocate approriate status structure
+ * from passed mempool together with the necessary character arrays.
+ * Destroying the mempool will release all asociated allocation.
+ */
+
+/* Parse params from STATUS call for mirror target */
+typedef enum {
+ DM_STATUS_MIRROR_ALIVE = 'A',/* No failures */
+ DM_STATUS_MIRROR_FLUSH_FAILED = 'F',/* Mirror out-of-sync */
+ DM_STATUS_MIRROR_WRITE_FAILED = 'D',/* Mirror out-of-sync */
+ DM_STATUS_MIRROR_SYNC_FAILED = 'S',/* Mirror out-of-sync */
+ DM_STATUS_MIRROR_READ_FAILED = 'R',/* Mirror data unaffected */
+ DM_STATUS_MIRROR_UNCLASSIFIED = 'U' /* Bug */
+} dm_status_mirror_health_t;
+
+struct dm_status_mirror {
+ uint64_t total_regions;
+ uint64_t insync_regions;
+ uint32_t dev_count; /* # of devs[] elements (<= 8) */
+ struct {
+ dm_status_mirror_health_t health;
+ uint32_t major;
+ uint32_t minor;
+ } *devs; /* array with individual legs */
+ const char *log_type; /* core, disk,.... */
+ uint32_t log_count; /* # of logs[] elements */
+ struct {
+ dm_status_mirror_health_t health;
+ uint32_t major;
+ uint32_t minor;
+ } *logs; /* array with individual logs */
+};
+
+int dm_get_status_mirror(struct dm_pool *mem, const char *params,
+ struct dm_status_mirror **status);
+
+/* Parse params from STATUS call for raid target */
+struct dm_status_raid {
+ uint64_t reserved;
+ uint64_t total_regions; /* sectors */
+ uint64_t insync_regions; /* sectors */
+ uint64_t mismatch_count;
+ uint32_t dev_count;
+ char *raid_type;
+ /* A - alive, a - alive not in-sync, D - dead/failed */
+ char *dev_health;
+ /* idle, frozen, resync, recover, check, repair */
+ char *sync_action;
+ uint64_t data_offset; /* RAID out-of-place reshaping */
+};
+
+int dm_get_status_raid(struct dm_pool *mem, const char *params,
+ struct dm_status_raid **status);
+
+/* Parse params from STATUS call for cache target */
+struct dm_status_cache {
+ uint64_t version; /* zero for now */
+
+ uint32_t metadata_block_size; /* in 512B sectors */
+ uint32_t block_size; /* AKA 'chunk_size' */
+
+ uint64_t metadata_used_blocks;
+ uint64_t metadata_total_blocks;
+
+ uint64_t used_blocks;
+ uint64_t dirty_blocks;
+ uint64_t total_blocks;
+
+ uint64_t read_hits;
+ uint64_t read_misses;
+ uint64_t write_hits;
+ uint64_t write_misses;
+
+ uint64_t demotions;
+ uint64_t promotions;
+
+ uint64_t feature_flags; /* DM_CACHE_FEATURE_? */
+
+ int core_argc;
+ char **core_argv;
+
+ char *policy_name;
+ int policy_argc;
+ char **policy_argv;
+
+ unsigned error : 1; /* detected error (switches to fail soon) */
+ unsigned fail : 1; /* all I/O fails */
+ unsigned needs_check : 1; /* metadata needs check */
+ unsigned read_only : 1; /* metadata may not be changed */
+ uint32_t reserved : 28;
+};
+
+int dm_get_status_cache(struct dm_pool *mem, const char *params,
+ struct dm_status_cache **status);
+
+/*
+ * Parse params from STATUS call for snapshot target
+ *
+ * Snapshot target's format:
+ * <= 1.7.0: <used_sectors>/<total_sectors>
+ * >= 1.8.0: <used_sectors>/<total_sectors> <metadata_sectors>
+ */
+struct dm_status_snapshot {
+ uint64_t used_sectors; /* in 512b units */
+ uint64_t total_sectors;
+ uint64_t metadata_sectors;
+ unsigned has_metadata_sectors : 1; /* set when metadata_sectors is present */
+ unsigned invalid : 1; /* set when snapshot is invalidated */
+ unsigned merge_failed : 1; /* set when snapshot merge failed */
+ unsigned overflow : 1; /* set when snapshot overflows */
+};
+
+int dm_get_status_snapshot(struct dm_pool *mem, const char *params,
+ struct dm_status_snapshot **status);
+
+/* Parse params from STATUS call for thin_pool target */
+typedef enum {
+ DM_THIN_DISCARDS_IGNORE,
+ DM_THIN_DISCARDS_NO_PASSDOWN,
+ DM_THIN_DISCARDS_PASSDOWN
+} dm_thin_discards_t;
+
+struct dm_status_thin_pool {
+ uint64_t transaction_id;
+ uint64_t used_metadata_blocks;
+ uint64_t total_metadata_blocks;
+ uint64_t used_data_blocks;
+ uint64_t total_data_blocks;
+ uint64_t held_metadata_root;
+ uint32_t read_only; /* metadata may not be changed */
+ dm_thin_discards_t discards;
+ uint32_t fail : 1; /* all I/O fails */
+ uint32_t error_if_no_space : 1; /* otherwise queue_if_no_space */
+ uint32_t out_of_data_space : 1; /* metadata may be changed, but data may not be allocated (no rw) */
+ uint32_t needs_check : 1; /* metadata needs check */
+ uint32_t error : 1; /* detected error (switches to fail soon) */
+ uint32_t reserved : 27;
+};
+
+int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
+ struct dm_status_thin_pool **status);
+
+/* Parse params from STATUS call for thin target */
+struct dm_status_thin {
+ uint64_t mapped_sectors;
+ uint64_t highest_mapped_sector;
+ uint32_t fail : 1; /* Thin volume fails I/O */
+ uint32_t reserved : 31;
+};
+
+int dm_get_status_thin(struct dm_pool *mem, const char *params,
+ struct dm_status_thin **status);
+
+/*
+ * device-mapper statistics support
+ */
+
+/*
+ * Statistics handle.
+ *
+ * Operations on dm_stats objects include managing statistics regions
+ * and obtaining and manipulating current counter values from the
+ * kernel. Methods are provided to return baisc count values and to
+ * derive time-based metrics when a suitable interval estimate is
+ * provided.
+ *
+ * Internally the dm_stats handle contains a pointer to a table of one
+ * or more dm_stats_region objects representing the regions registered
+ * with the dm_stats_create_region() method. These in turn point to a
+ * table of one or more dm_stats_counters objects containing the
+ * counter sets for each defined area within the region:
+ *
+ * dm_stats->dm_stats_region[nr_regions]->dm_stats_counters[nr_areas]
+ *
+ * This structure is private to the library and may change in future
+ * versions: all users should make use of the public interface and treat
+ * the dm_stats type as an opaque handle.
+ *
+ * Regions and counter sets are stored in order of increasing region_id.
+ * Depending on region specifications and the sequence of create and
+ * delete operations this may not correspond to increasing sector
+ * number: users of the library should not assume that this is the case
+ * unless region creation is deliberately managed to ensure this (by
+ * always creating regions in strict order of ascending sector address).
+ *
+ * Regions may also overlap so the same sector range may be included in
+ * more than one region or area: applications should be prepared to deal
+ * with this or manage regions such that it does not occur.
+ */
+struct dm_stats;
+
+/*
+ * Histogram handle.
+ *
+ * A histogram object represents the latency histogram values and bin
+ * boundaries of the histogram associated with a particular area.
+ *
+ * Operations on the handle allow the number of bins, bin boundaries,
+ * counts and relative proportions to be obtained as well as the
+ * conversion of a histogram or its bounds to a compact string
+ * representation.
+ */
+struct dm_histogram;
+
+/*
+ * Allocate a dm_stats handle to use for subsequent device-mapper
+ * statistics operations. A program_id may be specified and will be
+ * used by default for subsequent operations on this handle.
+ *
+ * If program_id is NULL or the empty string a program_id will be
+ * automatically set to the value contained in /proc/self/comm.
+ */
+struct dm_stats *dm_stats_create(const char *program_id);
+
+/*
+ * Bind a dm_stats handle to the specified device major and minor
+ * values. Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor);
+
+/*
+ * Bind a dm_stats handle to the specified device name.
+ * Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_name(struct dm_stats *dms, const char *name);
+
+/*
+ * Bind a dm_stats handle to the specified device UUID.
+ * Any previous binding is cleared and any preexisting counter
+ * data contained in the handle is released.
+ */
+int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid);
+
+/*
+ * Bind a dm_stats handle to the device backing the file referenced
+ * by the specified file descriptor.
+ *
+ * File descriptor fd must reference a regular file, open for reading,
+ * in a local file system, backed by a device-mapper device, that
+ * supports the FIEMAP ioctl, and that returns data describing the
+ * physical location of extents.
+ */
+int dm_stats_bind_from_fd(struct dm_stats *dms, int fd);
+/*
+ * Test whether the running kernel supports the precise_timestamps
+ * feature. Presence of this feature also implies histogram support.
+ * The library will check this call internally and fails any attempt
+ * to use nanosecond counters or histograms on kernels that fail to
+ * meet this check.
+ */
+int dm_message_supports_precise_timestamps(void);
+
+/*
+ * Precise timetamps and histogram support.
+ *
+ * Test for the presence of precise_timestamps and histogram support.
+ */
+int dm_stats_driver_supports_precise(void);
+int dm_stats_driver_supports_histogram(void);
+
+/*
+ * Returns 1 if the specified region has the precise_timestamps feature
+ * enabled (i.e. produces nanosecond-precision counter values) or 0 for
+ * a region using the default milisecond precision.
+ */
+int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms,
+ uint64_t region_id);
+
+/*
+ * Returns 1 if the region at the current cursor location has the
+ * precise_timestamps feature enabled (i.e. produces
+ * nanosecond-precision counter values) or 0 for a region using the
+ * default milisecond precision.
+ */
+int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms);
+
+#define DM_STATS_ALL_PROGRAMS ""
+/*
+ * Parse the response from a @stats_list message. dm_stats_list will
+ * allocate the necessary dm_stats and dm_stats region structures from
+ * the embedded dm_pool. No counter data will be obtained (the counters
+ * members of dm_stats_region objects are set to NULL).
+ *
+ * A program_id may optionally be supplied; if the argument is non-NULL
+ * only regions with a matching program_id value will be considered. If
+ * the argument is NULL then the default program_id associated with the
+ * dm_stats handle will be used. Passing the special value
+ * DM_STATS_ALL_PROGRAMS will cause all regions to be queried
+ * regardless of region program_id.
+ */
+int dm_stats_list(struct dm_stats *dms, const char *program_id);
+
+#define DM_STATS_REGIONS_ALL UINT64_MAX
+/*
+ * Populate a dm_stats object with statistics for one or more regions of
+ * the specified device.
+ *
+ * A program_id may optionally be supplied; if the argument is non-NULL
+ * only regions with a matching program_id value will be considered. If
+ * the argument is NULL then the default program_id associated with the
+ * dm_stats handle will be used. Passing the special value
+ * DM_STATS_ALL_PROGRAMS will cause all regions to be queried
+ * regardless of region program_id.
+ *
+ * Passing the special value DM_STATS_REGIONS_ALL as the region_id
+ * argument will attempt to retrieve all regions selected by the
+ * program_id argument.
+ *
+ * If region_id is used to request a single region_id to be populated
+ * the program_id is ignored.
+ */
+int dm_stats_populate(struct dm_stats *dms, const char *program_id,
+ uint64_t region_id);
+
+/*
+ * Create a new statistics region on the device bound to dms.
+ *
+ * start and len specify the region start and length in 512b sectors.
+ * Passing zero for both start and len will create a region spanning
+ * the entire device.
+ *
+ * Step determines how to subdivide the region into discrete counter
+ * sets: a positive value specifies the size of areas into which the
+ * region should be split while a negative value will split the region
+ * into a number of areas equal to the absolute value of step:
+ *
+ * - a region with one area spanning the entire device:
+ *
+ * dm_stats_create_region(dms, 0, 0, -1, p, a);
+ *
+ * - a region with areas of 1MiB:
+ *
+ * dm_stats_create_region(dms, 0, 0, 1 << 11, p, a);
+ *
+ * - one 1MiB region starting at 1024 sectors with two areas:
+ *
+ * dm_stats_create_region(dms, 1024, 1 << 11, -2, p, a);
+ *
+ * If precise is non-zero attempt to create a region with nanosecond
+ * precision counters using the kernel precise_timestamps feature.
+ *
+ * precise - A flag to request nanosecond precision counters
+ * to be used for this region.
+ *
+ * histogram_bounds - specify the boundaries of a latency histogram to
+ * be tracked for the region. The values are expressed as an array of
+ * uint64_t terminated with a zero. Values must be in order of ascending
+ * magnitude and specify the upper bounds of successive histogram bins
+ * in nanoseconds (with an implicit lower bound of zero on the first bin
+ * and an implicit upper bound of infinity on the final bin). For
+ * example:
+ *
+ * uint64_t bounds_ary[] = { 1000, 2000, 3000, 0 };
+ *
+ * Specifies a histogram with four bins: 0-1000ns, 1000-2000ns,
+ * 2000-3000ns and >3000ns.
+ *
+ * The smallest latency value that can be tracked for a region not using
+ * precise_timestamps is 1ms: attempting to create a region with
+ * histogram boundaries < 1ms will cause the precise_timestamps feature
+ * to be enabled for that region automatically if it was not requested
+ * explicitly.
+ *
+ * program_id is an optional string argument that identifies the
+ * program creating the region. If program_id is NULL or the empty
+ * string the default program_id stored in the handle will be used.
+ *
+ * user_data is an optional string argument that is added to the
+ * content of the aux_data field stored with the statistics region by
+ * the kernel.
+ *
+ * The library may also use this space internally, for example, to
+ * store a group descriptor or other metadata: in this case the
+ * library will strip any internal data fields from the value before
+ * it is returned via a call to dm_stats_get_region_aux_data().
+ *
+ * The user data stored is not accessed by the library or kernel and
+ * may be used to store an arbitrary data word (embedded whitespace is
+ * not permitted).
+ *
+ * An application using both the library and direct access to the
+ * @stats_list device-mapper message may see the internal values stored
+ * in this field by the library. In such cases any string up to and
+ * including the first '#' in the field must be treated as an opaque
+ * value and preserved across any external modification of aux_data.
+ *
+ * The region_id of the newly-created region is returned in *region_id
+ * if it is non-NULL.
+ */
+int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ int precise, struct dm_histogram *bounds,
+ const char *program_id, const char *user_data);
+
+/*
+ * Delete the specified statistics region. This will also mark the
+ * region as not-present and discard any existing statistics data.
+ */
+int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Clear the specified statistics region. This requests the kernel to
+ * zero all counter values (except in-flight I/O). Note that this
+ * operation is not atomic with respect to reads of the counters; any IO
+ * events occurring between the last print operation and the clear will
+ * be lost. This can be avoided by using the atomic print-and-clear
+ * function of the dm_stats_print_region() call or by using the higher
+ * level dm_stats_populate() interface.
+ */
+int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Print the current counter values for the specified statistics region
+ * and return them as a string. The memory for the string buffer will
+ * be allocated from the dm_stats handle's private pool and should be
+ * returned by calling dm_stats_buffer_destroy() when no longer
+ * required. The pointer will become invalid following any call that
+ * clears or reinitializes the handle (destroy, list, populate, bind).
+ *
+ * This allows applications that wish to access the raw message response
+ * to obtain it via a dm_stats handle; no parsing of the textual counter
+ * data is carried out by this function.
+ *
+ * Most users are recommended to use the dm_stats_populate() call
+ * instead since this will automatically parse the statistics data into
+ * numeric form accessible via the dm_stats_get_*() counter access
+ * methods.
+ *
+ * A subset of the data lines may be requested by setting the
+ * start_line and num_lines parameters. If both are zero all data
+ * lines are returned.
+ *
+ * If the clear parameter is non-zero the operation will also
+ * atomically reset all counter values to zero (except in-flight IO).
+ */
+char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
+ unsigned start_line, unsigned num_lines,
+ unsigned clear);
+
+/*
+ * Destroy a statistics response buffer obtained from a call to
+ * dm_stats_print_region().
+ */
+void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer);
+
+/*
+ * Determine the number of regions contained in a dm_stats handle
+ * following a dm_stats_list() or dm_stats_populate() call.
+ *
+ * The value returned is the number of registered regions visible with the
+ * progam_id value used for the list or populate operation and may not be
+ * equal to the highest present region_id (either due to program_id
+ * filtering or gaps in the sequence of region_id values).
+ *
+ * Always returns zero on an empty handle.
+ */
+uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms);
+
+/*
+ * Determine the number of groups contained in a dm_stats handle
+ * following a dm_stats_list() or dm_stats_populate() call.
+ *
+ * The value returned is the number of registered groups visible with the
+ * progam_id value used for the list or populate operation and may not be
+ * equal to the highest present group_id (either due to program_id
+ * filtering or gaps in the sequence of group_id values).
+ *
+ * Always returns zero on an empty handle.
+ */
+uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms);
+
+/*
+ * Test whether region_id is present in this dm_stats handle.
+ */
+int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Returns the number of areas (counter sets) contained in the specified
+ * region_id of the supplied dm_stats handle.
+ */
+uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
+ uint64_t region_id);
+
+/*
+ * Returns the total number of areas (counter sets) in all regions of the
+ * given dm_stats object.
+ */
+uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms);
+
+/*
+ * Test whether group_id is present in this dm_stats handle.
+ */
+int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id);
+
+/*
+ * Return the number of bins in the histogram configuration for the
+ * specified region or zero if no histogram specification is configured.
+ * Valid following a dm_stats_list() or dm_stats_populate() operation.
+ */
+int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms,
+ uint64_t region_id);
+
+/*
+ * Parse a histogram string with optional unit suffixes into a
+ * dm_histogram bounds description.
+ *
+ * A histogram string is a string of numbers "n1,n2,n3,..." that
+ * represent the boundaries of a histogram. The first and final bins
+ * have implicit lower and upper bounds of zero and infinity
+ * respectively and boundary values must occur in order of ascending
+ * magnitude. Unless a unit suffix is given all values are specified in
+ * nanoseconds.
+ *
+ * For example, if bounds_str="300,600,900", the region will be created
+ * with a histogram containing four bins. Each report will include four
+ * numbers a:b:c:d. a is the number of requests that took between 0 and
+ * 300ns to complete, b is the number of requests that took 300-600ns to
+ * complete, c is the number of requests that took 600-900ns to complete
+ * and d is the number of requests that took more than 900ns to
+ * complete.
+ *
+ * An optional unit suffix of 's', 'ms', 'us', or 'ns' may be used to
+ * specify units of seconds, miliseconds, microseconds, or nanoseconds:
+ *
+ * bounds_str="1ns,1us,1ms,1s"
+ * bounds_str="500us,1ms,1500us,2ms"
+ * bounds_str="200ms,400ms,600ms,800ms,1s"
+ *
+ * The smallest valid unit of time for a histogram specification depends
+ * on whether the region uses precise timestamps: for a region with the
+ * default milisecond precision the smallest possible histogram boundary
+ * magnitude is one milisecond: attempting to use a histogram with a
+ * boundary less than one milisecond when creating a region will cause
+ * the region to be created with the precise_timestamps feature enabled.
+ *
+ * On sucess a pointer to the struct dm_histogram representing the
+ * bounds values is returned, or NULL in the case of error. The returned
+ * pointer should be freed using dm_free() when no longer required.
+ */
+struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str);
+
+/*
+ * Parse a zero terminated array of uint64_t into a dm_histogram bounds
+ * description.
+ *
+ * Each value in the array specifies the upper bound of a bin in the
+ * latency histogram in nanoseconds. Values must appear in ascending
+ * order of magnitude.
+ *
+ * The smallest valid unit of time for a histogram specification depends
+ * on whether the region uses precise timestamps: for a region with the
+ * default milisecond precision the smallest possible histogram boundary
+ * magnitude is one milisecond: attempting to use a histogram with a
+ * boundary less than one milisecond when creating a region will cause
+ * the region to be created with the precise_timestamps feature enabled.
+ */
+struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds);
+
+/*
+ * Destroy the histogram bounds array obtained from a call to
+ * dm_histogram_bounds_from_string().
+ */
+void dm_histogram_bounds_destroy(struct dm_histogram *bounds);
+
+/*
+ * Destroy a dm_stats object and all associated regions, counter
+ * sets and histograms.
+ */
+void dm_stats_destroy(struct dm_stats *dms);
+
+/*
+ * Counter sampling interval
+ */
+
+/*
+ * Set the sampling interval for counter data to the specified value in
+ * either nanoseconds or milliseconds.
+ *
+ * The interval is used to calculate time-based metrics from the basic
+ * counter data: an interval must be set before calling any of the
+ * metric methods.
+ *
+ * For best accuracy the duration should be measured and updated at the
+ * end of each interval.
+ *
+ * All values are stored internally with nanosecond precision and are
+ * converted to or from ms when the millisecond interfaces are used.
+ */
+void dm_stats_set_sampling_interval_ns(struct dm_stats *dms,
+ uint64_t interval_ns);
+
+void dm_stats_set_sampling_interval_ms(struct dm_stats *dms,
+ uint64_t interval_ms);
+
+/*
+ * Retrieve the configured sampling interval in either nanoseconds or
+ * milliseconds.
+ */
+uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms);
+uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms);
+
+/*
+ * Override program_id. This may be used to change the default
+ * program_id value for an existing handle. If the allow_empty argument
+ * is non-zero a NULL or empty program_id is permitted.
+ *
+ * Use with caution! Most users of the library should set a valid,
+ * non-NULL program_id for every statistics region created. Failing to
+ * do so may result in confusing state when multiple programs are
+ * creating and managing statistics regions.
+ *
+ * All users of the library are encouraged to choose an unambiguous,
+ * unique program_id: this could be based on PID (for programs that
+ * create, report, and delete regions in a single process), session id,
+ * executable name, or some other distinguishing string.
+ *
+ * Use of the empty string as a program_id does not simplify use of the
+ * library or the command line tools and use of this value is strongly
+ * discouraged.
+ */
+int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
+ const char *program_id);
+
+/*
+ * Region properties: size, length & area_len.
+ *
+ * Region start and length are returned in units of 512b as specified
+ * at region creation time. The area_len value gives the size of areas
+ * into which the region has been subdivided. For regions with a single
+ * area spanning the range this value is equal to the region length.
+ *
+ * For regions created with a specified number of areas the value
+ * represents the size of the areas into which the kernel divided the
+ * region excluding any rounding of the last area size. The number of
+ * areas may be obtained using the dm_stats_nr_areas_region() call.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id);
+
+int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
+ uint64_t region_id);
+
+int dm_stats_get_region_area_len(const struct dm_stats *dms,
+ uint64_t *len, uint64_t region_id);
+
+/*
+ * Area properties: start, offset and length.
+ *
+ * The area length is always equal to the area length of the region
+ * that contains it and is obtained from dm_stats_get_region_area_len().
+ *
+ * The start of an area is a function of the area_id and the containing
+ * region's start and area length: it gives the absolute offset into the
+ * containing device of the beginning of the area.
+ *
+ * The offset expresses the area's relative offset into the current
+ * region. I.e. the area start minus the start offset of the containing
+ * region.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
+ uint64_t region_id, uint64_t area_id);
+
+/*
+ * Retrieve program_id and user aux_data for a specific region.
+ *
+ * Only valid following a call to dm_stats_list().
+ */
+
+/*
+ * Retrieve program_id for the specified region.
+ *
+ * The returned pointer does not need to be freed separately from the
+ * dm_stats handle but will become invalid after a dm_stats_destroy(),
+ * dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the
+ * handle from which it was obtained.
+ */
+const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
+ uint64_t region_id);
+
+/*
+ * Retrieve user aux_data set for the specified region. This function
+ * will return any stored user aux_data as a string in the memory
+ * pointed to by the aux_data argument.
+ *
+ * Any library internal aux_data fields, such as DMS_GROUP descriptors,
+ * are stripped before the value is returned.
+ *
+ * The returned pointer does not need to be freed separately from the
+ * dm_stats handle but will become invalid after a dm_stats_destroy(),
+ * dm_stats_list(), dm_stats_populate(), or dm_stats_bind*() of the
+ * handle from which it was obtained.
+ */
+const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
+ uint64_t region_id);
+
+typedef enum {
+ DM_STATS_OBJECT_TYPE_NONE,
+ DM_STATS_OBJECT_TYPE_AREA,
+ DM_STATS_OBJECT_TYPE_REGION,
+ DM_STATS_OBJECT_TYPE_GROUP
+} dm_stats_obj_type_t;
+
+/*
+ * Statistics cursor
+ *
+ * A dm_stats handle maintains an optional cursor into the statistics
+ * tables that it stores. Iterators are provided to visit each region,
+ * area, or group in a handle and accessor methods are provided to
+ * obtain properties and values for the object at the current cursor
+ * position.
+ *
+ * Using the cursor simplifies walking all regions or groups when
+ * the tables are sparse (i.e. contains some present and some
+ * non-present region_id or group_id values either due to program_id
+ * filtering or the ordering of region and group creation and deletion).
+ *
+ * Simple macros are provided to visit each area, region, or group,
+ * contained in a handle and applications are encouraged to use these
+ * where possible.
+ */
+
+/*
+ * Walk flags are used to initialise a dm_stats handle's cursor control
+ * and to select region or group aggregation when calling a metric or
+ * counter property method with immediate group, region, and area ID
+ * values.
+ *
+ * Walk flags are stored in the uppermost word of a uint64_t so that
+ * a region_id or group_id may be encoded in the lower bits. This
+ * allows an aggregate region_id or group_id to be specified when
+ * retrieving counter or metric values.
+ *
+ * Flags may be ORred together when used to initialise a dm_stats_walk:
+ * the resulting walk will visit instance of each type specified by
+ * the flag combination.
+ */
+#define DM_STATS_WALK_AREA 0x1000000000000ULL
+#define DM_STATS_WALK_REGION 0x2000000000000ULL
+#define DM_STATS_WALK_GROUP 0x4000000000000ULL
+
+#define DM_STATS_WALK_ALL 0x7000000000000ULL
+#define DM_STATS_WALK_DEFAULT (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION)
+
+/*
+ * Skip regions from a DM_STATS_WALK_REGION that contain only a single
+ * area: in this case the region's aggregate values are identical to
+ * the values of the single contained area. Setting this flag will
+ * suppress these duplicate entries during a dm_stats_walk_* with the
+ * DM_STATS_WALK_REGION flag set.
+ */
+#define DM_STATS_WALK_SKIP_SINGLE_AREA 0x8000000000000ULL
+
+/*
+ * Initialise the cursor control of a dm_stats handle for the specified
+ * walk type(s). Including a walk flag in the flags argument will cause
+ * any subsequent walk to visit that type of object (until the next
+ * call to dm_stats_walk_init()).
+ */
+int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags);
+
+/*
+ * Set the cursor of a dm_stats handle to address the first present
+ * group, region, or area of the currently configured walk. It is
+ * valid to attempt to walk a NULL stats handle or a handle containing
+ * no present regions; in this case any call to dm_stats_walk_next()
+ * becomes a no-op and all calls to dm_stats_walk_end() return true.
+ */
+void dm_stats_walk_start(struct dm_stats *dms);
+
+/*
+ * Advance the statistics cursor to the next area, or to the next
+ * present region if at the end of the current region. If the end of
+ * the region, area, or group tables is reached a subsequent call to
+ * dm_stats_walk_end() will return 1 and dm_stats_object_type() called
+ * on the location will return DM_STATS_OBJECT_TYPE_NONE,
+ */
+void dm_stats_walk_next(struct dm_stats *dms);
+
+/*
+ * Force the statistics cursor to advance to the next region. This will
+ * stop any in-progress area walk (by clearing DM_STATS_WALK_AREA) and
+ * advance the cursor to the next present region, the first present
+ * group (if DM_STATS_GROUP_WALK is set), or to the end. In this case a
+ * subsequent call to dm_stats_walk_end() will return 1 and a call to
+ * dm_stats_object_type() for the location will return
+ * DM_STATS_OBJECT_TYPE_NONE.
+ */
+void dm_stats_walk_next_region(struct dm_stats *dms);
+
+/*
+ * Test whether the end of a statistics walk has been reached.
+ */
+int dm_stats_walk_end(struct dm_stats *dms);
+
+/*
+ * Return the type of object at the location specified by region_id
+ * and area_id. If either region_id or area_id uses one of the special
+ * values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the
+ * corresponding region or area identifier will be taken from the
+ * current cursor location. If the cursor location or the value encoded
+ * by region_id and area_id indicates an aggregate region or group,
+ * this will be reflected in the value returned.
+ */
+dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms,
+ uint64_t region_id,
+ uint64_t area_id);
+
+/*
+ * Return the type of object at the current stats cursor location.
+ */
+dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms);
+
+/*
+ * Stats iterators
+ *
+ * C 'for' and 'do'/'while' style iterators for dm_stats data.
+ *
+ * It is not safe to call any function that modifies the region table
+ * within the loop body (i.e. dm_stats_list(), dm_stats_populate(),
+ * dm_stats_init(), or dm_stats_destroy()).
+ *
+ * All counter and property (dm_stats_get_*) access methods, as well as
+ * dm_stats_populate_region() can be safely called from loops.
+ *
+ */
+
+/*
+ * Iterate over the regions table visiting each region.
+ *
+ * If the region table is empty or unpopulated the loop body will not be
+ * executed.
+ */
+#define dm_stats_foreach_region(dms) \
+for (dm_stats_walk_init((dms), DM_STATS_WALK_REGION), \
+ dm_stats_walk_start((dms)); \
+ !dm_stats_walk_end((dms)); dm_stats_walk_next_region((dms)))
+
+/*
+ * Iterate over the regions table visiting each area.
+ *
+ * If the region table is empty or unpopulated the loop body will not
+ * be executed.
+ */
+#define dm_stats_foreach_area(dms) \
+for (dm_stats_walk_init((dms), DM_STATS_WALK_AREA), \
+ dm_stats_walk_start((dms)); \
+ !dm_stats_walk_end((dms)); dm_stats_walk_next((dms)))
+
+/*
+ * Iterate over the regions table visiting each group. Metric and
+ * counter methods will return values for the group.
+ *
+ * If the group table is empty or unpopulated the loop body will not
+ * be executed.
+ */
+#define dm_stats_foreach_group(dms) \
+for (dm_stats_walk_init((dms), DM_STATS_WALK_GROUP), \
+ dm_stats_walk_start(dms); \
+ !dm_stats_walk_end(dms); \
+ dm_stats_walk_next(dms))
+
+/*
+ * Start a walk iterating over the regions contained in dm_stats handle
+ * 'dms'.
+ *
+ * The body of the loop should call dm_stats_walk_next() or
+ * dm_stats_walk_next_region() to advance to the next element.
+ *
+ * The loop body is executed at least once even if the stats handle is
+ * empty.
+ */
+#define dm_stats_walk_do(dms) \
+do { \
+ dm_stats_walk_start((dms)); \
+ do
+
+/*
+ * Start a 'while' style loop or end a 'do..while' loop iterating over the
+ * regions contained in dm_stats handle 'dms'.
+ */
+#define dm_stats_walk_while(dms) \
+ while(!dm_stats_walk_end((dms))); \
+} while (0)
+
+/*
+ * Cursor relative property methods
+ *
+ * Calls with the prefix dm_stats_get_current_* operate relative to the
+ * current cursor location, returning properties for the current region
+ * or area of the supplied dm_stats handle.
+ *
+ */
+
+/*
+ * Returns the number of areas (counter sets) contained in the current
+ * region of the supplied dm_stats handle.
+ */
+uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms);
+
+/*
+ * Retrieve the current values of the stats cursor.
+ */
+uint64_t dm_stats_get_current_region(const struct dm_stats *dms);
+uint64_t dm_stats_get_current_area(const struct dm_stats *dms);
+
+/*
+ * Current region properties: size, length & area_len.
+ *
+ * See the comments for the equivalent dm_stats_get_* versions for a
+ * complete description of these methods.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_current_region_start(const struct dm_stats *dms,
+ uint64_t *start);
+
+int dm_stats_get_current_region_len(const struct dm_stats *dms,
+ uint64_t *len);
+
+int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
+ uint64_t *area_len);
+
+/*
+ * Current area properties: start and length.
+ *
+ * See the comments for the equivalent dm_stats_get_* versions for a
+ * complete description of these methods.
+ *
+ * All values are returned in units of 512b sectors.
+ */
+int dm_stats_get_current_area_start(const struct dm_stats *dms,
+ uint64_t *start);
+
+int dm_stats_get_current_area_offset(const struct dm_stats *dms,
+ uint64_t *offset);
+
+int dm_stats_get_current_area_len(const struct dm_stats *dms,
+ uint64_t *start);
+
+/*
+ * Return a pointer to the program_id string for region at the current
+ * cursor location.
+ */
+const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms);
+
+/*
+ * Return a pointer to the user aux_data string for the region at the
+ * current cursor location.
+ */
+const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms);
+
+/*
+ * Statistics groups and data aggregation.
+ */
+
+/*
+ * Create a new group in stats handle dms from the group descriptor
+ * passed in group. The group descriptor is a string containing a list
+ * of region_id values that will be included in the group. The first
+ * region_id found will be the group leader. Ranges of identifiers may
+ * be expressed as "M-N", where M and N are the start and end region_id
+ * values for the range.
+ */
+int dm_stats_create_group(struct dm_stats *dms, const char *group,
+ const char *alias, uint64_t *group_id);
+
+/*
+ * Remove the specified group_id. If the remove argument is zero the
+ * group will be removed but the regions that it contained will remain.
+ * If remove is non-zero then all regions that belong to the group will
+ * also be removed.
+ */
+int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id, int remove);
+
+/*
+ * Set an alias for this group or region. The alias will be returned
+ * instead of the normal dm-stats name for this region or group.
+ */
+int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id,
+ const char *alias);
+
+/*
+ * Returns a pointer to the currently configured alias for id, or the
+ * name of the dm device the handle is bound to if no alias has been
+ * set. The pointer will be freed automatically when a new alias is set
+ * or when the stats handle is cleared.
+ */
+const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id);
+
+#define DM_STATS_GROUP_NONE UINT64_MAX
+/*
+ * Return the group_id that the specified region_id belongs to, or the
+ * special value DM_STATS_GROUP_NONE if the region does not belong
+ * to any group.
+ */
+uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id);
+
+/*
+ * Store a pointer to a string describing the regions that are members
+ * of the group specified by group_id in the memory pointed to by buf.
+ * The string is in the same format as the 'group' argument to
+ * dm_stats_create_group().
+ *
+ * The pointer does not need to be freed explicitly by the caller: it
+ * will become invalid following a subsequent dm_stats_list(),
+ * dm_stats_populate() or dm_stats_destroy() of the corresponding
+ * dm_stats handle.
+ */
+int dm_stats_get_group_descriptor(const struct dm_stats *dms,
+ uint64_t group_id, char **buf);
+
+/*
+ * Create regions that correspond to the extents of a file in the
+ * filesystem and optionally place them into a group.
+ *
+ * File descriptor fd must reference a regular file, open for reading,
+ * in a local file system that supports the FIEMAP ioctl, and that
+ * returns data describing the physical location of extents.
+ *
+ * The file descriptor can be closed by the caller following the call
+ * to dm_stats_create_regions_from_fd().
+ *
+ * Unless nogroup is non-zero the regions will be placed into a group
+ * and the group alias set to the value supplied (if alias is NULL no
+ * group alias will be assigned).
+ *
+ * On success the function returns a pointer to an array of uint64_t
+ * containing the IDs of the newly created regions. The region_id
+ * array is terminated by the value DM_STATS_REGION_NOT_PRESENT and
+ * should be freed using dm_free() when no longer required.
+ *
+ * On error NULL is returned.
+ *
+ * Following a call to dm_stats_create_regions_from_fd() the handle
+ * is guaranteed to be in a listed state, and to contain any region
+ * and group identifiers created by the operation.
+ *
+ * The group_id for the new group is equal to the region_id value in
+ * the first array element.
+ */
+uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
+ int group, int precise,
+ struct dm_histogram *bounds,
+ const char *alias);
+/*
+ * Update a group of regions that correspond to the extents of a file
+ * in the filesystem, adding and removing regions to account for
+ * allocation changes in the underlying file.
+ *
+ * File descriptor fd must reference a regular file, open for reading,
+ * in a local file system that supports the FIEMAP ioctl, and that
+ * returns data describing the physical location of extents.
+ *
+ * The file descriptor can be closed by the caller following the call
+ * to dm_stats_update_regions_from_fd().
+ *
+ * On success the function returns a pointer to an array of uint64_t
+ * containing the IDs of the updated regions (including any existing
+ * regions that were not modified by the call).
+ *
+ * The region_id array is terminated by the special value
+ * DM_STATS_REGION_NOT_PRESENT and should be freed using dm_free()
+ * when no longer required.
+ *
+ * On error NULL is returned.
+ *
+ * Following a call to dm_stats_update_regions_from_fd() the handle
+ * is guaranteed to be in a listed state, and to contain any region
+ * and group identifiers created by the operation.
+ *
+ * This function cannot be used with file mapped regions that are
+ * not members of a group: either group the regions, or remove them
+ * and re-map them with dm_stats_create_regions_from_fd().
+ */
+uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
+ uint64_t group_id);
+
+
+/*
+ * The file map monitoring daemon can monitor files in two distinct
+ * ways: the mode affects the behaviour of the daemon when a file
+ * under monitoring is renamed or unlinked, and the conditions which
+ * cause the daemon to terminate.
+ *
+ * In both modes, the daemon will always shut down when the group
+ * being monitored is deleted.
+ *
+ * Follow inode:
+ * The daemon follows the inode of the file, as it was at the time the
+ * daemon started. The file descriptor referencing the file is kept
+ * open at all times, and the daemon will exit when it detects that
+ * the file has been unlinked and it is the last holder of a reference
+ * to the file.
+ *
+ * This mode is useful if the file is expected to be renamed, or moved
+ * within the file system, while it is being monitored.
+ *
+ * Follow path:
+ * The daemon follows the path that was given on the daemon command
+ * line. The file descriptor referencing the file is re-opened on each
+ * iteration of the daemon, and the daemon will exit if no file exists
+ * at this location (a tolerance is allowed so that a brief delay
+ * between unlink() and creat() is permitted).
+ *
+ * This mode is useful if the file is updated by unlinking the original
+ * and placing a new file at the same path.
+ */
+
+typedef enum {
+ DM_FILEMAPD_FOLLOW_INODE,
+ DM_FILEMAPD_FOLLOW_PATH,
+ DM_FILEMAPD_FOLLOW_NONE
+} dm_filemapd_mode_t;
+
+/*
+ * Parse a string representation of a dmfilemapd mode.
+ *
+ * Returns a valid dm_filemapd_mode_t value on success, or
+ * DM_FILEMAPD_FOLLOW_NONE on error.
+ */
+dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str);
+
+/*
+ * Start the dmfilemapd filemap monitoring daemon for the specified
+ * file descriptor, group, and file system path. The daemon will
+ * monitor the file for allocation changes, and when a change is
+ * detected, call dm_stats_update_regions_from_fd() to update the
+ * mapped regions for the file.
+ *
+ * The path provided to dm_stats_start_filemapd() must be an absolute
+ * path, and should reflect the path of 'fd' at the time that it was
+ * opened.
+ *
+ * The mode parameter controls the behaviour of the daemon when the
+ * file being monitored is unlinked or moved: see the comments for
+ * dm_filemapd_mode_t for a full description and possible values.
+ *
+ * The daemon can be stopped at any time by sending SIGTERM to the
+ * daemon pid.
+ */
+int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
+ dm_filemapd_mode_t mode, unsigned foreground,
+ unsigned verbose);
+
+/*
+ * Call this to actually run the ioctl.
+ */
+int dm_task_run(struct dm_task *dmt);
+
+/*
+ * The errno from the last device-mapper ioctl performed by dm_task_run.
+ */
+int dm_task_get_errno(struct dm_task *dmt);
+
+/*
+ * Call this to make or remove the device nodes associated with previously
+ * issued commands.
+ */
+void dm_task_update_nodes(void);
+
+/*
+ * Mangling support
+ *
+ * Character whitelist: 0-9, A-Z, a-z, #+-.:=@_
+ * HEX mangling format: \xNN, NN being the hex value of the character.
+ * (whitelist and format supported by udev)
+*/
+typedef enum {
+ DM_STRING_MANGLING_NONE, /* do not mangle at all */
+ DM_STRING_MANGLING_AUTO, /* mangle only if not already mangled with hex, error when mixed */
+ DM_STRING_MANGLING_HEX /* always mangle with hex encoding, no matter what the input is */
+} dm_string_mangling_t;
+
+/*
+ * Set/get mangling mode used for device-mapper names and uuids.
+ */
+int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling);
+dm_string_mangling_t dm_get_name_mangling_mode(void);
+
+/*
+ * Get mangled/unmangled form of the device-mapper name or uuid
+ * irrespective of the global setting (set by dm_set_name_mangling_mode).
+ * The name or uuid returned needs to be freed after use by calling dm_free!
+ */
+char *dm_task_get_name_mangled(const struct dm_task *dmt);
+char *dm_task_get_name_unmangled(const struct dm_task *dmt);
+char *dm_task_get_uuid_mangled(const struct dm_task *dmt);
+char *dm_task_get_uuid_unmangled(const struct dm_task *dmt);
+
+/*
+ * Configure the device-mapper directory
+ */
+int dm_set_dev_dir(const char *dir);
+const char *dm_dir(void);
+
+/*
+ * Configure sysfs directory, /sys by default
+ */
+int dm_set_sysfs_dir(const char *dir);
+const char *dm_sysfs_dir(void);
+
+/*
+ * Configure default UUID prefix string.
+ * Conventionally this is a short capitalised prefix indicating the subsystem
+ * that is managing the devices, e.g. "LVM-" or "MPATH-".
+ * To support stacks of devices from different subsystems, recursive functions
+ * stop recursing if they reach a device with a different prefix.
+ */
+int dm_set_uuid_prefix(const char *uuid_prefix);
+const char *dm_uuid_prefix(void);
+
+/*
+ * Determine whether a major number belongs to device-mapper or not.
+ */
+int dm_is_dm_major(uint32_t major);
+
+/*
+ * Get associated device name for given major and minor number by reading
+ * the sysfs content. If this is a dm device, get associated dm name, the one
+ * that appears in /dev/mapper. DM names could be resolved this way only if
+ * kernel used >= 2.6.29, kernel name is found otherwise (e.g. dm-0).
+ * If prefer_kernel_name is set, the kernel name is always preferred over
+ * device-mapper name for dm devices no matter what the kernel version is.
+ * For non-dm devices, we always get associated kernel name, e.g sda, md0 etc.
+ * Returns 0 on error or if sysfs is not used (or configured incorrectly),
+ * otherwise returns 1 and the supplied buffer holds the device name.
+ */
+int dm_device_get_name(uint32_t major, uint32_t minor,
+ int prefer_kernel_name,
+ char *buf, size_t buf_size);
+
+/*
+ * Determine whether a device has any holders (devices
+ * using this device). If sysfs is not used (or configured
+ * incorrectly), returns 0.
+ */
+int dm_device_has_holders(uint32_t major, uint32_t minor);
+
+/*
+ * Determine whether a device contains mounted filesystem.
+ * If sysfs is not used (or configured incorrectly), returns 0.
+ */
+int dm_device_has_mounted_fs(uint32_t major, uint32_t minor);
+
+
+/*
+ * Callback is invoked for individal mountinfo lines,
+ * minor, major and mount target are parsed and unmangled.
+ */
+typedef int (*dm_mountinfo_line_callback_fn) (char *line, unsigned maj, unsigned min,
+ char *target, void *cb_data);
+
+/*
+ * Read all lines from /proc/self/mountinfo,
+ * for each line calls read_fn callback.
+ */
+int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data);
+
+/*
+ * Initialise library
+ */
+void dm_lib_init(void) __attribute__((constructor));
+
+/*
+ * Release library resources
+ */
+void dm_lib_release(void);
+void dm_lib_exit(void) __attribute__((destructor));
+
+/* An optimisation for clients making repeated calls involving dm ioctls */
+void dm_hold_control_dev(int hold_open);
+
+/*
+ * Use NULL for all devices.
+ */
+int dm_mknodes(const char *name);
+int dm_driver_version(char *version, size_t size);
+
+/******************************************************
+ * Functions to build and manipulate trees of devices *
+ ******************************************************/
+struct dm_tree;
+struct dm_tree_node;
+
+/*
+ * Initialise an empty dependency tree.
+ *
+ * The tree consists of a root node together with one node for each mapped
+ * device which has child nodes for each device referenced in its table.
+ *
+ * Every node in the tree has one or more children and one or more parents.
+ *
+ * The root node is the parent/child of every node that doesn't have other
+ * parents/children.
+ */
+struct dm_tree *dm_tree_create(void);
+void dm_tree_free(struct dm_tree *tree);
+
+/*
+ * List of suffixes to be ignored when matching uuids against existing devices.
+ */
+void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes);
+
+/*
+ * Add nodes to the tree for a given device and all the devices it uses.
+ */
+int dm_tree_add_dev(struct dm_tree *tree, uint32_t major, uint32_t minor);
+int dm_tree_add_dev_with_udev_flags(struct dm_tree *tree, uint32_t major,
+ uint32_t minor, uint16_t udev_flags);
+
+/*
+ * Add a new node to the tree if it doesn't already exist.
+ */
+struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *tree,
+ const char *name,
+ const char *uuid,
+ uint32_t major, uint32_t minor,
+ int read_only,
+ int clear_inactive,
+ void *context);
+struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *tree,
+ const char *name,
+ const char *uuid,
+ uint32_t major,
+ uint32_t minor,
+ int read_only,
+ int clear_inactive,
+ void *context,
+ uint16_t udev_flags);
+
+/*
+ * Search for a node in the tree.
+ * Set major and minor to 0 or uuid to NULL to get the root node.
+ */
+struct dm_tree_node *dm_tree_find_node(struct dm_tree *tree,
+ uint32_t major,
+ uint32_t minor);
+struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *tree,
+ const char *uuid);
+
+/*
+ * Use this to walk through all children of a given node.
+ * Set handle to NULL in first call.
+ * Returns NULL after the last child.
+ * Set inverted to use inverted tree.
+ */
+struct dm_tree_node *dm_tree_next_child(void **handle,
+ const struct dm_tree_node *parent,
+ uint32_t inverted);
+
+/*
+ * Get properties of a node.
+ */
+const char *dm_tree_node_get_name(const struct dm_tree_node *node);
+const char *dm_tree_node_get_uuid(const struct dm_tree_node *node);
+const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node);
+void *dm_tree_node_get_context(const struct dm_tree_node *node);
+/*
+ * Returns 0 when node size and its children is unchanged.
+ * Returns 1 when node or any of its children has increased size.
+ * Rerurns -1 when node or any of its children has reduced size.
+ */
+int dm_tree_node_size_changed(const struct dm_tree_node *dnode);
+
+/*
+ * Returns the number of children of the given node (excluding the root node).
+ * Set inverted for the number of parents.
+ */
+int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted);
+
+/*
+ * Deactivate a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_deactivate_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len);
+/*
+ * Preload/create a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_preload_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len);
+
+/*
+ * Resume a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_activate_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len);
+
+/*
+ * Suspend a device plus all dependencies.
+ * Ignores devices that don't have a uuid starting with uuid_prefix.
+ */
+int dm_tree_suspend_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len);
+
+/*
+ * Skip the filesystem sync when suspending.
+ * Does nothing with other functions.
+ * Use this when no snapshots are involved.
+ */
+void dm_tree_skip_lockfs(struct dm_tree_node *dnode);
+
+/*
+ * Set the 'noflush' flag when suspending devices.
+ * If the kernel supports it, instead of erroring outstanding I/O that
+ * cannot be completed, the I/O is queued and resubmitted when the
+ * device is resumed. This affects multipath devices when all paths
+ * have failed and queue_if_no_path is set, and mirror devices when
+ * block_on_error is set and the mirror log has failed.
+ */
+void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode);
+
+/*
+ * Retry removal of each device if not successful.
+ */
+void dm_tree_retry_remove(struct dm_tree_node *dnode);
+
+/*
+ * Is the uuid prefix present in the tree?
+ * Only returns 0 if every node was checked successfully.
+ * Returns 1 if the tree walk has to be aborted.
+ */
+int dm_tree_children_use_uuid(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len);
+
+/*
+ * Construct tables for new nodes before activating them.
+ */
+int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode,
+ uint64_t size,
+ const char *origin_uuid);
+int dm_tree_node_add_snapshot_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *cow_uuid,
+ int persistent,
+ uint32_t chunk_size);
+int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *cow_uuid,
+ const char *merge_uuid,
+ uint32_t chunk_size);
+int dm_tree_node_add_error_target(struct dm_tree_node *node,
+ uint64_t size);
+int dm_tree_node_add_zero_target(struct dm_tree_node *node,
+ uint64_t size);
+int dm_tree_node_add_linear_target(struct dm_tree_node *node,
+ uint64_t size);
+int dm_tree_node_add_striped_target(struct dm_tree_node *node,
+ uint64_t size,
+ uint32_t stripe_size);
+
+#define DM_CRYPT_IV_DEFAULT UINT64_C(-1) /* iv_offset == seg offset */
+/*
+ * Function accepts one string in cipher specification
+ * (chainmode and iv should be NULL because included in cipher string)
+ * or
+ * separate arguments which will be joined to "cipher-chainmode-iv"
+ */
+int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *cipher,
+ const char *chainmode,
+ const char *iv,
+ uint64_t iv_offset,
+ const char *key);
+int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
+ uint64_t size);
+
+/* Mirror log flags */
+#define DM_NOSYNC 0x00000001 /* Known already in sync */
+#define DM_FORCESYNC 0x00000002 /* Force resync */
+#define DM_BLOCK_ON_ERROR 0x00000004 /* On error, suspend I/O */
+#define DM_CORELOG 0x00000008 /* In-memory log */
+
+int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
+ uint32_t region_size,
+ unsigned clustered,
+ const char *log_uuid,
+ unsigned area_count,
+ uint32_t flags);
+
+int dm_tree_node_add_raid_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *raid_type,
+ uint32_t region_size,
+ uint32_t stripe_size,
+ uint64_t rebuilds,
+ uint64_t flags);
+
+/*
+ * Defines below are based on kernel's dm-cache.c defines
+ * DM_CACHE_MIN_DATA_BLOCK_SIZE (32 * 1024 >> SECTOR_SHIFT)
+ * DM_CACHE_MAX_DATA_BLOCK_SIZE (1024 * 1024 * 1024 >> SECTOR_SHIFT)
+ */
+#define DM_CACHE_MIN_DATA_BLOCK_SIZE (UINT32_C(64))
+#define DM_CACHE_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152))
+/*
+ * Max supported size for cache pool metadata device.
+ * Limitation is hardcoded into the kernel and bigger device sizes
+ * are not accepted.
+ *
+ * Limit defined in drivers/md/dm-cache-metadata.h
+ */
+#define DM_CACHE_METADATA_MAX_SECTORS DM_THIN_METADATA_MAX_SECTORS
+
+/*
+ * Define number of elements in rebuild and writemostly arrays
+ * 'of struct dm_tree_node_raid_params'.
+ */
+
+struct dm_tree_node_raid_params {
+ const char *raid_type;
+
+ uint32_t stripes;
+ uint32_t mirrors;
+ uint32_t region_size;
+ uint32_t stripe_size;
+
+ /*
+ * 'rebuilds' and 'writemostly' are bitfields that signify
+ * which devices in the array are to be rebuilt or marked
+ * writemostly. The kernel supports up to 253 legs.
+ * We limit ourselves by choosing a lower value
+ * for DEFAULT_RAID{1}_MAX_IMAGES in defaults.h.
+ */
+ uint64_t rebuilds;
+ uint64_t writemostly;
+ uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */
+ uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
+ uint32_t max_recovery_rate; /* kB/sec/disk */
+ uint32_t min_recovery_rate; /* kB/sec/disk */
+ uint32_t stripe_cache; /* sectors */
+
+ uint64_t flags; /* [no]sync */
+ uint32_t reserved2;
+};
+
+/*
+ * Version 2 of above node raid params struct to keeep API compatibility.
+ *
+ * Extended for more than 64 legs (max 253 in the MD kernel runtime!),
+ * delta_disks for disk add/remove reshaping,
+ * data_offset for out-of-place reshaping
+ * and data_copies for odd number of raid10 legs.
+ */
+#define RAID_BITMAP_SIZE 4 /* 4 * 64 bit elements in rebuilds/writemostly arrays */
+struct dm_tree_node_raid_params_v2 {
+ const char *raid_type;
+
+ uint32_t stripes;
+ uint32_t mirrors;
+ uint32_t region_size;
+ uint32_t stripe_size;
+
+ int delta_disks; /* +/- number of disks to add/remove (reshaping) */
+ int data_offset; /* data offset to set (out-of-place reshaping) */
+
+ /*
+ * 'rebuilds' and 'writemostly' are bitfields that signify
+ * which devices in the array are to be rebuilt or marked
+ * writemostly. The kernel supports up to 253 legs.
+ * We limit ourselvs by choosing a lower value
+ * for DEFAULT_RAID_MAX_IMAGES.
+ */
+ uint64_t rebuilds[RAID_BITMAP_SIZE];
+ uint64_t writemostly[RAID_BITMAP_SIZE];
+ uint32_t writebehind; /* I/Os (kernel default COUNTER_MAX / 2) */
+ uint32_t data_copies; /* RAID # of data copies */
+ uint32_t sync_daemon_sleep; /* ms (kernel default = 5sec) */
+ uint32_t max_recovery_rate; /* kB/sec/disk */
+ uint32_t min_recovery_rate; /* kB/sec/disk */
+ uint32_t stripe_cache; /* sectors */
+
+ uint64_t flags; /* [no]sync */
+};
+
+int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node,
+ uint64_t size,
+ const struct dm_tree_node_raid_params *p);
+
+/* Version 2 API function taking dm_tree_node_raid_params_v2 for aforementioned extensions. */
+int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node,
+ uint64_t size,
+ const struct dm_tree_node_raid_params_v2 *p);
+
+/* Cache feature_flags */
+#define DM_CACHE_FEATURE_WRITEBACK 0x00000001
+#define DM_CACHE_FEATURE_WRITETHROUGH 0x00000002
+#define DM_CACHE_FEATURE_PASSTHROUGH 0x00000004
+#define DM_CACHE_FEATURE_METADATA2 0x00000008 /* cache v1.10 */
+
+struct dm_config_node;
+/*
+ * Use for passing cache policy and all its args e.g.:
+ *
+ * policy_settings {
+ * migration_threshold=2048
+ * sequention_threashold=100
+ * ...
+ * }
+ *
+ * For policy without any parameters use NULL.
+ */
+int dm_tree_node_add_cache_target(struct dm_tree_node *node,
+ uint64_t size,
+ uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
+ const char *metadata_uuid,
+ const char *data_uuid,
+ const char *origin_uuid,
+ const char *policy_name,
+ const struct dm_config_node *policy_settings,
+ uint32_t data_block_size);
+
+/*
+ * FIXME Add individual cache policy pairs <key> = value, like:
+ * int dm_tree_node_add_cache_policy_arg(struct dm_tree_node *dnode,
+ * const char *key, uint64_t value);
+ */
+
+/*
+ * Replicator operation mode
+ * Note: API for Replicator is not yet stable
+ */
+typedef enum {
+ DM_REPLICATOR_SYNC, /* Synchronous replication */
+ DM_REPLICATOR_ASYNC_WARN, /* Warn if async replicator is slow */
+ DM_REPLICATOR_ASYNC_STALL, /* Stall replicator if not fast enough */
+ DM_REPLICATOR_ASYNC_DROP, /* Drop sites out of sync */
+ DM_REPLICATOR_ASYNC_FAIL, /* Fail replicator if slow */
+ NUM_DM_REPLICATOR_MODES
+} dm_replicator_mode_t;
+
+int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *rlog_uuid,
+ const char *rlog_type,
+ unsigned rsite_index,
+ dm_replicator_mode_t mode,
+ uint32_t async_timeout,
+ uint64_t fall_behind_data,
+ uint32_t fall_behind_ios);
+
+int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *replicator_uuid, /* Replicator control device */
+ uint64_t rdevice_index,
+ const char *rdev_uuid, /* Rimage device name/uuid */
+ unsigned rsite_index,
+ const char *slog_uuid,
+ uint32_t slog_flags, /* Mirror log flags */
+ uint32_t slog_region_size);
+/* End of Replicator API */
+
+/*
+ * FIXME: Defines bellow are based on kernel's dm-thin.c defines
+ * DATA_DEV_BLOCK_SIZE_MIN_SECTORS (64 * 1024 >> SECTOR_SHIFT)
+ * DATA_DEV_BLOCK_SIZE_MAX_SECTORS (1024 * 1024 * 1024 >> SECTOR_SHIFT)
+ */
+#define DM_THIN_MIN_DATA_BLOCK_SIZE (UINT32_C(128))
+#define DM_THIN_MAX_DATA_BLOCK_SIZE (UINT32_C(2097152))
+/*
+ * Max supported size for thin pool metadata device (17112760320 bytes)
+ * Limitation is hardcoded into the kernel and bigger device size
+ * is not accepted.
+ * drivers/md/dm-thin-metadata.h THIN_METADATA_MAX_SECTORS
+ */
+#define DM_THIN_MAX_METADATA_SIZE (UINT64_C(255) * (1 << 14) * (4096 / (1 << 9)) - 256 * 1024)
+
+int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node,
+ uint64_t size,
+ uint64_t transaction_id,
+ const char *metadata_uuid,
+ const char *pool_uuid,
+ uint32_t data_block_size,
+ uint64_t low_water_mark,
+ unsigned skip_block_zeroing);
+
+/* Supported messages for thin provision target */
+typedef enum {
+ DM_THIN_MESSAGE_CREATE_SNAP, /* device_id, origin_id */
+ DM_THIN_MESSAGE_CREATE_THIN, /* device_id */
+ DM_THIN_MESSAGE_DELETE, /* device_id */
+ DM_THIN_MESSAGE_SET_TRANSACTION_ID, /* current_id, new_id */
+ DM_THIN_MESSAGE_RESERVE_METADATA_SNAP, /* target version >= 1.1 */
+ DM_THIN_MESSAGE_RELEASE_METADATA_SNAP, /* target version >= 1.1 */
+} dm_thin_message_t;
+
+int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
+ dm_thin_message_t type,
+ uint64_t id1, uint64_t id2);
+
+/*
+ * Set thin pool discard features
+ * ignore - Disable support for discards
+ * no_passdown - Don't pass discards down to underlying data device,
+ * just remove the mapping
+ * Feature is available since version 1.1 of the thin target.
+ */
+int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
+ unsigned ignore,
+ unsigned no_passdown);
+/*
+ * Set error if no space, instead of queueing for thin pool.
+ */
+int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
+ unsigned error_if_no_space);
+/* Start thin pool with metadata in read-only mode */
+int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
+ unsigned read_only);
+/*
+ * FIXME: Defines bellow are based on kernel's dm-thin.c defines
+ * MAX_DEV_ID ((1 << 24) - 1)
+ */
+#define DM_THIN_MAX_DEVICE_ID (UINT32_C((1 << 24) - 1))
+int dm_tree_node_add_thin_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *pool_uuid,
+ uint32_t device_id);
+
+int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node,
+ const char *external_uuid);
+
+void dm_tree_node_set_udev_flags(struct dm_tree_node *node, uint16_t udev_flags);
+
+void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
+ struct dm_tree_node *presuspend_node);
+
+int dm_tree_node_add_target_area(struct dm_tree_node *node,
+ const char *dev_name,
+ const char *dlid,
+ uint64_t offset);
+
+/*
+ * Only for temporarily-missing raid devices where changes are tracked.
+ */
+int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset);
+
+/*
+ * Set readahead (in sectors) after loading the node.
+ */
+void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
+ uint32_t read_ahead,
+ uint32_t read_ahead_flags);
+
+/*
+ * Set node callback hook before de/activation.
+ * Callback is called before 'activation' of node for activation tree,
+ * or 'deactivation' of node for deactivation tree.
+ */
+typedef enum {
+ DM_NODE_CALLBACK_PRELOADED, /* Node has preload deps */
+ DM_NODE_CALLBACK_DEACTIVATED, /* Node is deactivated */
+} dm_node_callback_t;
+typedef int (*dm_node_callback_fn) (struct dm_tree_node *node,
+ dm_node_callback_t type, void *cb_data);
+void dm_tree_node_set_callback(struct dm_tree_node *node,
+ dm_node_callback_fn cb, void *cb_data);
+
+void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie);
+uint32_t dm_tree_get_cookie(struct dm_tree_node *node);
+
+/*****************************************************************************
+ * Library functions
+ *****************************************************************************/
+
+/*******************
+ * Memory management
+ *******************/
+
+/*
+ * Never use these functions directly - use the macros following instead.
+ */
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
+ __attribute__((__warn_unused_result__));
+void dm_free_wrapper(void *ptr);
+char *dm_strdup_wrapper(const char *s, const char *file, int line)
+ __attribute__((__warn_unused_result__));
+int dm_dump_memory_wrapper(void);
+void dm_bounds_check_wrapper(void);
+
+#define dm_malloc(s) dm_malloc_wrapper((s), __FILE__, __LINE__)
+#define dm_malloc_aligned(s, a) dm_malloc_aligned_wrapper((s), (a), __FILE__, __LINE__)
+#define dm_zalloc(s) dm_zalloc_wrapper((s), __FILE__, __LINE__)
+#define dm_strdup(s) dm_strdup_wrapper((s), __FILE__, __LINE__)
+#define dm_free(p) dm_free_wrapper(p)
+#define dm_realloc(p, s) dm_realloc_wrapper((p), (s), __FILE__, __LINE__)
+#define dm_dump_memory() dm_dump_memory_wrapper()
+#define dm_bounds_check() dm_bounds_check_wrapper()
+
+/*
+ * The pool allocator is useful when you are going to allocate
+ * lots of memory, use the memory for a bit, and then free the
+ * memory in one go. A surprising amount of code has this usage
+ * profile.
+ *
+ * You should think of the pool as an infinite, contiguous chunk
+ * of memory. The front of this chunk of memory contains
+ * allocated objects, the second half is free. dm_pool_alloc grabs
+ * the next 'size' bytes from the free half, in effect moving it
+ * into the allocated half. This operation is very efficient.
+ *
+ * dm_pool_free frees the allocated object *and* all objects
+ * allocated after it. It is important to note this semantic
+ * difference from malloc/free. This is also extremely
+ * efficient, since a single dm_pool_free can dispose of a large
+ * complex object.
+ *
+ * dm_pool_destroy frees all allocated memory.
+ *
+ * eg, If you are building a binary tree in your program, and
+ * know that you are only ever going to insert into your tree,
+ * and not delete (eg, maintaining a symbol table for a
+ * compiler). You can create yourself a pool, allocate the nodes
+ * from it, and when the tree becomes redundant call dm_pool_destroy
+ * (no nasty iterating through the tree to free nodes).
+ *
+ * eg, On the other hand if you wanted to repeatedly insert and
+ * remove objects into the tree, you would be better off
+ * allocating the nodes from a free list; you cannot free a
+ * single arbitrary node with pool.
+ */
+
+struct dm_pool;
+
+/* constructor and destructor */
+struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
+ __attribute__((__warn_unused_result__));
+void dm_pool_destroy(struct dm_pool *p);
+
+/* simple allocation/free routines */
+void *dm_pool_alloc(struct dm_pool *p, size_t s)
+ __attribute__((__warn_unused_result__));
+void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
+ __attribute__((__warn_unused_result__));
+void dm_pool_empty(struct dm_pool *p);
+void dm_pool_free(struct dm_pool *p, void *ptr);
+
+/*
+ * To aid debugging, a pool can be locked. Any modifications made
+ * to the content of the pool while it is locked can be detected.
+ * Default compilation is using a crc checksum to notice modifications.
+ * The pool locking is using the mprotect with the compilation flag
+ * DEBUG_ENFORCE_POOL_LOCKING to enforce the memory protection.
+ */
+/* query pool lock status */
+int dm_pool_locked(struct dm_pool *p);
+/* mark pool as locked */
+int dm_pool_lock(struct dm_pool *p, int crc)
+ __attribute__((__warn_unused_result__));
+/* mark pool as unlocked */
+int dm_pool_unlock(struct dm_pool *p, int crc)
+ __attribute__((__warn_unused_result__));
+
+/*
+ * Object building routines:
+ *
+ * These allow you to 'grow' an object, useful for
+ * building strings, or filling in dynamic
+ * arrays.
+ *
+ * It's probably best explained with an example:
+ *
+ * char *build_string(struct dm_pool *mem)
+ * {
+ * int i;
+ * char buffer[16];
+ *
+ * if (!dm_pool_begin_object(mem, 128))
+ * return NULL;
+ *
+ * for (i = 0; i < 50; i++) {
+ * snprintf(buffer, sizeof(buffer), "%d, ", i);
+ * if (!dm_pool_grow_object(mem, buffer, 0))
+ * goto bad;
+ * }
+ *
+ * // add null
+ * if (!dm_pool_grow_object(mem, "\0", 1))
+ * goto bad;
+ *
+ * return dm_pool_end_object(mem);
+ *
+ * bad:
+ *
+ * dm_pool_abandon_object(mem);
+ * return NULL;
+ *}
+ *
+ * So start an object by calling dm_pool_begin_object
+ * with a guess at the final object size - if in
+ * doubt make the guess too small.
+ *
+ * Then append chunks of data to your object with
+ * dm_pool_grow_object. Finally get your object with
+ * a call to dm_pool_end_object.
+ *
+ * Setting delta to 0 means it will use strlen(extra).
+ */
+int dm_pool_begin_object(struct dm_pool *p, size_t hint);
+int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta);
+void *dm_pool_end_object(struct dm_pool *p);
+void dm_pool_abandon_object(struct dm_pool *p);
+
+/* utilities */
+char *dm_pool_strdup(struct dm_pool *p, const char *str)
+ __attribute__((__warn_unused_result__));
+char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n)
+ __attribute__((__warn_unused_result__));
+void *dm_pool_zalloc(struct dm_pool *p, size_t s)
+ __attribute__((__warn_unused_result__));
+
+/******************
+ * bitset functions
+ ******************/
+
+typedef uint32_t *dm_bitset_t;
+
+dm_bitset_t dm_bitset_create(struct dm_pool *mem, unsigned num_bits);
+void dm_bitset_destroy(dm_bitset_t bs);
+
+int dm_bitset_equal(dm_bitset_t in1, dm_bitset_t in2);
+
+void dm_bit_and(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2);
+void dm_bit_union(dm_bitset_t out, dm_bitset_t in1, dm_bitset_t in2);
+int dm_bit_get_first(dm_bitset_t bs);
+int dm_bit_get_next(dm_bitset_t bs, int last_bit);
+int dm_bit_get_last(dm_bitset_t bs);
+int dm_bit_get_prev(dm_bitset_t bs, int last_bit);
+
+#define DM_BITS_PER_INT (sizeof(int) * CHAR_BIT)
+
+#define dm_bit(bs, i) \
+ ((bs)[((i) / DM_BITS_PER_INT) + 1] & (0x1 << ((i) & (DM_BITS_PER_INT - 1))))
+
+#define dm_bit_set(bs, i) \
+ ((bs)[((i) / DM_BITS_PER_INT) + 1] |= (0x1 << ((i) & (DM_BITS_PER_INT - 1))))
+
+#define dm_bit_clear(bs, i) \
+ ((bs)[((i) / DM_BITS_PER_INT) + 1] &= ~(0x1 << ((i) & (DM_BITS_PER_INT - 1))))
+
+#define dm_bit_set_all(bs) \
+ memset((bs) + 1, -1, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int))
+
+#define dm_bit_clear_all(bs) \
+ memset((bs) + 1, 0, ((*(bs) / DM_BITS_PER_INT) + 1) * sizeof(int))
+
+#define dm_bit_copy(bs1, bs2) \
+ memcpy((bs1) + 1, (bs2) + 1, ((*(bs2) / DM_BITS_PER_INT) + 1) * sizeof(int))
+
+/*
+ * Parse a string representation of a bitset into a dm_bitset_t. The
+ * notation used is identical to the kernel bitmap parser (cpuset etc.)
+ * and supports both lists ("1,2,3") and ranges ("1-2,5-8"). If the mem
+ * parameter is NULL memory for the bitset will be allocated using
+ * dm_malloc(). Otherwise the bitset will be allocated using the supplied
+ * dm_pool.
+ */
+dm_bitset_t dm_bitset_parse_list(const char *str, struct dm_pool *mem,
+ size_t min_num_bits);
+
+/* Returns number of set bits */
+static inline unsigned hweight32(uint32_t i)
+{
+ unsigned r = (i & 0x55555555) + ((i >> 1) & 0x55555555);
+
+ r = (r & 0x33333333) + ((r >> 2) & 0x33333333);
+ r = (r & 0x0F0F0F0F) + ((r >> 4) & 0x0F0F0F0F);
+ r = (r & 0x00FF00FF) + ((r >> 8) & 0x00FF00FF);
+ return (r & 0x0000FFFF) + ((r >> 16) & 0x0000FFFF);
+}
+
+/****************
+ * hash functions
+ ****************/
+
+struct dm_hash_table;
+struct dm_hash_node;
+
+typedef void (*dm_hash_iterate_fn) (void *data);
+
+struct dm_hash_table *dm_hash_create(unsigned size_hint)
+ __attribute__((__warn_unused_result__));
+void dm_hash_destroy(struct dm_hash_table *t);
+void dm_hash_wipe(struct dm_hash_table *t);
+
+void *dm_hash_lookup(struct dm_hash_table *t, const char *key);
+int dm_hash_insert(struct dm_hash_table *t, const char *key, void *data);
+void dm_hash_remove(struct dm_hash_table *t, const char *key);
+
+void *dm_hash_lookup_binary(struct dm_hash_table *t, const void *key, uint32_t len);
+int dm_hash_insert_binary(struct dm_hash_table *t, const void *key, uint32_t len,
+ void *data);
+void dm_hash_remove_binary(struct dm_hash_table *t, const void *key, uint32_t len);
+
+unsigned dm_hash_get_num_entries(struct dm_hash_table *t);
+void dm_hash_iter(struct dm_hash_table *t, dm_hash_iterate_fn f);
+
+char *dm_hash_get_key(struct dm_hash_table *t, struct dm_hash_node *n);
+void *dm_hash_get_data(struct dm_hash_table *t, struct dm_hash_node *n);
+struct dm_hash_node *dm_hash_get_first(struct dm_hash_table *t);
+struct dm_hash_node *dm_hash_get_next(struct dm_hash_table *t, struct dm_hash_node *n);
+
+/*
+ * dm_hash_insert() replaces the value of an existing
+ * entry with a matching key if one exists. Otherwise
+ * it adds a new entry.
+ *
+ * dm_hash_insert_with_val() inserts a new entry if
+ * another entry with the same key already exists.
+ * val_len is the size of the data being inserted.
+ *
+ * If two entries with the same key exist,
+ * (added using dm_hash_insert_allow_multiple), then:
+ * . dm_hash_lookup() returns the first one it finds, and
+ * dm_hash_lookup_with_val() returns the one with a matching
+ * val_len/val.
+ * . dm_hash_remove() removes the first one it finds, and
+ * dm_hash_remove_with_val() removes the one with a matching
+ * val_len/val.
+ *
+ * If a single entry with a given key exists, and it has
+ * zero val_len, then:
+ * . dm_hash_lookup() returns it
+ * . dm_hash_lookup_with_val(val_len=0) returns it
+ * . dm_hash_remove() removes it
+ * . dm_hash_remove_with_val(val_len=0) removes it
+ *
+ * dm_hash_lookup_with_count() is a single call that will
+ * both lookup a key's value and check if there is more
+ * than one entry with the given key.
+ *
+ * (It is not meant to retrieve all the entries with the
+ * given key. In the common case where a single entry exists
+ * for the key, it is useful to have a single call that will
+ * both look up the value and indicate if multiple values
+ * exist for the key.)
+ *
+ * dm_hash_lookup_with_count:
+ * . If no entries exist, the function returns NULL, and
+ * the count is set to 0.
+ * . If only one entry exists, the value of that entry is
+ * returned and count is set to 1.
+ * . If N entries exists, the value of the first entry is
+ * returned and count is set to N.
+ */
+
+void *dm_hash_lookup_with_val(struct dm_hash_table *t, const char *key,
+ const void *val, uint32_t val_len);
+void dm_hash_remove_with_val(struct dm_hash_table *t, const char *key,
+ const void *val, uint32_t val_len);
+int dm_hash_insert_allow_multiple(struct dm_hash_table *t, const char *key,
+ const void *val, uint32_t val_len);
+void *dm_hash_lookup_with_count(struct dm_hash_table *t, const char *key, int *count);
+
+
+#define dm_hash_iterate(v, h) \
+ for (v = dm_hash_get_first((h)); v; \
+ v = dm_hash_get_next((h), v))
+
+/****************
+ * list functions
+ ****************/
+
+/*
+ * A list consists of a list head plus elements.
+ * Each element has 'next' and 'previous' pointers.
+ * The list head's pointers point to the first and the last element.
+ */
+
+struct dm_list {
+ struct dm_list *n, *p;
+};
+
+/*
+ * String list.
+ */
+struct dm_str_list {
+ struct dm_list list;
+ const char *str;
+};
+
+/*
+ * Initialise a list before use.
+ * The list head's next and previous pointers point back to itself.
+ */
+#define DM_LIST_HEAD_INIT(name) { &(name), &(name) }
+#define DM_LIST_INIT(name) struct dm_list name = DM_LIST_HEAD_INIT(name)
+void dm_list_init(struct dm_list *head);
+
+/*
+ * Insert an element before 'head'.
+ * If 'head' is the list head, this adds an element to the end of the list.
+ */
+void dm_list_add(struct dm_list *head, struct dm_list *elem);
+
+/*
+ * Insert an element after 'head'.
+ * If 'head' is the list head, this adds an element to the front of the list.
+ */
+void dm_list_add_h(struct dm_list *head, struct dm_list *elem);
+
+/*
+ * Delete an element from its list.
+ * Note that this doesn't change the element itself - it may still be safe
+ * to follow its pointers.
+ */
+void dm_list_del(struct dm_list *elem);
+
+/*
+ * Remove an element from existing list and insert before 'head'.
+ */
+void dm_list_move(struct dm_list *head, struct dm_list *elem);
+
+/*
+ * Join 'head1' to the end of 'head'.
+ */
+void dm_list_splice(struct dm_list *head, struct dm_list *head1);
+
+/*
+ * Is the list empty?
+ */
+int dm_list_empty(const struct dm_list *head);
+
+/*
+ * Is this the first element of the list?
+ */
+int dm_list_start(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Is this the last element of the list?
+ */
+int dm_list_end(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Return first element of the list or NULL if empty
+ */
+struct dm_list *dm_list_first(const struct dm_list *head);
+
+/*
+ * Return last element of the list or NULL if empty
+ */
+struct dm_list *dm_list_last(const struct dm_list *head);
+
+/*
+ * Return the previous element of the list, or NULL if we've reached the start.
+ */
+struct dm_list *dm_list_prev(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Return the next element of the list, or NULL if we've reached the end.
+ */
+struct dm_list *dm_list_next(const struct dm_list *head, const struct dm_list *elem);
+
+/*
+ * Given the address v of an instance of 'struct dm_list' called 'head'
+ * contained in a structure of type t, return the containing structure.
+ */
+#define dm_list_struct_base(v, t, head) \
+ ((t *)((const char *)(v) - (const char *)&((t *) 0)->head))
+
+/*
+ * Given the address v of an instance of 'struct dm_list list' contained in
+ * a structure of type t, return the containing structure.
+ */
+#define dm_list_item(v, t) dm_list_struct_base((v), t, list)
+
+/*
+ * Given the address v of one known element e in a known structure of type t,
+ * return another element f.
+ */
+#define dm_struct_field(v, t, e, f) \
+ (((t *)((uintptr_t)(v) - (uintptr_t)&((t *) 0)->e))->f)
+
+/*
+ * Given the address v of a known element e in a known structure of type t,
+ * return the list head 'list'
+ */
+#define dm_list_head(v, t, e) dm_struct_field(v, t, e, list)
+
+/*
+ * Set v to each element of a list in turn.
+ */
+#define dm_list_iterate(v, head) \
+ for (v = (head)->n; v != head; v = v->n)
+
+/*
+ * Set v to each element in a list in turn, starting from the element
+ * in front of 'start'.
+ * You can use this to 'unwind' a list_iterate and back out actions on
+ * already-processed elements.
+ * If 'start' is 'head' it walks the list backwards.
+ */
+#define dm_list_uniterate(v, head, start) \
+ for (v = (start)->p; v != head; v = v->p)
+
+/*
+ * A safe way to walk a list and delete and free some elements along
+ * the way.
+ * t must be defined as a temporary variable of the same type as v.
+ */
+#define dm_list_iterate_safe(v, t, head) \
+ for (v = (head)->n, t = v->n; v != head; v = t, t = v->n)
+
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The 'struct dm_list' variable within the containing structure is 'field'.
+ */
+#define dm_list_iterate_items_gen(v, head, field) \
+ for (v = dm_list_struct_base((head)->n, __typeof__(*v), field); \
+ &v->field != (head); \
+ v = dm_list_struct_base(v->field.n, __typeof__(*v), field))
+
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The list should be 'struct dm_list list' within the containing structure.
+ */
+#define dm_list_iterate_items(v, head) dm_list_iterate_items_gen(v, (head), list)
+
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The 'struct dm_list' variable within the containing structure is 'field'.
+ * t must be defined as a temporary variable of the same type as v.
+ */
+#define dm_list_iterate_items_gen_safe(v, t, head, field) \
+ for (v = dm_list_struct_base((head)->n, __typeof__(*v), field), \
+ t = dm_list_struct_base(v->field.n, __typeof__(*v), field); \
+ &v->field != (head); \
+ v = t, t = dm_list_struct_base(v->field.n, __typeof__(*v), field))
+/*
+ * Walk a list, setting 'v' in turn to the containing structure of each item.
+ * The containing structure should be the same type as 'v'.
+ * The list should be 'struct dm_list list' within the containing structure.
+ * t must be defined as a temporary variable of the same type as v.
+ */
+#define dm_list_iterate_items_safe(v, t, head) \
+ dm_list_iterate_items_gen_safe(v, t, (head), list)
+
+/*
+ * Walk a list backwards, setting 'v' in turn to the containing structure
+ * of each item.
+ * The containing structure should be the same type as 'v'.
+ * The 'struct dm_list' variable within the containing structure is 'field'.
+ */
+#define dm_list_iterate_back_items_gen(v, head, field) \
+ for (v = dm_list_struct_base((head)->p, __typeof__(*v), field); \
+ &v->field != (head); \
+ v = dm_list_struct_base(v->field.p, __typeof__(*v), field))
+
+/*
+ * Walk a list backwards, setting 'v' in turn to the containing structure
+ * of each item.
+ * The containing structure should be the same type as 'v'.
+ * The list should be 'struct dm_list list' within the containing structure.
+ */
+#define dm_list_iterate_back_items(v, head) dm_list_iterate_back_items_gen(v, (head), list)
+
+/*
+ * Return the number of elements in a list by walking it.
+ */
+unsigned int dm_list_size(const struct dm_list *head);
+
+/*********
+ * selinux
+ *********/
+
+/*
+ * Obtain SELinux security context assigned for the path and set this
+ * context for creating a new file system object. This security context
+ * is global and it is used until reset to default policy behaviour
+ * by calling 'dm_prepare_selinux_context(NULL, 0)'.
+ */
+int dm_prepare_selinux_context(const char *path, mode_t mode);
+/*
+ * Set SELinux context for existing file system object.
+ */
+int dm_set_selinux_context(const char *path, mode_t mode);
+
+/*********************
+ * string manipulation
+ *********************/
+
+/*
+ * Break up the name of a mapped device into its constituent
+ * Volume Group, Logical Volume and Layer (if present).
+ * If mem is supplied, the result is allocated from the mempool.
+ * Otherwise the strings are changed in situ.
+ */
+int dm_split_lvm_name(struct dm_pool *mem, const char *dmname,
+ char **vgname, char **lvname, char **layer);
+
+/*
+ * Destructively split buffer into NULL-separated words in argv.
+ * Returns number of words.
+ */
+int dm_split_words(char *buffer, unsigned max,
+ unsigned ignore_comments, /* Not implemented */
+ char **argv);
+
+/*
+ * Returns -1 if buffer too small
+ */
+int dm_snprintf(char *buf, size_t bufsize, const char *format, ...)
+ __attribute__ ((format(printf, 3, 4)));
+
+/*
+ * Returns pointer to the last component of the path.
+ */
+const char *dm_basename(const char *path);
+
+/*
+ * Returns number of occurrences of 'c' in 'str' of length 'size'.
+ */
+unsigned dm_count_chars(const char *str, size_t len, const int c);
+
+/*
+ * Length of string after escaping double quotes and backslashes.
+ */
+size_t dm_escaped_len(const char *str);
+
+/*
+ * <vg>-<lv>-<layer> or if !layer just <vg>-<lv>.
+ */
+char *dm_build_dm_name(struct dm_pool *mem, const char *vgname,
+ const char *lvname, const char *layer);
+char *dm_build_dm_uuid(struct dm_pool *mem, const char *prefix, const char *lvid, const char *layer);
+
+/*
+ * Copies a string, quoting double quotes with backslashes.
+ */
+char *dm_escape_double_quotes(char *out, const char *src);
+
+/*
+ * Undo quoting in situ.
+ */
+void dm_unescape_double_quotes(char *src);
+
+/*
+ * Unescape colons and "at" signs in situ and save the substrings
+ * starting at the position of the first unescaped colon and the
+ * first unescaped "at" sign. This is normally used to unescape
+ * device names used as PVs.
+ */
+void dm_unescape_colons_and_at_signs(char *src,
+ char **substr_first_unquoted_colon,
+ char **substr_first_unquoted_at_sign);
+
+/*
+ * Replacement for strncpy() function.
+ *
+ * Copies no more than n bytes from string pointed by src to the buffer
+ * pointed by dest and ensure string is finished with '\0'.
+ * Returns 0 if the whole string does not fit.
+ */
+int dm_strncpy(char *dest, const char *src, size_t n);
+
+/*
+ * Recognize unit specifier in the 'units' arg and return a factor
+ * representing that unit. If the 'units' contains a prefix with digits,
+ * the 'units' is considered to be a custom unit.
+ *
+ * Also, set 'unit_type' output arg to the character that represents
+ * the unit specified. The 'unit_type' character equals to the unit
+ * character itself recognized in the 'units' arg for canonical units.
+ * Otherwise, the 'unit_type' character is set to 'U' for custom unit.
+ *
+ * An example for k/K canonical units and 8k/8K custom units:
+ *
+ * units unit_type return value (factor)
+ * k k 1024
+ * K K 1000
+ * 8k U 1024*8
+ * 8K U 1000*8
+ * etc...
+ *
+ * Recognized units:
+ *
+ * h/H - human readable (returns 1 for both)
+ * b/B - byte (returns 1 for both)
+ * s/S - sector (returns 512 for both)
+ * k/K - kilo (returns 1024/1000 respectively)
+ * m/M - mega (returns 1024^2/1000^2 respectively)
+ * g/G - giga (returns 1024^3/1000^3 respectively)
+ * t/T - tera (returns 1024^4/1000^4 respectively)
+ * p/P - peta (returns 1024^5/1000^5 respectively)
+ * e/E - exa (returns 1024^6/1000^6 respectively)
+ *
+ * Only one units character is allowed in the 'units' arg
+ * if strict mode is enabled by 'strict' arg.
+ *
+ * The 'endptr' output arg, if not NULL, saves the pointer
+ * in the 'units' string which follows the unit specifier
+ * recognized (IOW the position where the parsing of the
+ * unit specifier stopped).
+ *
+ * Returns the unit factor or 0 if no unit is recognized.
+ */
+uint64_t dm_units_to_factor(const char *units, char *unit_type,
+ int strict, const char **endptr);
+
+/*
+ * Type of unit specifier used by dm_size_to_string().
+ */
+typedef enum {
+ DM_SIZE_LONG = 0, /* Megabyte */
+ DM_SIZE_SHORT = 1, /* MB or MiB */
+ DM_SIZE_UNIT = 2 /* M or m */
+} dm_size_suffix_t;
+
+/*
+ * Convert a size (in 512-byte sectors) into a printable string using units of unit_type.
+ * An upper-case unit_type indicates output units based on powers of 1000 are
+ * required; a lower-case unit_type indicates powers of 1024.
+ * For correct operation, unit_factor must be one of:
+ * 0 - the correct value will be calculated internally;
+ * or the output from dm_units_to_factor() corresponding to unit_type;
+ * or 'u' or 'U', an arbitrary number of bytes to use as the power base.
+ * Set include_suffix to 1 to include a suffix of suffix_type.
+ * Set use_si_units to 0 for suffixes that don't distinguish between 1000 and 1024.
+ * Set use_si_units to 1 for a suffix that does distinguish.
+ */
+const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
+ char unit_type, int use_si_units,
+ uint64_t unit_factor, int include_suffix,
+ dm_size_suffix_t suffix_type);
+
+/**************************
+ * file/stream manipulation
+ **************************/
+
+/*
+ * Create a directory (with parent directories if necessary).
+ * Returns 1 on success, 0 on failure.
+ */
+int dm_create_dir(const char *dir);
+
+int dm_is_empty_dir(const char *dir);
+
+/*
+ * Close a stream, with nicer error checking than fclose's.
+ * Derived from gnulib's close-stream.c.
+ *
+ * Close "stream". Return 0 if successful, and EOF (setting errno)
+ * otherwise. Upon failure, set errno to 0 if the error number
+ * cannot be determined. Useful mainly for writable streams.
+ */
+int dm_fclose(FILE *stream);
+
+/*
+ * Returns size of a buffer which is allocated with dm_malloc.
+ * Pointer to the buffer is stored in *buf.
+ * Returns -1 on failure leaving buf undefined.
+ */
+int dm_asprintf(char **buf, const char *format, ...)
+ __attribute__ ((format(printf, 2, 3)));
+int dm_vasprintf(char **buf, const char *format, va_list ap)
+ __attribute__ ((format(printf, 2, 0)));
+
+/*
+ * create lockfile (pidfile) - create and lock a lock file
+ * @lockfile: location of lock file
+ *
+ * Returns: 1 on success, 0 otherwise, errno is handled internally
+ */
+int dm_create_lockfile(const char* lockfile);
+
+/*
+ * Query whether a daemon is running based on its lockfile
+ *
+ * Returns: 1 if running, 0 if not
+ */
+int dm_daemon_is_running(const char* lockfile);
+
+/*********************
+ * regular expressions
+ *********************/
+struct dm_regex;
+
+/*
+ * Initialise an array of num patterns for matching.
+ * Uses memory from mem.
+ */
+struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns,
+ unsigned num_patterns);
+
+/*
+ * Match string s against the patterns.
+ * Returns the index of the highest pattern in the array that matches,
+ * or -1 if none match.
+ */
+int dm_regex_match(struct dm_regex *regex, const char *s);
+
+/*
+ * This is useful for regression testing only. The idea is if two
+ * fingerprints are different, then the two dfas are certainly not
+ * isomorphic. If two fingerprints _are_ the same then it's very likely
+ * that the dfas are isomorphic.
+ *
+ * This function must be called before any matching is done.
+ */
+uint32_t dm_regex_fingerprint(struct dm_regex *regex);
+
+/******************
+ * percent handling
+ ******************/
+/*
+ * A fixed-point representation of percent values. One percent equals to
+ * DM_PERCENT_1 as defined below. Values that are not multiples of DM_PERCENT_1
+ * represent fractions, with precision of 1/1000000 of a percent. See
+ * dm_percent_to_float for a conversion to a floating-point representation.
+ *
+ * You should always use dm_make_percent when building dm_percent_t values. The
+ * implementation of dm_make_percent is biased towards the middle: it ensures that
+ * the result is DM_PERCENT_0 or DM_PERCENT_100 if and only if this is the actual
+ * value -- it never rounds any intermediate value (> 0 or < 100) to either 0
+ * or 100.
+*/
+#define DM_PERCENT_CHAR '%'
+
+typedef enum {
+ DM_PERCENT_0 = 0,
+ DM_PERCENT_1 = 1000000,
+ DM_PERCENT_100 = 100 * DM_PERCENT_1,
+ DM_PERCENT_INVALID = -1,
+ DM_PERCENT_FAILED = -2
+} dm_percent_range_t;
+
+typedef int32_t dm_percent_t;
+
+float dm_percent_to_float(dm_percent_t percent);
+/*
+ * Return adjusted/rounded float for better percent value printing.
+ * Function ensures for given precision of digits:
+ * 100.0% returns only when the value is DM_PERCENT_100
+ * for close smaller values rounds to nearest smaller value
+ * 0.0% returns only for value DM_PERCENT_0
+ * for close bigger values rounds to nearest bigger value
+ * In all other cases returns same value as dm_percent_to_float()
+ */
+float dm_percent_to_round_float(dm_percent_t percent, unsigned digits);
+dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator);
+
+/********************
+ * timestamp handling
+ ********************/
+
+/*
+ * Create a dm_timestamp object to use with dm_timestamp_get.
+ */
+struct dm_timestamp *dm_timestamp_alloc(void);
+
+/*
+ * Update dm_timestamp object to represent the current time.
+ */
+int dm_timestamp_get(struct dm_timestamp *ts);
+
+/*
+ * Copy a timestamp from ts_old to ts_new.
+ */
+void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old);
+
+/*
+ * Compare two timestamps.
+ *
+ * Return: -1 if ts1 is less than ts2
+ * 0 if ts1 is equal to ts2
+ * 1 if ts1 is greater than ts2
+ */
+int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
+
+/*
+ * Return the absolute difference in nanoseconds between
+ * the dm_timestamp objects ts1 and ts2.
+ *
+ * Callers that need to know whether ts1 is before, equal to, or after ts2
+ * in addition to the magnitude should use dm_timestamp_compare.
+ */
+uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2);
+
+/*
+ * Destroy a dm_timestamp object.
+ */
+void dm_timestamp_destroy(struct dm_timestamp *ts);
+
+/*********************
+ * reporting functions
+ *********************/
+
+struct dm_report_object_type {
+ uint32_t id; /* Powers of 2 */
+ const char *desc;
+ const char *prefix; /* field id string prefix (optional) */
+ /* FIXME: convert to proper usage of const pointers here */
+ void *(*data_fn)(void *object); /* callback from report_object() */
+};
+
+struct dm_report_field;
+
+/*
+ * dm_report_field_type flags
+ */
+#define DM_REPORT_FIELD_MASK 0x00000FFF
+#define DM_REPORT_FIELD_ALIGN_MASK 0x0000000F
+#define DM_REPORT_FIELD_ALIGN_LEFT 0x00000001
+#define DM_REPORT_FIELD_ALIGN_RIGHT 0x00000002
+#define DM_REPORT_FIELD_TYPE_MASK 0x00000FF0
+#define DM_REPORT_FIELD_TYPE_NONE 0x00000000
+#define DM_REPORT_FIELD_TYPE_STRING 0x00000010
+#define DM_REPORT_FIELD_TYPE_NUMBER 0x00000020
+#define DM_REPORT_FIELD_TYPE_SIZE 0x00000040
+#define DM_REPORT_FIELD_TYPE_PERCENT 0x00000080
+#define DM_REPORT_FIELD_TYPE_STRING_LIST 0x00000100
+#define DM_REPORT_FIELD_TYPE_TIME 0x00000200
+
+/* For use with reserved values only! */
+#define DM_REPORT_FIELD_RESERVED_VALUE_MASK 0x0000000F
+#define DM_REPORT_FIELD_RESERVED_VALUE_NAMED 0x00000001 /* only named value, less strict form of reservation */
+#define DM_REPORT_FIELD_RESERVED_VALUE_RANGE 0x00000002 /* value is range - low and high value defined */
+#define DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE 0x00000004 /* value is computed in runtime */
+#define DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES 0x00000008 /* value names are recognized in runtime */
+
+#define DM_REPORT_FIELD_TYPE_ID_LEN 32
+#define DM_REPORT_FIELD_TYPE_HEADING_LEN 32
+
+struct dm_report;
+struct dm_report_field_type {
+ uint32_t type; /* object type id */
+ uint32_t flags; /* DM_REPORT_FIELD_* */
+ uint32_t offset; /* byte offset in the object */
+ int32_t width; /* default width */
+ /* string used to specify the field */
+ const char id[DM_REPORT_FIELD_TYPE_ID_LEN];
+ /* string printed in header */
+ const char heading[DM_REPORT_FIELD_TYPE_HEADING_LEN];
+ int (*report_fn)(struct dm_report *rh, struct dm_pool *mem,
+ struct dm_report_field *field, const void *data,
+ void *private_data);
+ const char *desc; /* description of the field */
+};
+
+/*
+ * Per-field reserved value.
+ */
+struct dm_report_field_reserved_value {
+ /* field_num is the position of the field in 'fields'
+ array passed to dm_report_init_with_selection */
+ uint32_t field_num;
+ /* the value is of the same type as the field
+ identified by field_num */
+ const void *value;
+};
+
+/*
+ * Reserved value is a 'value' that is used directly if any of the 'names' is hit
+ * or in case of fuzzy names, if such fuzzy name matches.
+ *
+ * If type is any of DM_REPORT_FIELD_TYPE_*, the reserved value is recognized
+ * for all fields of that type.
+ *
+ * If type is DM_REPORT_FIELD_TYPE_NONE, the reserved value is recognized
+ * for the exact field specified - hence the type of the value is automatically
+ * the same as the type of the field itself.
+ *
+ * The array of reserved values is used to initialize reporting with
+ * selection enabled (see also dm_report_init_with_selection function).
+ */
+struct dm_report_reserved_value {
+ const uint32_t type; /* DM_REPORT_FIELD_RESERVED_VALUE_* and DM_REPORT_FIELD_TYPE_* */
+ const void *value; /* reserved value:
+ uint64_t for DM_REPORT_FIELD_TYPE_NUMBER
+ uint64_t for DM_REPORT_FIELD_TYPE_SIZE (number of 512-byte sectors)
+ uint64_t for DM_REPORT_FIELD_TYPE_PERCENT
+ const char* for DM_REPORT_FIELD_TYPE_STRING
+ struct dm_report_field_reserved_value for DM_REPORT_FIELD_TYPE_NONE
+ dm_report_reserved_handler* if DM_REPORT_FIELD_RESERVED_VALUE_{DYNAMIC_VALUE,FUZZY_NAMES} is used */
+ const char **names; /* null-terminated array of static names for this reserved value */
+ const char *description; /* description of the reserved value */
+};
+
+/*
+ * Available actions for dm_report_reserved_value_handler.
+ */
+typedef enum {
+ DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
+ DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
+} dm_report_reserved_action_t;
+
+/*
+ * Generic reserved value handler to process reserved value names and/or values.
+ *
+ * Actions and their input/output:
+ *
+ * DM_REPORT_RESERVED_PARSE_FUZZY_NAME
+ * data_in: const char *fuzzy_name
+ * data_out: const char *canonical_name, NULL if fuzzy_name not recognized
+ *
+ * DM_REPORT_RESERVED_GET_DYNAMIC_VALUE
+ * data_in: const char *canonical_name
+ * data_out: void *value, NULL if canonical_name not recognized
+ *
+ * All actions return:
+ *
+ * -1 if action not implemented
+ * 0 on error
+ * 1 on success
+ */
+typedef int (*dm_report_reserved_handler) (struct dm_report *rh,
+ struct dm_pool *mem,
+ uint32_t field_num,
+ dm_report_reserved_action_t action,
+ const void *data_in,
+ const void **data_out);
+
+/*
+ * The dm_report_value_cache_{set,get} are helper functions to store and retrieve
+ * various values used during reporting (dm_report_field_type.report_fn) and/or
+ * selection processing (dm_report_reserved_handler instances) to avoid
+ * recalculation of these values or to share values among calls.
+ */
+int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data);
+const void *dm_report_value_cache_get(struct dm_report *rh, const char *name);
+/*
+ * dm_report_init output_flags
+ */
+#define DM_REPORT_OUTPUT_MASK 0x000000FF
+#define DM_REPORT_OUTPUT_ALIGNED 0x00000001
+#define DM_REPORT_OUTPUT_BUFFERED 0x00000002
+#define DM_REPORT_OUTPUT_HEADINGS 0x00000004
+#define DM_REPORT_OUTPUT_FIELD_NAME_PREFIX 0x00000008
+#define DM_REPORT_OUTPUT_FIELD_UNQUOTED 0x00000010
+#define DM_REPORT_OUTPUT_COLUMNS_AS_ROWS 0x00000020
+#define DM_REPORT_OUTPUT_MULTIPLE_TIMES 0x00000040
+
+struct dm_report *dm_report_init(uint32_t *report_types,
+ const struct dm_report_object_type *types,
+ const struct dm_report_field_type *fields,
+ const char *output_fields,
+ const char *output_separator,
+ uint32_t output_flags,
+ const char *sort_keys,
+ void *private_data);
+struct dm_report *dm_report_init_with_selection(uint32_t *report_types,
+ const struct dm_report_object_type *types,
+ const struct dm_report_field_type *fields,
+ const char *output_fields,
+ const char *output_separator,
+ uint32_t output_flags,
+ const char *sort_keys,
+ const char *selection,
+ const struct dm_report_reserved_value reserved_values[],
+ void *private_data);
+/*
+ * Report an object, pass it through the selection criteria if they
+ * are present and display the result on output if it passes the criteria.
+ */
+int dm_report_object(struct dm_report *rh, void *object);
+/*
+ * The same as dm_report_object, but display the result on output only if
+ * 'do_output' arg is set. Also, save the result of selection in 'selected'
+ * arg if it's not NULL (either 1 if the object passes, otherwise 0).
+ */
+int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected);
+
+/*
+ * Compact report output so that if field value is empty for all rows in
+ * the report, drop the field from output completely (including headers).
+ * Compact output is applicable only if report is buffered, otherwise
+ * this function has no effect.
+ */
+int dm_report_compact_fields(struct dm_report *rh);
+
+/*
+ * The same as dm_report_compact_fields, but for selected fields only.
+ * The "fields" arg is comma separated list of field names (the same format
+ * as used for "output_fields" arg in dm_report_init fn).
+ */
+int dm_report_compact_given_fields(struct dm_report *rh, const char *fields);
+
+/*
+ * Returns 1 if there is no data waiting to be output.
+ */
+int dm_report_is_empty(struct dm_report *rh);
+
+/*
+ * Destroy report content without doing output.
+ */
+void dm_report_destroy_rows(struct dm_report *rh);
+
+int dm_report_output(struct dm_report *rh);
+
+/*
+ * Output the report headings for a columns-based report, even if they
+ * have already been shown. Useful for repeating reports that wish to
+ * issue a periodic reminder of the column headings.
+ */
+int dm_report_column_headings(struct dm_report *rh);
+
+void dm_report_free(struct dm_report *rh);
+
+/*
+ * Prefix added to each field name with DM_REPORT_OUTPUT_FIELD_NAME_PREFIX
+ */
+int dm_report_set_output_field_name_prefix(struct dm_report *rh,
+ const char *report_prefix);
+
+int dm_report_set_selection(struct dm_report *rh, const char *selection);
+
+/*
+ * Report functions are provided for simple data types.
+ * They take care of allocating copies of the data.
+ */
+int dm_report_field_string(struct dm_report *rh, struct dm_report_field *field,
+ const char *const *data);
+int dm_report_field_string_list(struct dm_report *rh, struct dm_report_field *field,
+ const struct dm_list *data, const char *delimiter);
+int dm_report_field_string_list_unsorted(struct dm_report *rh, struct dm_report_field *field,
+ const struct dm_list *data, const char *delimiter);
+int dm_report_field_int32(struct dm_report *rh, struct dm_report_field *field,
+ const int32_t *data);
+int dm_report_field_uint32(struct dm_report *rh, struct dm_report_field *field,
+ const uint32_t *data);
+int dm_report_field_int(struct dm_report *rh, struct dm_report_field *field,
+ const int *data);
+int dm_report_field_uint64(struct dm_report *rh, struct dm_report_field *field,
+ const uint64_t *data);
+int dm_report_field_percent(struct dm_report *rh, struct dm_report_field *field,
+ const dm_percent_t *data);
+
+/*
+ * For custom fields, allocate the data in 'mem' and use
+ * dm_report_field_set_value().
+ * 'sortvalue' may be NULL if it matches 'value'
+ */
+void dm_report_field_set_value(struct dm_report_field *field, const void *value,
+ const void *sortvalue);
+
+/*
+ * Report group support.
+ */
+struct dm_report_group;
+
+typedef enum {
+ DM_REPORT_GROUP_SINGLE,
+ DM_REPORT_GROUP_BASIC,
+ DM_REPORT_GROUP_JSON
+} dm_report_group_type_t;
+
+struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data);
+int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data);
+int dm_report_group_pop(struct dm_report_group *group);
+int dm_report_group_output_and_pop_all(struct dm_report_group *group);
+int dm_report_group_destroy(struct dm_report_group *group);
+
+/*
+ * Stats counter access methods
+ *
+ * Each method returns the corresponding stats counter value from the
+ * supplied dm_stats handle for the specified region_id and area_id.
+ * If either region_id or area_id uses one of the special values
+ * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then the region
+ * or area is selected according to the current state of the dm_stats
+ * handle's embedded cursor.
+ *
+ * Two methods are provided to access counter values: a named function
+ * for each available counter field and a single function that accepts
+ * an enum value specifying the required field. New code is encouraged
+ * to use the enum based interface as calls to the named functions are
+ * implemented using the enum method internally.
+ *
+ * See the kernel documentation for complete descriptions of each
+ * counter field:
+ *
+ * Documentation/device-mapper/statistics.txt
+ * Documentation/iostats.txt
+ *
+ * reads: the number of reads completed
+ * reads_merged: the number of reads merged
+ * read_sectors: the number of sectors read
+ * read_nsecs: the number of nanoseconds spent reading
+ * writes: the number of writes completed
+ * writes_merged: the number of writes merged
+ * write_sectors: the number of sectors written
+ * write_nsecs: the number of nanoseconds spent writing
+ * io_in_progress: the number of I/Os currently in progress
+ * io_nsecs: the number of nanoseconds spent doing I/Os
+ * weighted_io_nsecs: the weighted number of nanoseconds spent doing I/Os
+ * total_read_nsecs: the total time spent reading in nanoseconds
+ * total_write_nsecs: the total time spent writing in nanoseconds
+ */
+
+#define DM_STATS_REGION_CURRENT UINT64_MAX
+#define DM_STATS_AREA_CURRENT UINT64_MAX
+
+typedef enum {
+ DM_STATS_READS_COUNT,
+ DM_STATS_READS_MERGED_COUNT,
+ DM_STATS_READ_SECTORS_COUNT,
+ DM_STATS_READ_NSECS,
+ DM_STATS_WRITES_COUNT,
+ DM_STATS_WRITES_MERGED_COUNT,
+ DM_STATS_WRITE_SECTORS_COUNT,
+ DM_STATS_WRITE_NSECS,
+ DM_STATS_IO_IN_PROGRESS_COUNT,
+ DM_STATS_IO_NSECS,
+ DM_STATS_WEIGHTED_IO_NSECS,
+ DM_STATS_TOTAL_READ_NSECS,
+ DM_STATS_TOTAL_WRITE_NSECS,
+ DM_STATS_NR_COUNTERS
+} dm_stats_counter_t;
+
+uint64_t dm_stats_get_counter(const struct dm_stats *dms,
+ dm_stats_counter_t counter,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_reads(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_reads_merged(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_read_sectors(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_read_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_writes(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_writes_merged(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_write_sectors(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_write_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_io_in_progress(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_io_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_weighted_io_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_total_read_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+uint64_t dm_stats_get_total_write_nsecs(const struct dm_stats *dms,
+ uint64_t region_id, uint64_t area_id);
+
+/*
+ * Derived statistics access methods
+ *
+ * Each method returns the corresponding value calculated from the
+ * counters stored in the supplied dm_stats handle for the specified
+ * region_id and area_id. If either region_id or area_id uses one of the
+ * special values DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT then
+ * the region or area is selected according to the current state of the
+ * dm_stats handle's embedded cursor.
+ *
+ * The set of metrics is based on the fields provided by the Linux
+ * iostats program.
+ *
+ * rd_merges_per_sec: the number of reads merged per second
+ * wr_merges_per_sec: the number of writes merged per second
+ * reads_per_sec: the number of reads completed per second
+ * writes_per_sec: the number of writes completed per second
+ * read_sectors_per_sec: the number of sectors read per second
+ * write_sectors_per_sec: the number of sectors written per second
+ * average_request_size: the average size of requests submitted
+ * service_time: the average service time (in ns) for requests issued
+ * average_queue_size: the average queue length
+ * average_wait_time: the average time for requests to be served (in ns)
+ * average_rd_wait_time: the average read wait time
+ * average_wr_wait_time: the average write wait time
+ */
+
+typedef enum {
+ DM_STATS_RD_MERGES_PER_SEC,
+ DM_STATS_WR_MERGES_PER_SEC,
+ DM_STATS_READS_PER_SEC,
+ DM_STATS_WRITES_PER_SEC,
+ DM_STATS_READ_SECTORS_PER_SEC,
+ DM_STATS_WRITE_SECTORS_PER_SEC,
+ DM_STATS_AVERAGE_REQUEST_SIZE,
+ DM_STATS_AVERAGE_QUEUE_SIZE,
+ DM_STATS_AVERAGE_WAIT_TIME,
+ DM_STATS_AVERAGE_RD_WAIT_TIME,
+ DM_STATS_AVERAGE_WR_WAIT_TIME,
+ DM_STATS_SERVICE_TIME,
+ DM_STATS_THROUGHPUT,
+ DM_STATS_UTILIZATION,
+ DM_STATS_NR_METRICS
+} dm_stats_metric_t;
+
+int dm_stats_get_metric(const struct dm_stats *dms, int metric,
+ uint64_t region_id, uint64_t area_id, double *value);
+
+int dm_stats_get_rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_wr_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_reads_per_sec(const struct dm_stats *dms, double *rd_s,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_writes_per_sec(const struct dm_stats *dms, double *wr_s,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_read_sectors_per_sec(const struct dm_stats *dms,
+ double *rsec_s, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_write_sectors_per_sec(const struct dm_stats *dms,
+ double *wr_s, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_average_request_size(const struct dm_stats *dms,
+ double *arqsz, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_service_time(const struct dm_stats *dms, double *svctm,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_queue_size(const struct dm_stats *dms, double *qusz,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_wait_time(const struct dm_stats *dms, double *await,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_average_rd_wait_time(const struct dm_stats *dms,
+ double *await, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_average_wr_wait_time(const struct dm_stats *dms,
+ double *await, uint64_t region_id,
+ uint64_t area_id);
+
+int dm_stats_get_throughput(const struct dm_stats *dms, double *tput,
+ uint64_t region_id, uint64_t area_id);
+
+int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
+ uint64_t region_id, uint64_t area_id);
+
+/*
+ * Statistics histogram access methods.
+ *
+ * Methods to access latency histograms for regions that have them
+ * enabled. Each histogram contains a configurable number of bins
+ * spanning a user defined latency interval.
+ *
+ * The bin count, upper and lower bin bounds, and bin values are
+ * made available via the following area methods.
+ *
+ * Methods to obtain a simple string representation of the histogram
+ * and its bounds are also provided.
+ */
+
+/*
+ * Retrieve a pointer to the histogram associated with the specified
+ * area. If the area does not have a histogram configured this function
+ * returns NULL.
+ *
+ * The pointer does not need to be freed explicitly by the caller: it
+ * will become invalid following a subsequent dm_stats_list(),
+ * dm_stats_populate() or dm_stats_destroy() of the corresponding
+ * dm_stats handle.
+ *
+ * If region_id or area_id is one of the special values
+ * DM_STATS_REGION_CURRENT or DM_STATS_AREA_CURRENT the current cursor
+ * value is used to select the region or area.
+ */
+struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms,
+ uint64_t region_id,
+ uint64_t area_id);
+
+/*
+ * Return the number of bins in the specified histogram handle.
+ */
+int dm_histogram_get_nr_bins(const struct dm_histogram *dmh);
+
+/*
+ * Get the lower bound of the specified bin of the histogram for the
+ * area specified by region_id and area_id. The value is returned in
+ * nanoseconds.
+ */
+uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the upper bound of the specified bin of the histogram for the
+ * area specified by region_id and area_id. The value is returned in
+ * nanoseconds.
+ */
+uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the width of the specified bin of the histogram for the area
+ * specified by region_id and area_id. The width is equal to the bin
+ * upper bound minus the lower bound and yields the range of latency
+ * values covered by this bin. The value is returned in nanoseconds.
+ */
+uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the value of the specified bin of the histogram for the area
+ * specified by region_id and area_id.
+ */
+uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin);
+
+/*
+ * Get the percentage (relative frequency) of the specified bin of the
+ * histogram for the area specified by region_id and area_id.
+ */
+dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh,
+ int bin);
+
+/*
+ * Return the total observations (sum of bin counts) for the histogram
+ * of the area specified by region_id and area_id.
+ */
+uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh);
+
+/*
+ * Histogram formatting flags.
+ */
+#define DM_HISTOGRAM_SUFFIX 0x1
+#define DM_HISTOGRAM_VALUES 0x2
+#define DM_HISTOGRAM_PERCENT 0X4
+#define DM_HISTOGRAM_BOUNDS_LOWER 0x10
+#define DM_HISTOGRAM_BOUNDS_UPPER 0x20
+#define DM_HISTOGRAM_BOUNDS_RANGE 0x30
+
+/*
+ * Return a string representation of the supplied histogram's values and
+ * bin boundaries.
+ *
+ * The bin argument selects the bin to format. If this argument is less
+ * than zero all bins will be included in the resulting string.
+ *
+ * width specifies a minimum width for the field in characters; if it is
+ * zero the width will be determined automatically based on the options
+ * selected for formatting. A value less than zero disables field width
+ * control: bin boundaries and values will be output with a minimum
+ * amount of whitespace.
+ *
+ * flags is a collection of flag arguments that control the string format:
+ *
+ * DM_HISTOGRAM_VALUES - Include bin values in the string.
+ * DM_HISTOGRAM_SUFFIX - Include time unit suffixes when printing bounds.
+ * DM_HISTOGRAM_PERCENT - Format bin values as a percentage.
+ *
+ * DM_HISTOGRAM_BOUNDS_LOWER - Include the lower bound of each bin.
+ * DM_HISTOGRAM_BOUNDS_UPPER - Include the upper bound of each bin.
+ * DM_HISTOGRAM_BOUNDS_RANGE - Show the span of each bin as "lo-up".
+ *
+ * The returned pointer does not need to be freed explicitly by the
+ * caller: it will become invalid following a subsequent
+ * dm_stats_list(), dm_stats_populate() or dm_stats_destroy() of the
+ * corresponding dm_stats handle.
+ */
+const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin,
+ int width, int flags);
+
+/*************************
+ * config file parse/print
+ *************************/
+typedef enum {
+ DM_CFG_INT,
+ DM_CFG_FLOAT,
+ DM_CFG_STRING,
+ DM_CFG_EMPTY_ARRAY
+} dm_config_value_type_t;
+
+struct dm_config_value {
+ dm_config_value_type_t type;
+
+ union {
+ int64_t i;
+ float f;
+ double d; /* Unused. */
+ const char *str;
+ } v;
+
+ struct dm_config_value *next; /* For arrays */
+ uint32_t format_flags;
+};
+
+struct dm_config_node {
+ const char *key;
+ struct dm_config_node *parent, *sib, *child;
+ struct dm_config_value *v;
+ int id;
+};
+
+struct dm_config_tree {
+ struct dm_config_node *root;
+ struct dm_config_tree *cascade;
+ struct dm_pool *mem;
+ void *custom;
+};
+
+struct dm_config_tree *dm_config_create(void);
+struct dm_config_tree *dm_config_from_string(const char *config_settings);
+int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end);
+int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end);
+
+void *dm_config_get_custom(struct dm_config_tree *cft);
+void dm_config_set_custom(struct dm_config_tree *cft, void *custom);
+
+/*
+ * When searching, first_cft is checked before second_cft.
+ */
+struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft);
+
+/*
+ * If there's a cascaded dm_config_tree, remove the top layer
+ * and return the layer below. Otherwise return NULL.
+ */
+struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft);
+
+/*
+ * Create a new, uncascaded config tree equivalent to the input cascade.
+ */
+struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft);
+
+void dm_config_destroy(struct dm_config_tree *cft);
+
+/* Simple output line by line. */
+typedef int (*dm_putline_fn)(const char *line, void *baton);
+/* More advaced output with config node reference. */
+typedef int (*dm_config_node_out_fn)(const struct dm_config_node *cn, const char *line, void *baton);
+
+/*
+ * Specification for advanced config node output.
+ */
+struct dm_config_node_out_spec {
+ dm_config_node_out_fn prefix_fn; /* called before processing config node lines */
+ dm_config_node_out_fn line_fn; /* called for each config node line */
+ dm_config_node_out_fn suffix_fn; /* called after processing config node lines */
+};
+
+/* Write the node and any subsequent siblings it has. */
+int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton);
+int dm_config_write_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton);
+
+/* Write given node only without subsequent siblings. */
+int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton);
+int dm_config_write_one_node_out(const struct dm_config_node *cn, const struct dm_config_node_out_spec *out_spec, void *baton);
+
+struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn, const char *path);
+int dm_config_has_node(const struct dm_config_node *cn, const char *path);
+int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *remove);
+const char *dm_config_find_str(const struct dm_config_node *cn, const char *path, const char *fail);
+const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn, const char *path, const char *fail);
+int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail);
+int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail);
+float dm_config_find_float(const struct dm_config_node *cn, const char *path, float fail);
+
+const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft, const char *path);
+const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path, const char *fail);
+const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path, const char *fail);
+int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail);
+int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail);
+float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path, float fail);
+int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail);
+
+/*
+ * Understands (0, ~0), (y, n), (yes, no), (on,
+ * off), (true, false).
+ */
+int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail);
+int dm_config_value_is_bool(const struct dm_config_value *v);
+
+int dm_config_get_uint32(const struct dm_config_node *cn, const char *path, uint32_t *result);
+int dm_config_get_uint64(const struct dm_config_node *cn, const char *path, uint64_t *result);
+int dm_config_get_str(const struct dm_config_node *cn, const char *path, const char **result);
+int dm_config_get_list(const struct dm_config_node *cn, const char *path, const struct dm_config_value **result);
+int dm_config_get_section(const struct dm_config_node *cn, const char *path, const struct dm_config_node **result);
+
+unsigned dm_config_maybe_section(const char *str, unsigned len);
+
+const char *dm_config_parent_name(const struct dm_config_node *n);
+
+struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *node, int siblings);
+struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key);
+struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft);
+struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *cn, int siblings);
+
+/*
+ * Common formatting flags applicable to all config node types (lower 16 bits).
+ */
+#define DM_CONFIG_VALUE_FMT_COMMON_ARRAY 0x00000001 /* value is array */
+#define DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES 0x00000002 /* add spaces in "key = value" pairs in constrast to "key=value" for better readability */
+
+/*
+ * Type-related config node formatting flags (higher 16 bits).
+ */
+/* int-related formatting flags */
+#define DM_CONFIG_VALUE_FMT_INT_OCTAL 0x00010000 /* print number in octal form */
+
+/* string-related formatting flags */
+#define DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES 0x00010000 /* do not print quotes around string value */
+
+void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags);
+uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv);
+
+struct dm_pool *dm_config_memory(struct dm_config_tree *cft);
+
+/* Udev device directory. */
+#define DM_UDEV_DEV_DIR "/dev/"
+
+/* Cookie prefixes.
+ *
+ * The cookie value consists of a prefix (16 bits) and a base (16 bits).
+ * We can use the prefix to store the flags. These flags are sent to
+ * kernel within given dm task. When returned back to userspace in
+ * DM_COOKIE udev environment variable, we can control several aspects
+ * of udev rules we use by decoding the cookie prefix. When doing the
+ * notification, we replace the cookie prefix with DM_COOKIE_MAGIC,
+ * so we notify the right semaphore.
+ *
+ * It is still possible to use cookies for passing the flags to udev
+ * rules even when udev_sync is disabled. The base part of the cookie
+ * will be zero (there's no notification semaphore) and prefix will be
+ * set then. However, having udev_sync enabled is highly recommended.
+ */
+#define DM_COOKIE_MAGIC 0x0D4D
+#define DM_UDEV_FLAGS_MASK 0xFFFF0000
+#define DM_UDEV_FLAGS_SHIFT 16
+
+/*
+ * DM_UDEV_DISABLE_DM_RULES_FLAG is set in case we need to disable
+ * basic device-mapper udev rules that create symlinks in /dev/<DM_DIR>
+ * directory. However, we can't reliably prevent creating default
+ * nodes by udev (commonly /dev/dm-X, where X is a number).
+ */
+#define DM_UDEV_DISABLE_DM_RULES_FLAG 0x0001
+/*
+ * DM_UDEV_DISABLE_SUBSYTEM_RULES_FLAG is set in case we need to disable
+ * subsystem udev rules, but still we need the general DM udev rules to
+ * be applied (to create the nodes and symlinks under /dev and /dev/disk).
+ */
+#define DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG 0x0002
+/*
+ * DM_UDEV_DISABLE_DISK_RULES_FLAG is set in case we need to disable
+ * general DM rules that set symlinks in /dev/disk directory.
+ */
+#define DM_UDEV_DISABLE_DISK_RULES_FLAG 0x0004
+/*
+ * DM_UDEV_DISABLE_OTHER_RULES_FLAG is set in case we need to disable
+ * all the other rules that are not general device-mapper nor subsystem
+ * related (the rules belong to other software or packages). All foreign
+ * rules should check this flag directly and they should ignore further
+ * rule processing for such event.
+ */
+#define DM_UDEV_DISABLE_OTHER_RULES_FLAG 0x0008
+/*
+ * DM_UDEV_LOW_PRIORITY_FLAG is set in case we need to instruct the
+ * udev rules to give low priority to the device that is currently
+ * processed. For example, this provides a way to select which symlinks
+ * could be overwritten by high priority ones if their names are equal.
+ * Common situation is a name based on FS UUID while using origin and
+ * snapshot devices.
+ */
+#define DM_UDEV_LOW_PRIORITY_FLAG 0x0010
+/*
+ * DM_UDEV_DISABLE_LIBRARY_FALLBACK is set in case we need to disable
+ * libdevmapper's node management. We will rely on udev completely
+ * and there will be no fallback action provided by libdevmapper if
+ * udev does something improperly. Using the library fallback code has
+ * a consequence that you need to take into account: any device node
+ * or symlink created without udev is not recorded in udev database
+ * which other applications may read to get complete list of devices.
+ * For this reason, use of DM_UDEV_DISABLE_LIBRARY_FALLBACK is
+ * recommended on systems where udev is used. Keep library fallback
+ * enabled just for exceptional cases where you need to debug udev-related
+ * problems. If you hit such problems, please contact us through upstream
+ * LVM2 development mailing list (see also README file). This flag is
+ * currently not set by default in libdevmapper so you need to set it
+ * explicitly if you're sure that udev is behaving correctly on your
+ * setups.
+ */
+#define DM_UDEV_DISABLE_LIBRARY_FALLBACK 0x0020
+/*
+ * DM_UDEV_PRIMARY_SOURCE_FLAG is automatically appended by
+ * libdevmapper for all ioctls generating udev uevents. Once used in
+ * udev rules, we know if this is a real "primary sourced" event or not.
+ * We need to distinguish real events originated in libdevmapper from
+ * any spurious events to gather all missing information (e.g. events
+ * generated as a result of "udevadm trigger" command or as a result
+ * of the "watch" udev rule).
+ */
+#define DM_UDEV_PRIMARY_SOURCE_FLAG 0x0040
+
+/*
+ * Udev flags reserved for use by any device-mapper subsystem.
+ */
+#define DM_SUBSYSTEM_UDEV_FLAG0 0x0100
+#define DM_SUBSYSTEM_UDEV_FLAG1 0x0200
+#define DM_SUBSYSTEM_UDEV_FLAG2 0x0400
+#define DM_SUBSYSTEM_UDEV_FLAG3 0x0800
+#define DM_SUBSYSTEM_UDEV_FLAG4 0x1000
+#define DM_SUBSYSTEM_UDEV_FLAG5 0x2000
+#define DM_SUBSYSTEM_UDEV_FLAG6 0x4000
+#define DM_SUBSYSTEM_UDEV_FLAG7 0x8000
+
+int dm_cookie_supported(void);
+
+/*
+ * Udev synchronisation functions.
+ */
+void dm_udev_set_sync_support(int sync_with_udev);
+int dm_udev_get_sync_support(void);
+void dm_udev_set_checking(int checking);
+int dm_udev_get_checking(void);
+
+/*
+ * Default value to get new auto generated cookie created
+ */
+#define DM_COOKIE_AUTO_CREATE 0
+int dm_udev_create_cookie(uint32_t *cookie);
+int dm_udev_complete(uint32_t cookie);
+int dm_udev_wait(uint32_t cookie);
+
+/*
+ * dm_dev_wait_immediate
+ * If *ready is 1 on return, the wait is complete.
+ * If *ready is 0 on return, the wait is incomplete and either
+ * this function or dm_udev_wait() must be called again.
+ * Returns 0 on error, when neither function should be called again.
+ */
+int dm_udev_wait_immediate(uint32_t cookie, int *ready);
+
+#define DM_DEV_DIR_UMASK 0022
+#define DM_CONTROL_NODE_UMASK 0177
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* LIB_DEVICE_MAPPER_H */
diff --git a/device_mapper/libdm-common.c b/device_mapper/libdm-common.c
new file mode 100644
index 000000000..bcf12cbdf
--- /dev/null
+++ b/device_mapper/libdm-common.c
@@ -0,0 +1,2691 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "device_mapper/misc/dmlib.h"
+#include "ioctl/libdm-targets.h"
+#include "libdm-common.h"
+#include "misc/kdev_t.h"
+#include "misc/dm-ioctl.h"
+
+#include <stdarg.h>
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+
+#ifdef UDEV_SYNC_SUPPORT
+# include <sys/types.h>
+# include <sys/ipc.h>
+# include <sys/sem.h>
+# include <libudev.h>
+#endif
+
+#ifdef __linux__
+# include <linux/fs.h>
+#endif
+
+#ifdef HAVE_SELINUX
+# include <selinux/selinux.h>
+#endif
+#ifdef HAVE_SELINUX_LABEL_H
+# include <selinux/label.h>
+#endif
+
+#define DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME "DM_DEFAULT_NAME_MANGLING_MODE"
+
+#define DEV_DIR "/dev/"
+
+#ifdef UDEV_SYNC_SUPPORT
+#ifdef _SEM_SEMUN_UNDEFINED
+union semun
+{
+ int val; /* value for SETVAL */
+ struct semid_ds *buf; /* buffer for IPC_STAT & IPC_SET */
+ unsigned short int *array; /* array for GETALL & SETALL */
+ struct seminfo *__buf; /* buffer for IPC_INFO */
+};
+#endif
+#endif
+
+static char _dm_dir[PATH_MAX] = DEV_DIR DM_DIR;
+static char _sysfs_dir[PATH_MAX] = "/sys/";
+static char _path0[PATH_MAX]; /* path buffer, safe 4kB on stack */
+static const char _mountinfo[] = "/proc/self/mountinfo";
+
+#define DM_MAX_UUID_PREFIX_LEN 15
+static char _default_uuid_prefix[DM_MAX_UUID_PREFIX_LEN + 1] = "LVM-";
+
+static int _verbose = 0;
+static int _suspended_dev_counter = 0;
+static dm_string_mangling_t _name_mangling_mode = DEFAULT_DM_NAME_MANGLING;
+
+#ifdef HAVE_SELINUX_LABEL_H
+static struct selabel_handle *_selabel_handle = NULL;
+#endif
+
+static int _udev_disabled = 0;
+
+#ifdef UDEV_SYNC_SUPPORT
+static int _semaphore_supported = -1;
+static int _udev_running = -1;
+static int _sync_with_udev = 1;
+static int _udev_checking = 1;
+#endif
+
+void dm_lib_init(void)
+{
+ const char *env;
+
+ if (getenv("DM_DISABLE_UDEV"))
+ _udev_disabled = 1;
+
+ _name_mangling_mode = DEFAULT_DM_NAME_MANGLING;
+ if ((env = getenv(DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME))) {
+ if (!strcasecmp(env, "none"))
+ _name_mangling_mode = DM_STRING_MANGLING_NONE;
+ else if (!strcasecmp(env, "auto"))
+ _name_mangling_mode = DM_STRING_MANGLING_AUTO;
+ else if (!strcasecmp(env, "hex"))
+ _name_mangling_mode = DM_STRING_MANGLING_HEX;
+ }
+}
+
+/*
+ * Library users can provide their own logging
+ * function.
+ */
+
+__attribute__((format(printf, 5, 0)))
+static void _default_log_line(int level, const char *file,
+ int line, int dm_errno_or_class,
+ const char *f, va_list ap)
+{
+ static int _abort_on_internal_errors = -1;
+ static int _debug_with_line_numbers = -1;
+ FILE *out = log_stderr(level) ? stderr : stdout;
+
+ level = log_level(level);
+
+ if (level <= _LOG_WARN || _verbose) {
+ if (level < _LOG_WARN)
+ out = stderr;
+
+ if (_debug_with_line_numbers < 0)
+ /* Set when env DM_DEBUG_WITH_LINE_NUMBERS is not "0" */
+ _debug_with_line_numbers =
+ strcmp(getenv("DM_DEBUG_WITH_LINE_NUMBERS") ? : "0", "0");
+
+ if (_debug_with_line_numbers)
+ fprintf(out, "%s:%d ", file, line);
+
+ vfprintf(out, f, ap);
+ fputc('\n', out);
+ }
+
+ if (_abort_on_internal_errors < 0)
+ /* Set when env DM_ABORT_ON_INTERNAL_ERRORS is not "0" */
+ _abort_on_internal_errors =
+ strcmp(getenv("DM_ABORT_ON_INTERNAL_ERRORS") ? : "0", "0");
+
+ if (_abort_on_internal_errors &&
+ !strncmp(f, INTERNAL_ERROR, sizeof(INTERNAL_ERROR) - 1))
+ abort();
+}
+
+__attribute__((format(printf, 5, 6)))
+static void _default_log_with_errno(int level,
+ const char *file, int line, int dm_errno_or_class,
+ const char *f, ...)
+{
+ va_list ap;
+
+ va_start(ap, f);
+ _default_log_line(level, file, line, dm_errno_or_class, f, ap);
+ va_end(ap);
+}
+
+__attribute__((format(printf, 4, 5)))
+static void _default_log(int level, const char *file,
+ int line, const char *f, ...)
+{
+ va_list ap;
+
+ va_start(ap, f);
+ _default_log_line(level, file, line, 0, f, ap);
+ va_end(ap);
+}
+
+dm_log_fn dm_log = _default_log;
+dm_log_with_errno_fn dm_log_with_errno = _default_log_with_errno;
+
+/*
+ * Wrapper function to reformat new messages to and
+ * old style logging which had not used errno parameter
+ *
+ * As we cannot simply pass '...' to old function we
+ * need to process arg list locally and just pass '%s' + buffer
+ */
+__attribute__((format(printf, 5, 6)))
+static void _log_to_default_log(int level,
+ const char *file, int line, int dm_errno_or_class,
+ const char *f, ...)
+{
+ int n;
+ va_list ap;
+ char buf[2 * PATH_MAX + 256]; /* big enough for most messages */
+
+ va_start(ap, f);
+ n = vsnprintf(buf, sizeof(buf), f, ap);
+ va_end(ap);
+
+ if (n > 0) /* Could be truncated */
+ dm_log(level, file, line, "%s", buf);
+}
+
+/*
+ * Wrapper function take 'old' style message without errno
+ * and log it via new logging function with errno arg
+ *
+ * This minor case may happen if new libdm is used with old
+ * recompiled tool that would decided to use new logging,
+ * but still would like to use old binary plugins.
+ */
+__attribute__((format(printf, 4, 5)))
+static void _log_to_default_log_with_errno(int level,
+ const char *file, int line, const char *f, ...)
+{
+ int n;
+ va_list ap;
+ char buf[2 * PATH_MAX + 256]; /* big enough for most messages */
+
+ va_start(ap, f);
+ n = vsnprintf(buf, sizeof(buf), f, ap);
+ va_end(ap);
+
+ if (n > 0) /* Could be truncated */
+ dm_log_with_errno(level, file, line, 0, "%s", buf);
+}
+
+void dm_log_init(dm_log_fn fn)
+{
+ if (fn) {
+ dm_log = fn;
+ dm_log_with_errno = _log_to_default_log;
+ } else {
+ dm_log = _default_log;
+ dm_log_with_errno = _default_log_with_errno;
+ }
+}
+
+int dm_log_is_non_default(void)
+{
+ return (dm_log == _default_log && dm_log_with_errno == _default_log_with_errno) ? 0 : 1;
+}
+
+void dm_log_with_errno_init(dm_log_with_errno_fn fn)
+{
+ if (fn) {
+ dm_log = _log_to_default_log_with_errno;
+ dm_log_with_errno = fn;
+ } else {
+ dm_log = _default_log;
+ dm_log_with_errno = _default_log_with_errno;
+ }
+}
+
+void dm_log_init_verbose(int level)
+{
+ _verbose = level;
+}
+
+static int _build_dev_path(char *buffer, size_t len, const char *dev_name)
+{
+ int r;
+
+ /* If there's a /, assume caller knows what they're doing */
+ if (strchr(dev_name, '/'))
+ r = dm_strncpy(buffer, dev_name, len);
+ else
+ r = (dm_snprintf(buffer, len, "%s/%s",
+ _dm_dir, dev_name) < 0) ? 0 : 1;
+ if (!r)
+ log_error("Failed to build dev path for \"%s\".", dev_name);
+
+ return r;
+}
+
+int dm_get_library_version(char *version, size_t size)
+{
+ return dm_strncpy(version, DM_LIB_VERSION, size);
+}
+
+void inc_suspended(void)
+{
+ _suspended_dev_counter++;
+ log_debug_activation("Suspended device counter increased to %d", _suspended_dev_counter);
+}
+
+void dec_suspended(void)
+{
+ if (!_suspended_dev_counter) {
+ log_error("Attempted to decrement suspended device counter below zero.");
+ return;
+ }
+
+ _suspended_dev_counter--;
+ log_debug_activation("Suspended device counter reduced to %d", _suspended_dev_counter);
+}
+
+int dm_get_suspended_counter(void)
+{
+ return _suspended_dev_counter;
+}
+
+int dm_set_name_mangling_mode(dm_string_mangling_t name_mangling_mode)
+{
+ _name_mangling_mode = name_mangling_mode;
+
+ return 1;
+}
+
+dm_string_mangling_t dm_get_name_mangling_mode(void)
+{
+ return _name_mangling_mode;
+}
+
+struct dm_task *dm_task_create(int type)
+{
+ struct dm_task *dmt = dm_zalloc(sizeof(*dmt));
+
+ if (!dmt) {
+ log_error("dm_task_create: malloc(%" PRIsize_t ") failed",
+ sizeof(*dmt));
+ return NULL;
+ }
+
+ if (!dm_check_version()) {
+ dm_free(dmt);
+ return_NULL;
+ }
+
+ dmt->type = type;
+ dmt->minor = -1;
+ dmt->major = -1;
+ dmt->allow_default_major_fallback = 1;
+ dmt->uid = DM_DEVICE_UID;
+ dmt->gid = DM_DEVICE_GID;
+ dmt->mode = DM_DEVICE_MODE;
+ dmt->no_open_count = 0;
+ dmt->read_ahead = DM_READ_AHEAD_AUTO;
+ dmt->read_ahead_flags = 0;
+ dmt->event_nr = 0;
+ dmt->cookie_set = 0;
+ dmt->query_inactive_table = 0;
+ dmt->new_uuid = 0;
+ dmt->secure_data = 0;
+ dmt->record_timestamp = 0;
+
+ return dmt;
+}
+
+/*
+ * Find the name associated with a given device number by scanning _dm_dir.
+ */
+static int _find_dm_name_of_device(dev_t st_rdev, char *buf, size_t buf_len)
+{
+ const char *name;
+ char path[PATH_MAX];
+ struct dirent *dirent;
+ DIR *d;
+ struct stat st;
+ int r = 0;
+
+ if (!(d = opendir(_dm_dir))) {
+ log_sys_error("opendir", _dm_dir);
+ return 0;
+ }
+
+ while ((dirent = readdir(d))) {
+ name = dirent->d_name;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ if (dm_snprintf(path, sizeof(path), "%s/%s", _dm_dir,
+ name) == -1) {
+ log_error("Couldn't create path for %s", name);
+ continue;
+ }
+
+ if (stat(path, &st))
+ continue;
+
+ if (st.st_rdev == st_rdev) {
+ strncpy(buf, name, buf_len);
+ r = 1;
+ break;
+ }
+ }
+
+ if (closedir(d))
+ log_sys_error("closedir", _dm_dir);
+
+ return r;
+}
+
+static int _is_whitelisted_char(char c)
+{
+ /*
+ * Actually, DM supports any character in a device name.
+ * This whitelist is just for proper integration with udev.
+ */
+ if ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ strchr("#+-.:=@_", c) != NULL)
+ return 1;
+
+ return 0;
+}
+
+int check_multiple_mangled_string_allowed(const char *str, const char *str_name,
+ dm_string_mangling_t mode)
+{
+ if (mode == DM_STRING_MANGLING_AUTO && strstr(str, "\\x5cx")) {
+ log_error("The %s \"%s\" seems to be mangled more than once. "
+ "This is not allowed in auto mode.", str_name, str);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Mangle all characters in the input string which are not on a whitelist
+ * with '\xNN' format where NN is the hex value of the character.
+ */
+int mangle_string(const char *str, const char *str_name, size_t len,
+ char *buf, size_t buf_len, dm_string_mangling_t mode)
+{
+ int need_mangling = -1; /* -1 don't know yet, 0 no, 1 yes */
+ size_t i, j;
+
+ if (!str || !buf)
+ return -1;
+
+ /* Is there anything to do at all? */
+ if (!*str || !len)
+ return 0;
+
+ if (buf_len < DM_NAME_LEN) {
+ log_error(INTERNAL_ERROR "mangle_string: supplied buffer too small");
+ return -1;
+ }
+
+ if (mode == DM_STRING_MANGLING_NONE)
+ mode = DM_STRING_MANGLING_AUTO;
+
+ for (i = 0, j = 0; str[i]; i++) {
+ if (mode == DM_STRING_MANGLING_AUTO) {
+ /*
+ * Detect already mangled part of the string and keep it.
+ * Return error on mixture of mangled/not mangled!
+ */
+ if (str[i] == '\\' && str[i+1] == 'x') {
+ if ((len - i < 4) || (need_mangling == 1))
+ goto bad1;
+ if (buf_len - j < 4)
+ goto bad2;
+
+ memcpy(&buf[j], &str[i], 4);
+ i+=3; j+=4;
+
+ need_mangling = 0;
+ continue;
+ }
+ }
+
+ if (_is_whitelisted_char(str[i])) {
+ /* whitelisted, keep it. */
+ if (buf_len - j < 1)
+ goto bad2;
+ buf[j] = str[i];
+ j++;
+ } else {
+ /*
+ * Not on a whitelist, mangle it.
+ * Return error on mixture of mangled/not mangled
+ * unless a DM_STRING_MANGLING_HEX is used!.
+ */
+ if ((mode != DM_STRING_MANGLING_HEX) && (need_mangling == 0))
+ goto bad1;
+ if (buf_len - j < 4)
+ goto bad2;
+
+ sprintf(&buf[j], "\\x%02x", (unsigned char) str[i]);
+ j+=4;
+
+ need_mangling = 1;
+ }
+ }
+
+ if (buf_len - j < 1)
+ goto bad2;
+ buf[j] = '\0';
+
+ /* All chars in the string whitelisted? */
+ if (need_mangling == -1)
+ need_mangling = 0;
+
+ return need_mangling;
+
+bad1:
+ log_error("The %s \"%s\" contains mixed mangled and unmangled "
+ "characters or it's already mangled improperly.", str_name, str);
+ return -1;
+bad2:
+ log_error("Mangled form of the %s too long for \"%s\".", str_name, str);
+ return -1;
+}
+
+/*
+ * Try to unmangle supplied string.
+ * Return value: -1 on error, 0 when no unmangling needed, 1 when unmangling applied
+ */
+int unmangle_string(const char *str, const char *str_name, size_t len,
+ char *buf, size_t buf_len, dm_string_mangling_t mode)
+{
+ int strict = mode != DM_STRING_MANGLING_NONE;
+ char str_rest[DM_NAME_LEN];
+ size_t i, j;
+ int code;
+ int r = 0;
+
+ if (!str || !buf)
+ return -1;
+
+ /* Is there anything to do at all? */
+ if (!*str || !len)
+ return 0;
+
+ if (buf_len < DM_NAME_LEN) {
+ log_error(INTERNAL_ERROR "unmangle_string: supplied buffer too small");
+ return -1;
+ }
+
+ for (i = 0, j = 0; str[i]; i++, j++) {
+ if (strict && !(_is_whitelisted_char(str[i]) || str[i]=='\\')) {
+ log_error("The %s \"%s\" should be mangled but "
+ "it contains blacklisted characters.", str_name, str);
+ j=0; r=-1;
+ goto out;
+ }
+
+ if (str[i] == '\\' && str[i+1] == 'x') {
+ if (!sscanf(&str[i+2], "%2x%s", &code, str_rest)) {
+ log_debug_activation("Hex encoding mismatch detected in %s \"%s\" "
+ "while trying to unmangle it.", str_name, str);
+ goto out;
+ }
+ buf[j] = (unsigned char) code;
+
+ /* skip the encoded part we've just decoded! */
+ i+= 3;
+
+ /* unmangling applied */
+ r = 1;
+ } else
+ buf[j] = str[i];
+ }
+
+out:
+ buf[j] = '\0';
+ return r;
+}
+
+static int _dm_task_set_name(struct dm_task *dmt, const char *name,
+ dm_string_mangling_t mangling_mode)
+{
+ char mangled_name[DM_NAME_LEN];
+ int r = 0;
+
+ dm_free(dmt->dev_name);
+ dmt->dev_name = NULL;
+ dm_free(dmt->mangled_dev_name);
+ dmt->mangled_dev_name = NULL;
+
+ if (strlen(name) >= DM_NAME_LEN) {
+ log_error("Name \"%s\" too long.", name);
+ return 0;
+ }
+
+ if (!check_multiple_mangled_string_allowed(name, "name", mangling_mode))
+ return_0;
+
+ if (mangling_mode != DM_STRING_MANGLING_NONE &&
+ (r = mangle_string(name, "name", strlen(name), mangled_name,
+ sizeof(mangled_name), mangling_mode)) < 0) {
+ log_error("Failed to mangle device name \"%s\".", name);
+ return 0;
+ }
+
+ /* Store mangled_dev_name only if it differs from dev_name! */
+ if (r) {
+ log_debug_activation("Device name mangled [%s]: %s --> %s",
+ mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+ name, mangled_name);
+ if (!(dmt->mangled_dev_name = dm_strdup(mangled_name))) {
+ log_error("_dm_task_set_name: dm_strdup(%s) failed", mangled_name);
+ return 0;
+ }
+ }
+
+ if (!(dmt->dev_name = dm_strdup(name))) {
+ log_error("_dm_task_set_name: strdup(%s) failed", name);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _dm_task_set_name_from_path(struct dm_task *dmt, const char *path,
+ const char *name)
+{
+ char buf[PATH_MAX];
+ struct stat st1, st2;
+ const char *final_name = NULL;
+ size_t len;
+
+ if (dmt->type == DM_DEVICE_CREATE) {
+ log_error("Name \"%s\" invalid. It contains \"/\".", path);
+ return 0;
+ }
+
+ if (!stat(path, &st1)) {
+ /*
+ * Found directly.
+ * If supplied path points to same device as last component
+ * under /dev/mapper, use that name directly.
+ */
+ if (dm_snprintf(buf, sizeof(buf), "%s/%s", _dm_dir, name) == -1) {
+ log_error("Couldn't create path for %s", name);
+ return 0;
+ }
+
+ if (!stat(buf, &st2) && (st1.st_rdev == st2.st_rdev))
+ final_name = name;
+ } else {
+ /* Not found. */
+ /* If there is exactly one '/' try a prefix of /dev */
+ if ((len = strlen(path)) < 3 || path[0] == '/' ||
+ dm_count_chars(path, len, '/') != 1) {
+ log_error("Device %s not found", path);
+ return 0;
+ }
+ if (dm_snprintf(buf, sizeof(buf), "%s/../%s", _dm_dir, path) == -1) {
+ log_error("Couldn't create /dev path for %s", path);
+ return 0;
+ }
+ if (stat(buf, &st1)) {
+ log_error("Device %s not found", path);
+ return 0;
+ }
+ /* Found */
+ }
+
+ /*
+ * If we don't have the dm name yet, Call _find_dm_name_of_device() to
+ * scan _dm_dir for a match.
+ */
+ if (!final_name) {
+ if (_find_dm_name_of_device(st1.st_rdev, buf, sizeof(buf)))
+ final_name = buf;
+ else {
+ log_error("Device %s not found", name);
+ return 0;
+ }
+ }
+
+ /* This is an already existing path - do not mangle! */
+ return _dm_task_set_name(dmt, final_name, DM_STRING_MANGLING_NONE);
+}
+
+int dm_task_set_name(struct dm_task *dmt, const char *name)
+{
+ char *pos;
+
+ /* Path supplied for existing device? */
+ if ((pos = strrchr(name, '/')))
+ return _dm_task_set_name_from_path(dmt, name, pos + 1);
+
+ return _dm_task_set_name(dmt, name, dm_get_name_mangling_mode());
+}
+
+const char *dm_task_get_name(const struct dm_task *dmt)
+{
+ return (dmt->dmi.v4->name);
+}
+
+static char *_task_get_string_mangled(const char *str, const char *str_name,
+ char *buf, size_t buf_size,
+ dm_string_mangling_t mode)
+{
+ char *rs;
+ int r;
+
+ if ((r = mangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0)
+ return NULL;
+
+ if (!(rs = r ? dm_strdup(buf) : dm_strdup(str)))
+ log_error("_task_get_string_mangled: dm_strdup failed");
+
+ return rs;
+}
+
+static char *_task_get_string_unmangled(const char *str, const char *str_name,
+ char *buf, size_t buf_size,
+ dm_string_mangling_t mode)
+{
+ char *rs;
+ int r = 0;
+
+ /*
+ * Unless the mode used is 'none', the string
+ * is *already* unmangled on ioctl return!
+ */
+ if (mode == DM_STRING_MANGLING_NONE &&
+ (r = unmangle_string(str, str_name, strlen(str), buf, buf_size, mode)) < 0)
+ return NULL;
+
+ if (!(rs = r ? dm_strdup(buf) : dm_strdup(str)))
+ log_error("_task_get_string_unmangled: dm_strdup failed");
+
+ return rs;
+}
+
+char *dm_task_get_name_mangled(const struct dm_task *dmt)
+{
+ const char *s = dm_task_get_name(dmt);
+ char buf[DM_NAME_LEN];
+ char *rs;
+
+ if (!(rs = _task_get_string_mangled(s, "name", buf, sizeof(buf), dm_get_name_mangling_mode())))
+ log_error("Failed to mangle device name \"%s\".", s);
+
+ return rs;
+}
+
+char *dm_task_get_name_unmangled(const struct dm_task *dmt)
+{
+ const char *s = dm_task_get_name(dmt);
+ char buf[DM_NAME_LEN];
+ char *rs;
+
+ if (!(rs = _task_get_string_unmangled(s, "name", buf, sizeof(buf), dm_get_name_mangling_mode())))
+ log_error("Failed to unmangle device name \"%s\".", s);
+
+ return rs;
+}
+
+const char *dm_task_get_uuid(const struct dm_task *dmt)
+{
+ return (dmt->dmi.v4->uuid);
+}
+
+char *dm_task_get_uuid_mangled(const struct dm_task *dmt)
+{
+ const char *s = dm_task_get_uuid(dmt);
+ char buf[DM_UUID_LEN];
+ char *rs;
+
+ if (!(rs = _task_get_string_mangled(s, "UUID", buf, sizeof(buf), dm_get_name_mangling_mode())))
+ log_error("Failed to mangle device uuid \"%s\".", s);
+
+ return rs;
+}
+
+char *dm_task_get_uuid_unmangled(const struct dm_task *dmt)
+{
+ const char *s = dm_task_get_uuid(dmt);
+ char buf[DM_UUID_LEN];
+ char *rs;
+
+ if (!(rs = _task_get_string_unmangled(s, "UUID", buf, sizeof(buf), dm_get_name_mangling_mode())))
+ log_error("Failed to unmangle device uuid \"%s\".", s);
+
+ return rs;
+}
+
+int dm_task_set_newname(struct dm_task *dmt, const char *newname)
+{
+ dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode();
+ char mangled_name[DM_NAME_LEN];
+ int r = 0;
+
+ if (strchr(newname, '/')) {
+ log_error("Name \"%s\" invalid. It contains \"/\".", newname);
+ return 0;
+ }
+
+ if (strlen(newname) >= DM_NAME_LEN) {
+ log_error("Name \"%s\" too long", newname);
+ return 0;
+ }
+
+ if (!*newname) {
+ log_error("Non empty new name is required.");
+ return 0;
+ }
+
+ if (!check_multiple_mangled_string_allowed(newname, "new name", mangling_mode))
+ return_0;
+
+ if (mangling_mode != DM_STRING_MANGLING_NONE &&
+ (r = mangle_string(newname, "new name", strlen(newname), mangled_name,
+ sizeof(mangled_name), mangling_mode)) < 0) {
+ log_error("Failed to mangle new device name \"%s\"", newname);
+ return 0;
+ }
+
+ if (r) {
+ log_debug_activation("New device name mangled [%s]: %s --> %s",
+ mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+ newname, mangled_name);
+ newname = mangled_name;
+ }
+
+ dm_free(dmt->newname);
+ if (!(dmt->newname = dm_strdup(newname))) {
+ log_error("dm_task_set_newname: strdup(%s) failed", newname);
+ return 0;
+ }
+
+ dmt->new_uuid = 0;
+
+ return 1;
+}
+
+int dm_task_set_uuid(struct dm_task *dmt, const char *uuid)
+{
+ char mangled_uuid[DM_UUID_LEN];
+ dm_string_mangling_t mangling_mode = dm_get_name_mangling_mode();
+ int r = 0;
+
+ dm_free(dmt->uuid);
+ dmt->uuid = NULL;
+ dm_free(dmt->mangled_uuid);
+ dmt->mangled_uuid = NULL;
+
+ if (!check_multiple_mangled_string_allowed(uuid, "UUID", mangling_mode))
+ return_0;
+
+ if (mangling_mode != DM_STRING_MANGLING_NONE &&
+ (r = mangle_string(uuid, "UUID", strlen(uuid), mangled_uuid,
+ sizeof(mangled_uuid), mangling_mode)) < 0) {
+ log_error("Failed to mangle device uuid \"%s\".", uuid);
+ return 0;
+ }
+
+ if (r) {
+ log_debug_activation("Device uuid mangled [%s]: %s --> %s",
+ mangling_mode == DM_STRING_MANGLING_AUTO ? "auto" : "hex",
+ uuid, mangled_uuid);
+
+ if (!(dmt->mangled_uuid = dm_strdup(mangled_uuid))) {
+ log_error("dm_task_set_uuid: dm_strdup(%s) failed", mangled_uuid);
+ return 0;
+ }
+ }
+
+ if (!(dmt->uuid = dm_strdup(uuid))) {
+ log_error("dm_task_set_uuid: strdup(%s) failed", uuid);
+ return 0;
+ }
+
+ return 1;
+}
+
+int dm_task_set_major(struct dm_task *dmt, int major)
+{
+ dmt->major = major;
+ dmt->allow_default_major_fallback = 0;
+
+ return 1;
+}
+
+int dm_task_set_minor(struct dm_task *dmt, int minor)
+{
+ dmt->minor = minor;
+
+ return 1;
+}
+
+int dm_task_set_major_minor(struct dm_task *dmt, int major, int minor,
+ int allow_default_major_fallback)
+{
+ dmt->major = major;
+ dmt->minor = minor;
+ dmt->allow_default_major_fallback = allow_default_major_fallback;
+
+ return 1;
+}
+
+int dm_task_set_uid(struct dm_task *dmt, uid_t uid)
+{
+ dmt->uid = uid;
+
+ return 1;
+}
+
+int dm_task_set_gid(struct dm_task *dmt, gid_t gid)
+{
+ dmt->gid = gid;
+
+ return 1;
+}
+
+int dm_task_set_mode(struct dm_task *dmt, mode_t mode)
+{
+ dmt->mode = mode;
+
+ return 1;
+}
+
+int dm_task_enable_checks(struct dm_task *dmt)
+{
+ dmt->enable_checks = 1;
+
+ return 1;
+}
+
+int dm_task_add_target(struct dm_task *dmt, uint64_t start, uint64_t size,
+ const char *ttype, const char *params)
+{
+ struct target *t = create_target(start, size, ttype, params);
+ if (!t)
+ return_0;
+
+ if (!dmt->head)
+ dmt->head = dmt->tail = t;
+ else {
+ dmt->tail->next = t;
+ dmt->tail = t;
+ }
+
+ return 1;
+}
+
+#ifdef HAVE_SELINUX
+static int _selabel_lookup(const char *path, mode_t mode,
+ security_context_t *scontext)
+{
+#ifdef HAVE_SELINUX_LABEL_H
+ if (!_selabel_handle &&
+ !(_selabel_handle = selabel_open(SELABEL_CTX_FILE, NULL, 0))) {
+ log_error("selabel_open failed: %s", strerror(errno));
+ return 0;
+ }
+
+ if (selabel_lookup(_selabel_handle, scontext, path, mode)) {
+ log_debug_activation("selabel_lookup failed for %s: %s",
+ path, strerror(errno));
+ return 0;
+ }
+#else
+ if (matchpathcon(path, mode, scontext)) {
+ log_debug_activation("matchpathcon failed for %s: %s",
+ path, strerror(errno));
+ return 0;
+ }
+#endif
+ return 1;
+}
+#endif
+
+#ifdef HAVE_SELINUX
+static int _is_selinux_enabled(void)
+{
+ static int _tested = 0;
+ static int _enabled;
+
+ if (!_tested) {
+ _tested = 1;
+ _enabled = is_selinux_enabled();
+ }
+
+ return _enabled;
+}
+#endif
+
+int dm_prepare_selinux_context(const char *path, mode_t mode)
+{
+#ifdef HAVE_SELINUX
+ security_context_t scontext = NULL;
+
+ if (_is_selinux_enabled() <= 0)
+ return 1;
+
+ if (path) {
+ if (!_selabel_lookup(path, mode, &scontext))
+ return_0;
+
+ log_debug_activation("Preparing SELinux context for %s to %s.", path, scontext);
+ }
+ else
+ log_debug_activation("Resetting SELinux context to default value.");
+
+ if (setfscreatecon(scontext) < 0) {
+ log_sys_error("setfscreatecon", (path ? : "SELinux context reset"));
+ freecon(scontext);
+ return 0;
+ }
+
+ freecon(scontext);
+#endif
+ return 1;
+}
+
+int dm_set_selinux_context(const char *path, mode_t mode)
+{
+#ifdef HAVE_SELINUX
+ security_context_t scontext = NULL;
+
+ if (_is_selinux_enabled() <= 0)
+ return 1;
+
+ if (!_selabel_lookup(path, mode, &scontext))
+ return_0;
+
+ log_debug_activation("Setting SELinux context for %s to %s.", path, scontext);
+
+ if ((lsetfilecon(path, scontext) < 0) && (errno != ENOTSUP)) {
+ log_sys_error("lsetfilecon", path);
+ freecon(scontext);
+ return 0;
+ }
+
+ freecon(scontext);
+#endif
+ return 1;
+}
+
+void selinux_release(void)
+{
+#ifdef HAVE_SELINUX_LABEL_H
+ if (_selabel_handle)
+ selabel_close(_selabel_handle);
+ _selabel_handle = NULL;
+#endif
+}
+
+static int _warn_if_op_needed(int warn_if_udev_failed)
+{
+ return warn_if_udev_failed && dm_udev_get_sync_support() && dm_udev_get_checking();
+}
+
+static int _add_dev_node(const char *dev_name, uint32_t major, uint32_t minor,
+ uid_t uid, gid_t gid, mode_t mode, int warn_if_udev_failed)
+{
+ char path[PATH_MAX];
+ struct stat info;
+ dev_t dev = MKDEV((dev_t)major, (dev_t)minor);
+ mode_t old_mask;
+
+ if (!_build_dev_path(path, sizeof(path), dev_name))
+ return_0;
+
+ if (stat(path, &info) >= 0) {
+ if (!S_ISBLK(info.st_mode)) {
+ log_error("A non-block device file at '%s' "
+ "is already present", path);
+ return 0;
+ }
+
+ /* If right inode already exists we don't touch uid etc. */
+ if (info.st_rdev == dev)
+ return 1;
+
+ if (unlink(path) < 0) {
+ log_error("Unable to unlink device node for '%s'",
+ dev_name);
+ return 0;
+ }
+ } else if (_warn_if_op_needed(warn_if_udev_failed))
+ log_warn("%s not set up by udev: Falling back to direct "
+ "node creation.", path);
+
+ (void) dm_prepare_selinux_context(path, S_IFBLK);
+ old_mask = umask(0);
+
+ /* The node may already have been created by udev. So ignore EEXIST. */
+ if (mknod(path, S_IFBLK | mode, dev) < 0 && errno != EEXIST) {
+ log_error("%s: mknod for %s failed: %s", path, dev_name, strerror(errno));
+ umask(old_mask);
+ (void) dm_prepare_selinux_context(NULL, 0);
+ return 0;
+ }
+ umask(old_mask);
+ (void) dm_prepare_selinux_context(NULL, 0);
+
+ if (chown(path, uid, gid) < 0) {
+ log_sys_error("chown", path);
+ return 0;
+ }
+
+ log_debug_activation("Created %s", path);
+
+ return 1;
+}
+
+static int _rm_dev_node(const char *dev_name, int warn_if_udev_failed)
+{
+ char path[PATH_MAX];
+ struct stat info;
+
+ if (!_build_dev_path(path, sizeof(path), dev_name))
+ return_0;
+ if (lstat(path, &info) < 0)
+ return 1;
+ else if (_warn_if_op_needed(warn_if_udev_failed))
+ log_warn("Node %s was not removed by udev. "
+ "Falling back to direct node removal.", path);
+
+ /* udev may already have deleted the node. Ignore ENOENT. */
+ if (unlink(path) < 0 && errno != ENOENT) {
+ log_error("Unable to unlink device node for '%s'", dev_name);
+ return 0;
+ }
+
+ log_debug_activation("Removed %s", path);
+
+ return 1;
+}
+
+static int _rename_dev_node(const char *old_name, const char *new_name,
+ int warn_if_udev_failed)
+{
+ char oldpath[PATH_MAX];
+ char newpath[PATH_MAX];
+ struct stat info, info2;
+ struct stat *info_block_dev;
+
+ if (!_build_dev_path(oldpath, sizeof(oldpath), old_name) ||
+ !_build_dev_path(newpath, sizeof(newpath), new_name))
+ return_0;
+
+ if (lstat(newpath, &info) == 0) {
+ if (S_ISLNK(info.st_mode)) {
+ if (stat(newpath, &info2) == 0)
+ info_block_dev = &info2;
+ else {
+ log_sys_error("stat", newpath);
+ return 0;
+ }
+ } else
+ info_block_dev = &info;
+
+ if (!S_ISBLK(info_block_dev->st_mode)) {
+ log_error("A non-block device file at '%s' "
+ "is already present", newpath);
+ return 0;
+ }
+ else if (_warn_if_op_needed(warn_if_udev_failed)) {
+ if (lstat(oldpath, &info) < 0 &&
+ errno == ENOENT)
+ /* assume udev already deleted this */
+ return 1;
+
+ log_warn("The node %s should have been renamed to %s "
+ "by udev but old node is still present. "
+ "Falling back to direct old node removal.",
+ oldpath, newpath);
+ return _rm_dev_node(old_name, 0);
+ }
+
+ if (unlink(newpath) < 0) {
+ if (errno == EPERM) {
+ /* devfs, entry has already been renamed */
+ return 1;
+ }
+ log_error("Unable to unlink device node for '%s'",
+ new_name);
+ return 0;
+ }
+ }
+ else if (_warn_if_op_needed(warn_if_udev_failed))
+ log_warn("The node %s should have been renamed to %s "
+ "by udev but new node is not present. "
+ "Falling back to direct node rename.",
+ oldpath, newpath);
+
+ /* udev may already have renamed the node. Ignore ENOENT. */
+ /* FIXME: when renaming to target mangling mode "none" with udev
+ * while there are some blacklisted characters in the node name,
+ * udev will remove the old_node, but fails to properly rename
+ * to new_node. The libdevmapper code tries to call
+ * rename(old_node,new_node), but that won't do anything
+ * since the old node is already removed by udev.
+ * For example renaming 'a\x20b' to 'a b':
+ * - udev removes 'a\x20b'
+ * - udev creates 'a' and 'b' (since it considers the ' ' as a delimiter
+ * - libdevmapper checks udev has done the rename properly
+ * - libdevmapper calls stat(new_node) and it does not see it
+ * - libdevmapper calls rename(old_node,new_node)
+ * - the rename is a NOP since the old_node does not exist anymore
+ *
+ * However, this situation is very rare - why would anyone need
+ * to rename to an unsupported mode??? So a fix for this would be
+ * just for completeness.
+ */
+ if (rename(oldpath, newpath) < 0 && errno != ENOENT) {
+ log_error("Unable to rename device node from '%s' to '%s'",
+ old_name, new_name);
+ return 0;
+ }
+
+ log_debug_activation("Renamed %s to %s", oldpath, newpath);
+
+ return 1;
+}
+
+#ifdef __linux__
+static int _open_dev_node(const char *dev_name)
+{
+ int fd = -1;
+ char path[PATH_MAX];
+
+ if (!_build_dev_path(path, sizeof(path), dev_name))
+ return fd;
+
+ if ((fd = open(path, O_RDONLY, 0)) < 0)
+ log_sys_error("open", path);
+
+ return fd;
+}
+
+int get_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+ uint32_t *read_ahead)
+{
+ char buf[24];
+ int len;
+ int r = 1;
+ int fd;
+ long read_ahead_long;
+
+ /*
+ * If we know the device number, use sysfs if we can.
+ * Otherwise use BLKRAGET ioctl.
+ */
+ if (*_sysfs_dir && major != 0) {
+ if (dm_snprintf(_path0, sizeof(_path0), "%sdev/block/%" PRIu32
+ ":%" PRIu32 "/bdi/read_ahead_kb", _sysfs_dir,
+ major, minor) < 0) {
+ log_error("Failed to build sysfs_path.");
+ return 0;
+ }
+
+ if ((fd = open(_path0, O_RDONLY, 0)) != -1) {
+ /* Reading from sysfs, expecting number\n */
+ if ((len = read(fd, buf, sizeof(buf) - 1)) < 1) {
+ log_sys_error("read", _path0);
+ r = 0;
+ } else {
+ buf[len] = 0; /* kill \n and ensure \0 */
+ *read_ahead = atoi(buf) * 2;
+ log_debug_activation("%s (%d:%d): read ahead is %" PRIu32,
+ dev_name, major, minor, *read_ahead);
+ }
+
+ if (close(fd))
+ log_sys_debug("close", _path0);
+
+ return r;
+ }
+
+ log_sys_debug("open", _path0);
+ /* Fall back to use dev_name */
+ }
+
+ /*
+ * Open/close dev_name may block the process
+ * (i.e. overfilled thin pool volume)
+ */
+ if (!*dev_name) {
+ log_error("Empty device name passed to BLKRAGET");
+ return 0;
+ }
+
+ if ((fd = _open_dev_node(dev_name)) < 0)
+ return_0;
+
+ if (ioctl(fd, BLKRAGET, &read_ahead_long)) {
+ log_sys_error("BLKRAGET", dev_name);
+ *read_ahead = 0;
+ r = 0;
+ } else {
+ *read_ahead = (uint32_t) read_ahead_long;
+ log_debug_activation("%s: read ahead is %" PRIu32, dev_name, *read_ahead);
+ }
+
+ if (close(fd))
+ log_sys_debug("close", dev_name);
+
+ return r;
+}
+
+static int _set_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+ uint32_t read_ahead)
+{
+ char buf[24];
+ int len;
+ int r = 1;
+ int fd;
+ long read_ahead_long = (long) read_ahead;
+
+ log_debug_activation("%s (%d:%d): Setting read ahead to %" PRIu32, dev_name,
+ major, minor, read_ahead);
+
+ /*
+ * If we know the device number, use sysfs if we can.
+ * Otherwise use BLKRASET ioctl. RA is set after resume.
+ */
+ if (*_sysfs_dir && major != 0) {
+ if (dm_snprintf(_path0, sizeof(_path0), "%sdev/block/%" PRIu32
+ ":%" PRIu32 "/bdi/read_ahead_kb",
+ _sysfs_dir, major, minor) < 0) {
+ log_error("Failed to build sysfs_path.");
+ return 0;
+ }
+
+ /* Sysfs is kB based, round up to kB */
+ if ((len = dm_snprintf(buf, sizeof(buf), FMTu32,
+ (read_ahead + 1) / 2)) < 0) {
+ log_error("Failed to build size in kB.");
+ return 0;
+ }
+
+ if ((fd = open(_path0, O_WRONLY, 0)) != -1) {
+ if (write(fd, buf, len) < len) {
+ log_sys_error("write", _path0);
+ r = 0;
+ }
+
+ if (close(fd))
+ log_sys_debug("close", _path0);
+
+ return r;
+ }
+
+ log_sys_debug("open", _path0);
+ /* Fall back to use dev_name */
+ }
+
+ if (!*dev_name) {
+ log_error("Empty device name passed to BLKRAGET");
+ return 0;
+ }
+
+ if ((fd = _open_dev_node(dev_name)) < 0)
+ return_0;
+
+ if (ioctl(fd, BLKRASET, read_ahead_long)) {
+ log_sys_error("BLKRASET", dev_name);
+ r = 0;
+ }
+
+ if (close(fd))
+ log_sys_debug("close", dev_name);
+
+ return r;
+}
+
+static int _set_dev_node_read_ahead(const char *dev_name,
+ uint32_t major, uint32_t minor,
+ uint32_t read_ahead, uint32_t read_ahead_flags)
+{
+ uint32_t current_read_ahead;
+
+ if (read_ahead == DM_READ_AHEAD_AUTO)
+ return 1;
+
+ if (read_ahead == DM_READ_AHEAD_NONE)
+ read_ahead = 0;
+
+ if (read_ahead_flags & DM_READ_AHEAD_MINIMUM_FLAG) {
+ if (!get_dev_node_read_ahead(dev_name, major, minor, &current_read_ahead))
+ return_0;
+
+ if (current_read_ahead >= read_ahead) {
+ log_debug_activation("%s: retaining kernel read ahead of %" PRIu32
+ " (requested %" PRIu32 ")",
+ dev_name, current_read_ahead, read_ahead);
+ return 1;
+ }
+ }
+
+ return _set_read_ahead(dev_name, major, minor, read_ahead);
+}
+
+#else
+
+int get_dev_node_read_ahead(const char *dev_name, uint32_t *read_ahead)
+{
+ *read_ahead = 0;
+
+ return 1;
+}
+
+static int _set_dev_node_read_ahead(const char *dev_name,
+ uint32_t major, uint32_t minor,
+ uint32_t read_ahead, uint32_t read_ahead_flags)
+{
+ return 1;
+}
+#endif
+
+typedef enum {
+ NODE_ADD,
+ NODE_DEL,
+ NODE_RENAME,
+ NODE_READ_AHEAD,
+ NUM_NODES
+} node_op_t;
+
+static int _do_node_op(node_op_t type, const char *dev_name, uint32_t major,
+ uint32_t minor, uid_t uid, gid_t gid, mode_t mode,
+ const char *old_name, uint32_t read_ahead,
+ uint32_t read_ahead_flags, int warn_if_udev_failed)
+{
+ switch (type) {
+ case NODE_ADD:
+ return _add_dev_node(dev_name, major, minor, uid, gid,
+ mode, warn_if_udev_failed);
+ case NODE_DEL:
+ return _rm_dev_node(dev_name, warn_if_udev_failed);
+ case NODE_RENAME:
+ return _rename_dev_node(old_name, dev_name, warn_if_udev_failed);
+ case NODE_READ_AHEAD:
+ return _set_dev_node_read_ahead(dev_name, major, minor,
+ read_ahead, read_ahead_flags);
+ default:
+ ; /* NOTREACHED */
+ }
+
+ return 1;
+}
+
+static DM_LIST_INIT(_node_ops);
+static int _count_node_ops[NUM_NODES];
+
+struct node_op_parms {
+ struct dm_list list;
+ node_op_t type;
+ char *dev_name;
+ uint32_t major;
+ uint32_t minor;
+ uid_t uid;
+ gid_t gid;
+ mode_t mode;
+ uint32_t read_ahead;
+ uint32_t read_ahead_flags;
+ char *old_name;
+ int warn_if_udev_failed;
+ unsigned rely_on_udev;
+ char names[0];
+};
+
+static void _store_str(char **pos, char **ptr, const char *str)
+{
+ strcpy(*pos, str);
+ *ptr = *pos;
+ *pos += strlen(*ptr) + 1;
+}
+
+static void _del_node_op(struct node_op_parms *nop)
+{
+ _count_node_ops[nop->type]--;
+ dm_list_del(&nop->list);
+ dm_free(nop);
+
+}
+
+/* Check if there is other the type of node operation stacked */
+static int _other_node_ops(node_op_t type)
+{
+ unsigned i;
+
+ for (i = 0; i < NUM_NODES; i++)
+ if (type != i && _count_node_ops[i])
+ return 1;
+ return 0;
+}
+
+static void _log_node_op(const char *action_str, struct node_op_parms *nop)
+{
+ const char *rely = nop->rely_on_udev ? " [trust_udev]" : "" ;
+ const char *verify = nop->warn_if_udev_failed ? " [verify_udev]" : "";
+
+ switch (nop->type) {
+ case NODE_ADD:
+ log_debug_activation("%s: %s NODE_ADD (%" PRIu32 ",%" PRIu32 ") %u:%u 0%o%s%s",
+ nop->dev_name, action_str, nop->major, nop->minor, nop->uid, nop->gid, nop->mode,
+ rely, verify);
+ break;
+ case NODE_DEL:
+ log_debug_activation("%s: %s NODE_DEL%s%s", nop->dev_name, action_str, rely, verify);
+ break;
+ case NODE_RENAME:
+ log_debug_activation("%s: %s NODE_RENAME to %s%s%s", nop->old_name, action_str, nop->dev_name, rely, verify);
+ break;
+ case NODE_READ_AHEAD:
+ log_debug_activation("%s: %s NODE_READ_AHEAD %" PRIu32 " (flags=%" PRIu32 ")%s%s",
+ nop->dev_name, action_str, nop->read_ahead, nop->read_ahead_flags, rely, verify);
+ break;
+ default:
+ ; /* NOTREACHED */
+ }
+}
+
+static int _stack_node_op(node_op_t type, const char *dev_name, uint32_t major,
+ uint32_t minor, uid_t uid, gid_t gid, mode_t mode,
+ const char *old_name, uint32_t read_ahead,
+ uint32_t read_ahead_flags, int warn_if_udev_failed,
+ unsigned rely_on_udev)
+{
+ struct node_op_parms *nop;
+ struct dm_list *noph, *nopht;
+ size_t len = strlen(dev_name) + strlen(old_name) + 2;
+ char *pos;
+
+ /*
+ * Note: warn_if_udev_failed must have valid content
+ */
+ if ((type == NODE_DEL) && _other_node_ops(type))
+ /*
+ * Ignore any outstanding operations on the node if deleting it.
+ */
+ dm_list_iterate_safe(noph, nopht, &_node_ops) {
+ nop = dm_list_item(noph, struct node_op_parms);
+ if (!strcmp(dev_name, nop->dev_name)) {
+ _log_node_op("Unstacking", nop);
+ _del_node_op(nop);
+ if (!_other_node_ops(type))
+ break; /* no other non DEL ops */
+ }
+ }
+ else if ((type == NODE_ADD) && _count_node_ops[NODE_DEL])
+ /*
+ * Ignore previous DEL operation on added node.
+ * (No other operations for this device then DEL could be stacked here).
+ */
+ dm_list_iterate_safe(noph, nopht, &_node_ops) {
+ nop = dm_list_item(noph, struct node_op_parms);
+ if ((nop->type == NODE_DEL) &&
+ !strcmp(dev_name, nop->dev_name)) {
+ _log_node_op("Unstacking", nop);
+ _del_node_op(nop);
+ break; /* no other DEL ops */
+ }
+ }
+ else if (type == NODE_RENAME)
+ /*
+ * Ignore any outstanding operations if renaming it.
+ *
+ * Currently RENAME operation happens through 'suspend -> resume'.
+ * On 'resume' device is added with read_ahead settings, so it is
+ * safe to remove any stacked ADD, RENAME, READ_AHEAD operation
+ * There cannot be any DEL operation on the renamed device.
+ */
+ dm_list_iterate_safe(noph, nopht, &_node_ops) {
+ nop = dm_list_item(noph, struct node_op_parms);
+ if (!strcmp(old_name, nop->dev_name)) {
+ _log_node_op("Unstacking", nop);
+ _del_node_op(nop);
+ }
+ }
+ else if (type == NODE_READ_AHEAD) {
+ /* udev doesn't process readahead */
+ rely_on_udev = 0;
+ warn_if_udev_failed = 0;
+ }
+
+ if (!(nop = dm_malloc(sizeof(*nop) + len))) {
+ log_error("Insufficient memory to stack mknod operation");
+ return 0;
+ }
+
+ pos = nop->names;
+ nop->type = type;
+ nop->major = major;
+ nop->minor = minor;
+ nop->uid = uid;
+ nop->gid = gid;
+ nop->mode = mode;
+ nop->read_ahead = read_ahead;
+ nop->read_ahead_flags = read_ahead_flags;
+ nop->rely_on_udev = rely_on_udev;
+
+ /*
+ * Clear warn_if_udev_failed if rely_on_udev is set. It doesn't get
+ * checked in this case - this just removes the flag from log messages.
+ */
+ nop->warn_if_udev_failed = rely_on_udev ? 0 : warn_if_udev_failed;
+
+ _store_str(&pos, &nop->dev_name, dev_name);
+ _store_str(&pos, &nop->old_name, old_name);
+
+ _count_node_ops[type]++;
+ dm_list_add(&_node_ops, &nop->list);
+
+ _log_node_op("Stacking", nop);
+
+ return 1;
+}
+
+static void _pop_node_ops(void)
+{
+ struct dm_list *noph, *nopht;
+ struct node_op_parms *nop;
+
+ dm_list_iterate_safe(noph, nopht, &_node_ops) {
+ nop = dm_list_item(noph, struct node_op_parms);
+ if (!nop->rely_on_udev) {
+ _log_node_op("Processing", nop);
+ _do_node_op(nop->type, nop->dev_name, nop->major, nop->minor,
+ nop->uid, nop->gid, nop->mode, nop->old_name,
+ nop->read_ahead, nop->read_ahead_flags,
+ nop->warn_if_udev_failed);
+ } else
+ _log_node_op("Skipping", nop);
+ _del_node_op(nop);
+ }
+}
+
+int add_dev_node(const char *dev_name, uint32_t major, uint32_t minor,
+ uid_t uid, gid_t gid, mode_t mode, int check_udev, unsigned rely_on_udev)
+{
+ return _stack_node_op(NODE_ADD, dev_name, major, minor, uid,
+ gid, mode, "", 0, 0, check_udev, rely_on_udev);
+}
+
+int rename_dev_node(const char *old_name, const char *new_name, int check_udev, unsigned rely_on_udev)
+{
+ return _stack_node_op(NODE_RENAME, new_name, 0, 0, 0,
+ 0, 0, old_name, 0, 0, check_udev, rely_on_udev);
+}
+
+int rm_dev_node(const char *dev_name, int check_udev, unsigned rely_on_udev)
+{
+ return _stack_node_op(NODE_DEL, dev_name, 0, 0, 0,
+ 0, 0, "", 0, 0, check_udev, rely_on_udev);
+}
+
+int set_dev_node_read_ahead(const char *dev_name,
+ uint32_t major, uint32_t minor,
+ uint32_t read_ahead, uint32_t read_ahead_flags)
+{
+ if (read_ahead == DM_READ_AHEAD_AUTO)
+ return 1;
+
+ return _stack_node_op(NODE_READ_AHEAD, dev_name, major, minor, 0, 0,
+ 0, "", read_ahead, read_ahead_flags, 0, 0);
+}
+
+void update_devs(void)
+{
+ _pop_node_ops();
+}
+
+static int _canonicalize_and_set_dir(const char *src, const char *suffix, size_t max_len, char *dir)
+{
+ size_t len;
+ const char *slash;
+
+ if (*src != '/') {
+ log_debug_activation("Invalid directory value, %s: "
+ "not an absolute name.", src);
+ return 0;
+ }
+
+ len = strlen(src);
+ slash = src[len-1] == '/' ? "" : "/";
+
+ if (dm_snprintf(dir, max_len, "%s%s%s", src, slash, suffix ? suffix : "") < 0) {
+ log_debug_activation("Invalid directory value, %s: name too long.", src);
+ return 0;
+ }
+
+ return 1;
+}
+
+int dm_set_dev_dir(const char *dev_dir)
+{
+ return _canonicalize_and_set_dir(dev_dir, DM_DIR, sizeof _dm_dir, _dm_dir);
+}
+
+const char *dm_dir(void)
+{
+ return _dm_dir;
+}
+
+int dm_set_sysfs_dir(const char *sysfs_dir)
+{
+ if (!sysfs_dir || !*sysfs_dir) {
+ _sysfs_dir[0] = '\0';
+ return 1;
+ }
+
+ return _canonicalize_and_set_dir(sysfs_dir, NULL, sizeof _sysfs_dir, _sysfs_dir);
+}
+
+const char *dm_sysfs_dir(void)
+{
+ return _sysfs_dir;
+}
+
+/*
+ * Replace existing uuid_prefix provided it isn't too long.
+ */
+int dm_set_uuid_prefix(const char *uuid_prefix)
+{
+ if (!uuid_prefix)
+ return_0;
+
+ if (strlen(uuid_prefix) > DM_MAX_UUID_PREFIX_LEN) {
+ log_error("New uuid prefix %s too long.", uuid_prefix);
+ return 0;
+ }
+
+ strcpy(_default_uuid_prefix, uuid_prefix);
+
+ return 1;
+}
+
+const char *dm_uuid_prefix(void)
+{
+ return _default_uuid_prefix;
+}
+
+static int _is_octal(int a)
+{
+ return (((a) & ~7) == '0');
+}
+
+/* Convert mangled mountinfo into normal ASCII string */
+static void _unmangle_mountinfo_string(const char *src, char *buf)
+{
+ while (*src) {
+ if ((*src == '\\') &&
+ _is_octal(src[1]) && _is_octal(src[2]) && _is_octal(src[3])) {
+ *buf++ = 64 * (src[1] & 7) + 8 * (src[2] & 7) + (src[3] & 7);
+ src += 4;
+ } else
+ *buf++ = *src++;
+ }
+ *buf = '\0';
+}
+
+/* Parse one line of mountinfo and unmangled target line */
+static int _mountinfo_parse_line(const char *line, unsigned *maj, unsigned *min, char *buf)
+{
+ char root[PATH_MAX + 1]; /* sscanf needs extra '\0' */
+ char target[PATH_MAX + 1];
+ char *devmapper;
+ struct dm_task *dmt;
+ struct dm_info info;
+ unsigned i;
+
+ /* TODO: maybe detect availability of %ms glib support ? */
+ if (sscanf(line, "%*u %*u %u:%u %" DM_TO_STRING(PATH_MAX)
+ "s %" DM_TO_STRING(PATH_MAX) "s",
+ maj, min, root, target) < 4) {
+ log_error("Failed to parse mountinfo line.");
+ return 0;
+ }
+
+ /* btrfs fakes device numbers, but there is still /dev/mapper name
+ * placed in mountinfo, so try to detect proper major:minor via this */
+ if (*maj == 0 && (devmapper = strstr(line, "/dev/mapper/"))) {
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO))) {
+ log_error("Mount info task creation failed.");
+ return 0;
+ }
+ devmapper += 12; /* skip fixed prefix */
+ for (i = 0; devmapper[i] && devmapper[i] != ' ' && i < sizeof(root); ++i)
+ root[i] = devmapper[i];
+ root[i] = 0;
+ _unmangle_mountinfo_string(root, buf);
+ buf[DM_NAME_LEN] = 0; /* cut away */
+
+ if (dm_task_set_name(dmt, buf) &&
+ dm_task_no_open_count(dmt) &&
+ dm_task_run(dmt) &&
+ dm_task_get_info(dmt, &info)) {
+ log_debug("Replacing mountinfo device (%u:%u) with matching DM device %s (%u:%u).",
+ *maj, *min, buf, info.major, info.minor);
+ *maj = info.major;
+ *min = info.minor;
+ }
+ dm_task_destroy(dmt);
+ }
+
+ _unmangle_mountinfo_string(target, buf);
+
+ return 1;
+}
+
+/*
+ * Function to operate on individal mountinfo line,
+ * minor, major and mount target are parsed and unmangled
+ */
+int dm_mountinfo_read(dm_mountinfo_line_callback_fn read_fn, void *cb_data)
+{
+ FILE *minfo;
+ char buffer[2 * PATH_MAX];
+ char target[PATH_MAX];
+ unsigned maj, min;
+ int r = 1;
+
+ if (!(minfo = fopen(_mountinfo, "r"))) {
+ if (errno != ENOENT)
+ log_sys_error("fopen", _mountinfo);
+ else
+ log_sys_debug("fopen", _mountinfo);
+ return 0;
+ }
+
+ while (!feof(minfo) && fgets(buffer, sizeof(buffer), minfo))
+ if (!_mountinfo_parse_line(buffer, &maj, &min, target) ||
+ !read_fn(buffer, maj, min, target, cb_data)) {
+ stack;
+ r = 0;
+ break;
+ }
+
+ if (fclose(minfo))
+ log_sys_error("fclose", _mountinfo);
+
+ return r;
+}
+
+static int _sysfs_get_dm_name(uint32_t major, uint32_t minor, char *buf, size_t buf_size)
+{
+ char *sysfs_path, *temp_buf = NULL;
+ FILE *fp = NULL;
+ int r = 0;
+ size_t len;
+
+ if (!(sysfs_path = dm_malloc(PATH_MAX)) ||
+ !(temp_buf = dm_malloc(PATH_MAX))) {
+ log_error("_sysfs_get_dm_name: failed to allocate temporary buffers");
+ goto bad;
+ }
+
+ if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 ":%" PRIu32
+ "/dm/name", _sysfs_dir, major, minor) < 0) {
+ log_error("_sysfs_get_dm_name: dm_snprintf failed");
+ goto bad;
+ }
+
+ if (!(fp = fopen(sysfs_path, "r"))) {
+ if (errno != ENOENT)
+ log_sys_error("fopen", sysfs_path);
+ else
+ log_sys_debug("fopen", sysfs_path);
+ goto bad;
+ }
+
+ if (!fgets(temp_buf, PATH_MAX, fp)) {
+ log_sys_error("fgets", sysfs_path);
+ goto bad;
+ }
+
+ len = strlen(temp_buf);
+
+ if (len > buf_size) {
+ log_error("_sysfs_get_dm_name: supplied buffer too small");
+ goto bad;
+ }
+
+ temp_buf[len ? len - 1 : 0] = '\0'; /* \n */
+ strcpy(buf, temp_buf);
+ r = 1;
+bad:
+ if (fp && fclose(fp))
+ log_sys_error("fclose", sysfs_path);
+
+ dm_free(temp_buf);
+ dm_free(sysfs_path);
+
+ return r;
+}
+
+static int _sysfs_get_kernel_name(uint32_t major, uint32_t minor, char *buf, size_t buf_size)
+{
+ char *name, *sysfs_path, *temp_buf = NULL;
+ ssize_t size;
+ size_t len;
+ int r = 0;
+
+ if (!(sysfs_path = dm_malloc(PATH_MAX)) ||
+ !(temp_buf = dm_malloc(PATH_MAX))) {
+ log_error("_sysfs_get_kernel_name: failed to allocate temporary buffers");
+ goto bad;
+ }
+
+ if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32 ":%" PRIu32,
+ _sysfs_dir, major, minor) < 0) {
+ log_error("_sysfs_get_kernel_name: dm_snprintf failed");
+ goto bad;
+ }
+
+ if ((size = readlink(sysfs_path, temp_buf, PATH_MAX - 1)) < 0) {
+ if (errno != ENOENT)
+ log_sys_error("readlink", sysfs_path);
+ else
+ log_sys_debug("readlink", sysfs_path);
+ goto bad;
+ }
+ temp_buf[size] = '\0';
+
+ if (!(name = strrchr(temp_buf, '/'))) {
+ log_error("Could not locate device kernel name in sysfs path %s", temp_buf);
+ goto bad;
+ }
+ name += 1;
+ len = size - (name - temp_buf) + 1;
+
+ if (len > buf_size) {
+ log_error("_sysfs_get_kernel_name: output buffer too small");
+ goto bad;
+ }
+
+ strcpy(buf, name);
+ r = 1;
+bad:
+ dm_free(temp_buf);
+ dm_free(sysfs_path);
+
+ return r;
+}
+
+int dm_device_get_name(uint32_t major, uint32_t minor, int prefer_kernel_name,
+ char *buf, size_t buf_size)
+{
+ if (!*_sysfs_dir)
+ return 0;
+
+ /*
+ * device-mapper devices and prefer_kernel_name = 0
+ * get dm name by reading /sys/dev/block/major:minor/dm/name,
+ * fallback to _sysfs_get_kernel_name if not successful
+ */
+ if (dm_is_dm_major(major) && !prefer_kernel_name) {
+ if (_sysfs_get_dm_name(major, minor, buf, buf_size))
+ return 1;
+ else
+ stack;
+ }
+
+ /*
+ * non-device-mapper devices or prefer_kernel_name = 1
+ * get kernel name using readlink /sys/dev/block/major:minor -> .../dm-X
+ */
+ return _sysfs_get_kernel_name(major, minor, buf, buf_size);
+}
+
+int dm_device_has_holders(uint32_t major, uint32_t minor)
+{
+ char sysfs_path[PATH_MAX];
+ struct stat st;
+
+ if (!*_sysfs_dir)
+ return 0;
+
+ if (dm_snprintf(sysfs_path, PATH_MAX, "%sdev/block/%" PRIu32
+ ":%" PRIu32 "/holders", _sysfs_dir, major, minor) < 0) {
+ log_warn("WARNING: sysfs_path dm_snprintf failed.");
+ return 0;
+ }
+
+ if (stat(sysfs_path, &st)) {
+ if (errno != ENOENT)
+ log_sys_debug("stat", sysfs_path);
+ return 0;
+ }
+
+ return !dm_is_empty_dir(sysfs_path);
+}
+
+static int _mounted_fs_on_device(const char *kernel_dev_name)
+{
+ char sysfs_path[PATH_MAX];
+ struct dirent *dirent;
+ DIR *d;
+ struct stat st;
+ int r = 0;
+
+ if (dm_snprintf(sysfs_path, PATH_MAX, "%sfs", _sysfs_dir) < 0) {
+ log_warn("WARNING: sysfs_path dm_snprintf failed.");
+ return 0;
+ }
+
+ if (!(d = opendir(sysfs_path))) {
+ if (errno != ENOENT)
+ log_sys_debug("opendir", sysfs_path);
+ return 0;
+ }
+
+ while ((dirent = readdir(d))) {
+ if (!strcmp(dirent->d_name, ".") || !strcmp(dirent->d_name, ".."))
+ continue;
+
+ if (dm_snprintf(sysfs_path, PATH_MAX, "%sfs/%s/%s",
+ _sysfs_dir, dirent->d_name, kernel_dev_name) < 0) {
+ log_warn("WARNING: sysfs_path dm_snprintf failed.");
+ break;
+ }
+
+ if (!stat(sysfs_path, &st)) {
+ /* found! */
+ r = 1;
+ break;
+ }
+ else if (errno != ENOENT) {
+ log_sys_debug("stat", sysfs_path);
+ break;
+ }
+ }
+
+ if (closedir(d))
+ log_sys_debug("closedir", kernel_dev_name);
+
+ return r;
+}
+
+struct mountinfo_s {
+ unsigned maj;
+ unsigned min;
+ int mounted;
+};
+
+static int _device_has_mounted_fs(char *buffer, unsigned major, unsigned minor,
+ char *target, void *cb_data)
+{
+ struct mountinfo_s *data = cb_data;
+ char kernel_dev_name[PATH_MAX];
+
+ if ((major == data->maj) && (minor == data->min)) {
+ if (!dm_device_get_name(major, minor, 1, kernel_dev_name,
+ sizeof(kernel_dev_name))) {
+ stack;
+ *kernel_dev_name = '\0';
+ }
+ log_verbose("Device %s (%u:%u) appears to be mounted on %s.",
+ kernel_dev_name, major, minor, target);
+ data->mounted = 1;
+ }
+
+ return 1;
+}
+
+int dm_device_has_mounted_fs(uint32_t major, uint32_t minor)
+{
+ char kernel_dev_name[PATH_MAX];
+ struct mountinfo_s data = {
+ .maj = major,
+ .min = minor,
+ };
+
+ if (!dm_mountinfo_read(_device_has_mounted_fs, &data))
+ stack;
+
+ if (data.mounted)
+ return 1;
+ /*
+ * TODO: Verify dm_mountinfo_read() is superset
+ * and remove sysfs check (namespaces)
+ */
+ /* Get kernel device name first */
+ if (!dm_device_get_name(major, minor, 1, kernel_dev_name, PATH_MAX))
+ return 0;
+
+ /* Check /sys/fs/<fs_name>/<kernel_dev_name> presence */
+ return _mounted_fs_on_device(kernel_dev_name);
+}
+
+int dm_mknodes(const char *name)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_MKNODES)))
+ return_0;
+
+ if (name && !dm_task_set_name(dmt, name))
+ goto out;
+
+ if (!dm_task_no_open_count(dmt))
+ goto out;
+
+ r = dm_task_run(dmt);
+
+out:
+ dm_task_destroy(dmt);
+ return r;
+}
+
+int dm_driver_version(char *version, size_t size)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_VERSION)))
+ return_0;
+
+ if (!dm_task_run(dmt))
+ log_error("Failed to get driver version");
+
+ if (!dm_task_get_driver_version(dmt, version, size))
+ goto out;
+
+ r = 1;
+
+out:
+ dm_task_destroy(dmt);
+ return r;
+}
+
+static void _set_cookie_flags(struct dm_task *dmt, uint16_t flags)
+{
+ if (!dm_cookie_supported())
+ return;
+
+ if (_udev_disabled) {
+ /*
+ * If udev is disabled, hardcode this functionality:
+ * - we want libdm to create the nodes
+ * - we don't want the /dev/mapper and any subsystem
+ * related content to be created by udev if udev
+ * rules are installed
+ */
+ flags &= ~DM_UDEV_DISABLE_LIBRARY_FALLBACK;
+ flags |= DM_UDEV_DISABLE_DM_RULES_FLAG | DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG;
+ }
+
+ dmt->event_nr = flags << DM_UDEV_FLAGS_SHIFT;
+}
+
+#ifndef UDEV_SYNC_SUPPORT
+void dm_udev_set_sync_support(int sync_with_udev)
+{
+}
+
+int dm_udev_get_sync_support(void)
+{
+ return 0;
+}
+
+void dm_udev_set_checking(int checking)
+{
+}
+
+int dm_udev_get_checking(void)
+{
+ return 0;
+}
+
+int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags)
+{
+ _set_cookie_flags(dmt, flags);
+
+ *cookie = 0;
+ dmt->cookie_set = 1;
+
+ return 1;
+}
+
+int dm_udev_complete(uint32_t cookie)
+{
+ return 1;
+}
+
+int dm_udev_wait(uint32_t cookie)
+{
+ update_devs();
+
+ return 1;
+}
+
+int dm_udev_wait_immediate(uint32_t cookie, int *ready)
+{
+ update_devs();
+ *ready = 1;
+
+ return 1;
+}
+
+#else /* UDEV_SYNC_SUPPORT */
+
+static int _check_semaphore_is_supported(void)
+{
+ int maxid;
+ union semun arg;
+ struct seminfo seminfo;
+
+ arg.__buf = &seminfo;
+ maxid = semctl(0, 0, SEM_INFO, arg);
+
+ if (maxid < 0) {
+ log_warn("Kernel not configured for semaphores (System V IPC). "
+ "Not using udev synchronisation code.");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _check_udev_is_running(void)
+{
+ struct udev *udev;
+ struct udev_queue *udev_queue;
+ int r;
+
+ if (!(udev = udev_new()))
+ goto_bad;
+
+ if (!(udev_queue = udev_queue_new(udev))) {
+ udev_unref(udev);
+ goto_bad;
+ }
+
+ if (!(r = udev_queue_get_udev_is_active(udev_queue)))
+ log_debug_activation("Udev is not running. "
+ "Not using udev synchronisation code.");
+
+ udev_queue_unref(udev_queue);
+ udev_unref(udev);
+
+ return r;
+
+bad:
+ log_error("Could not get udev state. Assuming udev is not running.");
+ return 0;
+}
+
+static void _check_udev_sync_requirements_once(void)
+{
+ if (_semaphore_supported < 0)
+ _semaphore_supported = _check_semaphore_is_supported();
+
+ if (_udev_running < 0) {
+ _udev_running = _check_udev_is_running();
+ if (_udev_disabled && _udev_running)
+ log_warn("Udev is running and DM_DISABLE_UDEV environment variable is set. "
+ "Bypassing udev, device-mapper library will manage device "
+ "nodes in device directory.");
+ }
+}
+
+void dm_udev_set_sync_support(int sync_with_udev)
+{
+ _check_udev_sync_requirements_once();
+ _sync_with_udev = sync_with_udev;
+}
+
+int dm_udev_get_sync_support(void)
+{
+ _check_udev_sync_requirements_once();
+
+ return !_udev_disabled && _semaphore_supported &&
+ dm_cookie_supported() &&_udev_running && _sync_with_udev;
+}
+
+void dm_udev_set_checking(int checking)
+{
+ if ((_udev_checking = checking))
+ log_debug_activation("DM udev checking enabled");
+ else
+ log_debug_activation("DM udev checking disabled");
+}
+
+int dm_udev_get_checking(void)
+{
+ return _udev_checking;
+}
+
+static int _get_cookie_sem(uint32_t cookie, int *semid)
+{
+ if (cookie >> 16 != DM_COOKIE_MAGIC) {
+ log_error("Could not continue to access notification "
+ "semaphore identified by cookie value %"
+ PRIu32 " (0x%x). Incorrect cookie prefix.",
+ cookie, cookie);
+ return 0;
+ }
+
+ if ((*semid = semget((key_t) cookie, 1, 0)) >= 0)
+ return 1;
+
+ switch (errno) {
+ case ENOENT:
+ log_error("Could not find notification "
+ "semaphore identified by cookie "
+ "value %" PRIu32 " (0x%x)",
+ cookie, cookie);
+ break;
+ case EACCES:
+ log_error("No permission to access "
+ "notificaton semaphore identified "
+ "by cookie value %" PRIu32 " (0x%x)",
+ cookie, cookie);
+ break;
+ default:
+ log_error("Failed to access notification "
+ "semaphore identified by cookie "
+ "value %" PRIu32 " (0x%x): %s",
+ cookie, cookie, strerror(errno));
+ break;
+ }
+
+ return 0;
+}
+
+static int _udev_notify_sem_inc(uint32_t cookie, int semid)
+{
+ struct sembuf sb = {0, 1, 0};
+ int val;
+
+ if (semop(semid, &sb, 1) < 0) {
+ log_error("semid %d: semop failed for cookie 0x%" PRIx32 ": %s",
+ semid, cookie, strerror(errno));
+ return 0;
+ }
+
+ if ((val = semctl(semid, 0, GETVAL)) < 0) {
+ log_error("semid %d: sem_ctl GETVAL failed for "
+ "cookie 0x%" PRIx32 ": %s",
+ semid, cookie, strerror(errno));
+ return 0;
+ }
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) incremented to %d",
+ cookie, semid, val);
+
+ return 1;
+}
+
+static int _udev_notify_sem_dec(uint32_t cookie, int semid)
+{
+ struct sembuf sb = {0, -1, IPC_NOWAIT};
+ int val;
+
+ if ((val = semctl(semid, 0, GETVAL)) < 0) {
+ log_error("semid %d: sem_ctl GETVAL failed for "
+ "cookie 0x%" PRIx32 ": %s",
+ semid, cookie, strerror(errno));
+ return 0;
+ }
+
+ if (semop(semid, &sb, 1) < 0) {
+ switch (errno) {
+ case EAGAIN:
+ log_error("semid %d: semop failed for cookie "
+ "0x%" PRIx32 ": "
+ "incorrect semaphore state",
+ semid, cookie);
+ break;
+ default:
+ log_error("semid %d: semop failed for cookie "
+ "0x%" PRIx32 ": %s",
+ semid, cookie, strerror(errno));
+ break;
+ }
+ return 0;
+ }
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) decremented to %d",
+ cookie, semid, val - 1);
+
+ return 1;
+}
+
+static int _udev_notify_sem_destroy(uint32_t cookie, int semid)
+{
+ if (semctl(semid, 0, IPC_RMID, 0) < 0) {
+ log_error("Could not cleanup notification semaphore "
+ "identified by cookie value %" PRIu32 " (0x%x): %s",
+ cookie, cookie, strerror(errno));
+ return 0;
+ }
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) destroyed", cookie,
+ semid);
+
+ return 1;
+}
+
+static int _udev_notify_sem_create(uint32_t *cookie, int *semid)
+{
+ int fd;
+ int gen_semid;
+ int val;
+ uint16_t base_cookie;
+ uint32_t gen_cookie;
+ union semun sem_arg;
+
+ if ((fd = open("/dev/urandom", O_RDONLY)) < 0) {
+ log_error("Failed to open /dev/urandom "
+ "to create random cookie value");
+ *cookie = 0;
+ return 0;
+ }
+
+ /* Generate random cookie value. Be sure it is unique and non-zero. */
+ do {
+ /* FIXME Handle non-error returns from read(). Move _io() into libdm? */
+ if (read(fd, &base_cookie, sizeof(base_cookie)) != sizeof(base_cookie)) {
+ log_error("Failed to initialize notification cookie");
+ goto bad;
+ }
+
+ gen_cookie = DM_COOKIE_MAGIC << 16 | base_cookie;
+
+ if (base_cookie && (gen_semid = semget((key_t) gen_cookie,
+ 1, 0600 | IPC_CREAT | IPC_EXCL)) < 0) {
+ switch (errno) {
+ case EEXIST:
+ /* if the semaphore key exists, we
+ * simply generate another random one */
+ base_cookie = 0;
+ break;
+ case ENOMEM:
+ log_error("Not enough memory to create "
+ "notification semaphore");
+ goto bad;
+ case ENOSPC:
+ log_error("Limit for the maximum number "
+ "of semaphores reached. You can "
+ "check and set the limits in "
+ "/proc/sys/kernel/sem.");
+ goto bad;
+ default:
+ log_error("Failed to create notification "
+ "semaphore: %s", strerror(errno));
+ goto bad;
+ }
+ }
+ } while (!base_cookie);
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) created",
+ gen_cookie, gen_semid);
+
+ sem_arg.val = 1;
+
+ if (semctl(gen_semid, 0, SETVAL, sem_arg) < 0) {
+ log_error("semid %d: semctl failed: %s", gen_semid, strerror(errno));
+ /* We have to destroy just created semaphore
+ * so it won't stay in the system. */
+ (void) _udev_notify_sem_destroy(gen_cookie, gen_semid);
+ goto bad;
+ }
+
+ if ((val = semctl(gen_semid, 0, GETVAL)) < 0) {
+ log_error("semid %d: sem_ctl GETVAL failed for "
+ "cookie 0x%" PRIx32 ": %s",
+ gen_semid, gen_cookie, strerror(errno));
+ goto bad;
+ }
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) incremented to %d",
+ gen_cookie, gen_semid, val);
+
+ if (close(fd))
+ stack;
+
+ *semid = gen_semid;
+ *cookie = gen_cookie;
+
+ return 1;
+
+bad:
+ if (close(fd))
+ stack;
+
+ *cookie = 0;
+
+ return 0;
+}
+
+int dm_udev_create_cookie(uint32_t *cookie)
+{
+ int semid;
+
+ if (!dm_udev_get_sync_support()) {
+ *cookie = 0;
+ return 1;
+ }
+
+ return _udev_notify_sem_create(cookie, &semid);
+}
+
+static const char *_task_type_disp(int type)
+{
+ switch(type) {
+ case DM_DEVICE_CREATE:
+ return "CREATE";
+ case DM_DEVICE_RELOAD:
+ return "RELOAD";
+ case DM_DEVICE_REMOVE:
+ return "REMOVE";
+ case DM_DEVICE_REMOVE_ALL:
+ return "REMOVE_ALL";
+ case DM_DEVICE_SUSPEND:
+ return "SUSPEND";
+ case DM_DEVICE_RESUME:
+ return "RESUME";
+ case DM_DEVICE_INFO:
+ return "INFO";
+ case DM_DEVICE_DEPS:
+ return "DEPS";
+ case DM_DEVICE_RENAME:
+ return "RENAME";
+ case DM_DEVICE_VERSION:
+ return "VERSION";
+ case DM_DEVICE_STATUS:
+ return "STATUS";
+ case DM_DEVICE_TABLE:
+ return "TABLE";
+ case DM_DEVICE_WAITEVENT:
+ return "WAITEVENT";
+ case DM_DEVICE_LIST:
+ return "LIST";
+ case DM_DEVICE_CLEAR:
+ return "CLEAR";
+ case DM_DEVICE_MKNODES:
+ return "MKNODES";
+ case DM_DEVICE_LIST_VERSIONS:
+ return "LIST_VERSIONS";
+ case DM_DEVICE_TARGET_MSG:
+ return "TARGET_MSG";
+ case DM_DEVICE_SET_GEOMETRY:
+ return "SET_GEOMETRY";
+ }
+ return "unknown";
+}
+
+int dm_task_set_cookie(struct dm_task *dmt, uint32_t *cookie, uint16_t flags)
+{
+ int semid;
+
+ _set_cookie_flags(dmt, flags);
+
+ if (!dm_udev_get_sync_support()) {
+ *cookie = 0;
+ dmt->cookie_set = 1;
+ return 1;
+ }
+
+ if (*cookie) {
+ if (!_get_cookie_sem(*cookie, &semid))
+ goto_bad;
+ } else if (!_udev_notify_sem_create(cookie, &semid))
+ goto_bad;
+
+ if (!_udev_notify_sem_inc(*cookie, semid)) {
+ log_error("Could not set notification semaphore "
+ "identified by cookie value %" PRIu32 " (0x%x)",
+ *cookie, *cookie);
+ goto bad;
+ }
+
+ dmt->event_nr |= ~DM_UDEV_FLAGS_MASK & *cookie;
+ dmt->cookie_set = 1;
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) assigned to "
+ "%s task(%d) with flags%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s (0x%" PRIx16 ")",
+ *cookie, semid, _task_type_disp(dmt->type), dmt->type,
+ (flags & DM_UDEV_DISABLE_DM_RULES_FLAG) ? " DISABLE_DM_RULES" : "",
+ (flags & DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG) ? " DISABLE_SUBSYSTEM_RULES" : "",
+ (flags & DM_UDEV_DISABLE_DISK_RULES_FLAG) ? " DISABLE_DISK_RULES" : "",
+ (flags & DM_UDEV_DISABLE_OTHER_RULES_FLAG) ? " DISABLE_OTHER_RULES" : "",
+ (flags & DM_UDEV_LOW_PRIORITY_FLAG) ? " LOW_PRIORITY" : "",
+ (flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK) ? " DISABLE_LIBRARY_FALLBACK" : "",
+ (flags & DM_UDEV_PRIMARY_SOURCE_FLAG) ? " PRIMARY_SOURCE" : "",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG0) ? " SUBSYSTEM_0" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG1) ? " SUBSYSTEM_1" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG2) ? " SUBSYSTEM_2" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG3) ? " SUBSYSTEM_3" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG4) ? " SUBSYSTEM_4" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG5) ? " SUBSYSTEM_5" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG6) ? " SUBSYSTEM_6" : " ",
+ (flags & DM_SUBSYSTEM_UDEV_FLAG7) ? " SUBSYSTEM_7" : " ",
+ flags);
+
+ return 1;
+
+bad:
+ dmt->event_nr = 0;
+ return 0;
+}
+
+int dm_udev_complete(uint32_t cookie)
+{
+ int semid;
+
+ if (!cookie || !dm_udev_get_sync_support())
+ return 1;
+
+ if (!_get_cookie_sem(cookie, &semid))
+ return_0;
+
+ if (!_udev_notify_sem_dec(cookie, semid)) {
+ log_error("Could not signal waiting process using notification "
+ "semaphore identified by cookie value %" PRIu32 " (0x%x)",
+ cookie, cookie);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * If *nowait is set, return immediately leaving it set if the semaphore
+ * is not ready to be decremented to 0. *nowait is cleared if the wait
+ * succeeds.
+ */
+static int _udev_wait(uint32_t cookie, int *nowait)
+{
+ int semid;
+ struct sembuf sb = {0, 0, 0};
+ int val;
+
+ if (!cookie || !dm_udev_get_sync_support())
+ return 1;
+
+ if (!_get_cookie_sem(cookie, &semid))
+ return_0;
+
+ /* Return immediately if the semaphore value exceeds 1? */
+ if (*nowait) {
+ if ((val = semctl(semid, 0, GETVAL)) < 0) {
+ log_error("semid %d: sem_ctl GETVAL failed for "
+ "cookie 0x%" PRIx32 ": %s",
+ semid, cookie, strerror(errno));
+ return 0;
+ }
+
+ if (val > 1)
+ return 1;
+
+ *nowait = 0;
+ }
+
+ if (!_udev_notify_sem_dec(cookie, semid)) {
+ log_error("Failed to set a proper state for notification "
+ "semaphore identified by cookie value %" PRIu32 " (0x%x) "
+ "to initialize waiting for incoming notifications.",
+ cookie, cookie);
+ (void) _udev_notify_sem_destroy(cookie, semid);
+ return 0;
+ }
+
+ log_debug_activation("Udev cookie 0x%" PRIx32 " (semid %d) waiting for zero",
+ cookie, semid);
+
+repeat_wait:
+ if (semop(semid, &sb, 1) < 0) {
+ if (errno == EINTR)
+ goto repeat_wait;
+ else if (errno == EIDRM)
+ return 1;
+
+ log_error("Could not set wait state for notification semaphore "
+ "identified by cookie value %" PRIu32 " (0x%x): %s",
+ cookie, cookie, strerror(errno));
+ (void) _udev_notify_sem_destroy(cookie, semid);
+ return 0;
+ }
+
+ return _udev_notify_sem_destroy(cookie, semid);
+}
+
+int dm_udev_wait(uint32_t cookie)
+{
+ int nowait = 0;
+ int r = _udev_wait(cookie, &nowait);
+
+ update_devs();
+
+ return r;
+}
+
+int dm_udev_wait_immediate(uint32_t cookie, int *ready)
+{
+ int nowait = 1;
+ int r = _udev_wait(cookie, &nowait);
+
+ if (r && nowait) {
+ *ready = 0;
+ return 1;
+ }
+
+ update_devs();
+ *ready = 1;
+
+ return r;
+}
+#endif /* UDEV_SYNC_SUPPORT */
diff --git a/device_mapper/libdm-common.h b/device_mapper/libdm-common.h
new file mode 100644
index 000000000..010d87674
--- /dev/null
+++ b/device_mapper/libdm-common.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef LIB_DMCOMMON_H
+#define LIB_DMCOMMON_H
+
+#include "libdevmapper.h"
+
+#define DM_DEFAULT_NAME_MANGLING_MODE_ENV_VAR_NAME "DM_DEFAULT_NAME_MANGLING_MODE"
+
+#define DEV_NAME(dmt) (dmt->mangled_dev_name ? : dmt->dev_name)
+#define DEV_UUID(DMT) (dmt->mangled_uuid ? : dmt->uuid)
+
+int mangle_string(const char *str, const char *str_name, size_t len,
+ char *buf, size_t buf_len, dm_string_mangling_t mode);
+
+int unmangle_string(const char *str, const char *str_name, size_t len,
+ char *buf, size_t buf_len, dm_string_mangling_t mode);
+
+int check_multiple_mangled_string_allowed(const char *str, const char *str_name,
+ dm_string_mangling_t mode);
+
+struct target *create_target(uint64_t start,
+ uint64_t len,
+ const char *type, const char *params);
+
+int add_dev_node(const char *dev_name, uint32_t minor, uint32_t major,
+ uid_t uid, gid_t gid, mode_t mode, int check_udev, unsigned rely_on_udev);
+int rm_dev_node(const char *dev_name, int check_udev, unsigned rely_on_udev);
+int rename_dev_node(const char *old_name, const char *new_name,
+ int check_udev, unsigned rely_on_udev);
+int get_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+ uint32_t *read_ahead);
+int set_dev_node_read_ahead(const char *dev_name, uint32_t major, uint32_t minor,
+ uint32_t read_ahead, uint32_t read_ahead_flags);
+void update_devs(void);
+void selinux_release(void);
+
+void inc_suspended(void);
+void dec_suspended(void);
+
+int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s);
+
+int get_uname_version(unsigned *major, unsigned *minor, unsigned *release);
+
+#endif
diff --git a/device_mapper/libdm-config.c b/device_mapper/libdm-config.c
new file mode 100644
index 000000000..fd4d929ec
--- /dev/null
+++ b/device_mapper/libdm-config.c
@@ -0,0 +1,1486 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <stdarg.h>
+
+#define SECTION_B_CHAR '{'
+#define SECTION_E_CHAR '}'
+
+enum {
+ TOK_INT,
+ TOK_FLOAT,
+ TOK_STRING, /* Single quotes */
+ TOK_STRING_ESCAPED, /* Double quotes */
+ TOK_STRING_BARE, /* No quotes */
+ TOK_EQ,
+ TOK_SECTION_B,
+ TOK_SECTION_E,
+ TOK_ARRAY_B,
+ TOK_ARRAY_E,
+ TOK_IDENTIFIER,
+ TOK_COMMA,
+ TOK_EOF
+};
+
+struct parser {
+ const char *fb, *fe; /* file limits */
+
+ int t; /* token limits and type */
+ const char *tb, *te;
+
+ int line; /* line number we are on */
+
+ struct dm_pool *mem;
+ int no_dup_node_check; /* whether to disable dup node checking */
+};
+
+struct config_output {
+ struct dm_pool *mem;
+ dm_putline_fn putline;
+ const struct dm_config_node_out_spec *spec;
+ void *baton;
+};
+
+static void _get_token(struct parser *p, int tok_prev);
+static void _eat_space(struct parser *p);
+static struct dm_config_node *_file(struct parser *p);
+static struct dm_config_node *_section(struct parser *p, struct dm_config_node *parent);
+static struct dm_config_value *_value(struct parser *p);
+static struct dm_config_value *_type(struct parser *p);
+static int _match_aux(struct parser *p, int t);
+static struct dm_config_value *_create_value(struct dm_pool *mem);
+static struct dm_config_node *_create_node(struct dm_pool *mem);
+static char *_dup_tok(struct parser *p);
+static char *_dup_token(struct dm_pool *mem, const char *b, const char *e);
+
+static const int _sep = '/';
+
+#define MAX_INDENT 32
+
+#define match(t) do {\
+ if (!_match_aux(p, (t))) {\
+ log_error("Parse error at byte %" PRIptrdiff_t " (line %d): unexpected token", \
+ p->tb - p->fb + 1, p->line); \
+ return 0;\
+ } \
+} while(0)
+
+static int _tok_match(const char *str, const char *b, const char *e)
+{
+ while (*str && (b != e)) {
+ if (*str++ != *b++)
+ return 0;
+ }
+
+ return !(*str || (b != e));
+}
+
+struct dm_config_tree *dm_config_create(void)
+{
+ struct dm_config_tree *cft;
+ struct dm_pool *mem = dm_pool_create("config", 10 * 1024);
+
+ if (!mem) {
+ log_error("Failed to allocate config pool.");
+ return 0;
+ }
+
+ if (!(cft = dm_pool_zalloc(mem, sizeof(*cft)))) {
+ log_error("Failed to allocate config tree.");
+ dm_pool_destroy(mem);
+ return 0;
+ }
+ cft->mem = mem;
+
+ return cft;
+}
+
+void dm_config_set_custom(struct dm_config_tree *cft, void *custom)
+{
+ cft->custom = custom;
+}
+
+void *dm_config_get_custom(struct dm_config_tree *cft)
+{
+ return cft->custom;
+}
+
+void dm_config_destroy(struct dm_config_tree *cft)
+{
+ dm_pool_destroy(cft->mem);
+}
+
+/*
+ * If there's a cascaded dm_config_tree, remove and return it, otherwise
+ * return NULL.
+ */
+struct dm_config_tree *dm_config_remove_cascaded_tree(struct dm_config_tree *cft)
+{
+ struct dm_config_tree *second_cft;
+
+ if (!cft)
+ return NULL;
+
+ second_cft = cft->cascade;
+ cft->cascade = NULL;
+
+ return second_cft;
+}
+
+/*
+ * When searching, first_cft is checked before second_cft.
+ */
+struct dm_config_tree *dm_config_insert_cascaded_tree(struct dm_config_tree *first_cft, struct dm_config_tree *second_cft)
+{
+ first_cft->cascade = second_cft;
+
+ return first_cft;
+}
+
+static struct dm_config_node *_config_reverse(struct dm_config_node *head)
+{
+ struct dm_config_node *left = head, *middle = NULL, *right = NULL;
+
+ while (left) {
+ right = middle;
+ middle = left;
+ left = left->sib;
+ middle->sib = right;
+ middle->child = _config_reverse(middle->child);
+ }
+
+ return middle;
+}
+
+static int _do_dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end, int no_dup_node_check)
+{
+ /* TODO? if (start == end) return 1; */
+
+ struct parser *p;
+ if (!(p = dm_pool_alloc(cft->mem, sizeof(*p))))
+ return_0;
+
+ p->mem = cft->mem;
+ p->fb = start;
+ p->fe = end;
+ p->tb = p->te = p->fb;
+ p->line = 1;
+ p->no_dup_node_check = no_dup_node_check;
+
+ _get_token(p, TOK_SECTION_E);
+ if (!(cft->root = _file(p)))
+ return_0;
+
+ cft->root = _config_reverse(cft->root);
+
+ return 1;
+}
+
+int dm_config_parse(struct dm_config_tree *cft, const char *start, const char *end)
+{
+ return _do_dm_config_parse(cft, start, end, 0);
+}
+
+int dm_config_parse_without_dup_node_check(struct dm_config_tree *cft, const char *start, const char *end)
+{
+ return _do_dm_config_parse(cft, start, end, 1);
+}
+
+struct dm_config_tree *dm_config_from_string(const char *config_settings)
+{
+ struct dm_config_tree *cft;
+
+ if (!(cft = dm_config_create()))
+ return_NULL;
+
+ if (!dm_config_parse(cft, config_settings, config_settings + strlen(config_settings))) {
+ dm_config_destroy(cft);
+ return_NULL;
+ }
+
+ return cft;
+}
+
+static int _line_start(struct config_output *out)
+{
+ if (!dm_pool_begin_object(out->mem, 128)) {
+ log_error("dm_pool_begin_object failed for config line");
+ return 0;
+ }
+
+ return 1;
+}
+
+__attribute__ ((format(printf, 2, 3)))
+static int _line_append(struct config_output *out, const char *fmt, ...)
+{
+ char buf[4096];
+ char *dyn_buf = NULL;
+ va_list ap;
+ int n;
+
+ /*
+ * We should be fine with the 4096 char buffer 99% of the time,
+ * but if we need to go beyond that, allocate the buffer dynamically.
+ */
+
+ va_start(ap, fmt);
+ n = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (n < 0) {
+ log_error("vsnprintf failed for config line");
+ return 0;
+ }
+
+ if (n > (int) sizeof buf - 1) {
+ /*
+ * Fixed size buffer with sizeof buf is not enough,
+ * so try dynamically allocated buffer now...
+ */
+ va_start(ap, fmt);
+ n = dm_vasprintf(&dyn_buf, fmt, ap);
+ va_end(ap);
+
+ if (n < 0) {
+ log_error("dm_vasprintf failed for config line");
+ return 0;
+ }
+ }
+
+ if (!dm_pool_grow_object(out->mem, dyn_buf ? : buf, 0)) {
+ log_error("dm_pool_grow_object failed for config line");
+ dm_free(dyn_buf);
+ return 0;
+ }
+
+ dm_free(dyn_buf);
+
+ return 1;
+}
+
+#define line_append(args...) do {if (!_line_append(out, args)) {return_0;}} while (0)
+
+static int _line_end(const struct dm_config_node *cn, struct config_output *out)
+{
+ const char *line;
+
+ if (!dm_pool_grow_object(out->mem, "\0", 1)) {
+ log_error("dm_pool_grow_object failed for config line");
+ return 0;
+ }
+
+ line = dm_pool_end_object(out->mem);
+
+ if (!out->putline && !out->spec)
+ return 0;
+
+ if (out->putline)
+ out->putline(line, out->baton);
+
+ if (out->spec && out->spec->line_fn)
+ out->spec->line_fn(cn, line, out->baton);
+
+ return 1;
+}
+
+static int _write_value(struct config_output *out, const struct dm_config_value *v)
+{
+ char *buf;
+ const char *s;
+
+ switch (v->type) {
+ case DM_CFG_STRING:
+ buf = alloca(dm_escaped_len(v->v.str));
+ s = (v->format_flags & DM_CONFIG_VALUE_FMT_STRING_NO_QUOTES) ? "" : "\"";
+ line_append("%s%s%s", s, dm_escape_double_quotes(buf, v->v.str), s);
+ break;
+
+ case DM_CFG_FLOAT:
+ line_append("%f", v->v.f);
+ break;
+
+ case DM_CFG_INT:
+ if (v->format_flags & DM_CONFIG_VALUE_FMT_INT_OCTAL)
+ line_append("0%" PRIo64, v->v.i);
+ else
+ line_append(FMTd64, v->v.i);
+ break;
+
+ case DM_CFG_EMPTY_ARRAY:
+ s = (v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES) ? " " : "";
+ line_append("[%s]", s);
+ break;
+
+ default:
+ log_error("_write_value: Unknown value type: %d", v->type);
+
+ }
+
+ return 1;
+}
+
+static int _write_config(const struct dm_config_node *n, int only_one,
+ struct config_output *out, int level)
+{
+ const char *extra_space;
+ int format_array;
+ char space[MAX_INDENT + 1];
+ int l = (level < MAX_INDENT) ? level : MAX_INDENT;
+ int i;
+ char *escaped_key = NULL;
+
+ if (!n)
+ return 1;
+
+ for (i = 0; i < l; i++)
+ space[i] = '\t';
+ space[i] = '\0';
+
+ do {
+ extra_space = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_EXTRA_SPACES)) ? " " : "";
+ format_array = (n->v && (n->v->format_flags & DM_CONFIG_VALUE_FMT_COMMON_ARRAY));
+
+ if (out->spec && out->spec->prefix_fn)
+ out->spec->prefix_fn(n, space, out->baton);
+
+ if (!_line_start(out))
+ return_0;
+ if (strchr(n->key, '#') || strchr(n->key, '"') || strchr(n->key, '!')) {
+ escaped_key = alloca(dm_escaped_len(n->key) + 2);
+ *escaped_key = '"';
+ dm_escape_double_quotes(escaped_key + 1, n->key);
+ strcat(escaped_key, "\"");
+ }
+ line_append("%s%s", space, escaped_key ? escaped_key : n->key);
+ escaped_key = NULL;
+ if (!n->v) {
+ /* it's a sub section */
+ line_append(" {");
+ if (!_line_end(n, out))
+ return_0;
+ if (!_write_config(n->child, 0, out, level + 1))
+ return_0;
+ if (!_line_start(out))
+ return_0;
+ line_append("%s}", space);
+ } else {
+ /* it's a value */
+ const struct dm_config_value *v = n->v;
+ line_append("%s=%s", extra_space, extra_space);
+ if (v->next) {
+ line_append("[%s", extra_space);
+ while (v && v->type != DM_CFG_EMPTY_ARRAY) {
+ if (!_write_value(out, v))
+ return_0;
+ v = v->next;
+ if (v && v->type != DM_CFG_EMPTY_ARRAY)
+ line_append(",%s", extra_space);
+ }
+ line_append("%s]", extra_space);
+ } else {
+ if (format_array && (v->type != DM_CFG_EMPTY_ARRAY))
+ line_append("[%s", extra_space);
+ if (!_write_value(out, v))
+ return_0;
+ if (format_array && (v->type != DM_CFG_EMPTY_ARRAY))
+ line_append("%s]", extra_space);
+ }
+ }
+ if (!_line_end(n, out))
+ return_0;
+
+ if (out->spec && out->spec->suffix_fn)
+ out->spec->suffix_fn(n, space, out->baton);
+
+ n = n->sib;
+ } while (n && !only_one);
+ /* FIXME: add error checking */
+ return 1;
+}
+
+static int _write_node(const struct dm_config_node *cn, int only_one,
+ dm_putline_fn putline,
+ const struct dm_config_node_out_spec *out_spec,
+ void *baton)
+{
+ struct config_output out = {
+ .mem = dm_pool_create("config_output", 1024),
+ .putline = putline,
+ .spec = out_spec,
+ .baton = baton
+ };
+
+ if (!out.mem)
+ return_0;
+
+ if (!_write_config(cn, only_one, &out, 0)) {
+ dm_pool_destroy(out.mem);
+ return_0;
+ }
+ dm_pool_destroy(out.mem);
+ return 1;
+}
+
+int dm_config_write_one_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton)
+{
+ return _write_node(cn, 1, putline, NULL, baton);
+}
+
+int dm_config_write_node(const struct dm_config_node *cn, dm_putline_fn putline, void *baton)
+{
+ return _write_node(cn, 0, putline, NULL, baton);
+}
+
+int dm_config_write_one_node_out(const struct dm_config_node *cn,
+ const struct dm_config_node_out_spec *out_spec,
+ void *baton)
+{
+ return _write_node(cn, 1, NULL, out_spec, baton);
+}
+
+int dm_config_write_node_out(const struct dm_config_node *cn,
+ const struct dm_config_node_out_spec *out_spec,
+ void *baton)
+{
+ return _write_node(cn, 0, NULL, out_spec, baton);
+}
+
+/*
+ * parser
+ */
+static char *_dup_string_tok(struct parser *p)
+{
+ char *str;
+
+ p->tb++, p->te--; /* strip "'s */
+
+ if (p->te < p->tb) {
+ log_error("Parse error at byte %" PRIptrdiff_t " (line %d): "
+ "expected a string token.",
+ p->tb - p->fb + 1, p->line);
+ return NULL;
+ }
+
+ if (!(str = _dup_tok(p)))
+ return_NULL;
+
+ p->te++;
+
+ return str;
+}
+
+static struct dm_config_node *_file(struct parser *p)
+{
+ struct dm_config_node root = { 0 };
+ root.key = "<root>";
+
+ while (p->t != TOK_EOF)
+ if (!_section(p, &root))
+ return_NULL;
+ return root.child;
+}
+
+static struct dm_config_node *_make_node(struct dm_pool *mem,
+ const char *key_b, const char *key_e,
+ struct dm_config_node *parent)
+{
+ struct dm_config_node *n;
+
+ if (!(n = _create_node(mem)))
+ return_NULL;
+
+ n->key = _dup_token(mem, key_b, key_e);
+ if (parent) {
+ n->parent = parent;
+ n->sib = parent->child;
+ parent->child = n;
+ }
+ return n;
+}
+
+/* when mem is not NULL, we create the path if it doesn't exist yet */
+static struct dm_config_node *_find_or_make_node(struct dm_pool *mem,
+ struct dm_config_node *parent,
+ const char *path,
+ int no_dup_node_check)
+{
+ const char *e;
+ struct dm_config_node *cn = parent ? parent->child : NULL;
+ struct dm_config_node *cn_found = NULL;
+
+ while (cn || mem) {
+ /* trim any leading slashes */
+ while (*path && (*path == _sep))
+ path++;
+
+ /* find the end of this segment */
+ for (e = path; *e && (*e != _sep); e++) ;
+
+ /* hunt for the node */
+ cn_found = NULL;
+
+ if (!no_dup_node_check) {
+ while (cn) {
+ if (_tok_match(cn->key, path, e)) {
+ /* Inefficient */
+ if (!cn_found)
+ cn_found = cn;
+ else
+ log_warn("WARNING: Ignoring duplicate"
+ " config node: %s ("
+ "seeking %s)", cn->key, path);
+ }
+
+ cn = cn->sib;
+ }
+ }
+
+ if (!cn_found && mem) {
+ if (!(cn_found = _make_node(mem, path, e, parent)))
+ return_NULL;
+ }
+
+ if (cn_found && *e) {
+ parent = cn_found;
+ cn = cn_found->child;
+ } else
+ return cn_found;
+ path = e;
+ }
+
+ return NULL;
+}
+
+static struct dm_config_node *_section(struct parser *p, struct dm_config_node *parent)
+{
+ /* IDENTIFIER SECTION_B_CHAR VALUE* SECTION_E_CHAR */
+
+ struct dm_config_node *root;
+ struct dm_config_value *value;
+ char *str;
+
+ if (p->t == TOK_STRING_ESCAPED) {
+ if (!(str = _dup_string_tok(p)))
+ return_NULL;
+ dm_unescape_double_quotes(str);
+
+ match(TOK_STRING_ESCAPED);
+ } else if (p->t == TOK_STRING) {
+ if (!(str = _dup_string_tok(p)))
+ return_NULL;
+
+ match(TOK_STRING);
+ } else {
+ if (!(str = _dup_tok(p)))
+ return_NULL;
+
+ match(TOK_IDENTIFIER);
+ }
+
+ if (!strlen(str)) {
+ log_error("Parse error at byte %" PRIptrdiff_t " (line %d): empty section identifier",
+ p->tb - p->fb + 1, p->line);
+ return NULL;
+ }
+
+ if (!(root = _find_or_make_node(p->mem, parent, str, p->no_dup_node_check)))
+ return_NULL;
+
+ if (p->t == TOK_SECTION_B) {
+ match(TOK_SECTION_B);
+ while (p->t != TOK_SECTION_E) {
+ if (!(_section(p, root)))
+ return_NULL;
+ }
+ match(TOK_SECTION_E);
+ } else {
+ match(TOK_EQ);
+ if (!(value = _value(p)))
+ return_NULL;
+ if (root->v)
+ log_warn("WARNING: Ignoring duplicate"
+ " config value: %s", str);
+ root->v = value;
+ }
+
+ return root;
+}
+
+static struct dm_config_value *_value(struct parser *p)
+{
+ /* '[' TYPE* ']' | TYPE */
+ struct dm_config_value *h = NULL, *l, *ll = NULL;
+ if (p->t == TOK_ARRAY_B) {
+ match(TOK_ARRAY_B);
+ while (p->t != TOK_ARRAY_E) {
+ if (!(l = _type(p)))
+ return_NULL;
+
+ if (!h)
+ h = l;
+ else
+ ll->next = l;
+ ll = l;
+
+ if (p->t == TOK_COMMA)
+ match(TOK_COMMA);
+ }
+ match(TOK_ARRAY_E);
+ /*
+ * Special case for an empty array.
+ */
+ if (!h) {
+ if (!(h = _create_value(p->mem))) {
+ log_error("Failed to allocate value");
+ return NULL;
+ }
+
+ h->type = DM_CFG_EMPTY_ARRAY;
+ }
+
+ } else
+ if (!(h = _type(p)))
+ return_NULL;
+
+ return h;
+}
+
+static struct dm_config_value *_type(struct parser *p)
+{
+ /* [+-]{0,1}[0-9]+ | [0-9]*\.[0-9]* | ".*" */
+ struct dm_config_value *v = _create_value(p->mem);
+ char *str;
+
+ if (!v) {
+ log_error("Failed to allocate type value");
+ return NULL;
+ }
+
+ switch (p->t) {
+ case TOK_INT:
+ v->type = DM_CFG_INT;
+ errno = 0;
+ v->v.i = strtoll(p->tb, NULL, 0); /* FIXME: check error */
+ if (errno) {
+ log_error("Failed to read int token.");
+ return NULL;
+ }
+ match(TOK_INT);
+ break;
+
+ case TOK_FLOAT:
+ v->type = DM_CFG_FLOAT;
+ errno = 0;
+ v->v.f = strtod(p->tb, NULL); /* FIXME: check error */
+ if (errno) {
+ log_error("Failed to read float token.");
+ return NULL;
+ }
+ match(TOK_FLOAT);
+ break;
+
+ case TOK_STRING:
+ v->type = DM_CFG_STRING;
+
+ if (!(v->v.str = _dup_string_tok(p)))
+ return_NULL;
+
+ match(TOK_STRING);
+ break;
+
+ case TOK_STRING_BARE:
+ v->type = DM_CFG_STRING;
+
+ if (!(v->v.str = _dup_tok(p)))
+ return_NULL;
+
+ match(TOK_STRING_BARE);
+ break;
+
+ case TOK_STRING_ESCAPED:
+ v->type = DM_CFG_STRING;
+
+ if (!(str = _dup_string_tok(p)))
+ return_NULL;
+ dm_unescape_double_quotes(str);
+ v->v.str = str;
+ match(TOK_STRING_ESCAPED);
+ break;
+
+ default:
+ log_error("Parse error at byte %" PRIptrdiff_t " (line %d): expected a value",
+ p->tb - p->fb + 1, p->line);
+ return NULL;
+ }
+ return v;
+}
+
+static int _match_aux(struct parser *p, int t)
+{
+ if (p->t != t)
+ return 0;
+
+ _get_token(p, t);
+ return 1;
+}
+
+/*
+ * tokeniser
+ */
+static void _get_token(struct parser *p, int tok_prev)
+{
+ int values_allowed = 0;
+
+ const char *te;
+
+ p->tb = p->te;
+ _eat_space(p);
+ if (p->tb == p->fe || !*p->tb) {
+ p->t = TOK_EOF;
+ return;
+ }
+
+ /* Should next token be interpreted as value instead of identifier? */
+ if (tok_prev == TOK_EQ || tok_prev == TOK_ARRAY_B ||
+ tok_prev == TOK_COMMA)
+ values_allowed = 1;
+
+ p->t = TOK_INT; /* fudge so the fall through for
+ floats works */
+
+ te = p->te;
+ switch (*te) {
+ case SECTION_B_CHAR:
+ p->t = TOK_SECTION_B;
+ te++;
+ break;
+
+ case SECTION_E_CHAR:
+ p->t = TOK_SECTION_E;
+ te++;
+ break;
+
+ case '[':
+ p->t = TOK_ARRAY_B;
+ te++;
+ break;
+
+ case ']':
+ p->t = TOK_ARRAY_E;
+ te++;
+ break;
+
+ case ',':
+ p->t = TOK_COMMA;
+ te++;
+ break;
+
+ case '=':
+ p->t = TOK_EQ;
+ te++;
+ break;
+
+ case '"':
+ p->t = TOK_STRING_ESCAPED;
+ te++;
+ while ((te != p->fe) && (*te) && (*te != '"')) {
+ if ((*te == '\\') && (te + 1 != p->fe) &&
+ *(te + 1))
+ te++;
+ te++;
+ }
+
+ if ((te != p->fe) && (*te))
+ te++;
+ break;
+
+ case '\'':
+ p->t = TOK_STRING;
+ te++;
+ while ((te != p->fe) && (*te) && (*te != '\''))
+ te++;
+
+ if ((te != p->fe) && (*te))
+ te++;
+ break;
+
+ case '.':
+ p->t = TOK_FLOAT;
+ /* Fall through */
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '+':
+ case '-':
+ if (values_allowed) {
+ while (++te != p->fe) {
+ if (!isdigit((int) *te)) {
+ if (*te == '.') {
+ if (p->t != TOK_FLOAT) {
+ p->t = TOK_FLOAT;
+ continue;
+ }
+ }
+ break;
+ }
+ }
+ break;
+ }
+ /* fall through */
+
+ default:
+ p->t = TOK_IDENTIFIER;
+ while ((te != p->fe) && (*te) && !isspace(*te) &&
+ (*te != '#') && (*te != '=') &&
+ (*te != SECTION_B_CHAR) &&
+ (*te != SECTION_E_CHAR))
+ te++;
+ if (values_allowed)
+ p->t = TOK_STRING_BARE;
+ break;
+ }
+
+ p->te = te;
+}
+
+static void _eat_space(struct parser *p)
+{
+ while (p->tb != p->fe) {
+ if (*p->te == '#')
+ while ((p->te != p->fe) && (*p->te != '\n') && (*p->te))
+ ++p->te;
+
+ else if (!isspace(*p->te))
+ break;
+
+ while ((p->te != p->fe) && isspace(*p->te)) {
+ if (*p->te == '\n')
+ ++p->line;
+ ++p->te;
+ }
+
+ p->tb = p->te;
+ }
+}
+
+/*
+ * memory management
+ */
+static struct dm_config_value *_create_value(struct dm_pool *mem)
+{
+ return dm_pool_zalloc(mem, sizeof(struct dm_config_value));
+}
+
+static struct dm_config_node *_create_node(struct dm_pool *mem)
+{
+ return dm_pool_zalloc(mem, sizeof(struct dm_config_node));
+}
+
+static char *_dup_token(struct dm_pool *mem, const char *b, const char *e)
+{
+ size_t len = e - b;
+ char *str = dm_pool_alloc(mem, len + 1);
+ if (!str) {
+ log_error("Failed to duplicate token.");
+ return 0;
+ }
+ memcpy(str, b, len);
+ str[len] = '\0';
+ return str;
+}
+
+static char *_dup_tok(struct parser *p)
+{
+ return _dup_token(p->mem, p->tb, p->te);
+}
+
+/*
+ * Utility functions
+ */
+
+/*
+ * node_lookup_fn is either:
+ * _find_config_node to perform a lookup starting from a given config_node
+ * in a config_tree;
+ * or
+ * _find_first_config_node to find the first config_node in a set of
+ * cascaded trees.
+ */
+typedef const struct dm_config_node *node_lookup_fn(const void *start, const char *path);
+
+static const struct dm_config_node *_find_config_node(const void *start, const char *path) {
+ struct dm_config_node dummy = { .child = (void *) start };
+ return _find_or_make_node(NULL, &dummy, path, 0);
+}
+
+static const struct dm_config_node *_find_first_config_node(const void *start, const char *path)
+{
+ const struct dm_config_tree *cft = start;
+ const struct dm_config_node *cn = NULL;
+
+ while (cft) {
+ if ((cn = _find_config_node(cft->root, path)))
+ return cn;
+ cft = cft->cascade;
+ }
+
+ return NULL;
+}
+
+static const char *_find_config_str(const void *start, node_lookup_fn find_fn,
+ const char *path, const char *fail, int allow_empty)
+{
+ const struct dm_config_node *n = find_fn(start, path);
+
+ /* Empty strings are ignored if allow_empty is set */
+ if (n && n->v) {
+ if ((n->v->type == DM_CFG_STRING) &&
+ (allow_empty || (*n->v->v.str))) {
+ /* log_very_verbose("Setting %s to %s", path, n->v->v.str); */
+ return n->v->v.str;
+ }
+ if ((n->v->type != DM_CFG_STRING) || (!allow_empty && fail))
+ log_warn("WARNING: Ignoring unsupported value for %s.", path);
+ }
+
+ if (fail)
+ log_very_verbose("%s not found in config: defaulting to %s",
+ path, fail);
+ return fail;
+}
+
+const char *dm_config_find_str(const struct dm_config_node *cn,
+ const char *path, const char *fail)
+{
+ return _find_config_str(cn, _find_config_node, path, fail, 0);
+}
+
+const char *dm_config_find_str_allow_empty(const struct dm_config_node *cn,
+ const char *path, const char *fail)
+{
+ return _find_config_str(cn, _find_config_node, path, fail, 1);
+}
+
+static int64_t _find_config_int64(const void *start, node_lookup_fn find,
+ const char *path, int64_t fail)
+{
+ const struct dm_config_node *n = find(start, path);
+
+ if (n && n->v && n->v->type == DM_CFG_INT) {
+ /* log_very_verbose("Setting %s to %" PRId64, path, n->v->v.i); */
+ return n->v->v.i;
+ }
+
+ log_very_verbose("%s not found in config: defaulting to %" PRId64,
+ path, fail);
+ return fail;
+}
+
+static float _find_config_float(const void *start, node_lookup_fn find,
+ const char *path, float fail)
+{
+ const struct dm_config_node *n = find(start, path);
+
+ if (n && n->v && n->v->type == DM_CFG_FLOAT) {
+ /* log_very_verbose("Setting %s to %f", path, n->v->v.f); */
+ return n->v->v.f;
+ }
+
+ log_very_verbose("%s not found in config: defaulting to %f",
+ path, fail);
+
+ return fail;
+
+}
+
+static int _str_in_array(const char *str, const char * const values[])
+{
+ int i;
+
+ for (i = 0; values[i]; i++)
+ if (!strcasecmp(str, values[i]))
+ return 1;
+
+ return 0;
+}
+
+static int _str_to_bool(const char *str, int fail)
+{
+ const char * const _true_values[] = { "y", "yes", "on", "true", NULL };
+ const char * const _false_values[] = { "n", "no", "off", "false", NULL };
+
+ if (_str_in_array(str, _true_values))
+ return 1;
+
+ if (_str_in_array(str, _false_values))
+ return 0;
+
+ return fail;
+}
+
+static int _find_config_bool(const void *start, node_lookup_fn find,
+ const char *path, int fail)
+{
+ const struct dm_config_node *n = find(start, path);
+ const struct dm_config_value *v;
+ int b;
+
+ if (n) {
+ v = n->v;
+
+ switch (v->type) {
+ case DM_CFG_INT:
+ b = v->v.i ? 1 : 0;
+ /* log_very_verbose("Setting %s to %d", path, b); */
+ return b;
+
+ case DM_CFG_STRING:
+ b = _str_to_bool(v->v.str, fail);
+ /* log_very_verbose("Setting %s to %d", path, b); */
+ return b;
+ default:
+ ;
+ }
+ }
+
+ log_very_verbose("%s not found in config: defaulting to %d",
+ path, fail);
+
+ return fail;
+}
+
+/***********************************
+ * node-based lookup
+ **/
+
+struct dm_config_node *dm_config_find_node(const struct dm_config_node *cn,
+ const char *path)
+{
+ return (struct dm_config_node *) _find_config_node(cn, path);
+}
+
+int dm_config_find_int(const struct dm_config_node *cn, const char *path, int fail)
+{
+ /* FIXME Add log_error message on overflow */
+ return (int) _find_config_int64(cn, _find_config_node, path, (int64_t) fail);
+}
+
+int64_t dm_config_find_int64(const struct dm_config_node *cn, const char *path, int64_t fail)
+{
+ return _find_config_int64(cn, _find_config_node, path, fail);
+}
+
+float dm_config_find_float(const struct dm_config_node *cn, const char *path,
+ float fail)
+{
+ return _find_config_float(cn, _find_config_node, path, fail);
+}
+
+int dm_config_find_bool(const struct dm_config_node *cn, const char *path, int fail)
+{
+ return _find_config_bool(cn, _find_config_node, path, fail);
+}
+
+int dm_config_value_is_bool(const struct dm_config_value *v) {
+ if (!v)
+ return 0;
+
+ switch(v->type) {
+ case DM_CFG_INT:
+ return 1;
+ case DM_CFG_STRING:
+ return _str_to_bool(v->v.str, -1) != -1;
+ default:
+ return 0;
+ }
+}
+
+/***********************************
+ * tree-based lookup
+ **/
+
+const struct dm_config_node *dm_config_tree_find_node(const struct dm_config_tree *cft,
+ const char *path)
+{
+ return _find_first_config_node(cft, path);
+}
+
+const char *dm_config_tree_find_str(const struct dm_config_tree *cft, const char *path,
+ const char *fail)
+{
+ return _find_config_str(cft, _find_first_config_node, path, fail, 0);
+}
+
+const char *dm_config_tree_find_str_allow_empty(const struct dm_config_tree *cft, const char *path,
+ const char *fail)
+{
+ return _find_config_str(cft, _find_first_config_node, path, fail, 1);
+}
+
+int dm_config_tree_find_int(const struct dm_config_tree *cft, const char *path, int fail)
+{
+ /* FIXME Add log_error message on overflow */
+ return (int) _find_config_int64(cft, _find_first_config_node, path, (int64_t) fail);
+}
+
+int64_t dm_config_tree_find_int64(const struct dm_config_tree *cft, const char *path, int64_t fail)
+{
+ return _find_config_int64(cft, _find_first_config_node, path, fail);
+}
+
+float dm_config_tree_find_float(const struct dm_config_tree *cft, const char *path,
+ float fail)
+{
+ return _find_config_float(cft, _find_first_config_node, path, fail);
+}
+
+int dm_config_tree_find_bool(const struct dm_config_tree *cft, const char *path, int fail)
+{
+ return _find_config_bool(cft, _find_first_config_node, path, fail);
+}
+
+/************************************/
+
+
+int dm_config_get_uint32(const struct dm_config_node *cn, const char *path,
+ uint32_t *result)
+{
+ const struct dm_config_node *n;
+
+ n = _find_config_node(cn, path);
+
+ if (!n || !n->v || n->v->type != DM_CFG_INT)
+ return 0;
+
+ if (result)
+ *result = n->v->v.i;
+ return 1;
+}
+
+int dm_config_get_uint64(const struct dm_config_node *cn, const char *path,
+ uint64_t *result)
+{
+ const struct dm_config_node *n;
+
+ n = _find_config_node(cn, path);
+
+ if (!n || !n->v || n->v->type != DM_CFG_INT)
+ return 0;
+
+ if (result)
+ *result = (uint64_t) n->v->v.i;
+ return 1;
+}
+
+int dm_config_get_str(const struct dm_config_node *cn, const char *path,
+ const char **result)
+{
+ const struct dm_config_node *n;
+
+ n = _find_config_node(cn, path);
+
+ if (!n || !n->v || n->v->type != DM_CFG_STRING)
+ return 0;
+
+ if (result)
+ *result = n->v->v.str;
+ return 1;
+}
+
+int dm_config_get_list(const struct dm_config_node *cn, const char *path,
+ const struct dm_config_value **result)
+{
+ const struct dm_config_node *n;
+
+ n = _find_config_node(cn, path);
+ /* TODO when we represent single-item lists consistently, add a check
+ * for n->v->next != NULL */
+ if (!n || !n->v)
+ return 0;
+
+ if (result)
+ *result = n->v;
+ return 1;
+}
+
+int dm_config_get_section(const struct dm_config_node *cn, const char *path,
+ const struct dm_config_node **result)
+{
+ const struct dm_config_node *n;
+
+ n = _find_config_node(cn, path);
+ if (!n || n->v)
+ return 0;
+
+ if (result)
+ *result = n;
+ return 1;
+}
+
+int dm_config_has_node(const struct dm_config_node *cn, const char *path)
+{
+ return _find_config_node(cn, path) ? 1 : 0;
+}
+
+/*
+ * Convert a token type to the char it represents.
+ */
+static char _token_type_to_char(int type)
+{
+ switch (type) {
+ case TOK_SECTION_B:
+ return SECTION_B_CHAR;
+ case TOK_SECTION_E:
+ return SECTION_E_CHAR;
+ default:
+ return 0;
+ }
+}
+
+/*
+ * Returns:
+ * # of 'type' tokens in 'str'.
+ */
+static unsigned _count_tokens(const char *str, unsigned len, int type)
+{
+ char c;
+
+ c = _token_type_to_char(type);
+
+ return dm_count_chars(str, len, c);
+}
+
+const char *dm_config_parent_name(const struct dm_config_node *n)
+{
+ return (n->parent ? n->parent->key : "(root)");
+}
+/*
+ * Heuristic function to make a quick guess as to whether a text
+ * region probably contains a valid config "section". (Useful for
+ * scanning areas of the disk for old metadata.)
+ * Config sections contain various tokens, may contain other sections
+ * and strings, and are delimited by begin (type 'TOK_SECTION_B') and
+ * end (type 'TOK_SECTION_E') tokens. As a quick heuristic, we just
+ * count the number of begin and end tokens, and see if they are
+ * non-zero and the counts match.
+ * Full validation of the section should be done with another function
+ * (for example, read_config_fd).
+ *
+ * Returns:
+ * 0 - probably is not a valid config section
+ * 1 - probably _is_ a valid config section
+ */
+unsigned dm_config_maybe_section(const char *str, unsigned len)
+{
+ int begin_count;
+ int end_count;
+
+ begin_count = _count_tokens(str, len, TOK_SECTION_B);
+ end_count = _count_tokens(str, len, TOK_SECTION_E);
+
+ if (begin_count && end_count && (begin_count == end_count))
+ return 1;
+ else
+ return 0;
+}
+
+__attribute__((nonnull(1, 2)))
+static struct dm_config_value *_clone_config_value(struct dm_pool *mem,
+ const struct dm_config_value *v)
+{
+ struct dm_config_value *new_cv;
+
+ if (!(new_cv = _create_value(mem))) {
+ log_error("Failed to clone config value.");
+ return NULL;
+ }
+
+ new_cv->type = v->type;
+ if (v->type == DM_CFG_STRING) {
+ if (!(new_cv->v.str = dm_pool_strdup(mem, v->v.str))) {
+ log_error("Failed to clone config string value.");
+ return NULL;
+ }
+ } else
+ new_cv->v = v->v;
+
+ if (v->next && !(new_cv->next = _clone_config_value(mem, v->next)))
+ return_NULL;
+
+ return new_cv;
+}
+
+struct dm_config_node *dm_config_clone_node_with_mem(struct dm_pool *mem, const struct dm_config_node *cn, int siblings)
+{
+ struct dm_config_node *new_cn;
+
+ if (!cn) {
+ log_error("Cannot clone NULL config node.");
+ return NULL;
+ }
+
+ if (!(new_cn = _create_node(mem))) {
+ log_error("Failed to clone config node.");
+ return NULL;
+ }
+
+ if ((cn->key && !(new_cn->key = dm_pool_strdup(mem, cn->key)))) {
+ log_error("Failed to clone config node key.");
+ return NULL;
+ }
+
+ new_cn->id = cn->id;
+
+ if ((cn->v && !(new_cn->v = _clone_config_value(mem, cn->v))) ||
+ (cn->child && !(new_cn->child = dm_config_clone_node_with_mem(mem, cn->child, 1))) ||
+ (siblings && cn->sib && !(new_cn->sib = dm_config_clone_node_with_mem(mem, cn->sib, siblings))))
+ return_NULL; /* 'new_cn' released with mem pool */
+
+ return new_cn;
+}
+
+struct dm_config_node *dm_config_clone_node(struct dm_config_tree *cft, const struct dm_config_node *node, int sib)
+{
+ return dm_config_clone_node_with_mem(cft->mem, node, sib);
+}
+
+struct dm_config_node *dm_config_create_node(struct dm_config_tree *cft, const char *key)
+{
+ struct dm_config_node *cn;
+
+ if (!(cn = _create_node(cft->mem))) {
+ log_error("Failed to create config node.");
+ return NULL;
+ }
+ if (!(cn->key = dm_pool_strdup(cft->mem, key))) {
+ log_error("Failed to create config node's key.");
+ return NULL;
+ }
+ cn->parent = NULL;
+ cn->v = NULL;
+
+ return cn;
+}
+
+struct dm_config_value *dm_config_create_value(struct dm_config_tree *cft)
+{
+ return _create_value(cft->mem);
+}
+
+void dm_config_value_set_format_flags(struct dm_config_value *cv, uint32_t format_flags)
+{
+ if (!cv)
+ return;
+
+ cv->format_flags = format_flags;
+}
+
+uint32_t dm_config_value_get_format_flags(struct dm_config_value *cv)
+{
+ if (!cv)
+ return 0;
+
+ return cv->format_flags;
+}
+
+struct dm_pool *dm_config_memory(struct dm_config_tree *cft)
+{
+ return cft->mem;
+}
+
+static int _override_path(const char *path, struct dm_config_node *node, void *baton)
+{
+ struct dm_config_tree *cft = baton;
+ struct dm_config_node dummy, *target;
+ dummy.child = cft->root;
+ if (!(target = _find_or_make_node(cft->mem, &dummy, path, 0)))
+ return_0;
+ if (!(target->v = _clone_config_value(cft->mem, node->v)))
+ return_0;
+ cft->root = dummy.child;
+ return 1;
+}
+
+static int _enumerate(const char *path, struct dm_config_node *cn, int (*cb)(const char *, struct dm_config_node *, void *), void *baton)
+{
+ char *sub = NULL;
+
+ while (cn) {
+ if (dm_asprintf(&sub, "%s/%s", path, cn->key) < 0)
+ return_0;
+ if (cn->child) {
+ if (!_enumerate(sub, cn->child, cb, baton))
+ goto_bad;
+ } else
+ if (!cb(sub, cn, baton))
+ goto_bad;
+ dm_free(sub);
+ cn = cn->sib;
+ }
+ return 1;
+bad:
+ dm_free(sub);
+ return 0;
+}
+
+struct dm_config_tree *dm_config_flatten(struct dm_config_tree *cft)
+{
+ struct dm_config_tree *res = dm_config_create(), *done = NULL, *current = NULL;
+
+ if (!res)
+ return_NULL;
+
+ while (done != cft) {
+ current = cft;
+ while (current->cascade != done)
+ current = current->cascade;
+ _enumerate("", current->root, _override_path, res);
+ done = current;
+ }
+
+ return res;
+}
+
+int dm_config_remove_node(struct dm_config_node *parent, struct dm_config_node *rem_node)
+{
+ struct dm_config_node *cn = parent->child, *last = NULL;
+ while (cn) {
+ if (cn == rem_node) {
+ if (last)
+ last->sib = cn->sib;
+ else
+ parent->child = cn->sib;
+ return 1;
+ }
+ last = cn;
+ cn = cn->sib;
+ }
+ return 0;
+}
diff --git a/device_mapper/libdm-deptree.c b/device_mapper/libdm-deptree.c
new file mode 100644
index 000000000..fab3e0662
--- /dev/null
+++ b/device_mapper/libdm-deptree.c
@@ -0,0 +1,3853 @@
+/*
+ * Copyright (C) 2005-2017 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "ioctl/libdm-targets.h"
+#include "libdm-common.h"
+#include "misc/kdev_t.h"
+#include "misc/dm-ioctl.h"
+
+#include <stdarg.h>
+#include <sys/param.h>
+#include <sys/utsname.h>
+
+#define MAX_TARGET_PARAMSIZE 500000
+
+/* Supported segment types */
+enum {
+ SEG_CACHE,
+ SEG_CRYPT,
+ SEG_ERROR,
+ SEG_LINEAR,
+ SEG_MIRRORED,
+ SEG_SNAPSHOT,
+ SEG_SNAPSHOT_ORIGIN,
+ SEG_SNAPSHOT_MERGE,
+ SEG_STRIPED,
+ SEG_ZERO,
+ SEG_THIN_POOL,
+ SEG_THIN,
+ SEG_RAID0,
+ SEG_RAID0_META,
+ SEG_RAID1,
+ SEG_RAID10,
+ SEG_RAID4,
+ SEG_RAID5_N,
+ SEG_RAID5_LA,
+ SEG_RAID5_RA,
+ SEG_RAID5_LS,
+ SEG_RAID5_RS,
+ SEG_RAID6_N_6,
+ SEG_RAID6_ZR,
+ SEG_RAID6_NR,
+ SEG_RAID6_NC,
+ SEG_RAID6_LS_6,
+ SEG_RAID6_RS_6,
+ SEG_RAID6_LA_6,
+ SEG_RAID6_RA_6,
+};
+
+/* FIXME Add crypt and multipath support */
+
+static const struct {
+ unsigned type;
+ const char target[16];
+} _dm_segtypes[] = {
+ { SEG_CACHE, "cache" },
+ { SEG_CRYPT, "crypt" },
+ { SEG_ERROR, "error" },
+ { SEG_LINEAR, "linear" },
+ { SEG_MIRRORED, "mirror" },
+ { SEG_SNAPSHOT, "snapshot" },
+ { SEG_SNAPSHOT_ORIGIN, "snapshot-origin" },
+ { SEG_SNAPSHOT_MERGE, "snapshot-merge" },
+ { SEG_STRIPED, "striped" },
+ { SEG_ZERO, "zero"},
+ { SEG_THIN_POOL, "thin-pool"},
+ { SEG_THIN, "thin"},
+ { SEG_RAID0, "raid0"},
+ { SEG_RAID0_META, "raid0_meta"},
+ { SEG_RAID1, "raid1"},
+ { SEG_RAID10, "raid10"},
+ { SEG_RAID4, "raid4"},
+ { SEG_RAID5_N, "raid5_n"},
+ { SEG_RAID5_LA, "raid5_la"},
+ { SEG_RAID5_RA, "raid5_ra"},
+ { SEG_RAID5_LS, "raid5_ls"},
+ { SEG_RAID5_RS, "raid5_rs"},
+ { SEG_RAID6_N_6,"raid6_n_6"},
+ { SEG_RAID6_ZR, "raid6_zr"},
+ { SEG_RAID6_NR, "raid6_nr"},
+ { SEG_RAID6_NC, "raid6_nc"},
+ { SEG_RAID6_LS_6, "raid6_ls_6"},
+ { SEG_RAID6_RS_6, "raid6_rs_6"},
+ { SEG_RAID6_LA_6, "raid6_la_6"},
+ { SEG_RAID6_RA_6, "raid6_ra_6"},
+
+
+ /*
+ * WARNING: Since 'raid' target overloads this 1:1 mapping table
+ * for search do not add new enum elements past them!
+ */
+ { SEG_RAID5_LS, "raid5"}, /* same as "raid5_ls" (default for MD also) */
+ { SEG_RAID6_ZR, "raid6"}, /* same as "raid6_zr" */
+ { SEG_RAID10, "raid10_near"}, /* same as "raid10" */
+};
+
+/* Some segment types have a list of areas of other devices attached */
+struct seg_area {
+ struct dm_list list;
+
+ struct dm_tree_node *dev_node;
+
+ uint64_t offset;
+};
+
+struct dm_thin_message {
+ dm_thin_message_t type;
+ union {
+ struct {
+ uint32_t device_id;
+ uint32_t origin_id;
+ } m_create_snap;
+ struct {
+ uint32_t device_id;
+ } m_create_thin;
+ struct {
+ uint32_t device_id;
+ } m_delete;
+ struct {
+ uint64_t current_id;
+ uint64_t new_id;
+ } m_set_transaction_id;
+ } u;
+};
+
+struct thin_message {
+ struct dm_list list;
+ struct dm_thin_message message;
+ int expected_errno;
+};
+
+/* Per-segment properties */
+struct load_segment {
+ struct dm_list list;
+
+ unsigned type;
+
+ uint64_t size;
+
+ unsigned area_count; /* Linear + Striped + Mirrored + Crypt */
+ struct dm_list areas; /* Linear + Striped + Mirrored + Crypt */
+
+ uint32_t stripe_size; /* Striped + raid */
+
+ int persistent; /* Snapshot */
+ uint32_t chunk_size; /* Snapshot */
+ struct dm_tree_node *cow; /* Snapshot */
+ struct dm_tree_node *origin; /* Snapshot + Snapshot origin + Cache */
+ struct dm_tree_node *merge; /* Snapshot */
+
+ struct dm_tree_node *log; /* Mirror */
+ uint32_t region_size; /* Mirror + raid */
+ unsigned clustered; /* Mirror */
+ unsigned mirror_area_count; /* Mirror */
+ uint32_t flags; /* Mirror + raid + Cache */
+ char *uuid; /* Clustered mirror log */
+
+ const char *policy_name; /* Cache */
+ unsigned policy_argc; /* Cache */
+ struct dm_config_node *policy_settings; /* Cache */
+
+ const char *cipher; /* Crypt */
+ const char *chainmode; /* Crypt */
+ const char *iv; /* Crypt */
+ uint64_t iv_offset; /* Crypt */
+ const char *key; /* Crypt */
+
+ int delta_disks; /* raid reshape number of disks */
+ int data_offset; /* raid reshape data offset on disk to set */
+ uint64_t rebuilds[RAID_BITMAP_SIZE]; /* raid */
+ uint64_t writemostly[RAID_BITMAP_SIZE]; /* raid */
+ uint32_t writebehind; /* raid */
+ uint32_t max_recovery_rate; /* raid kB/sec/disk */
+ uint32_t min_recovery_rate; /* raid kB/sec/disk */
+ uint32_t data_copies; /* raid10 data_copies */
+
+ struct dm_tree_node *metadata; /* Thin_pool + Cache */
+ struct dm_tree_node *pool; /* Thin_pool, Thin */
+ struct dm_tree_node *external; /* Thin */
+ struct dm_list thin_messages; /* Thin_pool */
+ uint64_t transaction_id; /* Thin_pool */
+ uint64_t low_water_mark; /* Thin_pool */
+ uint32_t data_block_size; /* Thin_pool + cache */
+ unsigned skip_block_zeroing; /* Thin_pool */
+ unsigned ignore_discard; /* Thin_pool target vsn 1.1 */
+ unsigned no_discard_passdown; /* Thin_pool target vsn 1.1 */
+ unsigned error_if_no_space; /* Thin pool target vsn 1.10 */
+ unsigned read_only; /* Thin pool target vsn 1.3 */
+ uint32_t device_id; /* Thin */
+
+};
+
+/* Per-device properties */
+struct load_properties {
+ int read_only;
+ uint32_t major;
+ uint32_t minor;
+
+ uint32_t read_ahead;
+ uint32_t read_ahead_flags;
+
+ unsigned segment_count;
+ int size_changed;
+ struct dm_list segs;
+
+ const char *new_name;
+
+ /* If immediate_dev_node is set to 1, try to create the dev node
+ * as soon as possible (e.g. in preload stage even during traversal
+ * and processing of dm tree). This will also flush all stacked dev
+ * node operations, synchronizing with udev.
+ */
+ unsigned immediate_dev_node;
+
+ /*
+ * If the device size changed from zero and this is set,
+ * don't resume the device immediately, even if the device
+ * has parents. This works provided the parents do not
+ * validate the device size and is required by pvmove to
+ * avoid starting the mirror resync operation too early.
+ */
+ unsigned delay_resume_if_new;
+
+ /*
+ * Preload tree normally only loads and not resume, but there is
+ * automatic resume when target is extended, as it's believed
+ * there can be no i/o flying to this 'new' extedend space
+ * from any device above. Reason is that preloaded target above
+ * may actually need to see its bigger subdevice before it
+ * gets suspended. As long as devices are simple linears
+ * there is no problem to resume bigger device in preload (before commit).
+ * However complex targets like thin-pool (raid,cache...)
+ * they shall not be resumed before their commit.
+ */
+ unsigned delay_resume_if_extended;
+
+ /*
+ * Call node_send_messages(), set to 2 if there are messages
+ * When != 0, it validates matching transaction id, thus thin-pools
+ * where transation_id is passed as 0 are never validated, this
+ * allows external managment of thin-pool TID.
+ */
+ unsigned send_messages;
+ /* Skip suspending node's children, used when sending messages to thin-pool */
+ int skip_suspend;
+};
+
+/* Two of these used to join two nodes with uses and used_by. */
+struct dm_tree_link {
+ struct dm_list list;
+ struct dm_tree_node *node;
+};
+
+struct dm_tree_node {
+ struct dm_tree *dtree;
+
+ const char *name;
+ const char *uuid;
+ struct dm_info info;
+
+ struct dm_list uses; /* Nodes this node uses */
+ struct dm_list used_by; /* Nodes that use this node */
+
+ int activation_priority; /* 0 gets activated first */
+ int implicit_deps; /* 1 device only implicitly referenced */
+
+ uint16_t udev_flags; /* Udev control flags */
+
+ void *context; /* External supplied context */
+
+ struct load_properties props; /* For creation/table (re)load */
+
+ /*
+ * If presuspend of child node is needed
+ * Note: only direct child is allowed
+ */
+ struct dm_tree_node *presuspend_node;
+
+ /* Callback */
+ dm_node_callback_fn callback;
+ void *callback_data;
+
+ /*
+ * TODO:
+ * Add advanced code which tracks of send ioctls and their
+ * proper revert operation for more advanced recovery
+ * Current code serves mostly only to recovery when
+ * thin pool metadata check fails and command would
+ * have left active thin data and metadata subvolumes.
+ */
+ struct dm_list activated; /* Head of activated nodes for preload revert */
+ struct dm_list activated_list; /* List of activated nodes for preload revert */
+};
+
+struct dm_tree {
+ struct dm_pool *mem;
+ struct dm_hash_table *devs;
+ struct dm_hash_table *uuids;
+ struct dm_tree_node root;
+ int skip_lockfs; /* 1 skips lockfs (for non-snapshots) */
+ int no_flush; /* 1 sets noflush (mirrors/multipath) */
+ int retry_remove; /* 1 retries remove if not successful */
+ uint32_t cookie;
+ char buf[DM_NAME_LEN + 32]; /* print buffer for device_name (major:minor) */
+ const char **optional_uuid_suffixes; /* uuid suffixes ignored when matching */
+};
+
+/*
+ * Tree functions.
+ */
+struct dm_tree *dm_tree_create(void)
+{
+ struct dm_pool *dmem;
+ struct dm_tree *dtree;
+
+ if (!(dmem = dm_pool_create("dtree", 1024)) ||
+ !(dtree = dm_pool_zalloc(dmem, sizeof(*dtree)))) {
+ log_error("Failed to allocate dtree.");
+ if (dmem)
+ dm_pool_destroy(dmem);
+ return NULL;
+ }
+
+ dtree->root.dtree = dtree;
+ dm_list_init(&dtree->root.uses);
+ dm_list_init(&dtree->root.used_by);
+ dm_list_init(&dtree->root.activated);
+ dtree->skip_lockfs = 0;
+ dtree->no_flush = 0;
+ dtree->mem = dmem;
+ dtree->optional_uuid_suffixes = NULL;
+
+ if (!(dtree->devs = dm_hash_create(8))) {
+ log_error("dtree hash creation failed");
+ dm_pool_destroy(dtree->mem);
+ return NULL;
+ }
+
+ if (!(dtree->uuids = dm_hash_create(32))) {
+ log_error("dtree uuid hash creation failed");
+ dm_hash_destroy(dtree->devs);
+ dm_pool_destroy(dtree->mem);
+ return NULL;
+ }
+
+ return dtree;
+}
+
+void dm_tree_free(struct dm_tree *dtree)
+{
+ if (!dtree)
+ return;
+
+ dm_hash_destroy(dtree->uuids);
+ dm_hash_destroy(dtree->devs);
+ dm_pool_destroy(dtree->mem);
+}
+
+void dm_tree_set_cookie(struct dm_tree_node *node, uint32_t cookie)
+{
+ node->dtree->cookie = cookie;
+}
+
+uint32_t dm_tree_get_cookie(struct dm_tree_node *node)
+{
+ return node->dtree->cookie;
+}
+
+void dm_tree_skip_lockfs(struct dm_tree_node *dnode)
+{
+ dnode->dtree->skip_lockfs = 1;
+}
+
+void dm_tree_use_no_flush_suspend(struct dm_tree_node *dnode)
+{
+ dnode->dtree->no_flush = 1;
+}
+
+void dm_tree_retry_remove(struct dm_tree_node *dnode)
+{
+ dnode->dtree->retry_remove = 1;
+}
+
+/*
+ * Node functions.
+ */
+static int _nodes_are_linked(const struct dm_tree_node *parent,
+ const struct dm_tree_node *child)
+{
+ struct dm_tree_link *dlink;
+
+ dm_list_iterate_items(dlink, &parent->uses)
+ if (dlink->node == child)
+ return 1;
+
+ return 0;
+}
+
+static int _link(struct dm_list *list, struct dm_tree_node *node)
+{
+ struct dm_tree_link *dlink;
+
+ if (!(dlink = dm_pool_alloc(node->dtree->mem, sizeof(*dlink)))) {
+ log_error("dtree link allocation failed");
+ return 0;
+ }
+
+ dlink->node = node;
+ dm_list_add(list, &dlink->list);
+
+ return 1;
+}
+
+static int _link_nodes(struct dm_tree_node *parent,
+ struct dm_tree_node *child)
+{
+ if (_nodes_are_linked(parent, child))
+ return 1;
+
+ if (!_link(&parent->uses, child))
+ return 0;
+
+ if (!_link(&child->used_by, parent))
+ return 0;
+
+ return 1;
+}
+
+static void _unlink(struct dm_list *list, struct dm_tree_node *node)
+{
+ struct dm_tree_link *dlink;
+
+ dm_list_iterate_items(dlink, list)
+ if (dlink->node == node) {
+ dm_list_del(&dlink->list);
+ break;
+ }
+}
+
+static void _unlink_nodes(struct dm_tree_node *parent,
+ struct dm_tree_node *child)
+{
+ if (!_nodes_are_linked(parent, child))
+ return;
+
+ _unlink(&parent->uses, child);
+ _unlink(&child->used_by, parent);
+}
+
+static int _add_to_toplevel(struct dm_tree_node *node)
+{
+ return _link_nodes(&node->dtree->root, node);
+}
+
+static void _remove_from_toplevel(struct dm_tree_node *node)
+{
+ _unlink_nodes(&node->dtree->root, node);
+}
+
+static int _add_to_bottomlevel(struct dm_tree_node *node)
+{
+ return _link_nodes(node, &node->dtree->root);
+}
+
+static void _remove_from_bottomlevel(struct dm_tree_node *node)
+{
+ _unlink_nodes(node, &node->dtree->root);
+}
+
+static int _link_tree_nodes(struct dm_tree_node *parent, struct dm_tree_node *child)
+{
+ /* Don't link to root node if child already has a parent */
+ if (parent == &parent->dtree->root) {
+ if (dm_tree_node_num_children(child, 1))
+ return 1;
+ } else
+ _remove_from_toplevel(child);
+
+ if (child == &child->dtree->root) {
+ if (dm_tree_node_num_children(parent, 0))
+ return 1;
+ } else
+ _remove_from_bottomlevel(parent);
+
+ return _link_nodes(parent, child);
+}
+
+static struct dm_tree_node *_create_dm_tree_node(struct dm_tree *dtree,
+ const char *name,
+ const char *uuid,
+ struct dm_info *info,
+ void *context,
+ uint16_t udev_flags)
+{
+ struct dm_tree_node *node;
+ dev_t dev;
+
+ if (!(node = dm_pool_zalloc(dtree->mem, sizeof(*node))) ||
+ !(node->name = dm_pool_strdup(dtree->mem, name)) ||
+ !(node->uuid = dm_pool_strdup(dtree->mem, uuid))) {
+ log_error("_create_dm_tree_node alloc failed.");
+ return NULL;
+ }
+
+ node->dtree = dtree;
+ node->info = *info;
+ node->context = context;
+ node->udev_flags = udev_flags;
+
+ dm_list_init(&node->uses);
+ dm_list_init(&node->used_by);
+ dm_list_init(&node->activated);
+ dm_list_init(&node->props.segs);
+
+ dev = MKDEV((dev_t)info->major, (dev_t)info->minor);
+
+ if (!dm_hash_insert_binary(dtree->devs, (const char *) &dev,
+ sizeof(dev), node)) {
+ log_error("dtree node hash insertion failed");
+ dm_pool_free(dtree->mem, node);
+ return NULL;
+ }
+
+ if (*uuid && !dm_hash_insert(dtree->uuids, uuid, node)) {
+ log_error("dtree uuid hash insertion failed");
+ dm_hash_remove_binary(dtree->devs, (const char *) &dev,
+ sizeof(dev));
+ dm_pool_free(dtree->mem, node);
+ return NULL;
+ }
+
+ return node;
+}
+
+static struct dm_tree_node *_find_dm_tree_node(struct dm_tree *dtree,
+ uint32_t major, uint32_t minor)
+{
+ dev_t dev = MKDEV((dev_t)major, (dev_t)minor);
+
+ return dm_hash_lookup_binary(dtree->devs, (const char *) &dev,
+ sizeof(dev));
+}
+
+void dm_tree_set_optional_uuid_suffixes(struct dm_tree *dtree, const char **optional_uuid_suffixes)
+{
+ dtree->optional_uuid_suffixes = optional_uuid_suffixes;
+}
+
+static struct dm_tree_node *_find_dm_tree_node_by_uuid(struct dm_tree *dtree,
+ const char *uuid)
+{
+ struct dm_tree_node *node;
+ const char *default_uuid_prefix;
+ size_t default_uuid_prefix_len;
+ const char *suffix, *suffix_position;
+ char uuid_without_suffix[DM_UUID_LEN];
+ unsigned i = 0;
+ const char **suffix_list = dtree->optional_uuid_suffixes;
+
+ if ((node = dm_hash_lookup(dtree->uuids, uuid))) {
+ log_debug("Matched uuid %s in deptree.", uuid);
+ return node;
+ }
+
+ default_uuid_prefix = dm_uuid_prefix();
+ default_uuid_prefix_len = strlen(default_uuid_prefix);
+
+ if (suffix_list && (suffix_position = rindex(uuid, '-'))) {
+ while ((suffix = suffix_list[i++])) {
+ if (strcmp(suffix_position + 1, suffix))
+ continue;
+
+ (void) strncpy(uuid_without_suffix, uuid, sizeof(uuid_without_suffix));
+ uuid_without_suffix[suffix_position - uuid] = '\0';
+
+ if ((node = dm_hash_lookup(dtree->uuids, uuid_without_suffix))) {
+ log_debug("Matched uuid %s (missing suffix -%s) in deptree.", uuid_without_suffix, suffix);
+ return node;
+ }
+
+ break;
+ };
+ }
+
+ if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
+ return NULL;
+
+ if ((node = dm_hash_lookup(dtree->uuids, uuid + default_uuid_prefix_len))) {
+ log_debug("Matched uuid %s (missing prefix) in deptree.", uuid + default_uuid_prefix_len);
+ return node;
+ }
+
+ log_debug("Not matched uuid %s in deptree.", uuid);
+ return NULL;
+}
+
+/* Return node's device_name (major:minor) for debug messages */
+static const char *_node_name(struct dm_tree_node *dnode)
+{
+ if (dm_snprintf(dnode->dtree->buf, sizeof(dnode->dtree->buf),
+ "%s (" FMTu32 ":" FMTu32 ")",
+ dnode->name ? dnode->name : "",
+ dnode->info.major, dnode->info.minor) < 0) {
+ stack;
+ return dnode->name;
+ }
+
+ return dnode->dtree->buf;
+}
+
+void dm_tree_node_set_udev_flags(struct dm_tree_node *dnode, uint16_t udev_flags)
+
+{
+ if (udev_flags != dnode->udev_flags)
+ log_debug_activation("Resetting %s udev_flags from 0x%x to 0x%x.",
+ _node_name(dnode),
+ dnode->udev_flags, udev_flags);
+ dnode->udev_flags = udev_flags;
+}
+
+void dm_tree_node_set_read_ahead(struct dm_tree_node *dnode,
+ uint32_t read_ahead,
+ uint32_t read_ahead_flags)
+{
+ dnode->props.read_ahead = read_ahead;
+ dnode->props.read_ahead_flags = read_ahead_flags;
+}
+
+void dm_tree_node_set_presuspend_node(struct dm_tree_node *node,
+ struct dm_tree_node *presuspend_node)
+{
+ node->presuspend_node = presuspend_node;
+}
+
+const char *dm_tree_node_get_name(const struct dm_tree_node *node)
+{
+ return node->info.exists ? node->name : "";
+}
+
+const char *dm_tree_node_get_uuid(const struct dm_tree_node *node)
+{
+ return node->info.exists ? node->uuid : "";
+}
+
+const struct dm_info *dm_tree_node_get_info(const struct dm_tree_node *node)
+{
+ return &node->info;
+}
+
+void *dm_tree_node_get_context(const struct dm_tree_node *node)
+{
+ return node->context;
+}
+
+int dm_tree_node_size_changed(const struct dm_tree_node *dnode)
+{
+ return dnode->props.size_changed;
+}
+
+int dm_tree_node_num_children(const struct dm_tree_node *node, uint32_t inverted)
+{
+ if (inverted) {
+ if (_nodes_are_linked(&node->dtree->root, node))
+ return 0;
+ return dm_list_size(&node->used_by);
+ }
+
+ if (_nodes_are_linked(node, &node->dtree->root))
+ return 0;
+
+ return dm_list_size(&node->uses);
+}
+
+/*
+ * Returns 1 if no prefix supplied
+ */
+static int _uuid_prefix_matches(const char *uuid, const char *uuid_prefix, size_t uuid_prefix_len)
+{
+ const char *default_uuid_prefix = dm_uuid_prefix();
+ size_t default_uuid_prefix_len = strlen(default_uuid_prefix);
+
+ if (!uuid_prefix)
+ return 1;
+
+ if (!strncmp(uuid, uuid_prefix, uuid_prefix_len))
+ return 1;
+
+ /* Handle transition: active device uuids might be missing the prefix */
+ if (uuid_prefix_len <= 4)
+ return 0;
+
+ if (!strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
+ return 0;
+
+ if (strncmp(uuid_prefix, default_uuid_prefix, default_uuid_prefix_len))
+ return 0;
+
+ if (!strncmp(uuid, uuid_prefix + default_uuid_prefix_len, uuid_prefix_len - default_uuid_prefix_len))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Returns 1 if no children.
+ */
+static int _children_suspended(struct dm_tree_node *node,
+ uint32_t inverted,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ struct dm_list *list;
+ struct dm_tree_link *dlink;
+ const struct dm_info *dinfo;
+ const char *uuid;
+
+ if (inverted) {
+ if (_nodes_are_linked(&node->dtree->root, node))
+ return 1;
+ list = &node->used_by;
+ } else {
+ if (_nodes_are_linked(node, &node->dtree->root))
+ return 1;
+ list = &node->uses;
+ }
+
+ dm_list_iterate_items(dlink, list) {
+ if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
+ stack;
+ continue;
+ }
+
+ /* Ignore if it doesn't belong to this VG */
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ /* Ignore if parent node wants to presuspend this node */
+ if (dlink->node->presuspend_node == node)
+ continue;
+
+ if (!(dinfo = dm_tree_node_get_info(dlink->node)))
+ return_0; /* FIXME Is this normal? */
+
+ if (!dinfo->suspended)
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Set major and minor to zero for root of tree.
+ */
+struct dm_tree_node *dm_tree_find_node(struct dm_tree *dtree,
+ uint32_t major,
+ uint32_t minor)
+{
+ if (!major && !minor)
+ return &dtree->root;
+
+ return _find_dm_tree_node(dtree, major, minor);
+}
+
+/*
+ * Set uuid to NULL for root of tree.
+ */
+struct dm_tree_node *dm_tree_find_node_by_uuid(struct dm_tree *dtree,
+ const char *uuid)
+{
+ if (!uuid || !*uuid)
+ return &dtree->root;
+
+ return _find_dm_tree_node_by_uuid(dtree, uuid);
+}
+
+/*
+ * First time set *handle to NULL.
+ * Set inverted to invert the tree.
+ */
+struct dm_tree_node *dm_tree_next_child(void **handle,
+ const struct dm_tree_node *parent,
+ uint32_t inverted)
+{
+ struct dm_list **dlink = (struct dm_list **) handle;
+ const struct dm_list *use_list;
+
+ if (inverted)
+ use_list = &parent->used_by;
+ else
+ use_list = &parent->uses;
+
+ if (!*dlink)
+ *dlink = dm_list_first(use_list);
+ else
+ *dlink = dm_list_next(use_list, *dlink);
+
+ return (*dlink) ? dm_list_item(*dlink, struct dm_tree_link)->node : NULL;
+}
+
+static int _deps(struct dm_task **dmt, struct dm_pool *mem, uint32_t major, uint32_t minor,
+ const char **name, const char **uuid, unsigned inactive_table,
+ struct dm_info *info, struct dm_deps **deps)
+{
+ memset(info, 0, sizeof(*info));
+ *name = "";
+ *uuid = "";
+ *deps = NULL;
+
+ if (!dm_is_dm_major(major)) {
+ info->major = major;
+ info->minor = minor;
+ return 1;
+ }
+
+ if (!(*dmt = dm_task_create(DM_DEVICE_DEPS)))
+ return_0;
+
+ if (!dm_task_set_major(*dmt, major) || !dm_task_set_minor(*dmt, minor)) {
+ log_error("_deps: failed to set major:minor for (" FMTu32 ":" FMTu32 ").",
+ major, minor);
+ goto failed;
+ }
+
+ if (inactive_table && !dm_task_query_inactive_table(*dmt)) {
+ log_error("_deps: failed to set inactive table for (%" PRIu32 ":%" PRIu32 ")",
+ major, minor);
+ goto failed;
+ }
+
+ if (!dm_task_run(*dmt)) {
+ log_error("_deps: task run failed for (%" PRIu32 ":%" PRIu32 ")",
+ major, minor);
+ goto failed;
+ }
+
+ if (!dm_task_get_info(*dmt, info)) {
+ log_error("_deps: failed to get info for (%" PRIu32 ":%" PRIu32 ")",
+ major, minor);
+ goto failed;
+ }
+
+ if (info->exists) {
+ if (info->major != major) {
+ log_error("Inconsistent dtree major number: %u != %u",
+ major, info->major);
+ goto failed;
+ }
+ if (info->minor != minor) {
+ log_error("Inconsistent dtree minor number: %u != %u",
+ minor, info->minor);
+ goto failed;
+ }
+ *name = dm_task_get_name(*dmt);
+ *uuid = dm_task_get_uuid(*dmt);
+ *deps = dm_task_get_deps(*dmt);
+ }
+
+ return 1;
+
+failed:
+ dm_task_destroy(*dmt);
+ *dmt = NULL;
+
+ return 0;
+}
+
+/*
+ * Deactivate a device with its dependencies if the uuid prefix matches.
+ */
+static int _info_by_dev(uint32_t major, uint32_t minor, int with_open_count,
+ struct dm_info *info, struct dm_pool *mem,
+ const char **name, const char **uuid)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+ return_0;
+
+ if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+ log_error("_info_by_dev: Failed to set device number.");
+ goto out;
+ }
+
+ if (!with_open_count && !dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ if (!dm_task_run(dmt))
+ goto_out;
+
+ if (!dm_task_get_info(dmt, info))
+ goto_out;
+
+ if (name && !(*name = dm_pool_strdup(mem, dm_task_get_name(dmt)))) {
+ log_error("name pool_strdup failed");
+ goto out;
+ }
+
+ if (uuid && !(*uuid = dm_pool_strdup(mem, dm_task_get_uuid(dmt)))) {
+ log_error("uuid pool_strdup failed");
+ goto out;
+ }
+
+ r = 1;
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _check_device_not_in_use(const char *name, struct dm_info *info)
+{
+ const char *reason;
+
+ if (!info->exists)
+ return 1;
+
+ /* If sysfs is not used, use open_count information only. */
+ if (!*dm_sysfs_dir()) {
+ if (!info->open_count)
+ return 1;
+ reason = "in use";
+ } else if (dm_device_has_holders(info->major, info->minor))
+ reason = "is used by another device";
+ else if (dm_device_has_mounted_fs(info->major, info->minor))
+ reason = "constains a filesystem in use";
+ else
+ return 1;
+
+ log_error("Device %s (" FMTu32 ":" FMTu32 ") %s.",
+ name, info->major, info->minor, reason);
+ return 0;
+}
+
+/* Check if all parent nodes of given node have open_count == 0 */
+static int _node_has_closed_parents(struct dm_tree_node *node,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ struct dm_tree_link *dlink;
+ const struct dm_info *dinfo;
+ struct dm_info info;
+ const char *uuid;
+
+ /* Iterate through parents of this node */
+ dm_list_iterate_items(dlink, &node->used_by) {
+ if (!(uuid = dm_tree_node_get_uuid(dlink->node))) {
+ stack;
+ continue;
+ }
+
+ /* Ignore if it doesn't belong to this VG */
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (!(dinfo = dm_tree_node_get_info(dlink->node)))
+ return_0; /* FIXME Is this normal? */
+
+ /* Refresh open_count */
+ if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL))
+ return_0;
+
+ if (!info.exists)
+ continue;
+
+ if (info.open_count) {
+ log_debug_activation("Node %s %d:%d has open_count %d", uuid_prefix,
+ dinfo->major, dinfo->minor, info.open_count);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int _deactivate_node(const char *name, uint32_t major, uint32_t minor,
+ uint32_t *cookie, uint16_t udev_flags, int retry)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ log_verbose("Removing %s (%" PRIu32 ":%" PRIu32 ")", name, major, minor);
+
+ if (!(dmt = dm_task_create(DM_DEVICE_REMOVE))) {
+ log_error("Deactivation dm_task creation failed for %s", name);
+ return 0;
+ }
+
+ if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+ log_error("Failed to set device number for %s deactivation", name);
+ goto out;
+ }
+
+ if (!dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ if (cookie)
+ if (!dm_task_set_cookie(dmt, cookie, udev_flags))
+ goto out;
+
+ if (retry)
+ dm_task_retry_remove(dmt);
+
+ r = dm_task_run(dmt);
+
+ /* FIXME Until kernel returns actual name so dm-iface.c can handle it */
+ rm_dev_node(name, dmt->cookie_set && !(udev_flags & DM_UDEV_DISABLE_DM_RULES_FLAG),
+ dmt->cookie_set && (udev_flags & DM_UDEV_DISABLE_LIBRARY_FALLBACK));
+
+ /* FIXME Remove node from tree or mark invalid? */
+
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _node_clear_table(struct dm_tree_node *dnode, uint16_t udev_flags)
+{
+ struct dm_task *dmt = NULL, *deps_dmt = NULL;
+ struct dm_info *info = &dnode->info, deps_info;
+ struct dm_deps *deps = NULL;
+ const char *name, *uuid, *depname, *depuuid;
+ const char *default_uuid_prefix;
+ size_t default_uuid_prefix_len;
+ uint32_t i;
+ int r = 0;
+
+ if (!(name = dm_tree_node_get_name(dnode))) {
+ log_error("_node_clear_table failed: missing name");
+ return 0;
+ }
+
+ /* Is there a table? */
+ if (!info->exists || !info->inactive_table)
+ return 1;
+
+ /* Get devices used by inactive table that's about to be deleted. */
+ if (!_deps(&deps_dmt, dnode->dtree->mem, info->major, info->minor, &depname, &depuuid, 1, info, &deps)) {
+ log_error("Failed to obtain dependencies for %s before clearing table.", name);
+ return 0;
+ }
+
+ log_verbose("Clearing inactive table %s (%" PRIu32 ":%" PRIu32 ")",
+ name, info->major, info->minor);
+
+ if (!(dmt = dm_task_create(DM_DEVICE_CLEAR))) {
+ log_error("Table clear dm_task creation failed for %s", name);
+ goto out;
+ }
+
+ if (!dm_task_set_major(dmt, info->major) ||
+ !dm_task_set_minor(dmt, info->minor)) {
+ log_error("Failed to set device number for %s table clear", name);
+ goto out;
+ }
+
+ r = dm_task_run(dmt);
+
+ if (!dm_task_get_info(dmt, info)) {
+ log_error("_node_clear_table failed: info missing after running task for %s", name);
+ r = 0;
+ }
+
+ if (!r || !deps)
+ goto_out;
+
+ /*
+ * Remove (incomplete) devices that the inactive table referred to but
+ * which are not in the tree, no longer referenced and don't have a live
+ * table.
+ */
+ default_uuid_prefix = dm_uuid_prefix();
+ default_uuid_prefix_len = strlen(default_uuid_prefix);
+
+ for (i = 0; i < deps->count; i++) {
+ /* If already in tree, assume it's under control */
+ if (_find_dm_tree_node(dnode->dtree, MAJOR(deps->device[i]), MINOR(deps->device[i])))
+ continue;
+
+ if (!_info_by_dev(MAJOR(deps->device[i]), MINOR(deps->device[i]), 1,
+ &deps_info, dnode->dtree->mem, &name, &uuid))
+ goto_out;
+
+ /* Proceed if device is an 'orphan' - unreferenced and without a live table. */
+ if (!deps_info.exists || deps_info.live_table || deps_info.open_count)
+ continue;
+
+ if (strncmp(uuid, default_uuid_prefix, default_uuid_prefix_len))
+ continue;
+
+ /* Remove device. */
+ if (!_deactivate_node(name, deps_info.major, deps_info.minor, &dnode->dtree->cookie, udev_flags, 0)) {
+ log_error("Failed to deactivate no-longer-used device %s (%"
+ PRIu32 ":%" PRIu32 ")", name, deps_info.major, deps_info.minor);
+ } else if (deps_info.suspended)
+ dec_suspended();
+ }
+
+out:
+ if (dmt)
+ dm_task_destroy(dmt);
+
+ if (deps_dmt)
+ dm_task_destroy(deps_dmt);
+
+ return r;
+}
+
+struct dm_tree_node *dm_tree_add_new_dev_with_udev_flags(struct dm_tree *dtree,
+ const char *name,
+ const char *uuid,
+ uint32_t major,
+ uint32_t minor,
+ int read_only,
+ int clear_inactive,
+ void *context,
+ uint16_t udev_flags)
+{
+ struct dm_tree_node *dnode;
+ struct dm_info info = { 0 };
+
+ if (!name || !uuid) {
+ log_error("Cannot add device without name and uuid.");
+ return NULL;
+ }
+
+ /* Do we need to add node to tree? */
+ if (!(dnode = dm_tree_find_node_by_uuid(dtree, uuid))) {
+ if (!(dnode = _create_dm_tree_node(dtree, name, uuid, &info,
+ context, 0)))
+ return_NULL;
+
+ /* Attach to root node until a table is supplied */
+ if (!_add_to_toplevel(dnode) || !_add_to_bottomlevel(dnode))
+ return_NULL;
+
+ dnode->props.major = major;
+ dnode->props.minor = minor;
+ } else if (strcmp(name, dnode->name)) {
+ /* Do we need to rename node? */
+ if (!(dnode->props.new_name = dm_pool_strdup(dtree->mem, name))) {
+ log_error("name pool_strdup failed");
+ return NULL;
+ }
+ }
+
+ dnode->props.read_only = read_only ? 1 : 0;
+ dnode->props.read_ahead = DM_READ_AHEAD_AUTO;
+ dnode->props.read_ahead_flags = 0;
+
+ if (clear_inactive && !_node_clear_table(dnode, udev_flags))
+ return_NULL;
+
+ dnode->context = context;
+ dnode->udev_flags = udev_flags;
+
+ return dnode;
+}
+
+struct dm_tree_node *dm_tree_add_new_dev(struct dm_tree *dtree, const char *name,
+ const char *uuid, uint32_t major, uint32_t minor,
+ int read_only, int clear_inactive, void *context)
+{
+ return dm_tree_add_new_dev_with_udev_flags(dtree, name, uuid, major, minor,
+ read_only, clear_inactive, context, 0);
+}
+
+static struct dm_tree_node *_add_dev(struct dm_tree *dtree,
+ struct dm_tree_node *parent,
+ uint32_t major, uint32_t minor,
+ uint16_t udev_flags,
+ int implicit_deps)
+{
+ struct dm_task *dmt = NULL;
+ struct dm_info info;
+ struct dm_deps *deps = NULL;
+ const char *name = NULL;
+ const char *uuid = NULL;
+ struct dm_tree_node *node = NULL;
+ uint32_t i;
+ int new = 0;
+
+ /* Already in tree? */
+ if (!(node = _find_dm_tree_node(dtree, major, minor))) {
+ if (!_deps(&dmt, dtree->mem, major, minor, &name, &uuid, 0, &info, &deps))
+ return_NULL;
+
+ if (!(node = _create_dm_tree_node(dtree, name, uuid, &info,
+ NULL, udev_flags)))
+ goto_out;
+ new = 1;
+ node->implicit_deps = implicit_deps;
+ } else if (!implicit_deps && node->implicit_deps) {
+ node->udev_flags = udev_flags;
+ node->implicit_deps = 0;
+ }
+
+ if (!_link_tree_nodes(parent, node)) {
+ node = NULL;
+ goto_out;
+ }
+
+ /* If node was already in tree, no need to recurse. */
+ if (!new)
+ goto out;
+
+ /* Can't recurse if not a mapped device or there are no dependencies */
+ if (!node->info.exists || !deps || !deps->count) {
+ if (!_add_to_bottomlevel(node)) {
+ stack;
+ node = NULL;
+ }
+ goto out;
+ }
+
+ /* Add dependencies to tree */
+ for (i = 0; i < deps->count; i++)
+ /* Implicit devices are by default temporary */
+ if (!_add_dev(dtree, node, MAJOR(deps->device[i]),
+ MINOR(deps->device[i]), udev_flags |
+ DM_UDEV_DISABLE_SUBSYSTEM_RULES_FLAG |
+ DM_UDEV_DISABLE_DISK_RULES_FLAG |
+ DM_UDEV_DISABLE_OTHER_RULES_FLAG, 1)) {
+ node = NULL;
+ goto_out;
+ }
+
+out:
+ if (dmt)
+ dm_task_destroy(dmt);
+
+ return node;
+}
+
+int dm_tree_add_dev(struct dm_tree *dtree, uint32_t major, uint32_t minor)
+{
+ return _add_dev(dtree, &dtree->root, major, minor, 0, 0) ? 1 : 0;
+}
+
+int dm_tree_add_dev_with_udev_flags(struct dm_tree *dtree, uint32_t major,
+ uint32_t minor, uint16_t udev_flags)
+{
+ return _add_dev(dtree, &dtree->root, major, minor, udev_flags, 0) ? 1 : 0;
+}
+
+static int _rename_node(const char *old_name, const char *new_name, uint32_t major,
+ uint32_t minor, uint32_t *cookie, uint16_t udev_flags)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ log_verbose("Renaming %s (%" PRIu32 ":%" PRIu32 ") to %s", old_name, major, minor, new_name);
+
+ if (!(dmt = dm_task_create(DM_DEVICE_RENAME))) {
+ log_error("Rename dm_task creation failed for %s", old_name);
+ return 0;
+ }
+
+ if (!dm_task_set_name(dmt, old_name)) {
+ log_error("Failed to set name for %s rename.", old_name);
+ goto out;
+ }
+
+ if (!dm_task_set_newname(dmt, new_name))
+ goto_out;
+
+ if (!dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ if (!dm_task_set_cookie(dmt, cookie, udev_flags))
+ goto out;
+
+ r = dm_task_run(dmt);
+
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+/* FIXME Merge with _suspend_node? */
+static int _resume_node(const char *name, uint32_t major, uint32_t minor,
+ uint32_t read_ahead, uint32_t read_ahead_flags,
+ struct dm_info *newinfo, uint32_t *cookie,
+ uint16_t udev_flags, int already_suspended)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ log_verbose("Resuming %s (" FMTu32 ":" FMTu32 ").", name, major, minor);
+
+ if (!(dmt = dm_task_create(DM_DEVICE_RESUME))) {
+ log_debug_activation("Suspend dm_task creation failed for %s.", name);
+ return 0;
+ }
+
+ /* FIXME Kernel should fill in name on return instead */
+ if (!dm_task_set_name(dmt, name)) {
+ log_debug_activation("Failed to set device name for %s resumption.", name);
+ goto out;
+ }
+
+ if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+ log_error("Failed to set device number for %s resumption.", name);
+ goto out;
+ }
+
+ if (!dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ if (!dm_task_set_read_ahead(dmt, read_ahead, read_ahead_flags))
+ log_warn("WARNING: Failed to set read ahead.");
+
+ if (!dm_task_set_cookie(dmt, cookie, udev_flags))
+ goto_out;
+
+ if (!(r = dm_task_run(dmt)))
+ goto_out;
+
+ if (already_suspended)
+ dec_suspended();
+
+ if (!(r = dm_task_get_info(dmt, newinfo)))
+ stack;
+
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _suspend_node(const char *name, uint32_t major, uint32_t minor,
+ int skip_lockfs, int no_flush, struct dm_info *newinfo)
+{
+ struct dm_task *dmt;
+ int r = 0;
+
+ log_verbose("Suspending %s (%" PRIu32 ":%" PRIu32 ")%s%s",
+ name, major, minor,
+ skip_lockfs ? "" : " with filesystem sync",
+ no_flush ? "" : " with device flush");
+
+ if (!(dmt = dm_task_create(DM_DEVICE_SUSPEND))) {
+ log_error("Suspend dm_task creation failed for %s", name);
+ return 0;
+ }
+
+ if (!dm_task_set_major(dmt, major) || !dm_task_set_minor(dmt, minor)) {
+ log_error("Failed to set device number for %s suspension.", name);
+ goto out;
+ }
+
+ if (!dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ if (skip_lockfs && !dm_task_skip_lockfs(dmt))
+ log_warn("WARNING: Failed to set skip_lockfs flag.");
+
+ if (no_flush && !dm_task_no_flush(dmt))
+ log_warn("WARNING: Failed to set no_flush flag.");
+
+ if ((r = dm_task_run(dmt))) {
+ inc_suspended();
+ r = dm_task_get_info(dmt, newinfo);
+ }
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _thin_pool_get_status(struct dm_tree_node *dnode,
+ struct dm_status_thin_pool *s)
+{
+ struct dm_task *dmt;
+ int r = 0;
+ uint64_t start, length;
+ char *type = NULL;
+ char *params = NULL;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_STATUS)))
+ return_0;
+
+ if (!dm_task_set_major(dmt, dnode->info.major) ||
+ !dm_task_set_minor(dmt, dnode->info.minor)) {
+ log_error("Failed to set major minor.");
+ goto out;
+ }
+
+ if (!dm_task_no_flush(dmt))
+ log_warn("WARNING: Can't set no_flush flag."); /* Non fatal */
+
+ if (!dm_task_run(dmt))
+ goto_out;
+
+ dm_get_next_target(dmt, NULL, &start, &length, &type, &params);
+
+ if (!type || (strcmp(type, "thin-pool") != 0)) {
+ log_error("Expected thin-pool target for %s and got %s.",
+ _node_name(dnode), type ? : "no target");
+ goto out;
+ }
+
+ if (!parse_thin_pool_status(params, s))
+ goto_out;
+
+ log_debug_activation("Found transaction id %" PRIu64 " for thin pool %s "
+ "with status line: %s.",
+ s->transaction_id, _node_name(dnode), params);
+
+ r = 1;
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _thin_pool_node_message(struct dm_tree_node *dnode, struct thin_message *tm)
+{
+ struct dm_task *dmt;
+ struct dm_thin_message *m = &tm->message;
+ char buf[64];
+ int r;
+
+ switch (m->type) {
+ case DM_THIN_MESSAGE_CREATE_SNAP:
+ r = dm_snprintf(buf, sizeof(buf), "create_snap %u %u",
+ m->u.m_create_snap.device_id,
+ m->u.m_create_snap.origin_id);
+ break;
+ case DM_THIN_MESSAGE_CREATE_THIN:
+ r = dm_snprintf(buf, sizeof(buf), "create_thin %u",
+ m->u.m_create_thin.device_id);
+ break;
+ case DM_THIN_MESSAGE_DELETE:
+ r = dm_snprintf(buf, sizeof(buf), "delete %u",
+ m->u.m_delete.device_id);
+ break;
+ case DM_THIN_MESSAGE_SET_TRANSACTION_ID:
+ r = dm_snprintf(buf, sizeof(buf),
+ "set_transaction_id %" PRIu64 " %" PRIu64,
+ m->u.m_set_transaction_id.current_id,
+ m->u.m_set_transaction_id.new_id);
+ break;
+ case DM_THIN_MESSAGE_RESERVE_METADATA_SNAP: /* target vsn 1.1 */
+ r = dm_snprintf(buf, sizeof(buf), "reserve_metadata_snap");
+ break;
+ case DM_THIN_MESSAGE_RELEASE_METADATA_SNAP: /* target vsn 1.1 */
+ r = dm_snprintf(buf, sizeof(buf), "release_metadata_snap");
+ break;
+ default:
+ r = -1;
+ }
+
+ if (r < 0) {
+ log_error("Failed to prepare message.");
+ return 0;
+ }
+
+ r = 0;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
+ return_0;
+
+ if (!dm_task_set_major(dmt, dnode->info.major) ||
+ !dm_task_set_minor(dmt, dnode->info.minor)) {
+ log_error("Failed to set message major minor.");
+ goto out;
+ }
+
+ if (!dm_task_set_message(dmt, buf))
+ goto_out;
+
+ /* Internal functionality of dm_task */
+ dmt->expected_errno = tm->expected_errno;
+
+ if (!dm_task_run(dmt)) {
+ log_error("Failed to process thin pool message \"%s\".", buf);
+ goto out;
+ }
+
+ r = 1;
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static struct load_segment *_get_last_load_segment(struct dm_tree_node *node)
+{
+ if (dm_list_empty(&node->props.segs)) {
+ log_error("Node %s is missing a segment.", _node_name(node));
+ return NULL;
+ }
+
+ return dm_list_item(dm_list_last(&node->props.segs), struct load_segment);
+}
+
+/* For preload pass only validate pool's transaction_id */
+static int _node_send_messages(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len,
+ int send)
+{
+ struct load_segment *seg;
+ struct thin_message *tmsg;
+ struct dm_status_thin_pool stp;
+ const char *uuid;
+ int have_messages;
+
+ if (!dnode->info.exists)
+ return 1;
+
+ if (!(seg = _get_last_load_segment(dnode)))
+ return_0;
+
+ if (seg->type != SEG_THIN_POOL)
+ return 1;
+
+ if (!(uuid = dm_tree_node_get_uuid(dnode)))
+ return_0;
+
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len)) {
+ log_debug_activation("UUID \"%s\" does not match.", uuid);
+ return 1;
+ }
+
+ if (!_thin_pool_get_status(dnode, &stp))
+ return_0;
+
+ have_messages = !dm_list_empty(&seg->thin_messages) ? 1 : 0;
+ if (stp.transaction_id == seg->transaction_id) {
+ dnode->props.send_messages = 0; /* messages already committed */
+ if (have_messages)
+ log_debug_activation("Thin pool %s transaction_id matches %"
+ PRIu64 ", skipping messages.",
+ _node_name(dnode), stp.transaction_id);
+ return 1;
+ }
+
+ /* Error if there are no stacked messages or id mismatches */
+ if ((stp.transaction_id + 1) != seg->transaction_id) {
+ log_error("Thin pool %s transaction_id is %" PRIu64 ", while expected %" PRIu64 ".",
+ _node_name(dnode), stp.transaction_id, seg->transaction_id - have_messages);
+ return 0;
+ }
+
+ if (!have_messages || !send)
+ return 1; /* transaction_id is matching */
+
+ dm_list_iterate_items(tmsg, &seg->thin_messages) {
+ if (!(_thin_pool_node_message(dnode, tmsg)))
+ return_0;
+ if (tmsg->message.type == DM_THIN_MESSAGE_SET_TRANSACTION_ID) {
+ if (!_thin_pool_get_status(dnode, &stp))
+ return_0;
+ if (stp.transaction_id != tmsg->message.u.m_set_transaction_id.new_id) {
+ log_error("Thin pool %s transaction_id is %" PRIu64
+ " and does not match expected %" PRIu64 ".",
+ _node_name(dnode), stp.transaction_id,
+ tmsg->message.u.m_set_transaction_id.new_id);
+ return 0;
+ }
+ }
+ }
+
+ dnode->props.send_messages = 0; /* messages posted */
+
+ return 1;
+}
+
+/*
+ * FIXME Don't attempt to deactivate known internal dependencies.
+ */
+static int _dm_tree_deactivate_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len,
+ unsigned level)
+{
+ int r = 1;
+ void *handle = NULL;
+ struct dm_tree_node *child = dnode;
+ struct dm_info info;
+ const struct dm_info *dinfo;
+ const char *name;
+ const char *uuid;
+
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (!(dinfo = dm_tree_node_get_info(child))) {
+ stack;
+ continue;
+ }
+
+ if (!(name = dm_tree_node_get_name(child))) {
+ stack;
+ continue;
+ }
+
+ if (!(uuid = dm_tree_node_get_uuid(child))) {
+ stack;
+ continue;
+ }
+
+ /* Ignore if it doesn't belong to this VG */
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ /* Refresh open_count */
+ if (!_info_by_dev(dinfo->major, dinfo->minor, 1, &info, NULL, NULL, NULL))
+ return_0;
+
+ if (!info.exists)
+ continue;
+
+ if (info.open_count) {
+ /* Skip internal non-toplevel opened nodes */
+ if (level)
+ continue;
+
+ /* When retry is not allowed, error */
+ if (!child->dtree->retry_remove) {
+ log_error("Unable to deactivate open %s (" FMTu32 ":"
+ FMTu32 ").", name, info.major, info.minor);
+ r = 0;
+ continue;
+ }
+
+ /* Check toplevel node for holders/mounted fs */
+ if (!_check_device_not_in_use(name, &info)) {
+ stack;
+ r = 0;
+ continue;
+ }
+ /* Go on with retry */
+ }
+
+ /* Also checking open_count in parent nodes of presuspend_node */
+ if ((child->presuspend_node &&
+ !_node_has_closed_parents(child->presuspend_node,
+ uuid_prefix, uuid_prefix_len))) {
+ /* Only report error from (likely non-internal) dependency at top level */
+ if (!level) {
+ log_error("Unable to deactivate open %s (" FMTu32 ":"
+ FMTu32 ").", name, info.major, info.minor);
+ r = 0;
+ }
+ continue;
+ }
+
+ /* Suspend child node first if requested */
+ if (child->presuspend_node &&
+ !dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (!_deactivate_node(name, info.major, info.minor,
+ &child->dtree->cookie, child->udev_flags,
+ (level == 0) ? child->dtree->retry_remove : 0)) {
+ log_error("Unable to deactivate %s (" FMTu32 ":"
+ FMTu32 ").", name, info.major, info.minor);
+ r = 0;
+ continue;
+ }
+
+ if (info.suspended && info.live_table)
+ dec_suspended();
+
+ if (child->callback &&
+ !child->callback(child, DM_NODE_CALLBACK_DEACTIVATED,
+ child->callback_data))
+ stack;
+ /* FIXME Deactivation must currently ignore failure
+ * here so that lvremove can continue: we need an
+ * alternative way to handle this state without
+ * setting r=0. Or better, skip calling thin_check
+ * entirely if the device is about to be removed. */
+
+ if (dm_tree_node_num_children(child, 0) &&
+ !_dm_tree_deactivate_children(child, uuid_prefix, uuid_prefix_len, level + 1))
+ return_0;
+ }
+
+ return r;
+}
+
+int dm_tree_deactivate_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ return _dm_tree_deactivate_children(dnode, uuid_prefix, uuid_prefix_len, 0);
+}
+
+int dm_tree_suspend_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ int r = 1;
+ void *handle = NULL;
+ struct dm_tree_node *child = dnode;
+ struct dm_info info, newinfo;
+ const struct dm_info *dinfo;
+ const char *name;
+ const char *uuid;
+
+ /* Suspend nodes at this level of the tree */
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (!(dinfo = dm_tree_node_get_info(child))) {
+ stack;
+ continue;
+ }
+
+ if (!(name = dm_tree_node_get_name(child))) {
+ stack;
+ continue;
+ }
+
+ if (!(uuid = dm_tree_node_get_uuid(child))) {
+ stack;
+ continue;
+ }
+
+ /* Ignore if it doesn't belong to this VG */
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ /* Ensure immediate parents are already suspended */
+ if (!_children_suspended(child, 1, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (!_info_by_dev(dinfo->major, dinfo->minor, 0, &info, NULL, NULL, NULL))
+ return_0;
+
+ if (!info.exists || info.suspended)
+ continue;
+
+ /* If child has some real messages send them */
+ if ((child->props.send_messages > 1) && r) {
+ if (!(r = _node_send_messages(child, uuid_prefix, uuid_prefix_len, 1)))
+ stack;
+ else {
+ log_debug_activation("Sent messages to thin-pool %s and "
+ "skipping suspend of its children.",
+ _node_name(child));
+ child->props.skip_suspend++;
+ }
+ continue;
+ }
+
+ if (!_suspend_node(name, info.major, info.minor,
+ child->dtree->skip_lockfs,
+ child->dtree->no_flush, &newinfo)) {
+ log_error("Unable to suspend %s (" FMTu32 ":"
+ FMTu32 ")", name, info.major, info.minor);
+ r = 0;
+ continue;
+ }
+
+ /* Update cached info */
+ child->info = newinfo;
+ }
+
+ /* Then suspend any child nodes */
+ handle = NULL;
+
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (child->props.skip_suspend)
+ continue;
+
+ if (!(uuid = dm_tree_node_get_uuid(child))) {
+ stack;
+ continue;
+ }
+
+ /* Ignore if it doesn't belong to this VG */
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (dm_tree_node_num_children(child, 0))
+ if (!dm_tree_suspend_children(child, uuid_prefix, uuid_prefix_len))
+ return_0;
+ }
+
+ return r;
+}
+
+/*
+ * _rename_conflict_exists
+ * @dnode
+ * @node
+ * @resolvable
+ *
+ * Check if there is a rename conflict with existing peers in
+ * this tree. 'resolvable' is set if the conflicting node will
+ * also be undergoing a rename. (Allowing that node to rename
+ * first would clear the conflict.)
+ *
+ * Returns: 1 if conflict, 0 otherwise
+ */
+static int _rename_conflict_exists(struct dm_tree_node *parent,
+ struct dm_tree_node *node,
+ int *resolvable)
+{
+ void *handle = NULL;
+ const char *name = dm_tree_node_get_name(node);
+ const char *sibling_name;
+ struct dm_tree_node *sibling;
+
+ *resolvable = 0;
+
+ if (!name)
+ return_0;
+
+ while ((sibling = dm_tree_next_child(&handle, parent, 0))) {
+ if (sibling == node)
+ continue;
+
+ if (!(sibling_name = dm_tree_node_get_name(sibling))) {
+ stack;
+ continue;
+ }
+
+ if (!strcmp(node->props.new_name, sibling_name)) {
+ if (sibling->props.new_name)
+ *resolvable = 1;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int dm_tree_activate_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ int r = 1;
+ int resolvable_name_conflict, awaiting_peer_rename = 0;
+ void *handle = NULL;
+ struct dm_tree_node *child = dnode;
+ const char *name;
+ const char *uuid;
+ int priority;
+
+ /* Activate children first */
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (!(uuid = dm_tree_node_get_uuid(child))) {
+ stack;
+ continue;
+ }
+
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (dm_tree_node_num_children(child, 0))
+ if (!dm_tree_activate_children(child, uuid_prefix, uuid_prefix_len))
+ return_0;
+ }
+
+ handle = NULL;
+
+ for (priority = 0; priority < 3; priority++) {
+ awaiting_peer_rename = 0;
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (priority != child->activation_priority)
+ continue;
+
+ if (!(uuid = dm_tree_node_get_uuid(child))) {
+ stack;
+ continue;
+ }
+
+ if (!_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (!(name = dm_tree_node_get_name(child))) {
+ stack;
+ continue;
+ }
+
+ /* Rename? */
+ if (child->props.new_name) {
+ if (_rename_conflict_exists(dnode, child, &resolvable_name_conflict) &&
+ resolvable_name_conflict) {
+ awaiting_peer_rename++;
+ continue;
+ }
+ if (!_rename_node(name, child->props.new_name, child->info.major,
+ child->info.minor, &child->dtree->cookie,
+ child->udev_flags)) {
+ log_error("Failed to rename %s (%" PRIu32
+ ":%" PRIu32 ") to %s", name, child->info.major,
+ child->info.minor, child->props.new_name);
+ return 0;
+ }
+ child->name = child->props.new_name;
+ child->props.new_name = NULL;
+ }
+
+ if (!child->info.inactive_table && !child->info.suspended)
+ continue;
+
+ if (!_resume_node(child->name, child->info.major, child->info.minor,
+ child->props.read_ahead, child->props.read_ahead_flags,
+ &child->info, &child->dtree->cookie, child->udev_flags, child->info.suspended)) {
+ log_error("Unable to resume %s.", _node_name(child));
+ r = 0;
+ continue;
+ }
+ }
+ if (awaiting_peer_rename)
+ priority--; /* redo priority level */
+ }
+
+ /*
+ * FIXME: Implement delayed error reporting
+ * activation should be stopped only in the case,
+ * the submission of transation_id message fails,
+ * resume should continue further, just whole command
+ * has to report failure.
+ */
+ if (r && (dnode->props.send_messages > 1) &&
+ !(r = _node_send_messages(dnode, uuid_prefix, uuid_prefix_len, 1)))
+ stack;
+
+ return r;
+}
+
+static int _create_node(struct dm_tree_node *dnode)
+{
+ int r = 0;
+ struct dm_task *dmt;
+
+ log_verbose("Creating %s", dnode->name);
+
+ if (!(dmt = dm_task_create(DM_DEVICE_CREATE))) {
+ log_error("Create dm_task creation failed for %s", dnode->name);
+ return 0;
+ }
+
+ if (!dm_task_set_name(dmt, dnode->name)) {
+ log_error("Failed to set device name for %s", dnode->name);
+ goto out;
+ }
+
+ if (!dm_task_set_uuid(dmt, dnode->uuid)) {
+ log_error("Failed to set uuid for %s", dnode->name);
+ goto out;
+ }
+
+ if (dnode->props.major &&
+ (!dm_task_set_major(dmt, dnode->props.major) ||
+ !dm_task_set_minor(dmt, dnode->props.minor))) {
+ log_error("Failed to set device number for %s creation.", dnode->name);
+ goto out;
+ }
+
+ if (dnode->props.read_only && !dm_task_set_ro(dmt)) {
+ log_error("Failed to set read only flag for %s", dnode->name);
+ goto out;
+ }
+
+ if (!dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ if ((r = dm_task_run(dmt))) {
+ if (!(r = dm_task_get_info(dmt, &dnode->info)))
+ /*
+ * This should not be possible to occur. However,
+ * we print an error message anyway for the more
+ * absurd cases (e.g. memory corruption) so there
+ * is never any question as to which one failed.
+ */
+ log_error(INTERNAL_ERROR
+ "Unable to get DM task info for %s.",
+ dnode->name);
+ }
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+/*
+ * _remove_node
+ *
+ * This function is only used to remove a DM device that has failed
+ * to load any table.
+ */
+static int _remove_node(struct dm_tree_node *dnode)
+{
+ if (!dnode->info.exists)
+ return 1;
+
+ if (dnode->info.live_table || dnode->info.inactive_table) {
+ log_error(INTERNAL_ERROR
+ "_remove_node called on device with loaded table(s).");
+ return 0;
+ }
+
+ if (!_deactivate_node(dnode->name, dnode->info.major, dnode->info.minor,
+ &dnode->dtree->cookie, dnode->udev_flags, 0)) {
+ log_error("Failed to clean-up device with no table: %s.",
+ _node_name(dnode));
+ return 0;
+ }
+ return 1;
+}
+
+static int _build_dev_string(char *devbuf, size_t bufsize, struct dm_tree_node *node)
+{
+ if (!dm_format_dev(devbuf, bufsize, node->info.major, node->info.minor)) {
+ log_error("Failed to format %s device number for %s as dm "
+ "target (%u,%u)",
+ node->name, node->uuid, node->info.major, node->info.minor);
+ return 0;
+ }
+
+ return 1;
+}
+
+/* simplify string emiting code */
+#define EMIT_PARAMS(p, str...)\
+do {\
+ int w;\
+ if ((w = dm_snprintf(params + p, paramsize - (size_t) p, str)) < 0) {\
+ stack; /* Out of space */\
+ return -1;\
+ }\
+ p += w;\
+} while (0)
+
+/*
+ * _emit_areas_line
+ *
+ * Returns: 1 on success, 0 on failure
+ */
+static int _emit_areas_line(struct dm_task *dmt __attribute__((unused)),
+ struct load_segment *seg, char *params,
+ size_t paramsize, int *pos)
+{
+ struct seg_area *area;
+ char devbuf[DM_FORMAT_DEV_BUFSIZE];
+ unsigned first_time = 1;
+
+ dm_list_iterate_items(area, &seg->areas) {
+ switch (seg->type) {
+ case SEG_RAID0:
+ case SEG_RAID0_META:
+ case SEG_RAID1:
+ case SEG_RAID10:
+ case SEG_RAID4:
+ case SEG_RAID5_N:
+ case SEG_RAID5_LA:
+ case SEG_RAID5_RA:
+ case SEG_RAID5_LS:
+ case SEG_RAID5_RS:
+ case SEG_RAID6_N_6:
+ case SEG_RAID6_ZR:
+ case SEG_RAID6_NR:
+ case SEG_RAID6_NC:
+ case SEG_RAID6_LS_6:
+ case SEG_RAID6_RS_6:
+ case SEG_RAID6_LA_6:
+ case SEG_RAID6_RA_6:
+ if (!area->dev_node) {
+ EMIT_PARAMS(*pos, " -");
+ break;
+ }
+ if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
+ return_0;
+
+ EMIT_PARAMS(*pos, " %s", devbuf);
+ break;
+ default:
+ if (!_build_dev_string(devbuf, sizeof(devbuf), area->dev_node))
+ return_0;
+
+ EMIT_PARAMS(*pos, "%s%s %" PRIu64, first_time ? "" : " ",
+ devbuf, area->offset);
+ }
+
+ first_time = 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Returns: 1 on success, 0 on failure
+ */
+static int _mirror_emit_segment_line(struct dm_task *dmt, struct load_segment *seg,
+ char *params, size_t paramsize)
+{
+ int block_on_error = 0;
+ int handle_errors = 0;
+ int dm_log_userspace = 0;
+ unsigned log_parm_count;
+ int pos = 0;
+ char logbuf[DM_FORMAT_DEV_BUFSIZE];
+ const char *logtype;
+ unsigned kmaj = 0, kmin = 0, krel = 0;
+
+ if (!get_uname_version(&kmaj, &kmin, &krel))
+ return_0;
+
+ if ((seg->flags & DM_BLOCK_ON_ERROR)) {
+ /*
+ * Originally, block_on_error was an argument to the log
+ * portion of the mirror CTR table. It was renamed to
+ * "handle_errors" and now resides in the 'features'
+ * section of the mirror CTR table (i.e. at the end).
+ *
+ * We can identify whether to use "block_on_error" or
+ * "handle_errors" by the dm-mirror module's version
+ * number (>= 1.12) or by the kernel version (>= 2.6.22).
+ */
+ if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 22))
+ handle_errors = 1;
+ else
+ block_on_error = 1;
+ }
+
+ if (seg->clustered) {
+ /* Cluster mirrors require a UUID */
+ if (!seg->uuid)
+ return_0;
+
+ /*
+ * Cluster mirrors used to have their own log
+ * types. Now they are accessed through the
+ * userspace log type.
+ *
+ * The dm-log-userspace module was added to the
+ * 2.6.31 kernel.
+ */
+ if (KERNEL_VERSION(kmaj, kmin, krel) >= KERNEL_VERSION(2, 6, 31))
+ dm_log_userspace = 1;
+ }
+
+ /* Region size */
+ log_parm_count = 1;
+
+ /* [no]sync, block_on_error etc. */
+ log_parm_count += hweight32(seg->flags);
+
+ /* "handle_errors" is a feature arg now */
+ if (handle_errors)
+ log_parm_count--;
+
+ /* DM_CORELOG does not count in the param list */
+ if (seg->flags & DM_CORELOG)
+ log_parm_count--;
+
+ if (seg->clustered) {
+ log_parm_count++; /* For UUID */
+
+ if (!dm_log_userspace)
+ EMIT_PARAMS(pos, "clustered-");
+ else
+ /* For clustered-* type field inserted later */
+ log_parm_count++;
+ }
+
+ if (!seg->log)
+ logtype = "core";
+ else {
+ logtype = "disk";
+ log_parm_count++;
+ if (!_build_dev_string(logbuf, sizeof(logbuf), seg->log))
+ return_0;
+ }
+
+ if (dm_log_userspace)
+ EMIT_PARAMS(pos, "userspace %u %s clustered-%s",
+ log_parm_count, seg->uuid, logtype);
+ else
+ EMIT_PARAMS(pos, "%s %u", logtype, log_parm_count);
+
+ if (seg->log)
+ EMIT_PARAMS(pos, " %s", logbuf);
+
+ EMIT_PARAMS(pos, " %u", seg->region_size);
+
+ if (seg->clustered && !dm_log_userspace)
+ EMIT_PARAMS(pos, " %s", seg->uuid);
+
+ if ((seg->flags & DM_NOSYNC))
+ EMIT_PARAMS(pos, " nosync");
+ else if ((seg->flags & DM_FORCESYNC))
+ EMIT_PARAMS(pos, " sync");
+
+ if (block_on_error)
+ EMIT_PARAMS(pos, " block_on_error");
+
+ EMIT_PARAMS(pos, " %u ", seg->mirror_area_count);
+
+ if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0)
+ return_0;
+
+ if (handle_errors)
+ EMIT_PARAMS(pos, " 1 handle_errors");
+
+ return 1;
+}
+
+static int _2_if_value(unsigned p)
+{
+ return p ? 2 : 0;
+}
+
+/* Return number of bits passed in @bits assuming 2 * 64 bit size */
+static int _get_params_count(const uint64_t *bits)
+{
+ int r = 0;
+ int i = RAID_BITMAP_SIZE;
+
+ while (i--) {
+ r += 2 * hweight32(bits[i] & 0xFFFFFFFF);
+ r += 2 * hweight32(bits[i] >> 32);
+ }
+
+ return r;
+}
+
+/*
+ * Get target version (major, minor and patchlevel) for @target_name
+ *
+ * FIXME: this function is derived from liblvm.
+ * Integrate with move of liblvm functions
+ * to libdm in future library layer purge
+ * (e.g. expose as API dm_target_version()?)
+ */
+static int _target_version(const char *target_name, uint32_t *maj,
+ uint32_t *min, uint32_t *patchlevel)
+{
+ int r = 0;
+ struct dm_task *dmt;
+ struct dm_versions *target, *last_target = NULL;
+
+ log_very_verbose("Getting target version for %s", target_name);
+ if (!(dmt = dm_task_create(DM_DEVICE_LIST_VERSIONS)))
+ return_0;
+
+ if (!dm_task_run(dmt)) {
+ log_debug_activation("Failed to get %s target versions", target_name);
+ /* Assume this was because LIST_VERSIONS isn't supported */
+ *maj = *min = *patchlevel = 0;
+ r = 1;
+ } else
+ for (target = dm_task_get_versions(dmt);
+ target != last_target;
+ last_target = target, target = (struct dm_versions *)((char *) target + target->next))
+ if (!strcmp(target_name, target->name)) {
+ *maj = target->version[0];
+ *min = target->version[1];
+ *patchlevel = target->version[2];
+ log_very_verbose("Found %s target "
+ "v%" PRIu32 ".%" PRIu32 ".%" PRIu32 ".",
+ target_name, *maj, *min, *patchlevel);
+ r = 1;
+ break;
+ }
+
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+static int _raid_emit_segment_line(struct dm_task *dmt, uint32_t major,
+ uint32_t minor, struct load_segment *seg,
+ uint64_t *seg_start, char *params,
+ size_t paramsize)
+{
+ uint32_t i;
+ uint32_t area_count = seg->area_count / 2;
+ uint32_t maj, min, patchlevel;
+ int param_count = 1; /* mandatory 'chunk size'/'stripe size' arg */
+ int pos = 0;
+ unsigned type;
+
+ if (seg->area_count % 2)
+ return 0;
+
+ if ((seg->flags & DM_NOSYNC) || (seg->flags & DM_FORCESYNC))
+ param_count++;
+
+ param_count += _2_if_value(seg->data_offset) +
+ _2_if_value(seg->delta_disks) +
+ _2_if_value(seg->region_size) +
+ _2_if_value(seg->writebehind) +
+ _2_if_value(seg->min_recovery_rate) +
+ _2_if_value(seg->max_recovery_rate) +
+ _2_if_value(seg->data_copies > 1);
+
+ /* rebuilds and writemostly are BITMAP_SIZE * 64 bits */
+ param_count += _get_params_count(seg->rebuilds);
+ param_count += _get_params_count(seg->writemostly);
+
+ if ((seg->type == SEG_RAID1) && seg->stripe_size)
+ log_info("WARNING: Ignoring RAID1 stripe size");
+
+ /* Kernel only expects "raid0", not "raid0_meta" */
+ type = seg->type;
+ if (type == SEG_RAID0_META)
+ type = SEG_RAID0;
+
+ EMIT_PARAMS(pos, "%s %d %u",
+ type == SEG_RAID10 ? "raid10" : _dm_segtypes[type].target,
+ param_count, seg->stripe_size);
+
+ if (!_target_version("raid", &maj, &min, &patchlevel))
+ return_0;
+
+ /*
+ * Target version prior to 1.9.0 and >= 1.11.0 emit
+ * order of parameters as of kernel target documentation
+ */
+ if (maj > 1 || (maj == 1 && (min < 9 || min >= 11))) {
+ if (seg->flags & DM_NOSYNC)
+ EMIT_PARAMS(pos, " nosync");
+ else if (seg->flags & DM_FORCESYNC)
+ EMIT_PARAMS(pos, " sync");
+
+ for (i = 0; i < area_count; i++)
+ if (seg->rebuilds[i/64] & (1ULL << (i%64)))
+ EMIT_PARAMS(pos, " rebuild %u", i);
+
+ if (seg->min_recovery_rate)
+ EMIT_PARAMS(pos, " min_recovery_rate %u",
+ seg->min_recovery_rate);
+
+ if (seg->max_recovery_rate)
+ EMIT_PARAMS(pos, " max_recovery_rate %u",
+ seg->max_recovery_rate);
+
+ for (i = 0; i < area_count; i++)
+ if (seg->writemostly[i/64] & (1ULL << (i%64)))
+ EMIT_PARAMS(pos, " write_mostly %u", i);
+
+ if (seg->writebehind)
+ EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
+
+ if (seg->region_size)
+ EMIT_PARAMS(pos, " region_size %u", seg->region_size);
+
+ if (seg->data_copies > 1 && type == SEG_RAID10)
+ EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
+
+ if (seg->delta_disks)
+ EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
+
+ /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
+ if (seg->data_offset)
+ EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
+
+ /* Target version >= 1.9.0 && < 1.11.0 had a table line parameter ordering flaw */
+ } else {
+ if (seg->data_copies > 1 && type == SEG_RAID10)
+ EMIT_PARAMS(pos, " raid10_copies %u", seg->data_copies);
+
+ if (seg->flags & DM_NOSYNC)
+ EMIT_PARAMS(pos, " nosync");
+ else if (seg->flags & DM_FORCESYNC)
+ EMIT_PARAMS(pos, " sync");
+
+ if (seg->region_size)
+ EMIT_PARAMS(pos, " region_size %u", seg->region_size);
+
+ /* If seg-data_offset == 1, kernel needs a zero offset to adjust to it */
+ if (seg->data_offset)
+ EMIT_PARAMS(pos, " data_offset %d", seg->data_offset == 1 ? 0 : seg->data_offset);
+
+ if (seg->delta_disks)
+ EMIT_PARAMS(pos, " delta_disks %d", seg->delta_disks);
+
+ for (i = 0; i < area_count; i++)
+ if (seg->rebuilds[i/64] & (1ULL << (i%64)))
+ EMIT_PARAMS(pos, " rebuild %u", i);
+
+ for (i = 0; i < area_count; i++)
+ if (seg->writemostly[i/64] & (1ULL << (i%64)))
+ EMIT_PARAMS(pos, " write_mostly %u", i);
+
+ if (seg->writebehind)
+ EMIT_PARAMS(pos, " max_write_behind %u", seg->writebehind);
+
+ if (seg->max_recovery_rate)
+ EMIT_PARAMS(pos, " max_recovery_rate %u",
+ seg->max_recovery_rate);
+
+ if (seg->min_recovery_rate)
+ EMIT_PARAMS(pos, " min_recovery_rate %u",
+ seg->min_recovery_rate);
+ }
+
+ /* Print number of metadata/data device pairs */
+ EMIT_PARAMS(pos, " %u", area_count);
+
+ if (_emit_areas_line(dmt, seg, params, paramsize, &pos) <= 0)
+ return_0;
+
+ return 1;
+}
+
+static int _cache_emit_segment_line(struct dm_task *dmt,
+ struct load_segment *seg,
+ char *params, size_t paramsize)
+{
+ int pos = 0;
+ /* unsigned feature_count; */
+ char data[DM_FORMAT_DEV_BUFSIZE];
+ char metadata[DM_FORMAT_DEV_BUFSIZE];
+ char origin[DM_FORMAT_DEV_BUFSIZE];
+ const char *name;
+ struct dm_config_node *cn;
+
+ /* Cache Dev */
+ if (!_build_dev_string(data, sizeof(data), seg->pool))
+ return_0;
+
+ /* Metadata Dev */
+ if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
+ return_0;
+
+ /* Origin Dev */
+ if (!_build_dev_string(origin, sizeof(origin), seg->origin))
+ return_0;
+
+ EMIT_PARAMS(pos, "%s %s %s", metadata, data, origin);
+
+ /* Data block size */
+ EMIT_PARAMS(pos, " %u", seg->data_block_size);
+
+ /* Features */
+ /* feature_count = hweight32(seg->flags); */
+ /* EMIT_PARAMS(pos, " %u", feature_count); */
+ if (seg->flags & DM_CACHE_FEATURE_METADATA2)
+ EMIT_PARAMS(pos, " 2 metadata2 ");
+ else
+ EMIT_PARAMS(pos, " 1 ");
+
+ if (seg->flags & DM_CACHE_FEATURE_PASSTHROUGH)
+ EMIT_PARAMS(pos, "passthrough");
+ else if (seg->flags & DM_CACHE_FEATURE_WRITEBACK)
+ EMIT_PARAMS(pos, "writeback");
+ else
+ EMIT_PARAMS(pos, "writethrough");
+
+ /* Cache Policy */
+ name = seg->policy_name ? : "default";
+
+ EMIT_PARAMS(pos, " %s", name);
+
+ EMIT_PARAMS(pos, " %u", seg->policy_argc * 2);
+ if (seg->policy_settings)
+ for (cn = seg->policy_settings->child; cn; cn = cn->sib)
+ EMIT_PARAMS(pos, " %s %" PRIu64, cn->key, cn->v->v.i);
+
+ return 1;
+}
+
+static int _thin_pool_emit_segment_line(struct dm_task *dmt,
+ struct load_segment *seg,
+ char *params, size_t paramsize)
+{
+ int pos = 0;
+ char pool[DM_FORMAT_DEV_BUFSIZE], metadata[DM_FORMAT_DEV_BUFSIZE];
+ int features = (seg->error_if_no_space ? 1 : 0) +
+ (seg->read_only ? 1 : 0) +
+ (seg->ignore_discard ? 1 : 0) +
+ (seg->no_discard_passdown ? 1 : 0) +
+ (seg->skip_block_zeroing ? 1 : 0);
+
+ if (!_build_dev_string(metadata, sizeof(metadata), seg->metadata))
+ return_0;
+
+ if (!_build_dev_string(pool, sizeof(pool), seg->pool))
+ return_0;
+
+ EMIT_PARAMS(pos, "%s %s %d %" PRIu64 " %d%s%s%s%s%s", metadata, pool,
+ seg->data_block_size, seg->low_water_mark, features,
+ seg->skip_block_zeroing ? " skip_block_zeroing" : "",
+ seg->ignore_discard ? " ignore_discard" : "",
+ seg->no_discard_passdown ? " no_discard_passdown" : "",
+ seg->error_if_no_space ? " error_if_no_space" : "",
+ seg->read_only ? " read_only" : ""
+ );
+
+ return 1;
+}
+
+static int _thin_emit_segment_line(struct dm_task *dmt,
+ struct load_segment *seg,
+ char *params, size_t paramsize)
+{
+ int pos = 0;
+ char pool[DM_FORMAT_DEV_BUFSIZE];
+ char external[DM_FORMAT_DEV_BUFSIZE + 1];
+
+ if (!_build_dev_string(pool, sizeof(pool), seg->pool))
+ return_0;
+
+ if (!seg->external)
+ *external = 0;
+ else {
+ *external = ' ';
+ if (!_build_dev_string(external + 1, sizeof(external) - 1,
+ seg->external))
+ return_0;
+ }
+
+ EMIT_PARAMS(pos, "%s %d%s", pool, seg->device_id, external);
+
+ return 1;
+}
+
+static int _emit_segment_line(struct dm_task *dmt, uint32_t major,
+ uint32_t minor, struct load_segment *seg,
+ uint64_t *seg_start, char *params,
+ size_t paramsize)
+{
+ int pos = 0;
+ int r;
+ int target_type_is_raid = 0;
+ char originbuf[DM_FORMAT_DEV_BUFSIZE], cowbuf[DM_FORMAT_DEV_BUFSIZE];
+
+ switch(seg->type) {
+ case SEG_ERROR:
+ case SEG_ZERO:
+ case SEG_LINEAR:
+ break;
+ case SEG_MIRRORED:
+ /* Mirrors are pretty complicated - now in separate function */
+ r = _mirror_emit_segment_line(dmt, seg, params, paramsize);
+ if (!r)
+ return_0;
+ break;
+ case SEG_SNAPSHOT:
+ case SEG_SNAPSHOT_MERGE:
+ if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
+ return_0;
+ if (!_build_dev_string(cowbuf, sizeof(cowbuf), seg->cow))
+ return_0;
+ EMIT_PARAMS(pos, "%s %s %c %d", originbuf, cowbuf,
+ seg->persistent ? 'P' : 'N', seg->chunk_size);
+ break;
+ case SEG_SNAPSHOT_ORIGIN:
+ if (!_build_dev_string(originbuf, sizeof(originbuf), seg->origin))
+ return_0;
+ EMIT_PARAMS(pos, "%s", originbuf);
+ break;
+ case SEG_STRIPED:
+ EMIT_PARAMS(pos, "%u %u ", seg->area_count, seg->stripe_size);
+ break;
+ case SEG_CRYPT:
+ EMIT_PARAMS(pos, "%s%s%s%s%s %s %" PRIu64 " ", seg->cipher,
+ seg->chainmode ? "-" : "", seg->chainmode ?: "",
+ seg->iv ? "-" : "", seg->iv ?: "", seg->key,
+ seg->iv_offset != DM_CRYPT_IV_DEFAULT ?
+ seg->iv_offset : *seg_start);
+ break;
+ case SEG_RAID0:
+ case SEG_RAID0_META:
+ case SEG_RAID1:
+ case SEG_RAID10:
+ case SEG_RAID4:
+ case SEG_RAID5_N:
+ case SEG_RAID5_LA:
+ case SEG_RAID5_RA:
+ case SEG_RAID5_LS:
+ case SEG_RAID5_RS:
+ case SEG_RAID6_N_6:
+ case SEG_RAID6_ZR:
+ case SEG_RAID6_NR:
+ case SEG_RAID6_NC:
+ case SEG_RAID6_LS_6:
+ case SEG_RAID6_RS_6:
+ case SEG_RAID6_LA_6:
+ case SEG_RAID6_RA_6:
+ target_type_is_raid = 1;
+ r = _raid_emit_segment_line(dmt, major, minor, seg, seg_start,
+ params, paramsize);
+ if (!r)
+ return_0;
+
+ break;
+ case SEG_THIN_POOL:
+ if (!_thin_pool_emit_segment_line(dmt, seg, params, paramsize))
+ return_0;
+ break;
+ case SEG_THIN:
+ if (!_thin_emit_segment_line(dmt, seg, params, paramsize))
+ return_0;
+ break;
+ case SEG_CACHE:
+ if (!_cache_emit_segment_line(dmt, seg, params, paramsize))
+ return_0;
+ break;
+ }
+
+ switch(seg->type) {
+ case SEG_ERROR:
+ case SEG_SNAPSHOT:
+ case SEG_SNAPSHOT_ORIGIN:
+ case SEG_SNAPSHOT_MERGE:
+ case SEG_ZERO:
+ case SEG_THIN_POOL:
+ case SEG_THIN:
+ case SEG_CACHE:
+ break;
+ case SEG_CRYPT:
+ case SEG_LINEAR:
+ case SEG_STRIPED:
+ if ((r = _emit_areas_line(dmt, seg, params, paramsize, &pos)) <= 0) {
+ stack;
+ return r;
+ }
+ if (!params[0]) {
+ log_error("No parameters supplied for %s target "
+ "%u:%u.", _dm_segtypes[seg->type].target,
+ major, minor);
+ return 0;
+ }
+ break;
+ }
+
+ log_debug_activation("Adding target to (%" PRIu32 ":%" PRIu32 "): %" PRIu64
+ " %" PRIu64 " %s %s", major, minor,
+ *seg_start, seg->size, target_type_is_raid ? "raid" :
+ _dm_segtypes[seg->type].target, params);
+
+ if (!dm_task_add_target(dmt, *seg_start, seg->size,
+ target_type_is_raid ? "raid" :
+ _dm_segtypes[seg->type].target, params))
+ return_0;
+
+ *seg_start += seg->size;
+
+ return 1;
+}
+
+#undef EMIT_PARAMS
+
+static int _emit_segment(struct dm_task *dmt, uint32_t major, uint32_t minor,
+ struct load_segment *seg, uint64_t *seg_start)
+{
+ char *params;
+ size_t paramsize = 4096; /* FIXME: too small for long RAID lines when > 64 devices supported */
+ int ret;
+
+ do {
+ if (!(params = dm_malloc(paramsize))) {
+ log_error("Insufficient space for target parameters.");
+ return 0;
+ }
+
+ params[0] = '\0';
+ ret = _emit_segment_line(dmt, major, minor, seg, seg_start,
+ params, paramsize);
+ dm_free(params);
+
+ if (!ret)
+ stack;
+
+ if (ret >= 0)
+ return ret;
+
+ log_debug_activation("Insufficient space in params[%" PRIsize_t
+ "] for target parameters.", paramsize);
+
+ paramsize *= 2;
+ } while (paramsize < MAX_TARGET_PARAMSIZE);
+
+ log_error("Target parameter size too big. Aborting.");
+ return 0;
+}
+
+static int _load_node(struct dm_tree_node *dnode)
+{
+ int r = 0;
+ struct dm_task *dmt;
+ struct load_segment *seg;
+ uint64_t seg_start = 0, existing_table_size;
+
+ log_verbose("Loading table for %s.", _node_name(dnode));
+
+ if (!(dmt = dm_task_create(DM_DEVICE_RELOAD))) {
+ log_error("Reload dm_task creation failed for %s.", _node_name(dnode));
+ return 0;
+ }
+
+ if (!dm_task_set_major(dmt, dnode->info.major) ||
+ !dm_task_set_minor(dmt, dnode->info.minor)) {
+ log_error("Failed to set device number for %s reload.", _node_name(dnode));
+ goto out;
+ }
+
+ if (dnode->props.read_only && !dm_task_set_ro(dmt)) {
+ log_error("Failed to set read only flag for %s.", _node_name(dnode));
+ goto out;
+ }
+
+ if (!dm_task_no_open_count(dmt))
+ log_warn("WARNING: Failed to disable open_count.");
+
+ dm_list_iterate_items(seg, &dnode->props.segs)
+ if (!_emit_segment(dmt, dnode->info.major, dnode->info.minor,
+ seg, &seg_start))
+ goto_out;
+
+ if (!dm_task_suppress_identical_reload(dmt))
+ log_warn("WARNING: Failed to suppress reload of identical tables.");
+
+ if ((r = dm_task_run(dmt))) {
+ r = dm_task_get_info(dmt, &dnode->info);
+ if (r && !dnode->info.inactive_table)
+ log_verbose("Suppressed %s identical table reload.",
+ _node_name(dnode));
+
+ existing_table_size = dm_task_get_existing_table_size(dmt);
+ if ((dnode->props.size_changed =
+ (existing_table_size == seg_start) ? 0 :
+ (existing_table_size > seg_start) ? -1 : 1)) {
+ /*
+ * Kernel usually skips size validation on zero-length devices
+ * now so no need to preload them.
+ */
+ /* FIXME In which kernel version did this begin? */
+ if (!existing_table_size && dnode->props.delay_resume_if_new)
+ dnode->props.size_changed = 0;
+
+ log_debug_activation("Table size changed from %" PRIu64 " to %"
+ PRIu64 " for %s.%s", existing_table_size,
+ seg_start, _node_name(dnode),
+ dnode->props.size_changed ? "" : " (Ignoring.)");
+
+ /*
+ * FIXME: code here has known design problem.
+ * LVM2 does NOT resize thin-pool on top of other LV in 2 steps -
+ * where raid would be resized with 1st. transaction
+ * followed by 2nd. thin-pool resize - RHBZ #1285063
+ */
+ if (existing_table_size && dnode->props.delay_resume_if_extended) {
+ log_debug_activation("Resume of table of extended device %s delayed.",
+ _node_name(dnode));
+ dnode->props.size_changed = 0;
+ }
+ }
+ }
+
+ dnode->props.segment_count = 0;
+
+out:
+ dm_task_destroy(dmt);
+
+ return r;
+}
+
+/*
+ * Currently try to deactivate only nodes created during preload.
+ * New node is always attached to the front of activated_list
+ */
+static int _dm_tree_revert_activated(struct dm_tree_node *parent)
+{
+ struct dm_tree_node *child;
+
+ dm_list_iterate_items_gen(child, &parent->activated, activated_list) {
+ log_debug_activation("Reverting %s.", _node_name(child));
+ if (child->callback) {
+ log_debug_activation("Dropping callback for %s.", _node_name(child));
+ child->callback = NULL;
+ }
+ if (!_deactivate_node(child->name, child->info.major, child->info.minor,
+ &child->dtree->cookie, child->udev_flags, 0)) {
+ log_error("Unable to deactivate %s.", _node_name(child));
+ return 0;
+ }
+ if (!_dm_tree_revert_activated(child))
+ return_0;
+ }
+
+ return 1;
+}
+
+int dm_tree_preload_children(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ int r = 1, node_created = 0;
+ void *handle = NULL;
+ struct dm_tree_node *child;
+ int update_devs_flag = 0;
+
+ /* Preload children first */
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ /* Propagate delay of resume from parent node */
+ if (dnode->props.delay_resume_if_new > 1)
+ child->props.delay_resume_if_new = dnode->props.delay_resume_if_new;
+
+ /* Skip existing non-device-mapper devices */
+ if (!child->info.exists && child->info.major)
+ continue;
+
+ /* Ignore if it doesn't belong to this VG */
+ if (child->info.exists &&
+ !_uuid_prefix_matches(child->uuid, uuid_prefix, uuid_prefix_len))
+ continue;
+
+ if (dm_tree_node_num_children(child, 0))
+ if (!dm_tree_preload_children(child, uuid_prefix, uuid_prefix_len))
+ return_0;
+
+ /* FIXME Cope if name exists with no uuid? */
+ if (!child->info.exists && !(node_created = _create_node(child)))
+ return_0;
+
+ /* Propagate delayed resume from exteded child node */
+ if (child->props.delay_resume_if_extended)
+ dnode->props.delay_resume_if_extended = 1;
+
+ if (!child->info.inactive_table &&
+ child->props.segment_count &&
+ !_load_node(child)) {
+ /*
+ * If the table load does not succeed, we remove the
+ * device in the kernel that would otherwise have an
+ * empty table. This makes the create + load of the
+ * device atomic. However, if other dependencies have
+ * already been created and loaded; this code is
+ * insufficient to remove those - only the node
+ * encountering the table load failure is removed.
+ */
+ if (node_created && !_remove_node(child))
+ return_0;
+ return_0;
+ }
+
+ /* No resume for a device without parents or with unchanged or smaller size */
+ if (!dm_tree_node_num_children(child, 1) || (child->props.size_changed <= 0))
+ continue;
+
+ if (!child->info.inactive_table && !child->info.suspended)
+ continue;
+
+ if (!_resume_node(child->name, child->info.major, child->info.minor,
+ child->props.read_ahead, child->props.read_ahead_flags,
+ &child->info, &child->dtree->cookie, child->udev_flags,
+ child->info.suspended)) {
+ log_error("Unable to resume %s.", _node_name(child));
+ /* If the device was not previously active, we might as well remove this node. */
+ if (!child->info.live_table &&
+ !_deactivate_node(child->name, child->info.major, child->info.minor,
+ &child->dtree->cookie, child->udev_flags, 0))
+ log_error("Unable to deactivate %s.", _node_name(child));
+ r = 0;
+ /* Each child is handled independently */
+ continue;
+ }
+
+ if (node_created) {
+ /* Collect newly introduced devices for revert */
+ dm_list_add_h(&dnode->activated, &child->activated_list);
+
+ /* When creating new node also check transaction_id. */
+ if (child->props.send_messages &&
+ !_node_send_messages(child, uuid_prefix, uuid_prefix_len, 0)) {
+ stack;
+ if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
+ stack;
+ dm_tree_set_cookie(dnode, 0);
+ (void) _dm_tree_revert_activated(dnode);
+ r = 0;
+ continue;
+ }
+ }
+
+ /*
+ * Prepare for immediate synchronization with udev and flush all stacked
+ * dev node operations if requested by immediate_dev_node property. But
+ * finish processing current level in the tree first.
+ */
+ if (child->props.immediate_dev_node)
+ update_devs_flag = 1;
+ }
+
+ if (update_devs_flag ||
+ (r && !dnode->info.exists && dnode->callback)) {
+ if (!dm_udev_wait(dm_tree_get_cookie(dnode)))
+ stack;
+ dm_tree_set_cookie(dnode, 0);
+
+ if (r && !dnode->info.exists && dnode->callback &&
+ !dnode->callback(dnode, DM_NODE_CALLBACK_PRELOADED,
+ dnode->callback_data))
+ {
+ /* Try to deactivate what has been activated in preload phase */
+ (void) _dm_tree_revert_activated(dnode);
+ return_0;
+ }
+ }
+
+ return r;
+}
+
+/*
+ * Returns 1 if unsure.
+ */
+int dm_tree_children_use_uuid(struct dm_tree_node *dnode,
+ const char *uuid_prefix,
+ size_t uuid_prefix_len)
+{
+ void *handle = NULL;
+ struct dm_tree_node *child = dnode;
+ const char *uuid;
+
+ while ((child = dm_tree_next_child(&handle, dnode, 0))) {
+ if (!(uuid = dm_tree_node_get_uuid(child))) {
+ log_warn("WARNING: Failed to get uuid for dtree node %s.",
+ _node_name(child));
+ return 1;
+ }
+
+ if (_uuid_prefix_matches(uuid, uuid_prefix, uuid_prefix_len))
+ return 1;
+
+ if (dm_tree_node_num_children(child, 0))
+ dm_tree_children_use_uuid(child, uuid_prefix, uuid_prefix_len);
+ }
+
+ return 0;
+}
+
+/*
+ * Target functions
+ */
+static struct load_segment *_add_segment(struct dm_tree_node *dnode, unsigned type, uint64_t size)
+{
+ struct load_segment *seg;
+
+ if (!(seg = dm_pool_zalloc(dnode->dtree->mem, sizeof(*seg)))) {
+ log_error("dtree node segment allocation failed");
+ return NULL;
+ }
+
+ seg->type = type;
+ seg->size = size;
+ dm_list_init(&seg->areas);
+ dm_list_add(&dnode->props.segs, &seg->list);
+ dnode->props.segment_count++;
+
+ return seg;
+}
+
+int dm_tree_node_add_snapshot_origin_target(struct dm_tree_node *dnode,
+ uint64_t size,
+ const char *origin_uuid)
+{
+ struct load_segment *seg;
+ struct dm_tree_node *origin_node;
+
+ if (!(seg = _add_segment(dnode, SEG_SNAPSHOT_ORIGIN, size)))
+ return_0;
+
+ if (!(origin_node = dm_tree_find_node_by_uuid(dnode->dtree, origin_uuid))) {
+ log_error("Couldn't find snapshot origin uuid %s.", origin_uuid);
+ return 0;
+ }
+
+ seg->origin = origin_node;
+ if (!_link_tree_nodes(dnode, origin_node))
+ return_0;
+
+ /* Resume snapshot origins after new snapshots */
+ dnode->activation_priority = 1;
+
+ /*
+ * Don't resume the origin immediately in case it is a non-trivial
+ * target that must not be active more than once concurrently!
+ */
+ origin_node->props.delay_resume_if_new = 1;
+
+ return 1;
+}
+
+static int _add_snapshot_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *cow_uuid,
+ const char *merge_uuid,
+ int persistent,
+ uint32_t chunk_size)
+{
+ struct load_segment *seg;
+ struct dm_tree_node *origin_node, *cow_node, *merge_node;
+ unsigned seg_type;
+
+ seg_type = !merge_uuid ? SEG_SNAPSHOT : SEG_SNAPSHOT_MERGE;
+
+ if (!(seg = _add_segment(node, seg_type, size)))
+ return_0;
+
+ if (!(origin_node = dm_tree_find_node_by_uuid(node->dtree, origin_uuid))) {
+ log_error("Couldn't find snapshot origin uuid %s.", origin_uuid);
+ return 0;
+ }
+
+ seg->origin = origin_node;
+ if (!_link_tree_nodes(node, origin_node))
+ return_0;
+
+ if (!(cow_node = dm_tree_find_node_by_uuid(node->dtree, cow_uuid))) {
+ log_error("Couldn't find snapshot COW device uuid %s.", cow_uuid);
+ return 0;
+ }
+
+ seg->cow = cow_node;
+ if (!_link_tree_nodes(node, cow_node))
+ return_0;
+
+ seg->persistent = persistent ? 1 : 0;
+ seg->chunk_size = chunk_size;
+
+ if (merge_uuid) {
+ if (!(merge_node = dm_tree_find_node_by_uuid(node->dtree, merge_uuid))) {
+ /* not a pure error, merging snapshot may have been deactivated */
+ log_verbose("Couldn't find merging snapshot uuid %s.", merge_uuid);
+ } else {
+ seg->merge = merge_node;
+ /* must not link merging snapshot, would undermine activation_priority below */
+ }
+
+ /* Resume snapshot-merge (acting origin) after other snapshots */
+ node->activation_priority = 1;
+ if (seg->merge) {
+ /* Resume merging snapshot after snapshot-merge */
+ seg->merge->activation_priority = 2;
+ }
+ }
+
+ return 1;
+}
+
+
+int dm_tree_node_add_snapshot_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *cow_uuid,
+ int persistent,
+ uint32_t chunk_size)
+{
+ return _add_snapshot_target(node, size, origin_uuid, cow_uuid,
+ NULL, persistent, chunk_size);
+}
+
+int dm_tree_node_add_snapshot_merge_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *origin_uuid,
+ const char *cow_uuid,
+ const char *merge_uuid,
+ uint32_t chunk_size)
+{
+ return _add_snapshot_target(node, size, origin_uuid, cow_uuid,
+ merge_uuid, 1, chunk_size);
+}
+
+int dm_tree_node_add_error_target(struct dm_tree_node *node,
+ uint64_t size)
+{
+ if (!_add_segment(node, SEG_ERROR, size))
+ return_0;
+
+ return 1;
+}
+
+int dm_tree_node_add_zero_target(struct dm_tree_node *node,
+ uint64_t size)
+{
+ if (!_add_segment(node, SEG_ZERO, size))
+ return_0;
+
+ return 1;
+}
+
+int dm_tree_node_add_linear_target(struct dm_tree_node *node,
+ uint64_t size)
+{
+ if (!_add_segment(node, SEG_LINEAR, size))
+ return_0;
+
+ return 1;
+}
+
+int dm_tree_node_add_striped_target(struct dm_tree_node *node,
+ uint64_t size,
+ uint32_t stripe_size)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _add_segment(node, SEG_STRIPED, size)))
+ return_0;
+
+ seg->stripe_size = stripe_size;
+
+ return 1;
+}
+
+int dm_tree_node_add_crypt_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *cipher,
+ const char *chainmode,
+ const char *iv,
+ uint64_t iv_offset,
+ const char *key)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _add_segment(node, SEG_CRYPT, size)))
+ return_0;
+
+ seg->cipher = cipher;
+ seg->chainmode = chainmode;
+ seg->iv = iv;
+ seg->iv_offset = iv_offset;
+ seg->key = key;
+
+ return 1;
+}
+
+int dm_tree_node_add_mirror_target_log(struct dm_tree_node *node,
+ uint32_t region_size,
+ unsigned clustered,
+ const char *log_uuid,
+ unsigned area_count,
+ uint32_t flags)
+{
+ struct dm_tree_node *log_node = NULL;
+ struct load_segment *seg;
+
+ if (!(seg = _get_last_load_segment(node)))
+ return_0;
+
+ if (log_uuid) {
+ if (!(seg->uuid = dm_pool_strdup(node->dtree->mem, log_uuid))) {
+ log_error("log uuid pool_strdup failed");
+ return 0;
+ }
+ if ((flags & DM_CORELOG))
+ /* For pvmove: immediate resume (for size validation) isn't needed. */
+ /* pvmove flag passed via unused UUID and its suffix */
+ node->props.delay_resume_if_new = strstr(log_uuid, "pvmove") ? 2 : 1;
+ else {
+ if (!(log_node = dm_tree_find_node_by_uuid(node->dtree, log_uuid))) {
+ log_error("Couldn't find mirror log uuid %s.", log_uuid);
+ return 0;
+ }
+
+ if (clustered)
+ log_node->props.immediate_dev_node = 1;
+
+ /* The kernel validates the size of disk logs. */
+ /* FIXME Propagate to any devices below */
+ log_node->props.delay_resume_if_new = 0;
+
+ if (!_link_tree_nodes(node, log_node))
+ return_0;
+ }
+ }
+
+ seg->log = log_node;
+ seg->region_size = region_size;
+ seg->clustered = clustered;
+ seg->mirror_area_count = area_count;
+ seg->flags = flags;
+
+ return 1;
+}
+
+int dm_tree_node_add_mirror_target(struct dm_tree_node *node,
+ uint64_t size)
+{
+ if (!_add_segment(node, SEG_MIRRORED, size))
+ return_0;
+
+ return 1;
+}
+
+int dm_tree_node_add_raid_target_with_params(struct dm_tree_node *node,
+ uint64_t size,
+ const struct dm_tree_node_raid_params *p)
+{
+ unsigned i;
+ struct load_segment *seg = NULL;
+
+ for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i)
+ if (!strcmp(p->raid_type, _dm_segtypes[i].target))
+ if (!(seg = _add_segment(node,
+ _dm_segtypes[i].type, size)))
+ return_0;
+ if (!seg) {
+ log_error("Unsupported raid type %s.", p->raid_type);
+ return 0;
+ }
+
+ seg->region_size = p->region_size;
+ seg->stripe_size = p->stripe_size;
+ seg->area_count = 0;
+ memset(seg->rebuilds, 0, sizeof(seg->rebuilds));
+ seg->rebuilds[0] = p->rebuilds;
+ memset(seg->writemostly, 0, sizeof(seg->writemostly));
+ seg->writemostly[0] = p->writemostly;
+ seg->writebehind = p->writebehind;
+ seg->min_recovery_rate = p->min_recovery_rate;
+ seg->max_recovery_rate = p->max_recovery_rate;
+ seg->flags = p->flags;
+
+ return 1;
+}
+
+int dm_tree_node_add_raid_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *raid_type,
+ uint32_t region_size,
+ uint32_t stripe_size,
+ uint64_t rebuilds,
+ uint64_t flags)
+{
+ struct dm_tree_node_raid_params params = {
+ .raid_type = raid_type,
+ .region_size = region_size,
+ .stripe_size = stripe_size,
+ .rebuilds = rebuilds,
+ .flags = flags
+ };
+
+ return dm_tree_node_add_raid_target_with_params(node, size, &params);
+}
+
+/*
+ * Version 2 of dm_tree_node_add_raid_target() allowing for:
+ *
+ * - maximum 253 legs in a raid set (MD kernel limitation)
+ * - delta_disks for disk add/remove reshaping
+ * - data_offset for out-of-place reshaping
+ * - data_copies to cope witth odd numbers of raid10 disks
+ */
+int dm_tree_node_add_raid_target_with_params_v2(struct dm_tree_node *node,
+ uint64_t size,
+ const struct dm_tree_node_raid_params_v2 *p)
+{
+ unsigned i;
+ struct load_segment *seg = NULL;
+
+ for (i = 0; i < DM_ARRAY_SIZE(_dm_segtypes) && !seg; ++i)
+ if (!strcmp(p->raid_type, _dm_segtypes[i].target))
+ if (!(seg = _add_segment(node,
+ _dm_segtypes[i].type, size)))
+ return_0;
+ if (!seg) {
+ log_error("Unsupported raid type %s.", p->raid_type);
+ return 0;
+ }
+
+ seg->region_size = p->region_size;
+ seg->stripe_size = p->stripe_size;
+ seg->area_count = 0;
+ seg->delta_disks = p->delta_disks;
+ seg->data_offset = p->data_offset;
+ memcpy(seg->rebuilds, p->rebuilds, sizeof(seg->rebuilds));
+ memcpy(seg->writemostly, p->writemostly, sizeof(seg->writemostly));
+ seg->writebehind = p->writebehind;
+ seg->data_copies = p->data_copies;
+ seg->min_recovery_rate = p->min_recovery_rate;
+ seg->max_recovery_rate = p->max_recovery_rate;
+ seg->flags = p->flags;
+
+ return 1;
+}
+
+int dm_tree_node_add_cache_target(struct dm_tree_node *node,
+ uint64_t size,
+ uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
+ const char *metadata_uuid,
+ const char *data_uuid,
+ const char *origin_uuid,
+ const char *policy_name,
+ const struct dm_config_node *policy_settings,
+ uint32_t data_block_size)
+{
+ struct dm_config_node *cn;
+ struct load_segment *seg;
+ static const uint64_t _modemask =
+ DM_CACHE_FEATURE_PASSTHROUGH |
+ DM_CACHE_FEATURE_WRITETHROUGH |
+ DM_CACHE_FEATURE_WRITEBACK;
+
+ /* Detect unknown (bigger) feature bit */
+ if (feature_flags >= (DM_CACHE_FEATURE_METADATA2 * 2)) {
+ log_error("Unsupported cache's feature flags set " FMTu64 ".",
+ feature_flags);
+ return 0;
+ }
+
+ switch (feature_flags & _modemask) {
+ case DM_CACHE_FEATURE_PASSTHROUGH:
+ case DM_CACHE_FEATURE_WRITEBACK:
+ if (strcmp(policy_name, "cleaner") == 0) {
+ /* Enforce writethrough mode for cleaner policy */
+ feature_flags = ~_modemask;
+ feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
+ }
+ /* Fall through */
+ case DM_CACHE_FEATURE_WRITETHROUGH:
+ break;
+ default:
+ log_error("Invalid cache's feature flag " FMTu64 ".",
+ feature_flags);
+ return 0;
+ }
+
+ if (data_block_size < DM_CACHE_MIN_DATA_BLOCK_SIZE) {
+ log_error("Data block size %u is lower then %u sectors.",
+ data_block_size, DM_CACHE_MIN_DATA_BLOCK_SIZE);
+ return 0;
+ }
+
+ if (data_block_size > DM_CACHE_MAX_DATA_BLOCK_SIZE) {
+ log_error("Data block size %u is higher then %u sectors.",
+ data_block_size, DM_CACHE_MAX_DATA_BLOCK_SIZE);
+ return 0;
+ }
+
+ if (!(seg = _add_segment(node, SEG_CACHE, size)))
+ return_0;
+
+ if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree,
+ data_uuid))) {
+ log_error("Missing cache's data uuid %s.",
+ data_uuid);
+ return 0;
+ }
+ if (!_link_tree_nodes(node, seg->pool))
+ return_0;
+
+ if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree,
+ metadata_uuid))) {
+ log_error("Missing cache's metadata uuid %s.",
+ metadata_uuid);
+ return 0;
+ }
+ if (!_link_tree_nodes(node, seg->metadata))
+ return_0;
+
+ if (!(seg->origin = dm_tree_find_node_by_uuid(node->dtree,
+ origin_uuid))) {
+ log_error("Missing cache's origin uuid %s.",
+ metadata_uuid);
+ return 0;
+ }
+ if (!_link_tree_nodes(node, seg->origin))
+ return_0;
+
+ seg->data_block_size = data_block_size;
+ seg->flags = feature_flags;
+ seg->policy_name = policy_name;
+
+ /* FIXME: better validation missing */
+ if (policy_settings) {
+ if (!(seg->policy_settings = dm_config_clone_node_with_mem(node->dtree->mem, policy_settings, 0)))
+ return_0;
+
+ for (cn = seg->policy_settings->child; cn; cn = cn->sib) {
+ if (!cn->v || (cn->v->type != DM_CFG_INT)) {
+ /* For now only <key> = <int> pairs are supported */
+ log_error("Cache policy parameter %s is without integer value.", cn->key);
+ return 0;
+ }
+ seg->policy_argc++;
+ }
+ }
+
+ return 1;
+}
+
+int dm_tree_node_add_replicator_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *rlog_uuid,
+ const char *rlog_type,
+ unsigned rsite_index,
+ dm_replicator_mode_t mode,
+ uint32_t async_timeout,
+ uint64_t fall_behind_data,
+ uint32_t fall_behind_ios)
+{
+ log_error("Replicator segment is unsupported.");
+ return 0;
+}
+
+/* Appends device node to Replicator */
+int dm_tree_node_add_replicator_dev_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *replicator_uuid,
+ uint64_t rdevice_index,
+ const char *rdev_uuid,
+ unsigned rsite_index,
+ const char *slog_uuid,
+ uint32_t slog_flags,
+ uint32_t slog_region_size)
+{
+ log_error("Replicator targer is unsupported.");
+ return 0;
+}
+
+static struct load_segment *_get_single_load_segment(struct dm_tree_node *node,
+ unsigned type)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_last_load_segment(node)))
+ return_NULL;
+
+ /* Never used past _load_node(), so can test segment_count */
+ if (node->props.segment_count != 1) {
+ log_error("Node %s must have only one segment.",
+ _dm_segtypes[type].target);
+ return NULL;
+ }
+
+ if (seg->type != type) {
+ log_error("Node %s has segment type %s.",
+ _dm_segtypes[type].target,
+ _dm_segtypes[seg->type].target);
+ return NULL;
+ }
+
+ return seg;
+}
+
+static int _thin_validate_device_id(uint32_t device_id)
+{
+ if (device_id > DM_THIN_MAX_DEVICE_ID) {
+ log_error("Device id %u is higher then %u.",
+ device_id, DM_THIN_MAX_DEVICE_ID);
+ return 0;
+ }
+
+ return 1;
+}
+
+int dm_tree_node_add_thin_pool_target(struct dm_tree_node *node,
+ uint64_t size,
+ uint64_t transaction_id,
+ const char *metadata_uuid,
+ const char *pool_uuid,
+ uint32_t data_block_size,
+ uint64_t low_water_mark,
+ unsigned skip_block_zeroing)
+{
+ struct load_segment *seg, *mseg;
+ uint64_t devsize = 0;
+
+ if (data_block_size < DM_THIN_MIN_DATA_BLOCK_SIZE) {
+ log_error("Data block size %u is lower then %u sectors.",
+ data_block_size, DM_THIN_MIN_DATA_BLOCK_SIZE);
+ return 0;
+ }
+
+ if (data_block_size > DM_THIN_MAX_DATA_BLOCK_SIZE) {
+ log_error("Data block size %u is higher then %u sectors.",
+ data_block_size, DM_THIN_MAX_DATA_BLOCK_SIZE);
+ return 0;
+ }
+
+ if (!(seg = _add_segment(node, SEG_THIN_POOL, size)))
+ return_0;
+
+ if (!(seg->metadata = dm_tree_find_node_by_uuid(node->dtree, metadata_uuid))) {
+ log_error("Missing metadata uuid %s.", metadata_uuid);
+ return 0;
+ }
+
+ if (!_link_tree_nodes(node, seg->metadata))
+ return_0;
+
+ /* FIXME: more complex target may need more tweaks */
+ dm_list_iterate_items(mseg, &seg->metadata->props.segs) {
+ devsize += mseg->size;
+ if (devsize > DM_THIN_MAX_METADATA_SIZE) {
+ log_debug_activation("Ignoring %" PRIu64 " of device.",
+ devsize - DM_THIN_MAX_METADATA_SIZE);
+ mseg->size -= (devsize - DM_THIN_MAX_METADATA_SIZE);
+ devsize = DM_THIN_MAX_METADATA_SIZE;
+ /* FIXME: drop remaining segs */
+ }
+ }
+
+ if (!(seg->pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) {
+ log_error("Missing pool uuid %s.", pool_uuid);
+ return 0;
+ }
+
+ if (!_link_tree_nodes(node, seg->pool))
+ return_0;
+
+ /* Clean flag delay_resume_if_new - so corelog gets resumed */
+ seg->metadata->props.delay_resume_if_new = 0;
+ seg->pool->props.delay_resume_if_new = 0;
+
+ /* Preload must not resume extended running thin-pool before it's committed */
+ node->props.delay_resume_if_extended = 1;
+
+ /* Validate only transaction_id > 0 when activating thin-pool */
+ node->props.send_messages = transaction_id ? 1 : 0;
+ seg->transaction_id = transaction_id;
+ seg->low_water_mark = low_water_mark;
+ seg->data_block_size = data_block_size;
+ seg->skip_block_zeroing = skip_block_zeroing;
+ dm_list_init(&seg->thin_messages);
+
+ return 1;
+}
+
+int dm_tree_node_add_thin_pool_message(struct dm_tree_node *node,
+ dm_thin_message_t type,
+ uint64_t id1, uint64_t id2)
+{
+ struct thin_message *tm;
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+ return_0;
+
+ if (!(tm = dm_pool_zalloc(node->dtree->mem, sizeof (*tm)))) {
+ log_error("Failed to allocate thin message.");
+ return 0;
+ }
+
+ switch (type) {
+ case DM_THIN_MESSAGE_CREATE_SNAP:
+ /* If the thin origin is active, it must be suspend first! */
+ if (id1 == id2) {
+ log_error("Cannot use same device id for origin and its snapshot.");
+ return 0;
+ }
+ if (!_thin_validate_device_id(id1) ||
+ !_thin_validate_device_id(id2))
+ return_0;
+ tm->message.u.m_create_snap.device_id = id1;
+ tm->message.u.m_create_snap.origin_id = id2;
+ break;
+ case DM_THIN_MESSAGE_CREATE_THIN:
+ if (!_thin_validate_device_id(id1))
+ return_0;
+ tm->message.u.m_create_thin.device_id = id1;
+ tm->expected_errno = EEXIST;
+ break;
+ case DM_THIN_MESSAGE_DELETE:
+ if (!_thin_validate_device_id(id1))
+ return_0;
+ tm->message.u.m_delete.device_id = id1;
+ tm->expected_errno = ENODATA;
+ break;
+ case DM_THIN_MESSAGE_SET_TRANSACTION_ID:
+ if ((id1 + 1) != id2) {
+ log_error("New transaction id must be sequential.");
+ return 0; /* FIXME: Maybe too strict here? */
+ }
+ if (id2 != seg->transaction_id) {
+ log_error("Current transaction id is different from thin pool.");
+ return 0; /* FIXME: Maybe too strict here? */
+ }
+ tm->message.u.m_set_transaction_id.current_id = id1;
+ tm->message.u.m_set_transaction_id.new_id = id2;
+ break;
+ default:
+ log_error("Unsupported message type %d.", (int) type);
+ return 0;
+ }
+
+ tm->message.type = type;
+ dm_list_add(&seg->thin_messages, &tm->list);
+ /* Higher value >1 identifies there are really some messages */
+ node->props.send_messages = 2;
+
+ return 1;
+}
+
+int dm_tree_node_set_thin_pool_discard(struct dm_tree_node *node,
+ unsigned ignore,
+ unsigned no_passdown)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+ return_0;
+
+ seg->ignore_discard = ignore;
+ seg->no_discard_passdown = no_passdown;
+
+ return 1;
+}
+
+int dm_tree_node_set_thin_pool_error_if_no_space(struct dm_tree_node *node,
+ unsigned error_if_no_space)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+ return_0;
+
+ seg->error_if_no_space = error_if_no_space;
+
+ return 1;
+}
+
+int dm_tree_node_set_thin_pool_read_only(struct dm_tree_node *node,
+ unsigned read_only)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN_POOL)))
+ return_0;
+
+ seg->read_only = read_only;
+
+ return 1;
+}
+
+int dm_tree_node_add_thin_target(struct dm_tree_node *node,
+ uint64_t size,
+ const char *pool_uuid,
+ uint32_t device_id)
+{
+ struct dm_tree_node *pool;
+ struct load_segment *seg;
+
+ if (!(pool = dm_tree_find_node_by_uuid(node->dtree, pool_uuid))) {
+ log_error("Missing thin pool uuid %s.", pool_uuid);
+ return 0;
+ }
+
+ if (!_link_tree_nodes(node, pool))
+ return_0;
+
+ if (!_thin_validate_device_id(device_id))
+ return_0;
+
+ if (!(seg = _add_segment(node, SEG_THIN, size)))
+ return_0;
+
+ seg->pool = pool;
+ seg->device_id = device_id;
+
+ return 1;
+}
+
+int dm_tree_node_set_thin_external_origin(struct dm_tree_node *node,
+ const char *external_uuid)
+{
+ struct dm_tree_node *external;
+ struct load_segment *seg;
+
+ if (!(seg = _get_single_load_segment(node, SEG_THIN)))
+ return_0;
+
+ if (!(external = dm_tree_find_node_by_uuid(node->dtree,
+ external_uuid))) {
+ log_error("Missing thin external origin uuid %s.",
+ external_uuid);
+ return 0;
+ }
+
+ if (!_link_tree_nodes(node, external))
+ return_0;
+
+ seg->external = external;
+
+ return 1;
+}
+
+static int _add_area(struct dm_tree_node *node, struct load_segment *seg, struct dm_tree_node *dev_node, uint64_t offset)
+{
+ struct seg_area *area;
+
+ if (!(area = dm_pool_zalloc(node->dtree->mem, sizeof (*area)))) {
+ log_error("Failed to allocate target segment area.");
+ return 0;
+ }
+
+ area->dev_node = dev_node;
+ area->offset = offset;
+
+ dm_list_add(&seg->areas, &area->list);
+ seg->area_count++;
+
+ return 1;
+}
+
+int dm_tree_node_add_target_area(struct dm_tree_node *node,
+ const char *dev_name,
+ const char *uuid,
+ uint64_t offset)
+{
+ struct load_segment *seg;
+ struct stat info;
+ struct dm_tree_node *dev_node;
+
+ if ((!dev_name || !*dev_name) && (!uuid || !*uuid)) {
+ log_error("dm_tree_node_add_target_area called without device");
+ return 0;
+ }
+
+ if (uuid) {
+ if (!(dev_node = dm_tree_find_node_by_uuid(node->dtree, uuid))) {
+ log_error("Couldn't find area uuid %s.", uuid);
+ return 0;
+ }
+ if (!_link_tree_nodes(node, dev_node))
+ return_0;
+ } else {
+ if (stat(dev_name, &info) < 0) {
+ log_error("Device %s not found.", dev_name);
+ return 0;
+ }
+
+ if (!S_ISBLK(info.st_mode)) {
+ log_error("Device %s is not a block device.", dev_name);
+ return 0;
+ }
+
+ /* FIXME Check correct macro use */
+ if (!(dev_node = _add_dev(node->dtree, node, MAJOR(info.st_rdev),
+ MINOR(info.st_rdev), 0, 0)))
+ return_0;
+ }
+
+ if (!(seg = _get_last_load_segment(node)))
+ return_0;
+
+ if (!_add_area(node, seg, dev_node, offset))
+ return_0;
+
+ return 1;
+}
+
+int dm_tree_node_add_null_area(struct dm_tree_node *node, uint64_t offset)
+{
+ struct load_segment *seg;
+
+ if (!(seg = _get_last_load_segment(node)))
+ return_0;
+
+ switch (seg->type) {
+ case SEG_RAID0:
+ case SEG_RAID0_META:
+ case SEG_RAID1:
+ case SEG_RAID4:
+ case SEG_RAID5_N:
+ case SEG_RAID5_LA:
+ case SEG_RAID5_RA:
+ case SEG_RAID5_LS:
+ case SEG_RAID5_RS:
+ case SEG_RAID6_N_6:
+ case SEG_RAID6_ZR:
+ case SEG_RAID6_NR:
+ case SEG_RAID6_NC:
+ case SEG_RAID6_LS_6:
+ case SEG_RAID6_RS_6:
+ case SEG_RAID6_LA_6:
+ case SEG_RAID6_RA_6:
+ break;
+ default:
+ log_error("dm_tree_node_add_null_area() called on an unsupported segment type");
+ return 0;
+ }
+
+ if (!_add_area(node, seg, NULL, offset))
+ return_0;
+
+ return 1;
+}
+
+void dm_tree_node_set_callback(struct dm_tree_node *dnode,
+ dm_node_callback_fn cb, void *data)
+{
+ dnode->callback = cb;
+ dnode->callback_data = data;
+}
+
+#if defined(__GNUC__)
+/*
+ * Backward compatible implementations.
+ *
+ * Keep these at the end of the file to make sure that
+ * no code in this file accidentally calls it.
+ */
+
+/* Backward compatible dm_tree_node_size_changed() implementations. */
+int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode);
+int dm_tree_node_size_changed_base(const struct dm_tree_node *dnode)
+{
+ /* Base does not make difference between smaller and bigger */
+ return dm_tree_node_size_changed(dnode) ? 1 : 0;
+}
+
+/*
+ * Retain ABI compatibility after adding the DM_CACHE_FEATURE_METADATA2
+ * in version 1.02.138.
+ *
+ * Binaries compiled against version 1.02.138 onwards will use
+ * the new function dm_tree_node_add_cache_target which detects unknown
+ * feature flags and returns error for them.
+ */
+int dm_tree_node_add_cache_target_base(struct dm_tree_node *node,
+ uint64_t size,
+ uint64_t feature_flags, /* DM_CACHE_FEATURE_* */
+ const char *metadata_uuid,
+ const char *data_uuid,
+ const char *origin_uuid,
+ const char *policy_name,
+ const struct dm_config_node *policy_settings,
+ uint32_t data_block_size);
+int dm_tree_node_add_cache_target_base(struct dm_tree_node *node,
+ uint64_t size,
+ uint64_t feature_flags,
+ const char *metadata_uuid,
+ const char *data_uuid,
+ const char *origin_uuid,
+ const char *policy_name,
+ const struct dm_config_node *policy_settings,
+ uint32_t data_block_size)
+{
+ /* Old version supported only these FEATURE bits, others were ignored so masked them */
+ static const uint64_t _mask =
+ DM_CACHE_FEATURE_WRITEBACK |
+ DM_CACHE_FEATURE_WRITETHROUGH |
+ DM_CACHE_FEATURE_PASSTHROUGH;
+
+ return dm_tree_node_add_cache_target(node, size, feature_flags & _mask,
+ metadata_uuid, data_uuid, origin_uuid,
+ policy_name, policy_settings, data_block_size);
+}
+#endif
diff --git a/device_mapper/libdm-file.c b/device_mapper/libdm-file.c
new file mode 100644
index 000000000..5c6d2232d
--- /dev/null
+++ b/device_mapper/libdm-file.c
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <sys/file.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+
+static int _is_dir(const char *path)
+{
+ struct stat st;
+
+ if (stat(path, &st) < 0) {
+ log_sys_error("stat", path);
+ return 0;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ log_error("Existing path %s is not "
+ "a directory.", path);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _create_dir_recursive(const char *dir)
+{
+ char *orig, *s;
+ int rc, r = 0;
+
+ log_verbose("Creating directory \"%s\"", dir);
+ /* Create parent directories */
+ orig = s = dm_strdup(dir);
+ if (!s) {
+ log_error("Failed to duplicate directory name.");
+ return 0;
+ }
+
+ while ((s = strchr(s, '/')) != NULL) {
+ *s = '\0';
+ if (*orig) {
+ rc = mkdir(orig, 0777);
+ if (rc < 0) {
+ if (errno == EEXIST) {
+ if (!_is_dir(orig))
+ goto_out;
+ } else {
+ if (errno != EROFS)
+ log_sys_error("mkdir", orig);
+ goto out;
+ }
+ }
+ }
+ *s++ = '/';
+ }
+
+ /* Create final directory */
+ rc = mkdir(dir, 0777);
+ if (rc < 0) {
+ if (errno == EEXIST) {
+ if (!_is_dir(dir))
+ goto_out;
+ } else {
+ if (errno != EROFS)
+ log_sys_error("mkdir", orig);
+ goto out;
+ }
+ }
+
+ r = 1;
+out:
+ dm_free(orig);
+ return r;
+}
+
+int dm_create_dir(const char *dir)
+{
+ struct stat info;
+
+ if (!*dir)
+ return 1;
+
+ if (stat(dir, &info) == 0 && S_ISDIR(info.st_mode))
+ return 1;
+
+ if (!_create_dir_recursive(dir))
+ return_0;
+
+ return 1;
+}
+
+int dm_is_empty_dir(const char *dir)
+{
+ struct dirent *dirent;
+ DIR *d;
+
+ if (!(d = opendir(dir))) {
+ log_sys_error("opendir", dir);
+ return 0;
+ }
+
+ while ((dirent = readdir(d)))
+ if (strcmp(dirent->d_name, ".") && strcmp(dirent->d_name, ".."))
+ break;
+
+ if (closedir(d))
+ log_sys_error("closedir", dir);
+
+ return dirent ? 0 : 1;
+}
+
+int dm_fclose(FILE *stream)
+{
+ int prev_fail = ferror(stream);
+ int fclose_fail = fclose(stream);
+
+ /* If there was a previous failure, but fclose succeeded,
+ clear errno, since ferror does not set it, and its value
+ may be unrelated to the ferror-reported failure. */
+ if (prev_fail && !fclose_fail)
+ errno = 0;
+
+ return prev_fail || fclose_fail ? EOF : 0;
+}
+
+int dm_create_lockfile(const char *lockfile)
+{
+ int fd, value;
+ size_t bufferlen;
+ ssize_t write_out;
+ struct flock lock;
+ char buffer[50];
+ int retries = 0;
+
+ if ((fd = open(lockfile, O_CREAT | O_WRONLY,
+ (S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH))) < 0) {
+ log_error("Cannot open lockfile [%s], error was [%s]",
+ lockfile, strerror(errno));
+ return 0;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_start = 0;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 0;
+retry_fcntl:
+ if (fcntl(fd, F_SETLK, &lock) < 0) {
+ switch (errno) {
+ case EINTR:
+ goto retry_fcntl;
+ case EACCES:
+ case EAGAIN:
+ if (retries == 20) {
+ log_error("Cannot lock lockfile [%s], error was [%s]",
+ lockfile, strerror(errno));
+ break;
+ } else {
+ ++ retries;
+ usleep(1000);
+ goto retry_fcntl;
+ }
+ default:
+ log_error("process is already running");
+ }
+
+ goto fail_close;
+ }
+
+ if (ftruncate(fd, 0) < 0) {
+ log_error("Cannot truncate pidfile [%s], error was [%s]",
+ lockfile, strerror(errno));
+
+ goto fail_close_unlink;
+ }
+
+ snprintf(buffer, sizeof(buffer), "%u\n", getpid());
+
+ bufferlen = strlen(buffer);
+ write_out = write(fd, buffer, bufferlen);
+
+ if ((write_out < 0) || (write_out == 0 && errno)) {
+ log_error("Cannot write pid to pidfile [%s], error was [%s]",
+ lockfile, strerror(errno));
+
+ goto fail_close_unlink;
+ }
+
+ if ((write_out == 0) || ((size_t)write_out < bufferlen)) {
+ log_error("Cannot write pid to pidfile [%s], shortwrite of"
+ "[%" PRIsize_t "] bytes, expected [%" PRIsize_t "]\n",
+ lockfile, write_out, bufferlen);
+
+ goto fail_close_unlink;
+ }
+
+ if ((value = fcntl(fd, F_GETFD, 0)) < 0) {
+ log_error("Cannot get close-on-exec flag from pidfile [%s], "
+ "error was [%s]", lockfile, strerror(errno));
+
+ goto fail_close_unlink;
+ }
+ value |= FD_CLOEXEC;
+ if (fcntl(fd, F_SETFD, value) < 0) {
+ log_error("Cannot set close-on-exec flag from pidfile [%s], "
+ "error was [%s]", lockfile, strerror(errno));
+
+ goto fail_close_unlink;
+ }
+
+ return 1;
+
+fail_close_unlink:
+ if (unlink(lockfile))
+ log_sys_debug("unlink", lockfile);
+fail_close:
+ if (close(fd))
+ log_sys_debug("close", lockfile);
+
+ return 0;
+}
+
+int dm_daemon_is_running(const char* lockfile)
+{
+ int fd;
+ struct flock lock;
+
+ if((fd = open(lockfile, O_RDONLY)) < 0)
+ return 0;
+
+ lock.l_type = F_WRLCK;
+ lock.l_start = 0;
+ lock.l_whence = SEEK_SET;
+ lock.l_len = 0;
+ if (fcntl(fd, F_GETLK, &lock) < 0) {
+ log_error("Cannot check lock status of lockfile [%s], error was [%s]",
+ lockfile, strerror(errno));
+ if (close(fd))
+ stack;
+ return 0;
+ }
+
+ if (close(fd))
+ stack;
+
+ return (lock.l_type == F_UNLCK) ? 0 : 1;
+}
diff --git a/device_mapper/libdm-report.c b/device_mapper/libdm-report.c
new file mode 100644
index 000000000..3a48c3f46
--- /dev/null
+++ b/device_mapper/libdm-report.c
@@ -0,0 +1,5104 @@
+/*
+ * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <ctype.h>
+#include <math.h> /* fabs() */
+#include <float.h> /* DBL_EPSILON */
+#include <time.h>
+
+/*
+ * Internal flags
+ */
+#define RH_SORT_REQUIRED 0x00000100
+#define RH_HEADINGS_PRINTED 0x00000200
+#define RH_FIELD_CALC_NEEDED 0x00000400
+#define RH_ALREADY_REPORTED 0x00000800
+
+struct selection {
+ struct dm_pool *mem;
+ struct selection_node *selection_root;
+ int add_new_fields;
+};
+
+struct report_group_item;
+
+struct dm_report {
+ struct dm_pool *mem;
+
+ /**
+ * Cache the first row allocated so that all rows and fields
+ * can be disposed of in a single dm_pool_free() call.
+ */
+ struct row *first_row;
+
+ /* To report all available types */
+#define REPORT_TYPES_ALL UINT32_MAX
+ uint32_t report_types;
+ const char *output_field_name_prefix;
+ const char *field_prefix;
+ uint32_t flags;
+ const char *separator;
+
+ uint32_t keys_count;
+
+ /* Ordered list of fields needed for this report */
+ struct dm_list field_props;
+
+ /* Rows of report data */
+ struct dm_list rows;
+
+ /* Array of field definitions */
+ const struct dm_report_field_type *fields;
+ const char **canonical_field_ids;
+ const struct dm_report_object_type *types;
+
+ /* To store caller private data */
+ void *private;
+
+ /* Selection handle */
+ struct selection *selection;
+
+ /* Null-terminated array of reserved values */
+ const struct dm_report_reserved_value *reserved_values;
+ struct dm_hash_table *value_cache;
+
+ struct report_group_item *group_item;
+};
+
+struct dm_report_group {
+ dm_report_group_type_t type;
+ struct dm_pool *mem;
+ struct dm_list items;
+ int indent;
+};
+
+struct report_group_item {
+ struct dm_list list;
+ struct dm_report_group *group;
+ struct dm_report *report;
+ union {
+ uint32_t orig_report_flags;
+ uint32_t finished_count;
+ } store;
+ struct report_group_item *parent;
+ unsigned output_done:1;
+ unsigned needs_closing:1;
+ void *data;
+};
+
+/*
+ * Internal per-field flags
+ */
+#define FLD_HIDDEN 0x00001000
+#define FLD_SORT_KEY 0x00002000
+#define FLD_ASCENDING 0x00004000
+#define FLD_DESCENDING 0x00008000
+#define FLD_COMPACTED 0x00010000
+#define FLD_COMPACT_ONE 0x00020000
+
+struct field_properties {
+ struct dm_list list;
+ uint32_t field_num;
+ uint32_t sort_posn;
+ int32_t initial_width;
+ int32_t width; /* current width: adjusted by dm_report_object() */
+ const struct dm_report_object_type *type;
+ uint32_t flags;
+ int implicit;
+};
+
+/*
+ * Report selection
+ */
+struct op_def {
+ const char *string;
+ uint32_t flags;
+ const char *desc;
+};
+
+#define FLD_CMP_MASK 0x0FF00000
+#define FLD_CMP_UNCOMPARABLE 0x00100000
+#define FLD_CMP_EQUAL 0x00200000
+#define FLD_CMP_NOT 0x00400000
+#define FLD_CMP_GT 0x00800000
+#define FLD_CMP_LT 0x01000000
+#define FLD_CMP_REGEX 0x02000000
+#define FLD_CMP_NUMBER 0x04000000
+#define FLD_CMP_TIME 0x08000000
+/*
+ * #define FLD_CMP_STRING 0x10000000
+ * We could define FLD_CMP_STRING here for completeness here,
+ * but it's not needed - we can check operator compatibility with
+ * field type by using FLD_CMP_REGEX, FLD_CMP_NUMBER and
+ * FLD_CMP_TIME flags only.
+ */
+
+/*
+ * When defining operators, always define longer one before
+ * shorter one if one is a prefix of another!
+ * (e.g. =~ comes before =)
+*/
+static struct op_def _op_cmp[] = {
+ { "=~", FLD_CMP_REGEX, "Matching regular expression. [regex]" },
+ { "!~", FLD_CMP_REGEX|FLD_CMP_NOT, "Not matching regular expression. [regex]" },
+ { "=", FLD_CMP_EQUAL, "Equal to. [number, size, percent, string, string list, time]" },
+ { "!=", FLD_CMP_NOT|FLD_CMP_EQUAL, "Not equal to. [number, size, percent, string, string_list, time]" },
+ { ">=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Greater than or equal to. [number, size, percent, time]" },
+ { ">", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_GT, "Greater than. [number, size, percent, time]" },
+ { "<=", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Less than or equal to. [number, size, percent, time]" },
+ { "<", FLD_CMP_NUMBER|FLD_CMP_TIME|FLD_CMP_LT, "Less than. [number, size, percent, time]" },
+ { "since", FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL, "Since specified time (same as '>='). [time]" },
+ { "after", FLD_CMP_TIME|FLD_CMP_GT, "After specified time (same as '>'). [time]"},
+ { "until", FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL, "Until specified time (same as '<='). [time]"},
+ { "before", FLD_CMP_TIME|FLD_CMP_LT, "Before specified time (same as '<'). [time]"},
+ { NULL, 0, NULL }
+};
+
+#define SEL_MASK 0x000000FF
+#define SEL_ITEM 0x00000001
+#define SEL_AND 0x00000002
+#define SEL_OR 0x00000004
+
+#define SEL_MODIFIER_MASK 0x00000F00
+#define SEL_MODIFIER_NOT 0x00000100
+
+#define SEL_PRECEDENCE_MASK 0x0000F000
+#define SEL_PRECEDENCE_PS 0x00001000
+#define SEL_PRECEDENCE_PE 0x00002000
+
+#define SEL_LIST_MASK 0x000F0000
+#define SEL_LIST_LS 0x00010000
+#define SEL_LIST_LE 0x00020000
+#define SEL_LIST_SUBSET_LS 0x00040000
+#define SEL_LIST_SUBSET_LE 0x00080000
+
+static struct op_def _op_log[] = {
+ { "&&", SEL_AND, "All fields must match" },
+ { ",", SEL_AND, "All fields must match" },
+ { "||", SEL_OR, "At least one field must match" },
+ { "#", SEL_OR, "At least one field must match" },
+ { "!", SEL_MODIFIER_NOT, "Logical negation" },
+ { "(", SEL_PRECEDENCE_PS, "Left parenthesis" },
+ { ")", SEL_PRECEDENCE_PE, "Right parenthesis" },
+ { "[", SEL_LIST_LS, "List start" },
+ { "]", SEL_LIST_LE, "List end"},
+ { "{", SEL_LIST_SUBSET_LS, "List subset start"},
+ { "}", SEL_LIST_SUBSET_LE, "List subset end"},
+ { NULL, 0, NULL},
+};
+
+struct selection_str_list {
+ struct dm_str_list str_list;
+ unsigned type; /* either SEL_AND or SEL_OR */
+};
+
+struct field_selection_value {
+ union {
+ const char *s;
+ uint64_t i;
+ time_t t;
+ double d;
+ struct dm_regex *r;
+ struct selection_str_list *l;
+ } v;
+ struct field_selection_value *next;
+};
+
+struct field_selection {
+ struct field_properties *fp;
+ uint32_t flags;
+ struct field_selection_value *value;
+};
+
+struct selection_node {
+ struct dm_list list;
+ uint32_t type;
+ union {
+ struct field_selection *item;
+ struct dm_list set;
+ } selection;
+};
+
+struct reserved_value_wrapper {
+ const char *matched_name;
+ const struct dm_report_reserved_value *reserved;
+ const void *value;
+};
+
+/*
+ * Report data field
+ */
+struct dm_report_field {
+ struct dm_list list;
+ struct field_properties *props;
+
+ const char *report_string; /* Formatted ready for display */
+ const void *sort_value; /* Raw value for sorting */
+};
+
+struct row {
+ struct dm_list list;
+ struct dm_report *rh;
+ struct dm_list fields; /* Fields in display order */
+ struct dm_report_field *(*sort_fields)[]; /* Fields in sort order */
+ int selected;
+ struct dm_report_field *field_sel_status;
+};
+
+/*
+ * Implicit report types and fields.
+ */
+#define SPECIAL_REPORT_TYPE 0x80000000
+#define SPECIAL_FIELD_SELECTED_ID "selected"
+#define SPECIAL_FIELD_HELP_ID "help"
+#define SPECIAL_FIELD_HELP_ALT_ID "?"
+
+static void *_null_returning_fn(void *obj __attribute__((unused)))
+{
+ return NULL;
+}
+
+static int _no_report_fn(struct dm_report *rh __attribute__((unused)),
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field __attribute__((unused)),
+ const void *data __attribute__((unused)),
+ void *private __attribute__((unused)))
+{
+ return 1;
+}
+
+static int _selected_disp(struct dm_report *rh,
+ struct dm_pool *mem __attribute__((unused)),
+ struct dm_report_field *field,
+ const void *data,
+ void *private __attribute__((unused)))
+{
+ const struct row *row = (const struct row *)data;
+ return dm_report_field_int(rh, field, &row->selected);
+}
+
+static const struct dm_report_object_type _implicit_special_report_types[] = {
+ { SPECIAL_REPORT_TYPE, "Special", "special_", _null_returning_fn },
+ { 0, "", "", NULL }
+};
+
+static const struct dm_report_field_type _implicit_special_report_fields[] = {
+ { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ID, "Help", _no_report_fn, "Show help." },
+ { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ALT_ID, "Help", _no_report_fn, "Show help." },
+ { 0, 0, 0, 0, "", "", 0, 0}
+};
+
+static const struct dm_report_field_type _implicit_special_report_fields_with_selection[] = {
+ { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER, 0, 8, SPECIAL_FIELD_SELECTED_ID, "Selected", _selected_disp, "Set if item passes selection criteria." },
+ { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ID, "Help", _no_report_fn, "Show help." },
+ { SPECIAL_REPORT_TYPE, DM_REPORT_FIELD_TYPE_NUMBER | FLD_CMP_UNCOMPARABLE , 0, 8, SPECIAL_FIELD_HELP_ALT_ID, "Help", _no_report_fn, "Show help." },
+ { 0, 0, 0, 0, "", "", 0, 0}
+};
+
+static const struct dm_report_object_type *_implicit_report_types = _implicit_special_report_types;
+static const struct dm_report_field_type *_implicit_report_fields = _implicit_special_report_fields;
+
+static const struct dm_report_object_type *_find_type(struct dm_report *rh,
+ uint32_t report_type)
+{
+ const struct dm_report_object_type *t;
+
+ for (t = _implicit_report_types; t->data_fn; t++)
+ if (t->id == report_type)
+ return t;
+
+ for (t = rh->types; t->data_fn; t++)
+ if (t->id == report_type)
+ return t;
+
+ return NULL;
+}
+
+/*
+ * Data-munging functions to prepare each data type for display and sorting
+ */
+
+int dm_report_field_string(struct dm_report *rh,
+ struct dm_report_field *field, const char *const *data)
+{
+ char *repstr;
+
+ if (!(repstr = dm_pool_strdup(rh->mem, *data))) {
+ log_error("dm_report_field_string: dm_pool_strdup failed");
+ return 0;
+ }
+
+ field->report_string = repstr;
+ field->sort_value = (const void *) field->report_string;
+
+ return 1;
+}
+
+int dm_report_field_percent(struct dm_report *rh,
+ struct dm_report_field *field,
+ const dm_percent_t *data)
+{
+ char *repstr;
+ uint64_t *sortval;
+
+ if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) {
+ log_error("dm_report_field_percent: dm_pool_alloc failed for sort_value.");
+ return 0;
+ }
+
+ *sortval = (uint64_t)(*data);
+
+ if (*data == DM_PERCENT_INVALID) {
+ dm_report_field_set_value(field, "", sortval);
+ return 1;
+ }
+
+ if (!(repstr = dm_pool_alloc(rh->mem, 8))) {
+ dm_pool_free(rh->mem, sortval);
+ log_error("dm_report_field_percent: dm_pool_alloc failed for percent report string.");
+ return 0;
+ }
+
+ if (dm_snprintf(repstr, 7, "%.2f", dm_percent_to_round_float(*data, 2)) < 0) {
+ dm_pool_free(rh->mem, sortval);
+ log_error("dm_report_field_percent: percentage too large.");
+ return 0;
+ }
+
+ dm_report_field_set_value(field, repstr, sortval);
+ return 1;
+}
+
+struct str_list_sort_value_item {
+ unsigned pos;
+ size_t len;
+};
+
+struct str_list_sort_value {
+ const char *value;
+ struct str_list_sort_value_item *items;
+};
+
+struct str_list_sort_item {
+ const char *str;
+ struct str_list_sort_value_item item;
+};
+
+static int _str_list_sort_item_cmp(const void *a, const void *b)
+{
+ const struct str_list_sort_item *slsi_a = (const struct str_list_sort_item *) a;
+ const struct str_list_sort_item *slsi_b = (const struct str_list_sort_item *) b;
+
+ return strcmp(slsi_a->str, slsi_b->str);
+}
+
+static int _report_field_string_list(struct dm_report *rh,
+ struct dm_report_field *field,
+ const struct dm_list *data,
+ const char *delimiter,
+ int sort)
+{
+ static const char _string_list_grow_object_failed_msg[] = "dm_report_field_string_list: dm_pool_grow_object_failed";
+ struct str_list_sort_value *sort_value = NULL;
+ unsigned int list_size, pos, i;
+ struct str_list_sort_item *arr = NULL;
+ struct dm_str_list *sl;
+ size_t delimiter_len, len;
+ void *object;
+ int r = 0;
+
+ if (!(sort_value = dm_pool_zalloc(rh->mem, sizeof(struct str_list_sort_value)))) {
+ log_error("dm_report_field_string_list: dm_pool_zalloc failed for sort_value");
+ return 0;
+ }
+
+ list_size = dm_list_size(data);
+
+ /*
+ * Sort value stores the pointer to the report_string and then
+ * position and length for each list element withing the report_string.
+ * The first element stores number of elements in 'len' (therefore
+ * list_size + 1 is used below for the extra element).
+ * For example, with this input:
+ * sort = 0; (we don't want to report sorted)
+ * report_string = "abc,xy,defgh"; (this is reported)
+ *
+ * ...we end up with:
+ * sort_value->value = report_string; (we'll use the original report_string for indices)
+ * sort_value->items[0] = {0,3}; (we have 3 items)
+ * sort_value->items[1] = {0,3}; ("abc")
+ * sort_value->items[2] = {7,5}; ("defgh")
+ * sort_value->items[3] = {4,2}; ("xy")
+ *
+ * The items alone are always sorted while in report_string they can be
+ * sorted or not (based on "sort" arg) - it depends on how we prefer to
+ * display the list. Having items sorted internally helps with searching
+ * through them.
+ */
+ if (!(sort_value->items = dm_pool_zalloc(rh->mem, (list_size + 1) * sizeof(struct str_list_sort_value_item)))) {
+ log_error("dm_report_fiel_string_list: dm_pool_zalloc failed for sort value items");
+ goto out;
+ }
+ sort_value->items[0].len = list_size;
+
+ /* zero items */
+ if (!list_size) {
+ sort_value->value = field->report_string = "";
+ field->sort_value = sort_value;
+ return 1;
+ }
+
+ /* one item */
+ if (list_size == 1) {
+ sl = (struct dm_str_list *) dm_list_first(data);
+ if (!sl ||
+ !(sort_value->value = field->report_string = dm_pool_strdup(rh->mem, sl->str))) {
+ log_error("dm_report_field_string_list: dm_pool_strdup failed");
+ goto out;
+ }
+ sort_value->items[1].pos = 0;
+ sort_value->items[1].len = strlen(sl->str);
+ field->sort_value = sort_value;
+ return 1;
+ }
+
+ /* more than one item - sort the list */
+ if (!(arr = dm_malloc(sizeof(struct str_list_sort_item) * list_size))) {
+ log_error("dm_report_field_string_list: dm_malloc failed");
+ goto out;
+ }
+
+ if (!(dm_pool_begin_object(rh->mem, 256))) {
+ log_error(_string_list_grow_object_failed_msg);
+ goto out;
+ }
+
+ if (!delimiter)
+ delimiter = ",";
+ delimiter_len = strlen(delimiter);
+
+ i = pos = len = 0;
+ dm_list_iterate_items(sl, data) {
+ arr[i].str = sl->str;
+ if (!sort) {
+ /* sorted outpud not required - report the list as it is */
+ len = strlen(sl->str);
+ if (!dm_pool_grow_object(rh->mem, arr[i].str, len) ||
+ (i+1 != list_size && !dm_pool_grow_object(rh->mem, delimiter, delimiter_len))) {
+ log_error(_string_list_grow_object_failed_msg);
+ goto out;
+ }
+ arr[i].item.pos = pos;
+ arr[i].item.len = len;
+ pos = i+1 == list_size ? pos+len : pos+len+delimiter_len;
+ }
+ i++;
+ }
+
+ qsort(arr, i, sizeof(struct str_list_sort_item), _str_list_sort_item_cmp);
+
+ for (i = 0, pos = 0; i < list_size; i++) {
+ if (sort) {
+ /* sorted output required - report the list as sorted */
+ len = strlen(arr[i].str);
+ if (!dm_pool_grow_object(rh->mem, arr[i].str, len) ||
+ (i+1 != list_size && !dm_pool_grow_object(rh->mem, delimiter, delimiter_len))) {
+ log_error(_string_list_grow_object_failed_msg);
+ goto out;
+ }
+ /*
+ * Save position and length of the string
+ * element in report_string for sort_value.
+ * Use i+1 here since items[0] stores list size!!!
+ */
+ sort_value->items[i+1].pos = pos;
+ sort_value->items[i+1].len = len;
+ pos = i+1 == list_size ? pos+len : pos+len+delimiter_len;
+ } else {
+ sort_value->items[i+1].pos = arr[i].item.pos;
+ sort_value->items[i+1].len = arr[i].item.len;
+ }
+ }
+
+ if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+ log_error(_string_list_grow_object_failed_msg);
+ goto out;
+ }
+
+ object = dm_pool_end_object(rh->mem);
+ sort_value->value = object;
+ field->sort_value = sort_value;
+ field->report_string = object;
+ r = 1;
+out:
+ if (!r && sort_value)
+ dm_pool_free(rh->mem, sort_value);
+ dm_free(arr);
+
+ return r;
+}
+
+int dm_report_field_string_list(struct dm_report *rh,
+ struct dm_report_field *field,
+ const struct dm_list *data,
+ const char *delimiter)
+{
+ return _report_field_string_list(rh, field, data, delimiter, 1);
+}
+
+int dm_report_field_string_list_unsorted(struct dm_report *rh,
+ struct dm_report_field *field,
+ const struct dm_list *data,
+ const char *delimiter)
+{
+ /*
+ * The raw value is always sorted, just the string reported is unsorted.
+ * Having the raw value always sorted helps when matching selection list
+ * with selection criteria.
+ */
+ return _report_field_string_list(rh, field, data, delimiter, 0);
+}
+
+int dm_report_field_int(struct dm_report *rh,
+ struct dm_report_field *field, const int *data)
+{
+ const int value = *data;
+ uint64_t *sortval;
+ char *repstr;
+
+ if (!(repstr = dm_pool_zalloc(rh->mem, 13))) {
+ log_error("dm_report_field_int: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (!(sortval = dm_pool_alloc(rh->mem, sizeof(int64_t)))) {
+ log_error("dm_report_field_int: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (dm_snprintf(repstr, 12, "%d", value) < 0) {
+ log_error("dm_report_field_int: int too big: %d", value);
+ return 0;
+ }
+
+ *sortval = (uint64_t) value;
+ field->sort_value = sortval;
+ field->report_string = repstr;
+
+ return 1;
+}
+
+int dm_report_field_uint32(struct dm_report *rh,
+ struct dm_report_field *field, const uint32_t *data)
+{
+ const uint32_t value = *data;
+ uint64_t *sortval;
+ char *repstr;
+
+ if (!(repstr = dm_pool_zalloc(rh->mem, 12))) {
+ log_error("dm_report_field_uint32: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) {
+ log_error("dm_report_field_uint32: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (dm_snprintf(repstr, 11, "%u", value) < 0) {
+ log_error("dm_report_field_uint32: uint32 too big: %u", value);
+ return 0;
+ }
+
+ *sortval = (uint64_t) value;
+ field->sort_value = sortval;
+ field->report_string = repstr;
+
+ return 1;
+}
+
+int dm_report_field_int32(struct dm_report *rh,
+ struct dm_report_field *field, const int32_t *data)
+{
+ const int32_t value = *data;
+ uint64_t *sortval;
+ char *repstr;
+
+ if (!(repstr = dm_pool_zalloc(rh->mem, 13))) {
+ log_error("dm_report_field_int32: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (!(sortval = dm_pool_alloc(rh->mem, sizeof(int64_t)))) {
+ log_error("dm_report_field_int32: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (dm_snprintf(repstr, 12, "%d", value) < 0) {
+ log_error("dm_report_field_int32: int32 too big: %d", value);
+ return 0;
+ }
+
+ *sortval = (uint64_t) value;
+ field->sort_value = sortval;
+ field->report_string = repstr;
+
+ return 1;
+}
+
+int dm_report_field_uint64(struct dm_report *rh,
+ struct dm_report_field *field, const uint64_t *data)
+{
+ const uint64_t value = *data;
+ uint64_t *sortval;
+ char *repstr;
+
+ if (!(repstr = dm_pool_zalloc(rh->mem, 22))) {
+ log_error("dm_report_field_uint64: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (!(sortval = dm_pool_alloc(rh->mem, sizeof(uint64_t)))) {
+ log_error("dm_report_field_uint64: dm_pool_alloc failed");
+ return 0;
+ }
+
+ if (dm_snprintf(repstr, 21, FMTu64 , value) < 0) {
+ log_error("dm_report_field_uint64: uint64 too big: %" PRIu64, value);
+ return 0;
+ }
+
+ *sortval = value;
+ field->sort_value = sortval;
+ field->report_string = repstr;
+
+ return 1;
+}
+
+/*
+ * Helper functions for custom report functions
+ */
+void dm_report_field_set_value(struct dm_report_field *field, const void *value, const void *sortvalue)
+{
+ field->report_string = (const char *) value;
+ field->sort_value = sortvalue ? : value;
+
+ if ((field->sort_value == value) &&
+ (field->props->flags & DM_REPORT_FIELD_TYPE_NUMBER))
+ log_warn(INTERNAL_ERROR "Using string as sort value for numerical field.");
+}
+
+static const char *_get_field_type_name(unsigned field_type)
+{
+ switch (field_type) {
+ case DM_REPORT_FIELD_TYPE_STRING: return "string";
+ case DM_REPORT_FIELD_TYPE_NUMBER: return "number";
+ case DM_REPORT_FIELD_TYPE_SIZE: return "size";
+ case DM_REPORT_FIELD_TYPE_PERCENT: return "percent";
+ case DM_REPORT_FIELD_TYPE_TIME: return "time";
+ case DM_REPORT_FIELD_TYPE_STRING_LIST: return "string list";
+ default: return "unknown";
+ }
+}
+
+/*
+ * show help message
+ */
+static size_t _get_longest_field_id_len(const struct dm_report_field_type *fields)
+{
+ uint32_t f;
+ size_t id_len = 0;
+
+ for (f = 0; fields[f].report_fn; f++)
+ if (strlen(fields[f].id) > id_len)
+ id_len = strlen(fields[f].id);
+
+ return id_len;
+}
+
+static void _display_fields_more(struct dm_report *rh,
+ const struct dm_report_field_type *fields,
+ size_t id_len, int display_all_fields_item,
+ int display_field_types)
+{
+ uint32_t f;
+ const struct dm_report_object_type *type;
+ const char *desc, *last_desc = "";
+
+ for (f = 0; fields[f].report_fn; f++)
+ if (strlen(fields[f].id) > id_len)
+ id_len = strlen(fields[f].id);
+
+ for (type = rh->types; type->data_fn; type++)
+ if (strlen(type->prefix) + 3 > id_len)
+ id_len = strlen(type->prefix) + 3;
+
+ for (f = 0; fields[f].report_fn; f++) {
+ if ((type = _find_type(rh, fields[f].type)) && type->desc)
+ desc = type->desc;
+ else
+ desc = " ";
+ if (desc != last_desc) {
+ if (*last_desc)
+ log_warn(" ");
+ log_warn("%s Fields", desc);
+ log_warn("%*.*s", (int) strlen(desc) + 7,
+ (int) strlen(desc) + 7,
+ "-------------------------------------------------------------------------------");
+ if (display_all_fields_item && type->id != SPECIAL_REPORT_TYPE)
+ log_warn(" %sall%-*s - %s", type->prefix,
+ (int) (id_len - 3 - strlen(type->prefix)), "",
+ "All fields in this section.");
+ }
+ /* FIXME Add line-wrapping at terminal width (or 80 cols) */
+ log_warn(" %-*s - %s%s%s%s%s", (int) id_len, fields[f].id, fields[f].desc,
+ display_field_types ? " [" : "",
+ display_field_types ? fields[f].flags & FLD_CMP_UNCOMPARABLE ? "unselectable " : "" : "",
+ display_field_types ? _get_field_type_name(fields[f].flags & DM_REPORT_FIELD_TYPE_MASK) : "",
+ display_field_types ? "]" : "");
+ last_desc = desc;
+ }
+}
+
+/*
+ * show help message
+ */
+static void _display_fields(struct dm_report *rh, int display_all_fields_item,
+ int display_field_types)
+{
+ size_t tmp, id_len = 0;
+
+ if ((tmp = _get_longest_field_id_len(_implicit_report_fields)) > id_len)
+ id_len = tmp;
+ if ((tmp = _get_longest_field_id_len(rh->fields)) > id_len)
+ id_len = tmp;
+
+ _display_fields_more(rh, rh->fields, id_len, display_all_fields_item,
+ display_field_types);
+ log_warn(" ");
+ _display_fields_more(rh, _implicit_report_fields, id_len,
+ display_all_fields_item, display_field_types);
+
+}
+
+/*
+ * Initialise report handle
+ */
+static int _copy_field(struct dm_report *rh, struct field_properties *dest,
+ uint32_t field_num, int implicit)
+{
+ const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
+ : rh->fields;
+
+ dest->field_num = field_num;
+ dest->initial_width = fields[field_num].width;
+ dest->width = fields[field_num].width; /* adjusted in _do_report_object() */
+ dest->flags = fields[field_num].flags & DM_REPORT_FIELD_MASK;
+ dest->implicit = implicit;
+
+ /* set object type method */
+ dest->type = _find_type(rh, fields[field_num].type);
+ if (!dest->type) {
+ log_error("dm_report: field not match: %s",
+ fields[field_num].id);
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct field_properties * _add_field(struct dm_report *rh,
+ uint32_t field_num, int implicit,
+ uint32_t flags)
+{
+ struct field_properties *fp;
+
+ if (!(fp = dm_pool_zalloc(rh->mem, sizeof(*fp)))) {
+ log_error("dm_report: struct field_properties allocation "
+ "failed");
+ return NULL;
+ }
+
+ if (!_copy_field(rh, fp, field_num, implicit)) {
+ stack;
+ dm_pool_free(rh->mem, fp);
+ return NULL;
+ }
+
+ fp->flags |= flags;
+
+ /*
+ * Place hidden fields at the front so dm_list_end() will
+ * tell us when we've reached the last visible field.
+ */
+ if (fp->flags & FLD_HIDDEN)
+ dm_list_add_h(&rh->field_props, &fp->list);
+ else
+ dm_list_add(&rh->field_props, &fp->list);
+
+ return fp;
+}
+
+static int _get_canonical_field_name(const char *field,
+ size_t flen,
+ char *canonical_field,
+ size_t fcanonical_len,
+ int *differs)
+{
+ size_t i;
+ int diff = 0;
+
+ for (i = 0; *field && flen; field++, flen--) {
+ if (*field == '_') {
+ diff = 1;
+ continue;
+ }
+ if ((i + 1) >= fcanonical_len) {
+ canonical_field[0] = '\0';
+ log_error("%s: field name too long.", field);
+ return 0;
+ }
+ canonical_field[i++] = *field;
+ }
+
+ canonical_field[i] = '\0';
+ if (differs)
+ *differs = diff;
+ return 1;
+}
+
+/*
+ * Compare canonical_name1 against canonical_name2 or prefix
+ * plus canonical_name2. Canonical name is a name where all
+ * superfluous characters are removed (underscores for now).
+ * Both names are always null-terminated.
+ */
+static int _is_same_field(const char *canonical_name1, const char *canonical_name2,
+ const char *prefix)
+{
+ size_t prefix_len;
+
+ /* Exact match? */
+ if (!strcasecmp(canonical_name1, canonical_name2))
+ return 1;
+
+ /* Match including prefix? */
+ prefix_len = strlen(prefix) - 1;
+ if (!strncasecmp(prefix, canonical_name1, prefix_len) &&
+ !strcasecmp(canonical_name1 + prefix_len, canonical_name2))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Check for a report type prefix + "all" match.
+ */
+static void _all_match_combine(const struct dm_report_object_type *types,
+ unsigned unprefixed_all_matched,
+ const char *field, size_t flen,
+ uint32_t *report_types)
+{
+ char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
+ const struct dm_report_object_type *t;
+ size_t prefix_len;
+
+ if (!_get_canonical_field_name(field, flen, field_canon, sizeof(field_canon), NULL))
+ return;
+ flen = strlen(field_canon);
+
+ for (t = types; t->data_fn; t++) {
+ prefix_len = strlen(t->prefix) - 1;
+
+ if (!strncasecmp(t->prefix, field_canon, prefix_len) &&
+ ((unprefixed_all_matched && (flen == prefix_len)) ||
+ (!strncasecmp(field_canon + prefix_len, "all", 3) &&
+ (flen == prefix_len + 3))))
+ *report_types |= t->id;
+ }
+}
+
+static uint32_t _all_match(struct dm_report *rh, const char *field, size_t flen)
+{
+ uint32_t report_types = 0;
+ unsigned unprefixed_all_matched = 0;
+
+ if (!strncasecmp(field, "all", 3) && flen == 3) {
+ /* If there's no report prefix, match all report types */
+ if (!(flen = strlen(rh->field_prefix)))
+ return rh->report_types ? : REPORT_TYPES_ALL;
+
+ /* otherwise include all fields beginning with the report prefix. */
+ unprefixed_all_matched = 1;
+ field = rh->field_prefix;
+ report_types = rh->report_types;
+ }
+
+ /* Combine all report types that have a matching prefix. */
+ _all_match_combine(rh->types, unprefixed_all_matched, field, flen, &report_types);
+
+ return report_types;
+}
+
+/*
+ * Add all fields with a matching type.
+ */
+static int _add_all_fields(struct dm_report *rh, uint32_t type)
+{
+ uint32_t f;
+
+ for (f = 0; rh->fields[f].report_fn; f++)
+ if ((rh->fields[f].type & type) && !_add_field(rh, f, 0, 0))
+ return 0;
+
+ return 1;
+}
+
+static int _get_field(struct dm_report *rh, const char *field, size_t flen,
+ uint32_t *f_ret, int *implicit)
+{
+ char field_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
+ uint32_t f;
+
+ if (!flen)
+ return 0;
+
+ if (!_get_canonical_field_name(field, flen, field_canon, sizeof(field_canon), NULL))
+ return_0;
+
+ for (f = 0; _implicit_report_fields[f].report_fn; f++) {
+ if (_is_same_field(_implicit_report_fields[f].id, field_canon, rh->field_prefix)) {
+ *f_ret = f;
+ *implicit = 1;
+ return 1;
+ }
+ }
+
+ for (f = 0; rh->fields[f].report_fn; f++) {
+ if (_is_same_field(rh->canonical_field_ids[f], field_canon, rh->field_prefix)) {
+ *f_ret = f;
+ *implicit = 0;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int _field_match(struct dm_report *rh, const char *field, size_t flen,
+ unsigned report_type_only)
+{
+ uint32_t f, type;
+ int implicit;
+
+ if (!flen)
+ return 0;
+
+ if ((_get_field(rh, field, flen, &f, &implicit))) {
+ if (report_type_only) {
+ rh->report_types |= implicit ? _implicit_report_fields[f].type
+ : rh->fields[f].type;
+ return 1;
+ }
+
+ return _add_field(rh, f, implicit, 0) ? 1 : 0;
+ }
+
+ if ((type = _all_match(rh, field, flen))) {
+ if (report_type_only) {
+ rh->report_types |= type;
+ return 1;
+ }
+
+ return _add_all_fields(rh, type);
+ }
+
+ return 0;
+}
+
+static int _add_sort_key(struct dm_report *rh, uint32_t field_num, int implicit,
+ uint32_t flags, unsigned report_type_only)
+{
+ struct field_properties *fp, *found = NULL;
+ const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
+ : rh->fields;
+
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if ((fp->implicit == implicit) && (fp->field_num == field_num)) {
+ found = fp;
+ break;
+ }
+ }
+
+ if (!found) {
+ if (report_type_only)
+ rh->report_types |= fields[field_num].type;
+ else if (!(found = _add_field(rh, field_num, implicit, FLD_HIDDEN)))
+ return_0;
+ }
+
+ if (report_type_only)
+ return 1;
+
+ if (found->flags & FLD_SORT_KEY) {
+ log_warn("dm_report: Ignoring duplicate sort field: %s.",
+ fields[field_num].id);
+ return 1;
+ }
+
+ found->flags |= FLD_SORT_KEY;
+ found->sort_posn = rh->keys_count++;
+ found->flags |= flags;
+
+ return 1;
+}
+
+static int _key_match(struct dm_report *rh, const char *key, size_t len,
+ unsigned report_type_only)
+{
+ char key_canon[DM_REPORT_FIELD_TYPE_ID_LEN];
+ uint32_t f;
+ uint32_t flags;
+
+ if (!len)
+ return 0;
+
+ if (*key == '+') {
+ key++;
+ len--;
+ flags = FLD_ASCENDING;
+ } else if (*key == '-') {
+ key++;
+ len--;
+ flags = FLD_DESCENDING;
+ } else
+ flags = FLD_ASCENDING;
+
+ if (!len) {
+ log_error("dm_report: Missing sort field name");
+ return 0;
+ }
+
+ if (!_get_canonical_field_name(key, len, key_canon, sizeof(key_canon), NULL))
+ return_0;
+
+ for (f = 0; _implicit_report_fields[f].report_fn; f++)
+ if (_is_same_field(_implicit_report_fields[f].id, key_canon, rh->field_prefix))
+ return _add_sort_key(rh, f, 1, flags, report_type_only);
+
+ for (f = 0; rh->fields[f].report_fn; f++)
+ if (_is_same_field(rh->canonical_field_ids[f], key_canon, rh->field_prefix))
+ return _add_sort_key(rh, f, 0, flags, report_type_only);
+
+ return 0;
+}
+
+static int _parse_fields(struct dm_report *rh, const char *format,
+ unsigned report_type_only)
+{
+ const char *ws; /* Word start */
+ const char *we = format; /* Word end */
+
+ while (*we) {
+ /* Allow consecutive commas */
+ while (*we && *we == ',')
+ we++;
+
+ /* start of the field name */
+ ws = we;
+ while (*we && *we != ',')
+ we++;
+
+ if (!_field_match(rh, ws, (size_t) (we - ws), report_type_only)) {
+ _display_fields(rh, 1, 0);
+ log_warn(" ");
+ log_error("Unrecognised field: %.*s", (int) (we - ws), ws);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int _parse_keys(struct dm_report *rh, const char *keys,
+ unsigned report_type_only)
+{
+ const char *ws; /* Word start */
+ const char *we = keys; /* Word end */
+
+ if (!keys)
+ return 1;
+
+ while (*we) {
+ /* Allow consecutive commas */
+ while (*we && *we == ',')
+ we++;
+ ws = we;
+ while (*we && *we != ',')
+ we++;
+ if (!_key_match(rh, ws, (size_t) (we - ws), report_type_only)) {
+ _display_fields(rh, 1, 0);
+ log_warn(" ");
+ log_error("dm_report: Unrecognised field: %.*s", (int) (we - ws), ws);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static int _contains_reserved_report_type(const struct dm_report_object_type *types)
+{
+ const struct dm_report_object_type *type, *implicit_type;
+
+ for (implicit_type = _implicit_report_types; implicit_type->data_fn; implicit_type++) {
+ for (type = types; type->data_fn; type++) {
+ if (implicit_type->id & type->id) {
+ log_error(INTERNAL_ERROR "dm_report_init: definition of report "
+ "types given contains reserved identifier");
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void _dm_report_init_update_types(struct dm_report *rh, uint32_t *report_types)
+{
+ const struct dm_report_object_type *type;
+
+ if (!report_types)
+ return;
+
+ *report_types = rh->report_types;
+ /*
+ * Do not include implicit types as these are not understood by
+ * dm_report_init caller - the caller doesn't know how to check
+ * these types anyway.
+ */
+ for (type = _implicit_report_types; type->data_fn; type++)
+ *report_types &= ~type->id;
+}
+
+static int _help_requested(struct dm_report *rh)
+{
+ struct field_properties *fp;
+
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if (fp->implicit &&
+ (!strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_HELP_ID) ||
+ !strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_HELP_ALT_ID)))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int _canonicalize_field_ids(struct dm_report *rh)
+{
+ size_t registered_field_count = 0, i;
+ char canonical_field[DM_REPORT_FIELD_TYPE_ID_LEN];
+ char *canonical_field_dup;
+ int differs;
+
+ while (*rh->fields[registered_field_count].id)
+ registered_field_count++;
+
+ if (!(rh->canonical_field_ids = dm_pool_alloc(rh->mem, registered_field_count * sizeof(const char *)))) {
+ log_error("_canonicalize_field_ids: dm_pool_alloc failed");
+ return 0;
+ }
+
+ for (i = 0; i < registered_field_count; i++) {
+ if (!_get_canonical_field_name(rh->fields[i].id, strlen(rh->fields[i].id),
+ canonical_field, sizeof(canonical_field), &differs))
+ return_0;
+
+ if (differs) {
+ if (!(canonical_field_dup = dm_pool_strdup(rh->mem, canonical_field))) {
+ log_error("_canonicalize_field_dup: dm_pool_alloc failed.");
+ return 0;
+ }
+ rh->canonical_field_ids[i] = canonical_field_dup;
+ } else
+ rh->canonical_field_ids[i] = rh->fields[i].id;
+ }
+
+ return 1;
+}
+
+struct dm_report *dm_report_init(uint32_t *report_types,
+ const struct dm_report_object_type *types,
+ const struct dm_report_field_type *fields,
+ const char *output_fields,
+ const char *output_separator,
+ uint32_t output_flags,
+ const char *sort_keys,
+ void *private_data)
+{
+ struct dm_report *rh;
+ const struct dm_report_object_type *type;
+
+ if (_contains_reserved_report_type(types))
+ return_NULL;
+
+ if (!(rh = dm_zalloc(sizeof(*rh)))) {
+ log_error("dm_report_init: dm_malloc failed");
+ return NULL;
+ }
+
+ /*
+ * rh->report_types is updated in _parse_fields() and _parse_keys()
+ * to contain all types corresponding to the fields specified by
+ * fields or keys.
+ */
+ if (report_types)
+ rh->report_types = *report_types;
+
+ rh->separator = output_separator;
+ rh->fields = fields;
+ rh->types = types;
+ rh->private = private_data;
+
+ rh->flags |= output_flags & DM_REPORT_OUTPUT_MASK;
+
+ /* With columns_as_rows we must buffer and not align. */
+ if (output_flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS) {
+ if (!(output_flags & DM_REPORT_OUTPUT_BUFFERED))
+ rh->flags |= DM_REPORT_OUTPUT_BUFFERED;
+ if (output_flags & DM_REPORT_OUTPUT_ALIGNED)
+ rh->flags &= ~DM_REPORT_OUTPUT_ALIGNED;
+ }
+
+ if (output_flags & DM_REPORT_OUTPUT_BUFFERED)
+ rh->flags |= RH_SORT_REQUIRED;
+
+ rh->flags |= RH_FIELD_CALC_NEEDED;
+
+ dm_list_init(&rh->field_props);
+ dm_list_init(&rh->rows);
+
+ if ((type = _find_type(rh, rh->report_types)) && type->prefix)
+ rh->field_prefix = type->prefix;
+ else
+ rh->field_prefix = "";
+
+ if (!(rh->mem = dm_pool_create("report", 10 * 1024))) {
+ log_error("dm_report_init: allocation of memory pool failed");
+ dm_free(rh);
+ return NULL;
+ }
+
+ if (!_canonicalize_field_ids(rh)) {
+ dm_report_free(rh);
+ return NULL;
+ }
+
+ /*
+ * To keep the code needed to add the "all" field to a minimum, we parse
+ * the field lists twice. The first time we only update the report type.
+ * FIXME Use one pass instead and expand the "all" field afterwards.
+ */
+ if (!_parse_fields(rh, output_fields, 1) ||
+ !_parse_keys(rh, sort_keys, 1)) {
+ dm_report_free(rh);
+ return NULL;
+ }
+
+ /* Generate list of fields for output based on format string & flags */
+ if (!_parse_fields(rh, output_fields, 0) ||
+ !_parse_keys(rh, sort_keys, 0)) {
+ dm_report_free(rh);
+ return NULL;
+ }
+
+ /*
+ * Return updated types value for further compatility check by caller.
+ */
+ _dm_report_init_update_types(rh, report_types);
+
+ if (_help_requested(rh)) {
+ _display_fields(rh, 1, 0);
+ log_warn(" ");
+ rh->flags |= RH_ALREADY_REPORTED;
+ }
+
+ return rh;
+}
+
+void dm_report_free(struct dm_report *rh)
+{
+ if (rh->selection)
+ dm_pool_destroy(rh->selection->mem);
+ if (rh->value_cache)
+ dm_hash_destroy(rh->value_cache);
+ dm_pool_destroy(rh->mem);
+ dm_free(rh);
+}
+
+static char *_toupperstr(char *str)
+{
+ char *u = str;
+
+ do
+ *u = toupper(*u);
+ while (*u++);
+
+ return str;
+}
+
+int dm_report_set_output_field_name_prefix(struct dm_report *rh, const char *output_field_name_prefix)
+{
+ char *prefix;
+
+ if (!(prefix = dm_pool_strdup(rh->mem, output_field_name_prefix))) {
+ log_error("dm_report_set_output_field_name_prefix: dm_pool_strdup failed");
+ return 0;
+ }
+
+ rh->output_field_name_prefix = _toupperstr(prefix);
+
+ return 1;
+}
+
+/*
+ * Create a row of data for an object
+ */
+static void *_report_get_field_data(struct dm_report *rh,
+ struct field_properties *fp, void *object)
+{
+ const struct dm_report_field_type *fields = fp->implicit ? _implicit_report_fields
+ : rh->fields;
+
+ char *ret = fp->type->data_fn(object);
+
+ if (!ret)
+ return NULL;
+
+ return (void *)(ret + fields[fp->field_num].offset);
+}
+
+static void *_report_get_implicit_field_data(struct dm_report *rh __attribute__((unused)),
+ struct field_properties *fp, struct row *row)
+{
+ if (!strcmp(_implicit_report_fields[fp->field_num].id, SPECIAL_FIELD_SELECTED_ID))
+ return row;
+
+ return NULL;
+}
+
+static int _dbl_equal(double d1, double d2)
+{
+ return fabs(d1 - d2) < DBL_EPSILON;
+}
+
+static int _dbl_greater(double d1, double d2)
+{
+ return (d1 > d2) && !_dbl_equal(d1, d2);
+}
+
+static int _dbl_less(double d1, double d2)
+{
+ return (d1 < d2) && !_dbl_equal(d1, d2);
+}
+
+static int _dbl_greater_or_equal(double d1, double d2)
+{
+ return _dbl_greater(d1, d2) || _dbl_equal(d1, d2);
+}
+
+static int _dbl_less_or_equal(double d1, double d2)
+{
+ return _dbl_less(d1, d2) || _dbl_equal(d1, d2);
+}
+
+#define _uint64 *(const uint64_t *)
+#define _uint64arr(var,index) ((const uint64_t *)(var))[(index)]
+#define _str (const char *)
+#define _dbl *(const double *)
+#define _dblarr(var,index) ((const double *)(var))[(index)]
+
+static int _do_check_value_is_strictly_reserved(unsigned type, const void *res_val, int res_range,
+ const void *val, struct field_selection *fs)
+{
+ int sel_range = fs ? fs->value->next != NULL : 0;
+
+ switch (type & DM_REPORT_FIELD_TYPE_MASK) {
+ case DM_REPORT_FIELD_TYPE_NUMBER:
+ if (res_range && sel_range) {
+ /* both reserved value and selection value are ranges */
+ if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) ||
+ (fs && ((fs->value->v.i == _uint64arr(res_val,0)) && (fs->value->next->v.i == _uint64arr(res_val,1)))))
+ return 1;
+ } else if (res_range) {
+ /* only reserved value is a range */
+ if (((_uint64 val >= _uint64arr(res_val,0)) && (_uint64 val <= _uint64arr(res_val,1))) ||
+ (fs && ((fs->value->v.i >= _uint64arr(res_val,0)) && (fs->value->v.i <= _uint64arr(res_val,1)))))
+ return 1;
+ } else if (sel_range) {
+ /* only selection value is a range */
+ if (((_uint64 val >= _uint64 res_val) && (_uint64 val <= _uint64 res_val)) ||
+ (fs && ((fs->value->v.i >= _uint64 res_val) && (fs->value->next->v.i <= _uint64 res_val))))
+ return 1;
+ } else {
+ /* neither selection value nor reserved value is a range */
+ if ((_uint64 val == _uint64 res_val) ||
+ (fs && (fs->value->v.i == _uint64 res_val)))
+ return 1;
+ }
+ break;
+
+ case DM_REPORT_FIELD_TYPE_STRING:
+ /* there are no ranges for string type yet */
+ if ((!strcmp(_str val, _str res_val)) ||
+ (fs && (!strcmp(fs->value->v.s, _str res_val))))
+ return 1;
+ break;
+
+ case DM_REPORT_FIELD_TYPE_SIZE:
+ if (res_range && sel_range) {
+ /* both reserved value and selection value are ranges */
+ if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) ||
+ (fs && (_dbl_equal(fs->value->v.d, _dblarr(res_val,0)) && (_dbl_equal(fs->value->next->v.d, _dblarr(res_val,1))))))
+ return 1;
+ } else if (res_range) {
+ /* only reserved value is a range */
+ if ((_dbl_greater_or_equal(_dbl val, _dblarr(res_val,0)) && _dbl_less_or_equal(_dbl val, _dblarr(res_val,1))) ||
+ (fs && (_dbl_greater_or_equal(fs->value->v.d, _dblarr(res_val,0)) && _dbl_less_or_equal(fs->value->v.d, _dblarr(res_val,1)))))
+ return 1;
+ } else if (sel_range) {
+ /* only selection value is a range */
+ if ((_dbl_greater_or_equal(_dbl val, _dbl res_val) && (_dbl_less_or_equal(_dbl val, _dbl res_val))) ||
+ (fs && (_dbl_greater_or_equal(fs->value->v.d, _dbl res_val) && _dbl_less_or_equal(fs->value->next->v.d, _dbl res_val))))
+ return 1;
+ } else {
+ /* neither selection value nor reserved value is a range */
+ if ((_dbl_equal(_dbl val, _dbl res_val)) ||
+ (fs && (_dbl_equal(fs->value->v.d, _dbl res_val))))
+ return 1;
+ }
+ break;
+
+ case DM_REPORT_FIELD_TYPE_STRING_LIST:
+ /* FIXME Add comparison for string list */
+ break;
+ case DM_REPORT_FIELD_TYPE_TIME:
+ /* FIXME Add comparison for time */
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * Used to check whether a value of certain type used in selection is reserved.
+ */
+static int _check_value_is_strictly_reserved(struct dm_report *rh, uint32_t field_num, unsigned type,
+ const void *val, struct field_selection *fs)
+{
+ const struct dm_report_reserved_value *iter = rh->reserved_values;
+ const struct dm_report_field_reserved_value *frv;
+ int res_range;
+
+ if (!iter)
+ return 0;
+
+ while (iter->value) {
+ /* Only check strict reserved values, not the weaker form ("named" reserved value). */
+ if (!(iter->type & DM_REPORT_FIELD_RESERVED_VALUE_NAMED)) {
+ res_range = iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE;
+ if ((iter->type & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_NONE) {
+ frv = (const struct dm_report_field_reserved_value *) iter->value;
+ if (frv->field_num == field_num && _do_check_value_is_strictly_reserved(type, frv->value, res_range, val, fs))
+ return 1;
+ } else if (iter->type & type && _do_check_value_is_strictly_reserved(type, iter->value, res_range, val, fs))
+ return 1;
+ }
+ iter++;
+ }
+
+ return 0;
+}
+
+static int _cmp_field_int(struct dm_report *rh, uint32_t field_num, const char *field_id,
+ uint64_t val, struct field_selection *fs)
+{
+ int range = fs->value->next != NULL;
+ const uint64_t sel1 = fs->value->v.i;
+ const uint64_t sel2 = range ? fs->value->next->v.i : 0;
+
+ switch(fs->flags & FLD_CMP_MASK) {
+ case FLD_CMP_EQUAL:
+ return range ? ((val >= sel1) && (val <= sel2)) : val == sel1;
+
+ case FLD_CMP_NOT|FLD_CMP_EQUAL:
+ return range ? !((val >= sel1) && (val <= sel2)) : val != sel1;
+
+ case FLD_CMP_NUMBER|FLD_CMP_GT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return range ? val > sel2 : val > sel1;
+
+ case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return val >= sel1;
+
+ case FLD_CMP_NUMBER|FLD_CMP_LT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return val < sel1;
+
+ case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &val, fs))
+ return 0;
+ return range ? val <= sel2 : val <= sel1;
+
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_int: unsupported number "
+ "comparison type for field %s", field_id);
+ }
+
+ return 0;
+}
+
+static int _cmp_field_double(struct dm_report *rh, uint32_t field_num, const char *field_id,
+ double val, struct field_selection *fs)
+{
+ int range = fs->value->next != NULL;
+ double sel1 = fs->value->v.d;
+ double sel2 = range ? fs->value->next->v.d : 0;
+
+ switch(fs->flags & FLD_CMP_MASK) {
+ case FLD_CMP_EQUAL:
+ return range ? (_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2))
+ : _dbl_equal(val, sel1);
+
+ case FLD_CMP_NOT|FLD_CMP_EQUAL:
+ return range ? !(_dbl_greater_or_equal(val, sel1) && _dbl_less_or_equal(val, sel2))
+ : !_dbl_equal(val, sel1);
+
+ case FLD_CMP_NUMBER|FLD_CMP_GT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return range ? _dbl_greater(val, sel2)
+ : _dbl_greater(val, sel1);
+
+ case FLD_CMP_NUMBER|FLD_CMP_GT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return _dbl_greater_or_equal(val, sel1);
+
+ case FLD_CMP_NUMBER|FLD_CMP_LT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return _dbl_less(val, sel1);
+
+ case FLD_CMP_NUMBER|FLD_CMP_LT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &val, fs))
+ return 0;
+ return range ? _dbl_less_or_equal(val, sel2) : _dbl_less_or_equal(val, sel1);
+
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_double: unsupported number "
+ "comparison type for selection field %s", field_id);
+ }
+
+ return 0;
+}
+
+static int _cmp_field_string(struct dm_report *rh __attribute__((unused)),
+ uint32_t field_num, const char *field_id,
+ const char *val, struct field_selection *fs)
+{
+ const char *sel = fs->value->v.s;
+
+ switch (fs->flags & FLD_CMP_MASK) {
+ case FLD_CMP_EQUAL:
+ return !strcmp(val, sel);
+ case FLD_CMP_NOT|FLD_CMP_EQUAL:
+ return strcmp(val, sel);
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_string: unsupported string "
+ "comparison type for selection field %s", field_id);
+ }
+
+ return 0;
+}
+
+static int _cmp_field_time(struct dm_report *rh,
+ uint32_t field_num, const char *field_id,
+ time_t val, struct field_selection *fs)
+{
+ int range = fs->value->next != NULL;
+ time_t sel1 = fs->value->v.t;
+ time_t sel2 = range ? fs->value->next->v.t : 0;
+
+ switch(fs->flags & FLD_CMP_MASK) {
+ case FLD_CMP_EQUAL:
+ return range ? ((val >= sel1) && (val <= sel2)) : val == sel1;
+ case FLD_CMP_NOT|FLD_CMP_EQUAL:
+ return range ? ((val >= sel1) && (val <= sel2)) : val != sel1;
+ case FLD_CMP_TIME|FLD_CMP_GT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return range ? val > sel2 : val > sel1;
+ case FLD_CMP_TIME|FLD_CMP_GT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return val >= sel1;
+ case FLD_CMP_TIME|FLD_CMP_LT:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return val < sel1;
+ case FLD_CMP_TIME|FLD_CMP_LT|FLD_CMP_EQUAL:
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &val, fs))
+ return 0;
+ return range ? val <= sel2 : val <= sel1;
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_time: unsupported time "
+ "comparison type for field %s", field_id);
+ }
+
+ return 0;
+}
+
+/* Matches if all items from selection string list match list value strictly 1:1. */
+static int _cmp_field_string_list_strict_all(const struct str_list_sort_value *val,
+ const struct selection_str_list *sel)
+{
+ unsigned int sel_list_size = dm_list_size(&sel->str_list.list);
+ struct dm_str_list *sel_item;
+ unsigned int i = 1;
+
+ if (!val->items[0].len) {
+ if (sel_list_size == 1) {
+ /* match blank string list with selection defined as blank string only */
+ sel_item = dm_list_item(dm_list_first(&sel->str_list.list), struct dm_str_list);
+ return !strcmp(sel_item->str, "");
+ }
+ return 0;
+ }
+
+ /* if item count differs, it's clear the lists do not match */
+ if (val->items[0].len != sel_list_size)
+ return 0;
+
+ /* both lists are sorted so they either match 1:1 or not */
+ dm_list_iterate_items(sel_item, &sel->str_list.list) {
+ if ((strlen(sel_item->str) != val->items[i].len) ||
+ strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len))
+ return 0;
+ i++;
+ }
+
+ return 1;
+}
+
+/* Matches if all items from selection string list match a subset of list value. */
+static int _cmp_field_string_list_subset_all(const struct str_list_sort_value *val,
+ const struct selection_str_list *sel)
+{
+ unsigned int sel_list_size = dm_list_size(&sel->str_list.list);
+ struct dm_str_list *sel_item;
+ unsigned int i, last_found = 1;
+ int r = 0;
+
+ if (!val->items[0].len) {
+ if (sel_list_size == 1) {
+ /* match blank string list with selection defined as blank string only */
+ sel_item = dm_list_item(dm_list_first(&sel->str_list.list), struct dm_str_list);
+ return !strcmp(sel_item->str, "");
+ }
+ return 0;
+ }
+
+ /* check selection is a subset of the value */
+ dm_list_iterate_items(sel_item, &sel->str_list.list) {
+ r = 0;
+ for (i = last_found; i <= val->items[0].len; i++) {
+ if ((strlen(sel_item->str) == val->items[i].len) &&
+ !strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len)) {
+ last_found = i;
+ r = 1;
+ }
+ }
+ if (!r)
+ break;
+ }
+
+ return r;
+}
+
+/* Matches if any item from selection string list matches list value. */
+static int _cmp_field_string_list_any(const struct str_list_sort_value *val,
+ const struct selection_str_list *sel)
+{
+ struct dm_str_list *sel_item;
+ unsigned int i;
+
+ /* match blank string list with selection that contains blank string */
+ if (!val->items[0].len) {
+ dm_list_iterate_items(sel_item, &sel->str_list.list) {
+ if (!strcmp(sel_item->str, ""))
+ return 1;
+ }
+ return 0;
+ }
+
+ dm_list_iterate_items(sel_item, &sel->str_list.list) {
+ /*
+ * TODO: Optimize this so we don't need to compare the whole lists' content.
+ * Make use of the fact that the lists are sorted!
+ */
+ for (i = 1; i <= val->items[0].len; i++) {
+ if ((strlen(sel_item->str) == val->items[i].len) &&
+ !strncmp(sel_item->str, val->value + val->items[i].pos, val->items[i].len))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int _cmp_field_string_list(struct dm_report *rh __attribute__((unused)),
+ uint32_t field_num, const char *field_id,
+ const struct str_list_sort_value *val,
+ struct field_selection *fs)
+{
+ const struct selection_str_list *sel = fs->value->v.l;
+ int subset, r;
+
+ switch (sel->type & SEL_LIST_MASK) {
+ case SEL_LIST_LS:
+ subset = 0;
+ break;
+ case SEL_LIST_SUBSET_LS:
+ subset = 1;
+ break;
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_string_list: unknown list type");
+ return 0;
+ }
+
+ switch (sel->type & SEL_MASK) {
+ case SEL_AND:
+ r = subset ? _cmp_field_string_list_subset_all(val, sel)
+ : _cmp_field_string_list_strict_all(val, sel);
+ break;
+ case SEL_OR:
+ r = _cmp_field_string_list_any(val, sel);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "_cmp_field_string_list: unsupported string "
+ "list type found, expecting either AND or OR list for "
+ "selection field %s", field_id);
+ return 0;
+ }
+
+ return fs->flags & FLD_CMP_NOT ? !r : r;
+}
+
+static int _cmp_field_regex(const char *s, struct field_selection *fs)
+{
+ int match = dm_regex_match(fs->value->v.r, s) >= 0;
+ return fs->flags & FLD_CMP_NOT ? !match : match;
+}
+
+static int _compare_selection_field(struct dm_report *rh,
+ struct dm_report_field *f,
+ struct field_selection *fs)
+{
+ const struct dm_report_field_type *fields = f->props->implicit ? _implicit_report_fields
+ : rh->fields;
+ const char *field_id = fields[f->props->field_num].id;
+ int r = 0;
+
+ if (!f->sort_value) {
+ log_error("_compare_selection_field: field without value :%d",
+ f->props->field_num);
+ return 0;
+ }
+
+ if (fs->flags & FLD_CMP_REGEX)
+ r = _cmp_field_regex((const char *) f->sort_value, fs);
+ else {
+ switch(f->props->flags & DM_REPORT_FIELD_TYPE_MASK) {
+ case DM_REPORT_FIELD_TYPE_PERCENT:
+ /*
+ * Check against real percent values only.
+ * That means DM_PERCENT_0 <= percent <= DM_PERCENT_100.
+ */
+ if (*(const uint64_t *) f->sort_value > DM_PERCENT_100)
+ return 0;
+ /* fall through */
+ case DM_REPORT_FIELD_TYPE_NUMBER:
+ r = _cmp_field_int(rh, f->props->field_num, field_id, *(const uint64_t *) f->sort_value, fs);
+ break;
+ case DM_REPORT_FIELD_TYPE_SIZE:
+ r = _cmp_field_double(rh, f->props->field_num, field_id, *(const double *) f->sort_value, fs);
+ break;
+ case DM_REPORT_FIELD_TYPE_STRING:
+ r = _cmp_field_string(rh, f->props->field_num, field_id, (const char *) f->sort_value, fs);
+ break;
+ case DM_REPORT_FIELD_TYPE_STRING_LIST:
+ r = _cmp_field_string_list(rh, f->props->field_num, field_id, (const struct str_list_sort_value *) f->sort_value, fs);
+ break;
+ case DM_REPORT_FIELD_TYPE_TIME:
+ r = _cmp_field_time(rh, f->props->field_num, field_id, *(const time_t *) f->sort_value, fs);
+ break;
+ default:
+ log_error(INTERNAL_ERROR "_compare_selection_field: unknown field type for field %s", field_id);
+ }
+ }
+
+ return r;
+}
+
+static int _check_selection(struct dm_report *rh, struct selection_node *sn,
+ struct dm_list *fields)
+{
+ int r;
+ struct selection_node *iter_n;
+ struct dm_report_field *f;
+
+ switch (sn->type & SEL_MASK) {
+ case SEL_ITEM:
+ r = 1;
+ dm_list_iterate_items(f, fields) {
+ if (sn->selection.item->fp != f->props)
+ continue;
+ if (!_compare_selection_field(rh, f, sn->selection.item))
+ r = 0;
+ }
+ break;
+ case SEL_OR:
+ r = 0;
+ dm_list_iterate_items(iter_n, &sn->selection.set)
+ if ((r |= _check_selection(rh, iter_n, fields)))
+ break;
+ break;
+ case SEL_AND:
+ r = 1;
+ dm_list_iterate_items(iter_n, &sn->selection.set)
+ if (!(r &= _check_selection(rh, iter_n, fields)))
+ break;
+ break;
+ default:
+ log_error("Unsupported selection type");
+ return 0;
+ }
+
+ return (sn->type & SEL_MODIFIER_NOT) ? !r : r;
+}
+
+static int _check_report_selection(struct dm_report *rh, struct dm_list *fields)
+{
+ if (!rh->selection || !rh->selection->selection_root)
+ return 1;
+
+ return _check_selection(rh, rh->selection->selection_root, fields);
+}
+
+static int _do_report_object(struct dm_report *rh, void *object, int do_output, int *selected)
+{
+ const struct dm_report_field_type *fields;
+ struct field_properties *fp;
+ struct row *row = NULL;
+ struct dm_report_field *field;
+ void *data = NULL;
+ int r = 0;
+
+ if (!rh) {
+ log_error(INTERNAL_ERROR "_do_report_object: dm_report handler is NULL.");
+ return 0;
+ }
+
+ if (!do_output && !selected) {
+ log_error(INTERNAL_ERROR "_do_report_object: output not requested and "
+ "selected output variable is NULL too.");
+ return 0;
+ }
+
+ if (rh->flags & RH_ALREADY_REPORTED)
+ return 1;
+
+ if (!(row = dm_pool_zalloc(rh->mem, sizeof(*row)))) {
+ log_error("_do_report_object: struct row allocation failed");
+ return 0;
+ }
+
+ if (!rh->first_row)
+ rh->first_row = row;
+
+ row->rh = rh;
+
+ if ((rh->flags & RH_SORT_REQUIRED) &&
+ !(row->sort_fields =
+ dm_pool_zalloc(rh->mem, sizeof(struct dm_report_field *) *
+ rh->keys_count))) {
+ log_error("_do_report_object: "
+ "row sort value structure allocation failed");
+ goto out;
+ }
+
+ dm_list_init(&row->fields);
+ row->selected = 1;
+
+ /* For each field to be displayed, call its report_fn */
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if (!(field = dm_pool_zalloc(rh->mem, sizeof(*field)))) {
+ log_error("_do_report_object: "
+ "struct dm_report_field allocation failed");
+ goto out;
+ }
+
+ if (fp->implicit) {
+ fields = _implicit_report_fields;
+ if (!strcmp(fields[fp->field_num].id, SPECIAL_FIELD_SELECTED_ID))
+ row->field_sel_status = field;
+ } else
+ fields = rh->fields;
+
+ field->props = fp;
+
+ data = fp->implicit ? _report_get_implicit_field_data(rh, fp, row)
+ : _report_get_field_data(rh, fp, object);
+ if (!data) {
+ log_error("_do_report_object: "
+ "no data assigned to field %s",
+ fields[fp->field_num].id);
+ goto out;
+ }
+
+ if (!fields[fp->field_num].report_fn(rh, rh->mem,
+ field, data,
+ rh->private)) {
+ log_error("_do_report_object: "
+ "report function failed for field %s",
+ fields[fp->field_num].id);
+ goto out;
+ }
+
+ dm_list_add(&row->fields, &field->list);
+ }
+
+ r = 1;
+
+ if (!_check_report_selection(rh, &row->fields)) {
+ row->selected = 0;
+
+ /*
+ * If the row is not selected, we still keep it for output if either:
+ * - we're displaying special "selected" field in the row,
+ * - or the report is supposed to be on output multiple times
+ * where each output can have a new selection defined.
+ */
+ if (!row->field_sel_status && !(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+ goto out;
+
+ if (row->field_sel_status) {
+ /*
+ * If field with id "selected" is reported,
+ * report the row although it does not pass
+ * the selection criteria.
+ * The "selected" field reports the result
+ * of the selection.
+ */
+ _implicit_report_fields[row->field_sel_status->props->field_num].report_fn(rh,
+ rh->mem, row->field_sel_status, row, rh->private);
+ /*
+ * If the "selected" field is not displayed, e.g.
+ * because it is part of the sort field list,
+ * skip the display of the row as usual unless
+ * we plan to do the output multiple times.
+ */
+ if ((row->field_sel_status->props->flags & FLD_HIDDEN) &&
+ !(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+ goto out;
+ }
+ }
+
+ if (!do_output)
+ goto out;
+
+ dm_list_add(&rh->rows, &row->list);
+
+ if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED))
+ return dm_report_output(rh);
+out:
+ if (selected)
+ *selected = row->selected;
+ if (!do_output || !r)
+ dm_pool_free(rh->mem, row);
+ return r;
+}
+
+static int _do_report_compact_fields(struct dm_report *rh, int global)
+{
+ struct dm_report_field *field;
+ struct field_properties *fp;
+ struct row *row;
+
+ if (!rh) {
+ log_error("dm_report_enable_compact_output: dm report handler is NULL.");
+ return 0;
+ }
+
+ if (!(rh->flags & DM_REPORT_OUTPUT_BUFFERED) ||
+ dm_list_empty(&rh->rows))
+ return 1;
+
+ /*
+ * At first, mark all fields with FLD_HIDDEN flag.
+ * Also, mark field with FLD_COMPACTED flag, but only
+ * the ones that didn't have FLD_HIDDEN set before.
+ * This prevents losing the original FLD_HIDDEN flag
+ * in next step...
+ */
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if (fp->flags & FLD_HIDDEN)
+ continue;
+ if (global || (fp->flags & FLD_COMPACT_ONE))
+ fp->flags |= (FLD_COMPACTED | FLD_HIDDEN);
+ }
+
+ /*
+ * ...check each field in a row and if its report value
+ * is not empty, drop the FLD_COMPACTED and FLD_HIDDEN
+ * flag if FLD_COMPACTED flag is set. It's important
+ * to keep FLD_HIDDEN flag for the fields that were
+ * already marked with FLD_HIDDEN before - these don't
+ * have FLD_COMPACTED set - check this condition!
+ */
+ dm_list_iterate_items(row, &rh->rows) {
+ dm_list_iterate_items(field, &row->fields) {
+ if ((field->report_string && *field->report_string) &&
+ field->props->flags & FLD_COMPACTED)
+ field->props->flags &= ~(FLD_COMPACTED | FLD_HIDDEN);
+ }
+ }
+
+ /*
+ * The fields left with FLD_COMPACTED and FLD_HIDDEN flag are
+ * the ones which have blank value in all rows. The FLD_HIDDEN
+ * will cause such field to not be reported on output at all.
+ */
+
+ return 1;
+}
+
+int dm_report_compact_fields(struct dm_report *rh)
+{
+ return _do_report_compact_fields(rh, 1);
+}
+
+static int _field_to_compact_match(struct dm_report *rh, const char *field, size_t flen)
+{
+ struct field_properties *fp;
+ uint32_t f;
+ int implicit;
+
+ if ((_get_field(rh, field, flen, &f, &implicit))) {
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if ((fp->implicit == implicit) && (fp->field_num == f)) {
+ fp->flags |= FLD_COMPACT_ONE;
+ break;
+ }
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+static int _parse_fields_to_compact(struct dm_report *rh, const char *fields)
+{
+ const char *ws; /* Word start */
+ const char *we = fields; /* Word end */
+
+ if (!fields)
+ return 1;
+
+ while (*we) {
+ while (*we && *we == ',')
+ we++;
+ ws = we;
+ while (*we && *we != ',')
+ we++;
+ if (!_field_to_compact_match(rh, ws, (size_t) (we - ws))) {
+ log_error("dm_report: Unrecognized field: %.*s", (int) (we - ws), ws);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int dm_report_compact_given_fields(struct dm_report *rh, const char *fields)
+{
+ if (!_parse_fields_to_compact(rh, fields))
+ return_0;
+
+ return _do_report_compact_fields(rh, 0);
+}
+
+int dm_report_object(struct dm_report *rh, void *object)
+{
+ return _do_report_object(rh, object, 1, NULL);
+}
+
+int dm_report_object_is_selected(struct dm_report *rh, void *object, int do_output, int *selected)
+{
+ return _do_report_object(rh, object, do_output, selected);
+}
+
+/*
+ * Selection parsing
+ */
+
+/*
+ * Other tokens (FIELD, VALUE, STRING, NUMBER, REGEX)
+ * FIELD := <strings of alphabet, number and '_'>
+ * VALUE := NUMBER | STRING
+ * REGEX := <strings quoted by '"', '\'', '(', '{', '[' or unquoted>
+ * NUMBER := <strings of [0-9]> (because sort_value is unsigned)
+ * STRING := <strings quoted by '"', '\'' or unquoted>
+ */
+
+static const char * _skip_space(const char *s)
+{
+ while (*s && isspace(*s))
+ s++;
+ return s;
+}
+
+static int _tok_op(struct op_def *t, const char *s, const char **end,
+ uint32_t expect)
+{
+ size_t len;
+
+ s = _skip_space(s);
+
+ for (; t->string; t++) {
+ if (expect && !(t->flags & expect))
+ continue;
+
+ len = strlen(t->string);
+ if (!strncmp(s, t->string, len)) {
+ if (end)
+ *end = s + len;
+ return t->flags;
+ }
+ }
+
+ if (end)
+ *end = s;
+ return 0;
+}
+
+static int _tok_op_log(const char *s, const char **end, uint32_t expect)
+{
+ return _tok_op(_op_log, s, end, expect);
+}
+
+static int _tok_op_cmp(const char *s, const char **end)
+{
+ return _tok_op(_op_cmp, s, end, 0);
+}
+
+static char _get_and_skip_quote_char(char const **s)
+{
+ char c = 0;
+
+ if (**s == '"' || **s == '\'') {
+ c = **s;
+ (*s)++;
+ }
+
+ return c;
+}
+
+ /*
+ *
+ * Input:
+ * s - a pointer to the parsed string
+ * Output:
+ * begin - a pointer to the beginning of the token
+ * end - a pointer to the end of the token + 1
+ * or undefined if return value is NULL
+ * return value - a starting point of the next parsing or
+ * NULL if 's' doesn't match with token type
+ * (the parsing should be terminated)
+ */
+static const char *_tok_value_number(const char *s,
+ const char **begin, const char **end)
+
+{
+ int is_float = 0;
+
+ *begin = s;
+ while ((!is_float && (*s == '.') && ++is_float) || isdigit(*s))
+ s++;
+ *end = s;
+
+ if (*begin == *end)
+ return NULL;
+
+ return s;
+}
+
+/*
+ * Input:
+ * s - a pointer to the parsed string
+ * endchar - terminating character
+ * end_op_flags - terminating operator flags (see _op_log)
+ * (if endchar is non-zero then endflags is ignored)
+ * Output:
+ * begin - a pointer to the beginning of the token
+ * end - a pointer to the end of the token + 1
+ * end_op_flag_hit - the flag from endflags hit during parsing
+ * return value - a starting point of the next parsing
+ */
+static const char *_tok_value_string(const char *s,
+ const char **begin, const char **end,
+ const char endchar, uint32_t end_op_flags,
+ uint32_t *end_op_flag_hit)
+{
+ uint32_t flag_hit = 0;
+
+ *begin = s;
+
+ /*
+ * If endchar is defined, scan the string till
+ * the endchar or the end of string is hit.
+ * This is in case the string is quoted and we
+ * know exact character that is the stopper.
+ */
+ if (endchar) {
+ while (*s && *s != endchar)
+ s++;
+ if (*s != endchar) {
+ log_error("Missing end quote.");
+ return NULL;
+ }
+ *end = s;
+ s++;
+ } else {
+ /*
+ * If endchar is not defined then endchar is/are the
+ * operator/s as defined by 'endflags' arg or space char.
+ * This is in case the string is not quoted and
+ * we don't know which character is the exact stopper.
+ */
+ while (*s) {
+ if ((flag_hit = _tok_op(_op_log, s, NULL, end_op_flags)) || *s == ' ')
+ break;
+ s++;
+ }
+ *end = s;
+ /*
+ * If we hit one of the strings as defined by 'endflags'
+ * and if 'endflag_hit' arg is provided, save the exact
+ * string flag that was hit.
+ */
+ if (end_op_flag_hit)
+ *end_op_flag_hit = flag_hit;
+ }
+
+ return s;
+}
+
+static const char *_reserved_name(struct dm_report *rh,
+ const struct dm_report_reserved_value *reserved,
+ const struct dm_report_field_reserved_value *frv,
+ uint32_t field_num, const char *s, size_t len)
+{
+ dm_report_reserved_handler handler;
+ const char *canonical_name;
+ const char **name;
+ char *tmp_s;
+ char c;
+ int r;
+
+ name = reserved->names;
+ while (*name) {
+ if ((strlen(*name) == len) && !strncmp(*name, s, len))
+ return *name;
+ name++;
+ }
+
+ if (reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES) {
+ handler = (dm_report_reserved_handler) (frv ? frv->value : reserved->value);
+ c = s[len];
+ tmp_s = (char *) s;
+ tmp_s[len] = '\0';
+ if ((r = handler(rh, rh->selection->mem, field_num,
+ DM_REPORT_RESERVED_PARSE_FUZZY_NAME,
+ tmp_s, (const void **) &canonical_name)) <= 0) {
+ if (r == -1)
+ log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing "
+ "implementation of DM_REPORT_RESERVED_PARSE_FUZZY_NAME action",
+ (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ else
+ log_error("Error occured while processing %s reserved value handler for field %s",
+ (reserved->type & DM_REPORT_FIELD_TYPE_MASK) ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ }
+ tmp_s[len] = c;
+ if (r && canonical_name)
+ return canonical_name;
+ }
+
+ return NULL;
+}
+
+/*
+ * Used to replace a string representation of the reserved value
+ * found in selection with the exact reserved value of certain type.
+ */
+static const char *_get_reserved(struct dm_report *rh, unsigned type,
+ uint32_t field_num, int implicit,
+ const char *s, const char **begin, const char **end,
+ struct reserved_value_wrapper *rvw)
+{
+ const struct dm_report_reserved_value *iter = implicit ? NULL : rh->reserved_values;
+ const struct dm_report_field_reserved_value *frv;
+ const char *tmp_begin, *tmp_end, *tmp_s = s;
+ const char *name = NULL;
+ char c;
+
+ rvw->reserved = NULL;
+
+ if (!iter)
+ return s;
+
+ c = _get_and_skip_quote_char(&tmp_s);
+ if (!(tmp_s = _tok_value_string(tmp_s, &tmp_begin, &tmp_end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL)))
+ return s;
+
+ while (iter->value) {
+ if (!(iter->type & DM_REPORT_FIELD_TYPE_MASK)) {
+ /* DM_REPORT_FIELD_TYPE_NONE - per-field reserved value */
+ frv = (const struct dm_report_field_reserved_value *) iter->value;
+ if ((frv->field_num == field_num) && (name = _reserved_name(rh, iter, frv, field_num,
+ tmp_begin, tmp_end - tmp_begin)))
+ break;
+ } else if (iter->type & type) {
+ /* DM_REPORT_FIELD_TYPE_* - per-type reserved value */
+ if ((name = _reserved_name(rh, iter, NULL, field_num,
+ tmp_begin, tmp_end - tmp_begin)))
+ break;
+ }
+ iter++;
+ }
+
+ if (name) {
+ /* found! */
+ *begin = tmp_begin;
+ *end = tmp_end;
+ s = tmp_s;
+ rvw->reserved = iter;
+ rvw->matched_name = name;
+ }
+
+ return s;
+}
+
+float dm_percent_to_float(dm_percent_t percent)
+{
+ /* Add 0.f to prevent returning -0.00 */
+ return (float) percent / DM_PERCENT_1 + 0.f;
+}
+
+float dm_percent_to_round_float(dm_percent_t percent, unsigned digits)
+{
+ static const float power10[] = {
+ 1.f, .1f, .01f, .001f, .0001f, .00001f, .000001f,
+ .0000001f, .00000001f, .000000001f,
+ .0000000001f
+ };
+ float r;
+ float f = dm_percent_to_float(percent);
+
+ if (digits >= DM_ARRAY_SIZE(power10))
+ digits = DM_ARRAY_SIZE(power10) - 1; /* no better precision */
+
+ r = DM_PERCENT_1 * power10[digits];
+
+ if ((percent < r) && (percent > DM_PERCENT_0))
+ f = power10[digits];
+ else if ((percent > (DM_PERCENT_100 - r)) && (percent < DM_PERCENT_100))
+ f = (float) (DM_PERCENT_100 - r) / DM_PERCENT_1;
+
+ return f;
+}
+
+dm_percent_t dm_make_percent(uint64_t numerator, uint64_t denominator)
+{
+ dm_percent_t percent;
+
+ if (!denominator)
+ return DM_PERCENT_100; /* FIXME? */
+ if (!numerator)
+ return DM_PERCENT_0;
+ if (numerator == denominator)
+ return DM_PERCENT_100;
+ switch (percent = DM_PERCENT_100 * ((double) numerator / (double) denominator)) {
+ case DM_PERCENT_100:
+ return DM_PERCENT_100 - 1;
+ case DM_PERCENT_0:
+ return DM_PERCENT_0 + 1;
+ default:
+ return percent;
+ }
+}
+
+int dm_report_value_cache_set(struct dm_report *rh, const char *name, const void *data)
+{
+ if (!rh->value_cache && (!(rh->value_cache = dm_hash_create(64)))) {
+ log_error("Failed to create cache for values used during reporting.");
+ return 0;
+ }
+
+ return dm_hash_insert(rh->value_cache, name, (void *) data);
+}
+
+const void *dm_report_value_cache_get(struct dm_report *rh, const char *name)
+{
+ return (rh->value_cache) ? dm_hash_lookup(rh->value_cache, name) : NULL;
+}
+
+/*
+ * Used to check whether the reserved_values definition passed to
+ * dm_report_init_with_selection contains only supported reserved value types.
+ */
+static int _check_reserved_values_supported(const struct dm_report_field_type fields[],
+ const struct dm_report_reserved_value reserved_values[])
+{
+ const struct dm_report_reserved_value *iter;
+ const struct dm_report_field_reserved_value *field_res;
+ const struct dm_report_field_type *field;
+ static uint32_t supported_reserved_types = DM_REPORT_FIELD_TYPE_NUMBER |
+ DM_REPORT_FIELD_TYPE_SIZE |
+ DM_REPORT_FIELD_TYPE_PERCENT |
+ DM_REPORT_FIELD_TYPE_STRING |
+ DM_REPORT_FIELD_TYPE_TIME;
+ static uint32_t supported_reserved_types_with_range = DM_REPORT_FIELD_RESERVED_VALUE_RANGE |
+ DM_REPORT_FIELD_TYPE_NUMBER |
+ DM_REPORT_FIELD_TYPE_SIZE |
+ DM_REPORT_FIELD_TYPE_PERCENT |
+ DM_REPORT_FIELD_TYPE_TIME;
+
+
+ if (!reserved_values)
+ return 1;
+
+ iter = reserved_values;
+
+ while (iter->value) {
+ if (iter->type & DM_REPORT_FIELD_TYPE_MASK) {
+ if (!(iter->type & supported_reserved_types) ||
+ ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) &&
+ !(iter->type & supported_reserved_types_with_range))) {
+ log_error(INTERNAL_ERROR "_check_reserved_values_supported: "
+ "global reserved value for type 0x%x not supported",
+ iter->type);
+ return 0;
+ }
+ } else {
+ field_res = (const struct dm_report_field_reserved_value *) iter->value;
+ field = &fields[field_res->field_num];
+ if (!(field->flags & supported_reserved_types) ||
+ ((iter->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE) &&
+ !(iter->type & supported_reserved_types_with_range))) {
+ log_error(INTERNAL_ERROR "_check_reserved_values_supported: "
+ "field-specific reserved value of type 0x%x for "
+ "field %s not supported",
+ field->flags & DM_REPORT_FIELD_TYPE_MASK, field->id);
+ return 0;
+ }
+ }
+ iter++;
+ }
+
+ return 1;
+}
+
+/*
+ * Input:
+ * ft - field type for which the value is parsed
+ * s - a pointer to the parsed string
+ * Output:
+ * begin - a pointer to the beginning of the token
+ * end - a pointer to the end of the token + 1
+ * flags - parsing flags
+ */
+static const char *_tok_value_regex(struct dm_report *rh,
+ const struct dm_report_field_type *ft,
+ const char *s, const char **begin,
+ const char **end, uint32_t *flags,
+ struct reserved_value_wrapper *rvw)
+{
+ char c;
+ rvw->reserved = NULL;
+
+ s = _skip_space(s);
+
+ if (!*s) {
+ log_error("Regular expression expected for selection field %s", ft->id);
+ return NULL;
+ }
+
+ switch (*s) {
+ case '(': c = ')'; break;
+ case '{': c = '}'; break;
+ case '[': c = ']'; break;
+ case '"': /* fall through */
+ case '\'': c = *s; break;
+ default: c = 0;
+ }
+
+ if (!(s = _tok_value_string(c ? s + 1 : s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) {
+ log_error("Failed to parse regex value for selection field %s.", ft->id);
+ return NULL;
+ }
+
+ *flags |= DM_REPORT_FIELD_TYPE_STRING;
+ return s;
+}
+
+static int _str_list_item_cmp(const void *a, const void *b)
+{
+ const struct dm_str_list * const *item_a = (const struct dm_str_list * const *) a;
+ const struct dm_str_list * const *item_b = (const struct dm_str_list * const *) b;
+
+ return strcmp((*item_a)->str, (*item_b)->str);
+}
+
+static int _add_item_to_string_list(struct dm_pool *mem, const char *begin,
+ const char *end, struct dm_list *list)
+{
+ struct dm_str_list *item;
+
+ if (!(item = dm_pool_zalloc(mem, sizeof(*item))) ||
+ !(item->str = begin == end ? "" : dm_pool_strndup(mem, begin, end - begin))) {
+ log_error("_add_item_to_string_list: memory allocation failed for string list item");
+ return 0;
+ }
+ dm_list_add(list, &item->list);
+
+ return 1;
+}
+
+/*
+ * Input:
+ * ft - field type for which the value is parsed
+ * mem - memory pool to allocate from
+ * s - a pointer to the parsed string
+ * Output:
+ * begin - a pointer to the beginning of the token (whole list)
+ * end - a pointer to the end of the token + 1 (whole list)
+ * sel_str_list - the list of strings parsed
+ */
+static const char *_tok_value_string_list(const struct dm_report_field_type *ft,
+ struct dm_pool *mem, const char *s,
+ const char **begin, const char **end,
+ struct selection_str_list **sel_str_list)
+{
+ static const char _str_list_item_parsing_failed[] = "Failed to parse string list value "
+ "for selection field %s.";
+ struct selection_str_list *ssl = NULL;
+ struct dm_str_list *item;
+ const char *begin_item = NULL, *end_item = NULL, *tmp;
+ uint32_t op_flags, end_op_flag_expected, end_op_flag_hit = 0;
+ struct dm_str_list **arr;
+ size_t list_size;
+ unsigned int i;
+ int list_end = 0;
+ char c;
+
+ if (!(ssl = dm_pool_alloc(mem, sizeof(*ssl)))) {
+ log_error("_tok_value_string_list: memory allocation failed for selection list");
+ goto bad;
+ }
+ dm_list_init(&ssl->str_list.list);
+ ssl->type = 0;
+ *begin = s;
+
+ if (!(op_flags = _tok_op_log(s, &tmp, SEL_LIST_LS | SEL_LIST_SUBSET_LS))) {
+ /* Only one item - SEL_LIST_{SUBSET_}LS and SEL_LIST_{SUBSET_}LE not used */
+ c = _get_and_skip_quote_char(&s);
+ if (!(s = _tok_value_string(s, &begin_item, &end_item, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) {
+ log_error(_str_list_item_parsing_failed, ft->id);
+ goto bad;
+ }
+ if (!_add_item_to_string_list(mem, begin_item, end_item, &ssl->str_list.list))
+ goto_bad;
+ ssl->type = SEL_OR | SEL_LIST_LS;
+ goto out;
+ }
+
+ /* More than one item - items enclosed in SEL_LIST_LS and SEL_LIST_LE
+ * or SEL_LIST_SUBSET_LS and SEL_LIST_SUBSET_LE.
+ * Each element is terminated by AND or OR operator or 'list end'.
+ * The first operator hit is then the one allowed for the whole list,
+ * no mixing allowed!
+ */
+
+ /* Are we using [] or {} for the list? */
+ end_op_flag_expected = (op_flags == SEL_LIST_LS) ? SEL_LIST_LE : SEL_LIST_SUBSET_LE;
+
+ op_flags = SEL_LIST_LE | SEL_LIST_SUBSET_LE | SEL_AND | SEL_OR;
+ s++;
+ while (*s) {
+ s = _skip_space(s);
+ c = _get_and_skip_quote_char(&s);
+ if (!(s = _tok_value_string(s, &begin_item, &end_item, c, op_flags, NULL))) {
+ log_error(_str_list_item_parsing_failed, ft->id);
+ goto bad;
+ }
+ s = _skip_space(s);
+
+ if (!(end_op_flag_hit = _tok_op_log(s, &tmp, op_flags))) {
+ log_error("Invalid operator in selection list.");
+ goto bad;
+ }
+
+ if (end_op_flag_hit & (SEL_LIST_LE | SEL_LIST_SUBSET_LE)) {
+ list_end = 1;
+ if (end_op_flag_hit != end_op_flag_expected) {
+ for (i = 0; _op_log[i].string; i++)
+ if (_op_log[i].flags == end_op_flag_expected)
+ break;
+ log_error("List ended with incorrect character, "
+ "expecting \'%s\'.", _op_log[i].string);
+ goto bad;
+ }
+ }
+
+ if (ssl->type) {
+ if (!list_end && !(ssl->type & end_op_flag_hit)) {
+ log_error("Only one type of logical operator allowed "
+ "in selection list at a time.");
+ goto bad;
+ }
+ } else {
+ if (list_end)
+ ssl->type = end_op_flag_expected == SEL_LIST_LE ? SEL_AND : SEL_OR;
+ else
+ ssl->type = end_op_flag_hit;
+ }
+
+ if (!_add_item_to_string_list(mem, begin_item, end_item, &ssl->str_list.list))
+ goto_bad;
+
+ s = tmp;
+
+ if (list_end)
+ break;
+ }
+
+ if (!(end_op_flag_hit & (SEL_LIST_LE | SEL_LIST_SUBSET_LE))) {
+ log_error("Missing list end for selection field %s", ft->id);
+ goto bad;
+ }
+
+ /* Store information whether [] or {} was used. */
+ if (end_op_flag_expected == SEL_LIST_LE)
+ ssl->type |= SEL_LIST_LS;
+ else
+ ssl->type |= SEL_LIST_SUBSET_LS;
+
+ /* Sort the list. */
+ if (!(list_size = dm_list_size(&ssl->str_list.list))) {
+ log_error(INTERNAL_ERROR "_tok_value_string_list: list has no items");
+ goto bad;
+ } else if (list_size == 1)
+ goto out;
+ if (!(arr = dm_malloc(sizeof(item) * list_size))) {
+ log_error("_tok_value_string_list: memory allocation failed for sort array");
+ goto bad;
+ }
+
+ i = 0;
+ dm_list_iterate_items(item, &ssl->str_list.list)
+ arr[i++] = item;
+ qsort(arr, list_size, sizeof(item), _str_list_item_cmp);
+ dm_list_init(&ssl->str_list.list);
+ for (i = 0; i < list_size; i++)
+ dm_list_add(&ssl->str_list.list, &arr[i]->list);
+
+ dm_free(arr);
+out:
+ *end = s;
+ if (sel_str_list)
+ *sel_str_list = ssl;
+
+ return s;
+bad:
+ *end = s;
+ if (ssl)
+ dm_pool_free(mem, ssl);
+ if (sel_str_list)
+ *sel_str_list = NULL;
+ return s;
+}
+
+struct time_value {
+ int range;
+ time_t t1;
+ time_t t2;
+};
+
+static const char *_out_of_range_msg = "Field selection value %s out of supported range for field %s.";
+
+/*
+ * Standard formatted date and time - ISO8601.
+ *
+ * date time timezone
+ *
+ * date:
+ * YYYY-MM-DD (or shortly YYYYMMDD)
+ * YYYY-MM (shortly YYYYMM), auto DD=1
+ * YYYY, auto MM=01 and DD=01
+ *
+ * time:
+ * hh:mm:ss (or shortly hhmmss)
+ * hh:mm (or shortly hhmm), auto ss=0
+ * hh (or shortly hh), auto mm=0, auto ss=0
+ *
+ * timezone:
+ * +hh:mm or -hh:mm (or shortly +hhmm or -hhmm)
+ * +hh or -hh
+*/
+
+#define DELIM_DATE '-'
+#define DELIM_TIME ':'
+
+static int _days_in_month[12] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+static int _is_leap_year(long year)
+{
+ return (((year % 4==0) && (year % 100 != 0)) || (year % 400 == 0));
+}
+
+static int _get_days_in_month(long month, long year)
+{
+ return (month == 2 && _is_leap_year(year)) ? _days_in_month[month-1] + 1
+ : _days_in_month[month-1];
+}
+
+typedef enum {
+ RANGE_NONE,
+ RANGE_SECOND,
+ RANGE_MINUTE,
+ RANGE_HOUR,
+ RANGE_DAY,
+ RANGE_MONTH,
+ RANGE_YEAR
+} time_range_t;
+
+static char *_get_date(char *str, struct tm *tm, time_range_t *range)
+{
+ static const char incorrect_date_format_msg[] = "Incorrect date format.";
+ time_range_t tmp_range = RANGE_NONE;
+ long n1, n2 = -1, n3 = -1;
+ char *s = str, *end;
+ size_t len = 0;
+
+ if (!isdigit(*s))
+ /* we need a year at least */
+ return NULL;
+
+ n1 = strtol(s, &end, 10);
+ if (*end == DELIM_DATE) {
+ len += (4 - (end - s)); /* diff in length from standard YYYY */
+ s = end + 1;
+ if (isdigit(*s)) {
+ n2 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard MM */
+ if (*end == DELIM_DATE) {
+ s = end + 1;
+ n3 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard DD */
+ }
+ }
+ }
+
+ len = len + end - str;
+
+ /* variations from standard YYYY-MM-DD */
+ if (n3 == -1) {
+ if (n2 == -1) {
+ if (len == 4) {
+ /* YYYY */
+ tmp_range = RANGE_YEAR;
+ n3 = n2 = 1;
+ } else if (len == 6) {
+ /* YYYYMM */
+ tmp_range = RANGE_MONTH;
+ n3 = 1;
+ n2 = n1 % 100;
+ n1 = n1 / 100;
+ } else if (len == 8) {
+ tmp_range = RANGE_DAY;
+ /* YYYYMMDD */
+ n3 = n1 % 100;
+ n2 = (n1 / 100) % 100;
+ n1 = n1 / 10000;
+ } else {
+ log_error(incorrect_date_format_msg);
+ return NULL;
+ }
+ } else {
+ if (len == 7) {
+ tmp_range = RANGE_MONTH;
+ /* YYYY-MM */
+ n3 = 1;
+ } else {
+ log_error(incorrect_date_format_msg);
+ return NULL;
+ }
+ }
+ }
+
+ if (n2 < 1 || n2 > 12) {
+ log_error("Specified month out of range.");
+ return NULL;
+ }
+
+ if (n3 < 1 || n3 > _get_days_in_month(n2, n1)) {
+ log_error("Specified day out of range.");
+ return NULL;
+ }
+
+ if (tmp_range == RANGE_NONE)
+ tmp_range = RANGE_DAY;
+
+ tm->tm_year = n1 - 1900;
+ tm->tm_mon = n2 - 1;
+ tm->tm_mday = n3;
+ *range = tmp_range;
+
+ return (char *) _skip_space(end);
+}
+
+static char *_get_time(char *str, struct tm *tm, time_range_t *range)
+{
+ static const char incorrect_time_format_msg[] = "Incorrect time format.";
+ time_range_t tmp_range = RANGE_NONE;
+ long n1, n2 = -1, n3 = -1;
+ char *s = str, *end;
+ size_t len = 0;
+
+ if (!isdigit(*s)) {
+ /* time is not compulsory */
+ tm->tm_hour = tm->tm_min = tm->tm_sec = 0;
+ return (char *) _skip_space(s);
+ }
+
+ n1 = strtol(s, &end, 10);
+ if (*end == DELIM_TIME) {
+ len += (2 - (end - s)); /* diff in length from standard HH */
+ s = end + 1;
+ if (isdigit(*s)) {
+ n2 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard MM */
+ if (*end == DELIM_TIME) {
+ s = end + 1;
+ n3 = strtol(s, &end, 10);
+ len += (2 - (end - s)); /* diff in length from standard SS */
+ }
+ }
+ }
+
+ len = len + end - str;
+
+ /* variations from standard HH:MM:SS */
+ if (n3 == -1) {
+ if (n2 == -1) {
+ if (len == 2) {
+ /* HH */
+ tmp_range = RANGE_HOUR;
+ n3 = n2 = 0;
+ } else if (len == 4) {
+ /* HHMM */
+ tmp_range = RANGE_MINUTE;
+ n3 = 0;
+ n2 = n1 % 100;
+ n1 = n1 / 100;
+ } else if (len == 6) {
+ /* HHMMSS */
+ tmp_range = RANGE_SECOND;
+ n3 = n1 % 100;
+ n2 = (n1 / 100) % 100;
+ n1 = n1 / 10000;
+ } else {
+ log_error(incorrect_time_format_msg);
+ return NULL;
+ }
+ } else {
+ if (len == 5) {
+ /* HH:MM */
+ tmp_range = RANGE_MINUTE;
+ n3 = 0;
+ } else {
+ log_error(incorrect_time_format_msg);
+ return NULL;
+ }
+ }
+ }
+
+ if (n1 < 0 || n1 > 23) {
+ log_error("Specified hours out of range.");
+ return NULL;
+ }
+
+ if (n2 < 0 || n2 > 60) {
+ log_error("Specified minutes out of range.");
+ return NULL;
+ }
+
+ if (n3 < 0 || n3 > 60) {
+ log_error("Specified seconds out of range.");
+ return NULL;
+ }
+
+ /* Just time without exact date is incomplete! */
+ if (*range != RANGE_DAY) {
+ log_error("Full date specification needed.");
+ return NULL;
+ }
+
+ tm->tm_hour = n1;
+ tm->tm_min = n2;
+ tm->tm_sec = n3;
+ *range = tmp_range;
+
+ return (char *) _skip_space(end);
+}
+
+/* The offset is always an absolute offset against GMT! */
+static char *_get_tz(char *str, int *tz_supplied, int *offset)
+{
+ long n1, n2 = -1;
+ char *s = str, *end;
+ int sign = 1; /* +HH:MM by default */
+ size_t len = 0;
+
+ *tz_supplied = 0;
+ *offset = 0;
+
+ if (!isdigit(*s)) {
+ if (*s == '+') {
+ sign = 1;
+ s = s + 1;
+ } else if (*s == '-') {
+ sign = -1;
+ s = s + 1;
+ } else
+ return (char *) _skip_space(s);
+ }
+
+ n1 = strtol(s, &end, 10);
+ if (*end == DELIM_TIME) {
+ len = (2 - (end - s)); /* diff in length from standard HH */
+ s = end + 1;
+ if (isdigit(*s)) {
+ n2 = strtol(s, &end, 10);
+ len = (2 - (end - s)); /* diff in length from standard MM */
+ }
+ }
+
+ len = len + end - s;
+
+ /* variations from standard HH:MM */
+ if (n2 == -1) {
+ if (len == 2) {
+ /* HH */
+ n2 = 0;
+ } else if (len == 4) {
+ /* HHMM */
+ n2 = n1 % 100;
+ n1 = n1 / 100;
+ } else
+ return NULL;
+ }
+
+ if (n2 < 0 || n2 > 60)
+ return NULL;
+
+ if (n1 < 0 || n1 > 14)
+ return NULL;
+
+ /* timezone offset in seconds */
+ *offset = sign * ((n1 * 3600) + (n2 * 60));
+ *tz_supplied = 1;
+ return (char *) _skip_space(end);
+}
+
+static int _local_tz_offset(time_t t_local)
+{
+ struct tm tm_gmt;
+ time_t t_gmt;
+
+ gmtime_r(&t_local, &tm_gmt);
+ t_gmt = mktime(&tm_gmt);
+
+ /*
+ * gmtime returns time that is adjusted
+ * for DST.Subtract this adjustment back
+ * to give us proper *absolute* offset
+ * for our local timezone.
+ */
+ if (tm_gmt.tm_isdst)
+ t_gmt -= 3600;
+
+ return t_local - t_gmt;
+}
+
+static void _get_final_time(time_range_t range, struct tm *tm,
+ int tz_supplied, int offset,
+ struct time_value *tval)
+{
+
+ struct tm tm_up = *tm;
+
+ switch (range) {
+ case RANGE_SECOND:
+ if (tm_up.tm_sec < 59) {
+ tm_up.tm_sec += 1;
+ break;
+ }
+ /* fall through */
+ case RANGE_MINUTE:
+ if (tm_up.tm_min < 59) {
+ tm_up.tm_min += 1;
+ break;
+ }
+ /* fall through */
+ case RANGE_HOUR:
+ if (tm_up.tm_hour < 23) {
+ tm_up.tm_hour += 1;
+ break;
+ }
+ /* fall through */
+ case RANGE_DAY:
+ if (tm_up.tm_mday < _get_days_in_month(tm_up.tm_mon, tm_up.tm_year)) {
+ tm_up.tm_mday += 1;
+ break;
+ }
+ /* fall through */
+ case RANGE_MONTH:
+ if (tm_up.tm_mon < 11) {
+ tm_up.tm_mon += 1;
+ break;
+ }
+ /* fall through */
+ case RANGE_YEAR:
+ tm_up.tm_year += 1;
+ break;
+ case RANGE_NONE:
+ /* nothing to do here */
+ break;
+ }
+
+ tval->range = (range != RANGE_NONE);
+ tval->t1 = mktime(tm);
+ tval->t2 = mktime(&tm_up) - 1;
+
+ if (tz_supplied) {
+ /*
+ * The 'offset' is with respect to the GMT.
+ * Calculate what the offset is with respect
+ * to our local timezone and adjust times
+ * so they represent time in our local timezone.
+ */
+ offset -= _local_tz_offset(tval->t1);
+ tval->t1 -= offset;
+ tval->t2 -= offset;
+ }
+}
+
+static int _parse_formatted_date_time(char *str, struct time_value *tval)
+{
+ time_range_t range = RANGE_NONE;
+ struct tm tm = {0};
+ int gmt_offset;
+ int tz_supplied;
+
+ tm.tm_year = tm.tm_mday = tm.tm_mon = -1;
+ tm.tm_hour = tm.tm_min = tm.tm_sec = -1;
+ tm.tm_isdst = tm.tm_wday = tm.tm_yday = -1;
+
+ if (!(str = _get_date(str, &tm, &range)))
+ return 0;
+
+ if (!(str = _get_time(str, &tm, &range)))
+ return 0;
+
+ if (!(str = _get_tz(str, &tz_supplied, &gmt_offset)))
+ return 0;
+
+ if (*str)
+ return 0;
+
+ _get_final_time(range, &tm, tz_supplied, gmt_offset, tval);
+
+ return 1;
+}
+
+static const char *_tok_value_time(const struct dm_report_field_type *ft,
+ struct dm_pool *mem, const char *s,
+ const char **begin, const char **end,
+ struct time_value *tval)
+{
+ char *time_str = NULL;
+ const char *r = NULL;
+ uint64_t t;
+ char c;
+
+ s = _skip_space(s);
+
+ if (*s == '@') {
+ /* Absolute time value in number of seconds since epoch. */
+ if (!(s = _tok_value_number(s+1, begin, end)))
+ goto_out;
+
+ if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) {
+ log_error("_tok_value_time: dm_pool_strndup failed");
+ goto out;
+ }
+
+ errno = 0;
+ if (((t = strtoull(time_str, NULL, 10)) == ULLONG_MAX) && errno == ERANGE) {
+ log_error(_out_of_range_msg, time_str, ft->id);
+ goto out;
+ }
+
+ tval->range = 0;
+ tval->t1 = (time_t) t;
+ tval->t2 = 0;
+ r = s;
+ } else {
+ c = _get_and_skip_quote_char(&s);
+ if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL)))
+ goto_out;
+
+ if (!(time_str = dm_pool_strndup(mem, *begin, *end - *begin))) {
+ log_error("tok_value_time: dm_pool_strndup failed");
+ goto out;
+ }
+
+ if (!_parse_formatted_date_time(time_str, tval))
+ goto_out;
+ r = s;
+ }
+out:
+ if (time_str)
+ dm_pool_free(mem, time_str);
+ return r;
+}
+
+/*
+ * Input:
+ * ft - field type for which the value is parsed
+ * s - a pointer to the parsed string
+ * mem - memory pool to allocate from
+ * Output:
+ * begin - a pointer to the beginning of the token
+ * end - a pointer to the end of the token + 1
+ * flags - parsing flags
+ * custom - custom data specific to token type
+ * (e.g. size unit factor)
+ */
+static const char *_tok_value(struct dm_report *rh,
+ const struct dm_report_field_type *ft,
+ uint32_t field_num, int implicit,
+ const char *s,
+ const char **begin, const char **end,
+ uint32_t *flags,
+ struct reserved_value_wrapper *rvw,
+ struct dm_pool *mem, void *custom)
+{
+ int expected_type = ft->flags & DM_REPORT_FIELD_TYPE_MASK;
+ struct selection_str_list **str_list;
+ struct time_value *tval;
+ uint64_t *factor;
+ const char *tmp;
+ char c;
+
+ s = _skip_space(s);
+
+ s = _get_reserved(rh, expected_type, field_num, implicit, s, begin, end, rvw);
+ if (rvw->reserved) {
+ /*
+ * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME,
+ * so adjust flags here based on expected type.
+ */
+ if (expected_type == DM_REPORT_FIELD_TYPE_TIME)
+ *flags &= ~FLD_CMP_NUMBER;
+ else if (expected_type == DM_REPORT_FIELD_TYPE_NUMBER)
+ *flags &= ~FLD_CMP_TIME;
+ *flags |= expected_type;
+ return s;
+ }
+
+ switch (expected_type) {
+
+ case DM_REPORT_FIELD_TYPE_STRING:
+ c = _get_and_skip_quote_char(&s);
+ if (!(s = _tok_value_string(s, begin, end, c, SEL_AND | SEL_OR | SEL_PRECEDENCE_PE, NULL))) {
+ log_error("Failed to parse string value "
+ "for selection field %s.", ft->id);
+ return NULL;
+ }
+ *flags |= DM_REPORT_FIELD_TYPE_STRING;
+ break;
+
+ case DM_REPORT_FIELD_TYPE_STRING_LIST:
+ if (!(str_list = (struct selection_str_list **) custom))
+ goto_bad;
+
+ s = _tok_value_string_list(ft, mem, s, begin, end, str_list);
+ if (!(*str_list)) {
+ log_error("Failed to parse string list value "
+ "for selection field %s.", ft->id);
+ return NULL;
+ }
+ *flags |= DM_REPORT_FIELD_TYPE_STRING_LIST;
+ break;
+
+ case DM_REPORT_FIELD_TYPE_NUMBER:
+ /* fall through */
+ case DM_REPORT_FIELD_TYPE_SIZE:
+ /* fall through */
+ case DM_REPORT_FIELD_TYPE_PERCENT:
+ if (!(s = _tok_value_number(s, begin, end))) {
+ log_error("Failed to parse numeric value "
+ "for selection field %s.", ft->id);
+ return NULL;
+ }
+
+ if (*s == DM_PERCENT_CHAR) {
+ s++;
+ c = DM_PERCENT_CHAR;
+ if (expected_type != DM_REPORT_FIELD_TYPE_PERCENT) {
+ log_error("Found percent value but %s value "
+ "expected for selection field %s.",
+ expected_type == DM_REPORT_FIELD_TYPE_NUMBER ?
+ "numeric" : "size", ft->id);
+ return NULL;
+ }
+ } else {
+ if (!(factor = (uint64_t *) custom))
+ goto_bad;
+
+ if ((*factor = dm_units_to_factor(s, &c, 0, &tmp))) {
+ s = tmp;
+ if (expected_type != DM_REPORT_FIELD_TYPE_SIZE) {
+ log_error("Found size unit specifier "
+ "but %s value expected for "
+ "selection field %s.",
+ expected_type == DM_REPORT_FIELD_TYPE_NUMBER ?
+ "numeric" : "percent", ft->id);
+ return NULL;
+ }
+ } else if (expected_type == DM_REPORT_FIELD_TYPE_SIZE) {
+ /*
+ * If size unit is not defined in the selection
+ * and the type expected is size, use use 'm'
+ * (1 MiB) for the unit by default. This is the
+ * same behaviour as seen in lvcreate -L <size>.
+ */
+ *factor = 1024*1024;
+ }
+ }
+
+ *flags |= expected_type;
+ /*
+ * FLD_CMP_NUMBER shares operators with FLD_CMP_TIME,
+ * but we have NUMBER here, so remove FLD_CMP_TIME.
+ */
+ *flags &= ~FLD_CMP_TIME;
+ break;
+
+ case DM_REPORT_FIELD_TYPE_TIME:
+ if (!(tval = (struct time_value *) custom))
+ goto_bad;
+
+ if (!(s = _tok_value_time(ft, mem, s, begin, end, tval))) {
+ log_error("Failed to parse time value "
+ "for selection field %s.", ft->id);
+ return NULL;
+ }
+
+ *flags |= DM_REPORT_FIELD_TYPE_TIME;
+ /*
+ * FLD_CMP_TIME shares operators with FLD_CMP_NUMBER,
+ * but we have TIME here, so remove FLD_CMP_NUMBER.
+ */
+ *flags &= ~FLD_CMP_NUMBER;
+ break;
+ }
+
+ return s;
+bad:
+ log_error(INTERNAL_ERROR "Forbidden NULL custom detected.");
+
+ return NULL;
+}
+
+/*
+ * Input:
+ * s - a pointer to the parsed string
+ * Output:
+ * begin - a pointer to the beginning of the token
+ * end - a pointer to the end of the token + 1
+ */
+static const char *_tok_field_name(const char *s,
+ const char **begin, const char **end)
+{
+ char c;
+ s = _skip_space(s);
+
+ *begin = s;
+ while ((c = *s) &&
+ (isalnum(c) || c == '_' || c == '-'))
+ s++;
+ *end = s;
+
+ if (*begin == *end)
+ return NULL;
+
+ return s;
+}
+
+static int _get_reserved_value(struct dm_report *rh, uint32_t field_num,
+ struct reserved_value_wrapper *rvw)
+{
+ const void *tmp_value;
+ dm_report_reserved_handler handler;
+ int r;
+
+ if (!rvw->reserved) {
+ rvw->value = NULL;
+ return 1;
+ }
+
+ if (rvw->reserved->type & DM_REPORT_FIELD_TYPE_MASK)
+ /* type reserved value */
+ tmp_value = rvw->reserved->value;
+ else
+ /* per-field reserved value */
+ tmp_value = ((const struct dm_report_field_reserved_value *) rvw->reserved->value)->value;
+
+ if (rvw->reserved->type & (DM_REPORT_FIELD_RESERVED_VALUE_DYNAMIC_VALUE | DM_REPORT_FIELD_RESERVED_VALUE_FUZZY_NAMES)) {
+ handler = (dm_report_reserved_handler) tmp_value;
+ if ((r = handler(rh, rh->selection->mem, field_num,
+ DM_REPORT_RESERVED_GET_DYNAMIC_VALUE,
+ rvw->matched_name, &tmp_value)) <= 0) {
+ if (r == -1)
+ log_error(INTERNAL_ERROR "%s reserved value handler for field %s has missing"
+ "implementation of DM_REPORT_RESERVED_GET_DYNAMIC_VALUE action",
+ (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ else
+ log_error("Error occured while processing %s reserved value handler for field %s",
+ (rvw->reserved->type) & DM_REPORT_FIELD_TYPE_MASK ? "type-specific" : "field-specific",
+ rh->fields[field_num].id);
+ return 0;
+ }
+ }
+
+ rvw->value = tmp_value;
+ return 1;
+}
+
+static struct field_selection *_create_field_selection(struct dm_report *rh,
+ uint32_t field_num,
+ int implicit,
+ const char *v,
+ size_t len,
+ uint32_t flags,
+ struct reserved_value_wrapper *rvw,
+ void *custom)
+{
+ static const char *_field_selection_value_alloc_failed_msg = "dm_report: struct field_selection_value allocation failed for selection field %s";
+ const struct dm_report_field_type *fields = implicit ? _implicit_report_fields
+ : rh->fields;
+ struct field_properties *fp, *found = NULL;
+ struct field_selection *fs;
+ const char *field_id;
+ struct time_value *tval;
+ uint64_t factor;
+ char *s;
+
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if ((fp->implicit == implicit) && (fp->field_num == field_num)) {
+ found = fp;
+ break;
+ }
+ }
+
+ /* The field is neither used in display options nor sort keys. */
+ if (!found) {
+ if (rh->selection->add_new_fields) {
+ if (!(found = _add_field(rh, field_num, implicit, FLD_HIDDEN)))
+ return NULL;
+ rh->report_types |= fields[field_num].type;
+ } else {
+ log_error("Unable to create selection with field \'%s\' "
+ "which is not included in current report.",
+ implicit ? _implicit_report_fields[field_num].id
+ : rh->fields[field_num].id);
+ return NULL;
+ }
+ }
+
+ field_id = fields[found->field_num].id;
+
+ if (!(found->flags & flags & DM_REPORT_FIELD_TYPE_MASK)) {
+ log_error("dm_report: incompatible comparison "
+ "type for selection field %s", field_id);
+ return NULL;
+ }
+
+ /* set up selection */
+ if (!(fs = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection)))) {
+ log_error("dm_report: struct field_selection "
+ "allocation failed for selection field %s", field_id);
+ return NULL;
+ }
+
+ if (!(fs->value = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) {
+ log_error(_field_selection_value_alloc_failed_msg, field_id);
+ goto error;
+ }
+
+ if (((rvw->reserved && (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)) ||
+ (((flags & DM_REPORT_FIELD_TYPE_MASK) == DM_REPORT_FIELD_TYPE_TIME) &&
+ custom && ((struct time_value *) custom)->range))
+ &&
+ !(fs->value->next = dm_pool_zalloc(rh->selection->mem, sizeof(struct field_selection_value)))) {
+ log_error(_field_selection_value_alloc_failed_msg, field_id);
+ goto error;
+ }
+
+ fs->fp = found;
+ fs->flags = flags;
+
+ if (!_get_reserved_value(rh, field_num, rvw)) {
+ log_error("dm_report: could not get reserved value "
+ "while processing selection field %s", field_id);
+ goto error;
+ }
+
+ /* store comparison operand */
+ if (flags & FLD_CMP_REGEX) {
+ /* REGEX */
+ if (!(s = dm_malloc(len + 1))) {
+ log_error("dm_report: dm_malloc failed to store "
+ "regex value for selection field %s", field_id);
+ goto error;
+ }
+ memcpy(s, v, len);
+ s[len] = '\0';
+
+ fs->value->v.r = dm_regex_create(rh->selection->mem, (const char * const *) &s, 1);
+ dm_free(s);
+ if (!fs->value->v.r) {
+ log_error("dm_report: failed to create regex "
+ "matcher for selection field %s", field_id);
+ goto error;
+ }
+ } else {
+ /* STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME */
+ if (!(s = dm_pool_strndup(rh->selection->mem, v, len))) {
+ log_error("dm_report: dm_pool_strndup for value "
+ "of selection field %s", field_id);
+ goto error;
+ }
+
+ switch (flags & DM_REPORT_FIELD_TYPE_MASK) {
+ case DM_REPORT_FIELD_TYPE_STRING:
+ if (rvw->value) {
+ fs->value->v.s = (const char *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.s = (((const char * const *) rvw->value)[1]);
+ dm_pool_free(rh->selection->mem, s);
+ } else {
+ fs->value->v.s = s;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING, fs->value->v.s, NULL)) {
+ log_error("String value %s found in selection is reserved.", fs->value->v.s);
+ goto error;
+ }
+ }
+ break;
+ case DM_REPORT_FIELD_TYPE_NUMBER:
+ if (rvw->value) {
+ fs->value->v.i = *(const uint64_t *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]);
+ } else {
+ errno = 0;
+ if (((fs->value->v.i = strtoull(s, NULL, 10)) == ULLONG_MAX) &&
+ (errno == ERANGE)) {
+ log_error(_out_of_range_msg, s, field_id);
+ goto error;
+ }
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_NUMBER, &fs->value->v.i, NULL)) {
+ log_error("Numeric value %" PRIu64 " found in selection is reserved.", fs->value->v.i);
+ goto error;
+ }
+ }
+ dm_pool_free(rh->selection->mem, s);
+ break;
+ case DM_REPORT_FIELD_TYPE_SIZE:
+ if (rvw->value) {
+ fs->value->v.d = *(const double *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.d = (((const double *) rvw->value)[1]);
+ } else {
+ errno = 0;
+ fs->value->v.d = strtod(s, NULL);
+ if (errno == ERANGE) {
+ log_error(_out_of_range_msg, s, field_id);
+ goto error;
+ }
+ if (custom && (factor = *((const uint64_t *)custom)))
+ fs->value->v.d *= factor;
+ fs->value->v.d /= 512; /* store size in sectors! */
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_SIZE, &fs->value->v.d, NULL)) {
+ log_error("Size value %f found in selection is reserved.", fs->value->v.d);
+ goto error;
+ }
+ }
+ dm_pool_free(rh->selection->mem, s);
+ break;
+ case DM_REPORT_FIELD_TYPE_PERCENT:
+ if (rvw->value) {
+ fs->value->v.i = *(const uint64_t *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.i = (((const uint64_t *) rvw->value)[1]);
+ } else {
+ errno = 0;
+ fs->value->v.d = strtod(s, NULL);
+ if ((errno == ERANGE) || (fs->value->v.d < 0) || (fs->value->v.d > 100)) {
+ log_error(_out_of_range_msg, s, field_id);
+ goto error;
+ }
+
+ fs->value->v.i = (dm_percent_t) (DM_PERCENT_1 * fs->value->v.d);
+
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_PERCENT, &fs->value->v.i, NULL)) {
+ log_error("Percent value %s found in selection is reserved.", s);
+ goto error;
+ }
+ }
+ break;
+ case DM_REPORT_FIELD_TYPE_STRING_LIST:
+ if (!custom)
+ goto_bad;
+ fs->value->v.l = *(struct selection_str_list **)custom;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_STRING_LIST, fs->value->v.l, NULL)) {
+ log_error("String list value found in selection is reserved.");
+ goto error;
+ }
+ break;
+ case DM_REPORT_FIELD_TYPE_TIME:
+ if (rvw->value) {
+ fs->value->v.t = *(const time_t *) rvw->value;
+ if (rvw->reserved->type & DM_REPORT_FIELD_RESERVED_VALUE_RANGE)
+ fs->value->next->v.t = (((const time_t *) rvw->value)[1]);
+ } else {
+ if (!(tval = (struct time_value *) custom))
+ goto_bad;
+ fs->value->v.t = tval->t1;
+ if (tval->range)
+ fs->value->next->v.t = tval->t2;
+ if (_check_value_is_strictly_reserved(rh, field_num, DM_REPORT_FIELD_TYPE_TIME, &fs->value->v.t, NULL)) {
+ log_error("Time value found in selection is reserved.");
+ goto error;
+ }
+ }
+ break;
+ default:
+ log_error(INTERNAL_ERROR "_create_field_selection: "
+ "unknown type of selection field %s", field_id);
+ goto error;
+ }
+ }
+
+ return fs;
+bad:
+ log_error(INTERNAL_ERROR "Forbiden NULL custom detected.");
+error:
+ dm_pool_free(rh->selection->mem, fs);
+
+ return NULL;
+}
+
+static struct selection_node *_alloc_selection_node(struct dm_pool *mem, uint32_t type)
+{
+ struct selection_node *sn;
+
+ if (!(sn = dm_pool_zalloc(mem, sizeof(struct selection_node)))) {
+ log_error("dm_report: struct selection_node allocation failed");
+ return NULL;
+ }
+
+ dm_list_init(&sn->list);
+ sn->type = type;
+ if (!(type & SEL_ITEM))
+ dm_list_init(&sn->selection.set);
+
+ return sn;
+}
+
+static void _display_selection_help(struct dm_report *rh)
+{
+ static const char _grow_object_failed_msg[] = "_display_selection_help: dm_pool_grow_object failed";
+ struct op_def *t;
+ const struct dm_report_reserved_value *rv;
+ size_t len_all, len_final = 0;
+ const char **rvs;
+ char *rvs_all;
+
+ log_warn("Selection operands");
+ log_warn("------------------");
+ log_warn(" field - Reporting field.");
+ log_warn(" number - Non-negative integer value.");
+ log_warn(" size - Floating point value with units, 'm' unit used by default if not specified.");
+ log_warn(" percent - Non-negative integer with or without %% suffix.");
+ log_warn(" string - Characters quoted by \' or \" or unquoted.");
+ log_warn(" string list - Strings enclosed by [ ] or { } and elements delimited by either");
+ log_warn(" \"all items must match\" or \"at least one item must match\" operator.");
+ log_warn(" regular expression - Characters quoted by \' or \" or unquoted.");
+ log_warn(" ");
+ if (rh->reserved_values) {
+ log_warn("Reserved values");
+ log_warn("---------------");
+
+ for (rv = rh->reserved_values; rv->type; rv++) {
+ for (len_all = 0, rvs = rv->names; *rvs; rvs++)
+ len_all += strlen(*rvs) + 2;
+ if (len_all > len_final)
+ len_final = len_all;
+ }
+
+ for (rv = rh->reserved_values; rv->type; rv++) {
+ if (!dm_pool_begin_object(rh->mem, 256)) {
+ log_error("_display_selection_help: dm_pool_begin_object failed");
+ break;
+ }
+ for (rvs = rv->names; *rvs; rvs++) {
+ if (((rvs != rv->names) && !dm_pool_grow_object(rh->mem, ", ", 2)) ||
+ !dm_pool_grow_object(rh->mem, *rvs, strlen(*rvs))) {
+ log_error(_grow_object_failed_msg);
+ goto out_reserved_values;
+ }
+ }
+ if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+ log_error(_grow_object_failed_msg);
+ goto out_reserved_values;
+ }
+ rvs_all = dm_pool_end_object(rh->mem);
+
+ log_warn(" %-*s - %s [%s]", (int) len_final, rvs_all, rv->description,
+ _get_field_type_name(rv->type));
+ dm_pool_free(rh->mem, rvs_all);
+ }
+ log_warn(" ");
+ }
+out_reserved_values:
+ log_warn("Selection operators");
+ log_warn("-------------------");
+ log_warn(" Comparison operators:");
+ t = _op_cmp;
+ for (; t->string; t++)
+ log_warn(" %6s - %s", t->string, t->desc);
+ log_warn(" ");
+ log_warn(" Logical and grouping operators:");
+ t = _op_log;
+ for (; t->string; t++)
+ log_warn(" %4s - %s", t->string, t->desc);
+ log_warn(" ");
+}
+
+static const char _sel_syntax_error_at_msg[] = "Selection syntax error at '%s'.";
+static const char _sel_help_ref_msg[] = "Use \'help\' for selection to get more help.";
+
+/*
+ * Selection parser
+ *
+ * _parse_* functions
+ *
+ * Input:
+ * s - a pointer to the parsed string
+ * Output:
+ * next - a pointer used for next _parse_*'s input,
+ * next == s if return value is NULL
+ * return value - a filter node pointer,
+ * NULL if s doesn't match
+ */
+
+/*
+ * SELECTION := FIELD_NAME OP_CMP STRING |
+ * FIELD_NAME OP_CMP NUMBER |
+ * FIELD_NAME OP_REGEX REGEX
+ */
+static struct selection_node *_parse_selection(struct dm_report *rh,
+ const char *s,
+ const char **next)
+{
+ struct field_selection *fs;
+ struct selection_node *sn;
+ const char *ws, *we; /* field name */
+ const char *vs, *ve; /* value */
+ const char *last;
+ uint32_t flags, field_num;
+ int implicit;
+ const struct dm_report_field_type *ft;
+ struct selection_str_list *str_list;
+ struct reserved_value_wrapper rvw = {0};
+ struct time_value tval;
+ uint64_t factor;
+ void *custom = NULL;
+ char *tmp;
+ char c;
+
+ /* field name */
+ if (!(last = _tok_field_name(s, &ws, &we))) {
+ log_error("Expecting field name");
+ goto bad;
+ }
+
+ /* check if the field with given name exists */
+ if (!_get_field(rh, ws, (size_t) (we - ws), &field_num, &implicit)) {
+ c = we[0];
+ tmp = (char *) we;
+ tmp[0] = '\0';
+ _display_fields(rh, 0, 1);
+ log_warn(" ");
+ log_error("Unrecognised selection field: %s", ws);
+ tmp[0] = c;
+ goto bad;
+ }
+
+ if (implicit) {
+ ft = &_implicit_report_fields[field_num];
+ if (ft->flags & FLD_CMP_UNCOMPARABLE) {
+ c = we[0];
+ tmp = (char *) we;
+ tmp[0] = '\0';
+ _display_fields(rh, 0, 1);
+ log_warn(" ");
+ log_error("Selection field is uncomparable: %s.", ws);
+ tmp[0] = c;
+ goto bad;
+ }
+ } else
+ ft = &rh->fields[field_num];
+
+ /* comparison operator */
+ if (!(flags = _tok_op_cmp(we, &last))) {
+ _display_selection_help(rh);
+ log_error("Unrecognised comparison operator: %s", we);
+ goto bad;
+ }
+ if (!last) {
+ _display_selection_help(rh);
+ log_error("Missing value after operator");
+ goto bad;
+ }
+
+ /* comparison value */
+ if (flags & FLD_CMP_REGEX) {
+ /*
+ * REGEX value
+ */
+ if (!(last = _tok_value_regex(rh, ft, last, &vs, &ve, &flags, &rvw)))
+ goto_bad;
+ } else {
+ /*
+ * STRING, NUMBER, SIZE, PERCENT, STRING_LIST, TIME value
+ */
+ if (flags & FLD_CMP_NUMBER) {
+ if (!(ft->flags & (DM_REPORT_FIELD_TYPE_NUMBER |
+ DM_REPORT_FIELD_TYPE_SIZE |
+ DM_REPORT_FIELD_TYPE_PERCENT |
+ DM_REPORT_FIELD_TYPE_TIME))) {
+ _display_selection_help(rh);
+ log_error("Operator can be used only with number, size, time or percent fields: %s", ws);
+ goto bad;
+ }
+ } else if (flags & FLD_CMP_TIME) {
+ if (!(ft->flags & DM_REPORT_FIELD_TYPE_TIME)) {
+ _display_selection_help(rh);
+ log_error("Operator can be used only with time fields: %s", ws);
+ goto bad;
+ }
+ }
+
+ if (ft->flags == DM_REPORT_FIELD_TYPE_SIZE ||
+ ft->flags == DM_REPORT_FIELD_TYPE_NUMBER ||
+ ft->flags == DM_REPORT_FIELD_TYPE_PERCENT)
+ custom = &factor;
+ else if (ft->flags & DM_REPORT_FIELD_TYPE_TIME)
+ custom = &tval;
+ else if (ft->flags == DM_REPORT_FIELD_TYPE_STRING_LIST)
+ custom = &str_list;
+ else
+ custom = NULL;
+ if (!(last = _tok_value(rh, ft, field_num, implicit,
+ last, &vs, &ve, &flags,
+ &rvw, rh->selection->mem, custom)))
+ goto_bad;
+ }
+
+ *next = _skip_space(last);
+
+ /* create selection */
+ if (!(fs = _create_field_selection(rh, field_num, implicit, vs, (size_t) (ve - vs), flags, &rvw, custom)))
+ return_NULL;
+
+ /* create selection node */
+ if (!(sn = _alloc_selection_node(rh->selection->mem, SEL_ITEM)))
+ return_NULL;
+
+ /* add selection to selection node */
+ sn->selection.item = fs;
+
+ return sn;
+bad:
+ log_error(_sel_syntax_error_at_msg, s);
+ log_error(_sel_help_ref_msg);
+ *next = s;
+ return NULL;
+}
+
+static struct selection_node *_parse_or_ex(struct dm_report *rh,
+ const char *s,
+ const char **next,
+ struct selection_node *or_sn);
+
+static struct selection_node *_parse_ex(struct dm_report *rh,
+ const char *s,
+ const char **next)
+{
+ static const char _ps_expected_msg[] = "Syntax error: left parenthesis expected at \'%s\'";
+ static const char _pe_expected_msg[] = "Syntax error: right parenthesis expected at \'%s\'";
+ struct selection_node *sn = NULL;
+ uint32_t t;
+ const char *tmp;
+
+ t = _tok_op_log(s, next, SEL_MODIFIER_NOT | SEL_PRECEDENCE_PS);
+ if (t == SEL_MODIFIER_NOT) {
+ /* '!' '(' EXPRESSION ')' */
+ if (!_tok_op_log(*next, &tmp, SEL_PRECEDENCE_PS)) {
+ log_error(_ps_expected_msg, *next);
+ goto error;
+ }
+ if (!(sn = _parse_or_ex(rh, tmp, next, NULL)))
+ goto error;
+ sn->type |= SEL_MODIFIER_NOT;
+ if (!_tok_op_log(*next, &tmp, SEL_PRECEDENCE_PE)) {
+ log_error(_pe_expected_msg, *next);
+ goto error;
+ }
+ *next = tmp;
+ } else if (t == SEL_PRECEDENCE_PS) {
+ /* '(' EXPRESSION ')' */
+ if (!(sn = _parse_or_ex(rh, *next, &tmp, NULL)))
+ goto error;
+ if (!_tok_op_log(tmp, next, SEL_PRECEDENCE_PE)) {
+ log_error(_pe_expected_msg, *next);
+ goto error;
+ }
+ } else if ((s = _skip_space(s))) {
+ /* SELECTION */
+ sn = _parse_selection(rh, s, next);
+ } else {
+ sn = NULL;
+ *next = s;
+ }
+
+ return sn;
+error:
+ *next = s;
+ return NULL;
+}
+
+/* AND_EXPRESSION := EX (AND_OP AND_EXPRSSION) */
+static struct selection_node *_parse_and_ex(struct dm_report *rh,
+ const char *s,
+ const char **next,
+ struct selection_node *and_sn)
+{
+ struct selection_node *n;
+ const char *tmp;
+
+ n = _parse_ex(rh, s, next);
+ if (!n)
+ goto error;
+
+ if (!_tok_op_log(*next, &tmp, SEL_AND)) {
+ if (!and_sn)
+ return n;
+ dm_list_add(&and_sn->selection.set, &n->list);
+ return and_sn;
+ }
+
+ if (!and_sn) {
+ if (!(and_sn = _alloc_selection_node(rh->selection->mem, SEL_AND)))
+ goto error;
+ }
+ dm_list_add(&and_sn->selection.set, &n->list);
+
+ return _parse_and_ex(rh, tmp, next, and_sn);
+error:
+ *next = s;
+ return NULL;
+}
+
+/* OR_EXPRESSION := AND_EXPRESSION (OR_OP OR_EXPRESSION) */
+static struct selection_node *_parse_or_ex(struct dm_report *rh,
+ const char *s,
+ const char **next,
+ struct selection_node *or_sn)
+{
+ struct selection_node *n;
+ const char *tmp;
+
+ n = _parse_and_ex(rh, s, next, NULL);
+ if (!n)
+ goto error;
+
+ if (!_tok_op_log(*next, &tmp, SEL_OR)) {
+ if (!or_sn)
+ return n;
+ dm_list_add(&or_sn->selection.set, &n->list);
+ return or_sn;
+ }
+
+ if (!or_sn) {
+ if (!(or_sn = _alloc_selection_node(rh->selection->mem, SEL_OR)))
+ goto error;
+ }
+ dm_list_add(&or_sn->selection.set, &n->list);
+
+ return _parse_or_ex(rh, tmp, next, or_sn);
+error:
+ *next = s;
+ return NULL;
+}
+
+static int _alloc_rh_selection(struct dm_report *rh)
+{
+ if (!(rh->selection = dm_pool_zalloc(rh->mem, sizeof(struct selection))) ||
+ !(rh->selection->mem = dm_pool_create("report selection", 10 * 1024))) {
+ log_error("Failed to allocate report selection structure.");
+ if (rh->selection)
+ dm_pool_free(rh->mem, rh->selection);
+ return 0;
+ }
+
+ return 1;
+}
+
+#define SPECIAL_SELECTION_ALL "all"
+
+static int _report_set_selection(struct dm_report *rh, const char *selection, int add_new_fields)
+{
+ struct selection_node *root = NULL;
+ const char *fin, *next;
+
+ if (rh->selection) {
+ if (rh->selection->selection_root)
+ /* Trash any previous selection. */
+ dm_pool_free(rh->selection->mem, rh->selection->selection_root);
+ rh->selection->selection_root = NULL;
+ } else {
+ if (!_alloc_rh_selection(rh))
+ goto_bad;
+ }
+
+ if (!selection || !selection[0] || !strcasecmp(selection, SPECIAL_SELECTION_ALL))
+ return 1;
+
+ rh->selection->add_new_fields = add_new_fields;
+
+ if (!(root = _alloc_selection_node(rh->selection->mem, SEL_OR)))
+ return 0;
+
+ if (!_parse_or_ex(rh, selection, &fin, root))
+ goto_bad;
+
+ next = _skip_space(fin);
+ if (*next) {
+ log_error("Expecting logical operator");
+ log_error(_sel_syntax_error_at_msg, next);
+ log_error(_sel_help_ref_msg);
+ goto bad;
+ }
+
+ rh->selection->selection_root = root;
+ return 1;
+bad:
+ dm_pool_free(rh->selection->mem, root);
+ return 0;
+}
+
+static void _reset_field_props(struct dm_report *rh)
+{
+ struct field_properties *fp;
+ dm_list_iterate_items(fp, &rh->field_props)
+ fp->width = fp->initial_width;
+ rh->flags |= RH_FIELD_CALC_NEEDED;
+}
+
+int dm_report_set_selection(struct dm_report *rh, const char *selection)
+{
+ struct row *row;
+
+ if (!_report_set_selection(rh, selection, 0))
+ return_0;
+
+ _reset_field_props(rh);
+
+ dm_list_iterate_items(row, &rh->rows) {
+ row->selected = _check_report_selection(rh, &row->fields);
+ if (row->field_sel_status)
+ _implicit_report_fields[row->field_sel_status->props->field_num].report_fn(rh,
+ rh->mem, row->field_sel_status, row, rh->private);
+ }
+
+ return 1;
+}
+
+struct dm_report *dm_report_init_with_selection(uint32_t *report_types,
+ const struct dm_report_object_type *types,
+ const struct dm_report_field_type *fields,
+ const char *output_fields,
+ const char *output_separator,
+ uint32_t output_flags,
+ const char *sort_keys,
+ const char *selection,
+ const struct dm_report_reserved_value reserved_values[],
+ void *private_data)
+{
+ struct dm_report *rh;
+
+ _implicit_report_fields = _implicit_special_report_fields_with_selection;
+
+ if (!(rh = dm_report_init(report_types, types, fields, output_fields,
+ output_separator, output_flags, sort_keys, private_data)))
+ return NULL;
+
+ if (!selection || !selection[0]) {
+ rh->selection = NULL;
+ return rh;
+ }
+
+ if (!_check_reserved_values_supported(fields, reserved_values)) {
+ log_error(INTERNAL_ERROR "dm_report_init_with_selection: "
+ "trying to register unsupported reserved value type, "
+ "skipping report selection");
+ return rh;
+ }
+ rh->reserved_values = reserved_values;
+
+ if (!strcasecmp(selection, SPECIAL_FIELD_HELP_ID) ||
+ !strcmp(selection, SPECIAL_FIELD_HELP_ALT_ID)) {
+ _display_fields(rh, 0, 1);
+ log_warn(" ");
+ _display_selection_help(rh);
+ rh->flags |= RH_ALREADY_REPORTED;
+ return rh;
+ }
+
+ if (!_report_set_selection(rh, selection, 1))
+ goto_bad;
+
+ _dm_report_init_update_types(rh, report_types);
+
+ return rh;
+bad:
+ dm_report_free(rh);
+ return NULL;
+}
+
+/*
+ * Print row of headings
+ */
+static int _report_headings(struct dm_report *rh)
+{
+ const struct dm_report_field_type *fields;
+ struct field_properties *fp;
+ const char *heading;
+ char *buf = NULL;
+ size_t buf_size = 0;
+
+ rh->flags |= RH_HEADINGS_PRINTED;
+
+ if (!(rh->flags & DM_REPORT_OUTPUT_HEADINGS))
+ return 1;
+
+ if (!dm_pool_begin_object(rh->mem, 128)) {
+ log_error("dm_report: "
+ "dm_pool_begin_object failed for headings");
+ return 0;
+ }
+
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if ((int) buf_size < fp->width)
+ buf_size = (size_t) fp->width;
+ }
+ /* Including trailing '\0'! */
+ buf_size++;
+
+ if (!(buf = dm_malloc(buf_size))) {
+ log_error("dm_report: Could not allocate memory for heading buffer.");
+ goto bad;
+ }
+
+ /* First heading line */
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if (fp->flags & FLD_HIDDEN)
+ continue;
+
+ fields = fp->implicit ? _implicit_report_fields : rh->fields;
+
+ heading = fields[fp->field_num].heading;
+ if (rh->flags & DM_REPORT_OUTPUT_ALIGNED) {
+ if (dm_snprintf(buf, buf_size, "%-*.*s",
+ fp->width, fp->width, heading) < 0) {
+ log_error("dm_report: snprintf heading failed");
+ goto bad;
+ }
+ if (!dm_pool_grow_object(rh->mem, buf, fp->width)) {
+ log_error("dm_report: Failed to generate report headings for printing");
+ goto bad;
+ }
+ } else if (!dm_pool_grow_object(rh->mem, heading, 0)) {
+ log_error("dm_report: Failed to generate report headings for printing");
+ goto bad;
+ }
+
+ if (!dm_list_end(&rh->field_props, &fp->list))
+ if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+ log_error("dm_report: Failed to generate report headings for printing");
+ goto bad;
+ }
+ }
+ if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+ log_error("dm_report: Failed to generate report headings for printing");
+ goto bad;
+ }
+
+ /* print all headings */
+ heading = (char *) dm_pool_end_object(rh->mem);
+ log_print("%s", heading);
+
+ dm_pool_free(rh->mem, (void *)heading);
+ dm_free(buf);
+
+ return 1;
+
+ bad:
+ dm_free(buf);
+ dm_pool_abandon_object(rh->mem);
+ return 0;
+}
+
+static int _should_display_row(struct row *row)
+{
+ return row->field_sel_status || row->selected;
+}
+
+static void _recalculate_fields(struct dm_report *rh)
+{
+ struct row *row;
+ struct dm_report_field *field;
+ int len;
+
+ dm_list_iterate_items(row, &rh->rows) {
+ dm_list_iterate_items(field, &row->fields) {
+ if ((rh->flags & RH_SORT_REQUIRED) &&
+ (field->props->flags & FLD_SORT_KEY)) {
+ (*row->sort_fields)[field->props->sort_posn] = field;
+ }
+
+ if (_should_display_row(row)) {
+ len = (int) strlen(field->report_string);
+ if ((len > field->props->width))
+ field->props->width = len;
+
+ }
+ }
+ }
+
+ rh->flags &= ~RH_FIELD_CALC_NEEDED;
+}
+
+int dm_report_column_headings(struct dm_report *rh)
+{
+ /* Columns-as-rows does not use _report_headings. */
+ if (rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS)
+ return 1;
+
+ if (rh->flags & RH_FIELD_CALC_NEEDED)
+ _recalculate_fields(rh);
+
+ return _report_headings(rh);
+}
+
+/*
+ * Sort rows of data
+ */
+static int _row_compare(const void *a, const void *b)
+{
+ const struct row *rowa = *(const struct row * const *) a;
+ const struct row *rowb = *(const struct row * const *) b;
+ const struct dm_report_field *sfa, *sfb;
+ uint32_t cnt;
+
+ for (cnt = 0; cnt < rowa->rh->keys_count; cnt++) {
+ sfa = (*rowa->sort_fields)[cnt];
+ sfb = (*rowb->sort_fields)[cnt];
+ if ((sfa->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) ||
+ (sfa->props->flags & DM_REPORT_FIELD_TYPE_SIZE) ||
+ (sfa->props->flags & DM_REPORT_FIELD_TYPE_TIME)) {
+ const uint64_t numa =
+ *(const uint64_t *) sfa->sort_value;
+ const uint64_t numb =
+ *(const uint64_t *) sfb->sort_value;
+
+ if (numa == numb)
+ continue;
+
+ if (sfa->props->flags & FLD_ASCENDING) {
+ return (numa > numb) ? 1 : -1;
+ } else { /* FLD_DESCENDING */
+ return (numa < numb) ? 1 : -1;
+ }
+ } else {
+ /* DM_REPORT_FIELD_TYPE_STRING
+ * DM_REPORT_FIELD_TYPE_STRING_LIST */
+ const char *stra = (const char *) sfa->sort_value;
+ const char *strb = (const char *) sfb->sort_value;
+ int cmp = strcmp(stra, strb);
+
+ if (!cmp)
+ continue;
+
+ if (sfa->props->flags & FLD_ASCENDING) {
+ return (cmp > 0) ? 1 : -1;
+ } else { /* FLD_DESCENDING */
+ return (cmp < 0) ? 1 : -1;
+ }
+ }
+ }
+
+ return 0; /* Identical */
+}
+
+static int _sort_rows(struct dm_report *rh)
+{
+ struct row *(*rows)[];
+ uint32_t count = 0;
+ struct row *row;
+
+ if (!(rows = dm_pool_alloc(rh->mem, sizeof(**rows) *
+ dm_list_size(&rh->rows)))) {
+ log_error("dm_report: sort array allocation failed");
+ return 0;
+ }
+
+ dm_list_iterate_items(row, &rh->rows)
+ (*rows)[count++] = row;
+
+ qsort(rows, count, sizeof(**rows), _row_compare);
+
+ dm_list_init(&rh->rows);
+ while (count--)
+ dm_list_add_h(&rh->rows, &(*rows)[count]->list);
+
+ return 1;
+}
+
+#define STANDARD_QUOTE "\'"
+#define STANDARD_PAIR "="
+
+#define JSON_INDENT_UNIT 4
+#define JSON_SPACE " "
+#define JSON_QUOTE "\""
+#define JSON_PAIR ":"
+#define JSON_SEPARATOR ","
+#define JSON_OBJECT_START "{"
+#define JSON_OBJECT_END "}"
+#define JSON_ARRAY_START "["
+#define JSON_ARRAY_END "]"
+#define JSON_ESCAPE_CHAR "\\"
+
+#define UNABLE_TO_EXTEND_OUTPUT_LINE_MSG "dm_report: Unable to extend output line"
+
+static int _is_basic_report(struct dm_report *rh)
+{
+ return rh->group_item &&
+ (rh->group_item->group->type == DM_REPORT_GROUP_BASIC);
+}
+
+static int _is_json_report(struct dm_report *rh)
+{
+ return rh->group_item &&
+ (rh->group_item->group->type == DM_REPORT_GROUP_JSON);
+}
+
+/*
+ * Produce report output
+ */
+static int _output_field(struct dm_report *rh, struct dm_report_field *field)
+{
+ const struct dm_report_field_type *fields = field->props->implicit ? _implicit_report_fields
+ : rh->fields;
+ char *field_id;
+ int32_t width;
+ uint32_t align;
+ const char *repstr;
+ const char *p1_repstr, *p2_repstr;
+ char *buf = NULL;
+ size_t buf_size = 0;
+
+ if (_is_json_report(rh)) {
+ if (!dm_pool_grow_object(rh->mem, JSON_QUOTE, 1) ||
+ !dm_pool_grow_object(rh->mem, fields[field->props->field_num].id, 0) ||
+ !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1) ||
+ !dm_pool_grow_object(rh->mem, JSON_PAIR, 1) ||
+ !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) {
+ log_error("dm_report: Unable to extend output line");
+ return 0;
+ }
+ } else if (rh->flags & DM_REPORT_OUTPUT_FIELD_NAME_PREFIX) {
+ if (!(field_id = dm_strdup(fields[field->props->field_num].id))) {
+ log_error("dm_report: Failed to copy field name");
+ return 0;
+ }
+
+ if (!dm_pool_grow_object(rh->mem, rh->output_field_name_prefix, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ dm_free(field_id);
+ return 0;
+ }
+
+ if (!dm_pool_grow_object(rh->mem, _toupperstr(field_id), 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ dm_free(field_id);
+ return 0;
+ }
+
+ dm_free(field_id);
+
+ if (!dm_pool_grow_object(rh->mem, STANDARD_PAIR, 1)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+
+ if (!(rh->flags & DM_REPORT_OUTPUT_FIELD_UNQUOTED) &&
+ !dm_pool_grow_object(rh->mem, STANDARD_QUOTE, 1)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+ }
+
+ repstr = field->report_string;
+ width = field->props->width;
+ if (!(rh->flags & DM_REPORT_OUTPUT_ALIGNED)) {
+ if (_is_json_report(rh)) {
+ /* Escape any JSON_QUOTE that may appear in reported string. */
+ p1_repstr = repstr;
+ while ((p2_repstr = strstr(p1_repstr, JSON_QUOTE))) {
+ if (p2_repstr > p1_repstr) {
+ if (!dm_pool_grow_object(rh->mem, p1_repstr, p2_repstr - p1_repstr)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+ }
+ if (!dm_pool_grow_object(rh->mem, JSON_ESCAPE_CHAR, 1) ||
+ !dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+ p1_repstr = p2_repstr + 1;
+ }
+
+ if (!dm_pool_grow_object(rh->mem, p1_repstr, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+ } else {
+ if (!dm_pool_grow_object(rh->mem, repstr, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+ }
+ } else {
+ if (!(align = field->props->flags & DM_REPORT_FIELD_ALIGN_MASK))
+ align = ((field->props->flags & DM_REPORT_FIELD_TYPE_NUMBER) ||
+ (field->props->flags & DM_REPORT_FIELD_TYPE_SIZE)) ?
+ DM_REPORT_FIELD_ALIGN_RIGHT : DM_REPORT_FIELD_ALIGN_LEFT;
+
+ /* Including trailing '\0'! */
+ buf_size = width + 1;
+ if (!(buf = dm_malloc(buf_size))) {
+ log_error("dm_report: Could not allocate memory for output line buffer.");
+ return 0;
+ }
+
+ if (align & DM_REPORT_FIELD_ALIGN_LEFT) {
+ if (dm_snprintf(buf, buf_size, "%-*.*s",
+ width, width, repstr) < 0) {
+ log_error("dm_report: left-aligned snprintf() failed");
+ goto bad;
+ }
+ if (!dm_pool_grow_object(rh->mem, buf, width)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ } else if (align & DM_REPORT_FIELD_ALIGN_RIGHT) {
+ if (dm_snprintf(buf, buf_size, "%*.*s",
+ width, width, repstr) < 0) {
+ log_error("dm_report: right-aligned snprintf() failed");
+ goto bad;
+ }
+ if (!dm_pool_grow_object(rh->mem, buf, width)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+ }
+
+ if (rh->flags & DM_REPORT_OUTPUT_FIELD_NAME_PREFIX) {
+ if (!(rh->flags & DM_REPORT_OUTPUT_FIELD_UNQUOTED)) {
+ if (!dm_pool_grow_object(rh->mem, STANDARD_QUOTE, 1)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+ } else if (_is_json_report(rh)) {
+ if (!dm_pool_grow_object(rh->mem, JSON_QUOTE, 1)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+
+ dm_free(buf);
+ return 1;
+
+bad:
+ dm_free(buf);
+ return 0;
+}
+
+static void _destroy_rows(struct dm_report *rh)
+{
+ /*
+ * free the first row allocated to this report: since this is a
+ * pool allocation this will also free all subsequently allocated
+ * rows from the report and any associated string data.
+ */
+ if (rh->first_row)
+ dm_pool_free(rh->mem, rh->first_row);
+ rh->first_row = NULL;
+ dm_list_init(&rh->rows);
+
+ /* Reset field widths to original values. */
+ _reset_field_props(rh);
+}
+
+static int _output_as_rows(struct dm_report *rh)
+{
+ const struct dm_report_field_type *fields;
+ struct field_properties *fp;
+ struct dm_report_field *field;
+ struct row *row;
+
+ dm_list_iterate_items(fp, &rh->field_props) {
+ if (fp->flags & FLD_HIDDEN) {
+ dm_list_iterate_items(row, &rh->rows) {
+ field = dm_list_item(dm_list_first(&row->fields), struct dm_report_field);
+ dm_list_del(&field->list);
+ }
+ continue;
+ }
+
+ fields = fp->implicit ? _implicit_report_fields : rh->fields;
+
+ if (!dm_pool_begin_object(rh->mem, 512)) {
+ log_error("dm_report: Unable to allocate output line");
+ return 0;
+ }
+
+ if ((rh->flags & DM_REPORT_OUTPUT_HEADINGS)) {
+ if (!dm_pool_grow_object(rh->mem, fields[fp->field_num].heading, 0)) {
+ log_error("dm_report: Failed to extend row for field name");
+ goto bad;
+ }
+ if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+ log_error("dm_report: Failed to extend row with separator");
+ goto bad;
+ }
+ }
+
+ dm_list_iterate_items(row, &rh->rows) {
+ if ((field = dm_list_item(dm_list_first(&row->fields), struct dm_report_field))) {
+ if (!_output_field(rh, field))
+ goto bad;
+ dm_list_del(&field->list);
+ }
+
+ if (!dm_list_end(&rh->rows, &row->list))
+ if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+
+ if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+ log_error("dm_report: Failed to terminate row");
+ goto bad;
+ }
+ log_print("%s", (char *) dm_pool_end_object(rh->mem));
+ }
+
+ _destroy_rows(rh);
+
+ return 1;
+
+ bad:
+ dm_pool_abandon_object(rh->mem);
+ return 0;
+}
+
+static int _output_as_columns(struct dm_report *rh)
+{
+ struct dm_list *fh, *rowh, *ftmp, *rtmp;
+ struct row *row = NULL;
+ struct dm_report_field *field;
+ struct dm_list *last_row;
+ int do_field_delim;
+ char *line;
+
+ /* If headings not printed yet, calculate field widths and print them */
+ if (!(rh->flags & RH_HEADINGS_PRINTED))
+ _report_headings(rh);
+
+ /* Print and clear buffer */
+ last_row = dm_list_last(&rh->rows);
+ dm_list_iterate_safe(rowh, rtmp, &rh->rows) {
+ row = dm_list_item(rowh, struct row);
+
+ if (!_should_display_row(row))
+ continue;
+
+ if (!dm_pool_begin_object(rh->mem, 512)) {
+ log_error("dm_report: Unable to allocate output line");
+ return 0;
+ }
+
+ if (_is_json_report(rh)) {
+ if (!dm_pool_grow_object(rh->mem, JSON_OBJECT_START, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+
+ do_field_delim = 0;
+
+ dm_list_iterate_safe(fh, ftmp, &row->fields) {
+ field = dm_list_item(fh, struct dm_report_field);
+ if (field->props->flags & FLD_HIDDEN)
+ continue;
+
+ if (do_field_delim) {
+ if (_is_json_report(rh)) {
+ if (!dm_pool_grow_object(rh->mem, JSON_SEPARATOR, 0) ||
+ !dm_pool_grow_object(rh->mem, JSON_SPACE, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ } else {
+ if (!dm_pool_grow_object(rh->mem, rh->separator, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+ } else
+ do_field_delim = 1;
+
+ if (!_output_field(rh, field))
+ goto bad;
+
+ if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+ dm_list_del(&field->list);
+ }
+
+ if (_is_json_report(rh)) {
+ if (!dm_pool_grow_object(rh->mem, JSON_OBJECT_END, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ if (rowh != last_row &&
+ !dm_pool_grow_object(rh->mem, JSON_SEPARATOR, 0)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+ }
+
+ if (!dm_pool_grow_object(rh->mem, "\0", 1)) {
+ log_error("dm_report: Unable to terminate output line");
+ goto bad;
+ }
+
+ line = (char *) dm_pool_end_object(rh->mem);
+ log_print("%*s", rh->group_item ? rh->group_item->group->indent + (int) strlen(line) : 0, line);
+ if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+ dm_list_del(&row->list);
+ }
+
+ if (!(rh->flags & DM_REPORT_OUTPUT_MULTIPLE_TIMES))
+ _destroy_rows(rh);
+
+ return 1;
+
+ bad:
+ dm_pool_abandon_object(rh->mem);
+ return 0;
+}
+
+int dm_report_is_empty(struct dm_report *rh)
+{
+ return dm_list_empty(&rh->rows) ? 1 : 0;
+}
+
+static struct report_group_item *_get_topmost_report_group_item(struct dm_report_group *group)
+{
+ struct report_group_item *item;
+
+ if (group && !dm_list_empty(&group->items))
+ item = dm_list_item(dm_list_first(&group->items), struct report_group_item);
+ else
+ item = NULL;
+
+ return item;
+}
+
+static void _json_output_start(struct dm_report_group *group)
+{
+ if (!group->indent) {
+ log_print(JSON_OBJECT_START);
+ group->indent += JSON_INDENT_UNIT;
+ }
+}
+
+static int _json_output_array_start(struct dm_pool *mem, struct report_group_item *item)
+{
+ const char *name = (const char *) item->data;
+ char *output;
+
+ if (!dm_pool_begin_object(mem, 32)) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ return 0;
+ }
+
+ if (!dm_pool_grow_object(mem, JSON_QUOTE, 1) ||
+ !dm_pool_grow_object(mem, name, 0) ||
+ !dm_pool_grow_object(mem, JSON_QUOTE JSON_PAIR JSON_SPACE JSON_ARRAY_START, 0) ||
+ !dm_pool_grow_object(mem, "\0", 1) ||
+ !(output = dm_pool_end_object(mem))) {
+ log_error(UNABLE_TO_EXTEND_OUTPUT_LINE_MSG);
+ goto bad;
+ }
+
+ if (item->parent->store.finished_count > 0)
+ log_print("%*s", item->group->indent + (int) sizeof(JSON_SEPARATOR) - 1, JSON_SEPARATOR);
+
+ if (item->parent->parent && item->parent->data) {
+ log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_START) - 1, JSON_OBJECT_START);
+ item->group->indent += JSON_INDENT_UNIT;
+ }
+
+ log_print("%*s", item->group->indent + (int) strlen(output), output);
+ item->group->indent += JSON_INDENT_UNIT;
+
+ dm_pool_free(mem, output);
+ return 1;
+bad:
+ dm_pool_abandon_object(mem);
+ return 0;
+}
+
+static int _prepare_json_report_output(struct dm_report *rh)
+{
+ _json_output_start(rh->group_item->group);
+
+ if (rh->group_item->output_done && dm_list_empty(&rh->rows))
+ return 1;
+
+ /*
+ * If this report is in JSON group, it must be at the
+ * top of the stack of reports so the output from
+ * different reports do not interleave with each other.
+ */
+ if (_get_topmost_report_group_item(rh->group_item->group) != rh->group_item) {
+ log_error("dm_report: dm_report_output: interleaved reports detected for JSON output");
+ return 0;
+ }
+
+ if (rh->group_item->needs_closing) {
+ log_error("dm_report: dm_report_output: unfinished JSON output detected");
+ return 0;
+ }
+
+ if (!_json_output_array_start(rh->mem, rh->group_item))
+ return_0;
+
+ rh->group_item->needs_closing = 1;
+ return 1;
+}
+
+static int _print_basic_report_header(struct dm_report *rh)
+{
+ const char *report_name = (const char *) rh->group_item->data;
+ size_t len = strlen(report_name);
+ char *underline;
+
+ if (!(underline = dm_pool_zalloc(rh->mem, len + 1)))
+ return_0;
+
+ memset(underline, '=', len);
+
+ if (rh->group_item->parent->store.finished_count > 0)
+ log_print("%s", "");
+ log_print("%s", report_name);
+ log_print("%s", underline);
+
+ dm_pool_free(rh->mem, underline);
+ return 1;
+}
+
+int dm_report_output(struct dm_report *rh)
+{
+ int r = 0;
+
+ if (_is_json_report(rh) &&
+ !_prepare_json_report_output(rh))
+ return_0;
+
+ if (dm_list_empty(&rh->rows)) {
+ r = 1;
+ goto out;
+ }
+
+ if (rh->flags & RH_FIELD_CALC_NEEDED)
+ _recalculate_fields(rh);
+
+ if ((rh->flags & RH_SORT_REQUIRED))
+ _sort_rows(rh);
+
+ if (_is_basic_report(rh) && !_print_basic_report_header(rh))
+ goto_out;
+
+ if ((rh->flags & DM_REPORT_OUTPUT_COLUMNS_AS_ROWS))
+ r = _output_as_rows(rh);
+ else
+ r = _output_as_columns(rh);
+out:
+ if (r && rh->group_item)
+ rh->group_item->output_done = 1;
+ return r;
+}
+
+void dm_report_destroy_rows(struct dm_report *rh)
+{
+ _destroy_rows(rh);
+}
+
+struct dm_report_group *dm_report_group_create(dm_report_group_type_t type, void *data)
+{
+ struct dm_report_group *group;
+ struct dm_pool *mem;
+ struct report_group_item *item;
+
+ if (!(mem = dm_pool_create("report_group", 1024))) {
+ log_error("dm_report: dm_report_init_group: failed to allocate mem pool");
+ return NULL;
+ }
+
+ if (!(group = dm_pool_zalloc(mem, sizeof(*group)))) {
+ log_error("dm_report: failed to allocate report group structure");
+ goto bad;
+ }
+
+ group->mem = mem;
+ group->type = type;
+ dm_list_init(&group->items);
+
+ if (!(item = dm_pool_zalloc(mem, sizeof(*item)))) {
+ log_error("dm_report: faile to allocate root report group item");
+ goto bad;
+ }
+
+ dm_list_add_h(&group->items, &item->list);
+
+ return group;
+bad:
+ dm_pool_destroy(mem);
+ return NULL;
+}
+
+static int _report_group_push_single(struct report_group_item *item, void *data)
+{
+ struct report_group_item *item_iter;
+ unsigned count = 0;
+
+ dm_list_iterate_items(item_iter, &item->group->items) {
+ if (item_iter->report)
+ count++;
+ }
+
+ if (count > 1) {
+ log_error("dm_report: unable to add more than one report "
+ "to current report group");
+ return 0;
+ }
+
+ return 1;
+}
+
+static int _report_group_push_basic(struct report_group_item *item, const char *name)
+{
+ if (item->report) {
+ if (!(item->report->flags & DM_REPORT_OUTPUT_BUFFERED))
+ item->report->flags &= ~(DM_REPORT_OUTPUT_MULTIPLE_TIMES);
+ } else {
+ if (!name && item->parent->store.finished_count > 0)
+ log_print("%s", "");
+ }
+
+ return 1;
+}
+
+static int _report_group_push_json(struct report_group_item *item, const char *name)
+{
+ if (name && !(item->data = dm_pool_strdup(item->group->mem, name))) {
+ log_error("dm_report: failed to duplicate json item name");
+ return 0;
+ }
+
+ if (item->report) {
+ item->report->flags &= ~(DM_REPORT_OUTPUT_ALIGNED |
+ DM_REPORT_OUTPUT_HEADINGS |
+ DM_REPORT_OUTPUT_COLUMNS_AS_ROWS);
+ item->report->flags |= DM_REPORT_OUTPUT_BUFFERED;
+ } else {
+ _json_output_start(item->group);
+ if (name) {
+ if (!_json_output_array_start(item->group->mem, item))
+ return_0;
+ } else {
+ if (!item->parent->parent) {
+ log_error("dm_report: can't use unnamed object at top level of JSON output");
+ return 0;
+ }
+ if (item->parent->store.finished_count > 0)
+ log_print("%*s", item->group->indent + (int) sizeof(JSON_SEPARATOR) - 1, JSON_SEPARATOR);
+ log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_START) - 1, JSON_OBJECT_START);
+ item->group->indent += JSON_INDENT_UNIT;
+ }
+
+ item->output_done = 1;
+ item->needs_closing = 1;
+ }
+
+ return 1;
+}
+
+int dm_report_group_push(struct dm_report_group *group, struct dm_report *report, void *data)
+{
+ struct report_group_item *item, *tmp_item;
+
+ if (!group)
+ return 1;
+
+ if (!(item = dm_pool_zalloc(group->mem, sizeof(*item)))) {
+ log_error("dm_report: dm_report_group_push: group item allocation failed");
+ return 0;
+ }
+
+ if ((item->report = report)) {
+ item->store.orig_report_flags = report->flags;
+ report->group_item = item;
+ }
+
+ item->group = group;
+ item->data = data;
+
+ dm_list_iterate_items(tmp_item, &group->items) {
+ if (!tmp_item->report) {
+ item->parent = tmp_item;
+ break;
+ }
+ }
+
+ dm_list_add_h(&group->items, &item->list);
+
+ switch (group->type) {
+ case DM_REPORT_GROUP_SINGLE:
+ if (!_report_group_push_single(item, data))
+ goto_bad;
+ break;
+ case DM_REPORT_GROUP_BASIC:
+ if (!_report_group_push_basic(item, data))
+ goto_bad;
+ break;
+ case DM_REPORT_GROUP_JSON:
+ if (!_report_group_push_json(item, data))
+ goto_bad;
+ break;
+ default:
+ goto_bad;
+ }
+
+ return 1;
+bad:
+ dm_list_del(&item->list);
+ dm_pool_free(group->mem, item);
+ return 0;
+}
+
+static int _report_group_pop_single(struct report_group_item *item)
+{
+ return 1;
+}
+
+static int _report_group_pop_basic(struct report_group_item *item)
+{
+ return 1;
+}
+
+static int _report_group_pop_json(struct report_group_item *item)
+{
+ if (item->output_done && item->needs_closing) {
+ if (item->data) {
+ item->group->indent -= JSON_INDENT_UNIT;
+ log_print("%*s", item->group->indent + (int) sizeof(JSON_ARRAY_END) - 1, JSON_ARRAY_END);
+ }
+ if (item->parent->data && item->parent->parent) {
+ item->group->indent -= JSON_INDENT_UNIT;
+ log_print("%*s", item->group->indent + (int) sizeof(JSON_OBJECT_END) - 1, JSON_OBJECT_END);
+ }
+ item->needs_closing = 0;
+ }
+
+ return 1;
+}
+
+int dm_report_group_pop(struct dm_report_group *group)
+{
+ struct report_group_item *item;
+
+ if (!group)
+ return 1;
+
+ if (!(item = _get_topmost_report_group_item(group))) {
+ log_error("dm_report: dm_report_group_pop: group has no items");
+ return 0;
+ }
+
+ switch (group->type) {
+ case DM_REPORT_GROUP_SINGLE:
+ if (!_report_group_pop_single(item))
+ return_0;
+ break;
+ case DM_REPORT_GROUP_BASIC:
+ if (!_report_group_pop_basic(item))
+ return_0;
+ break;
+ case DM_REPORT_GROUP_JSON:
+ if (!_report_group_pop_json(item))
+ return_0;
+ break;
+ default:
+ return 0;
+ }
+
+ dm_list_del(&item->list);
+
+ if (item->report) {
+ item->report->flags = item->store.orig_report_flags;
+ item->report->group_item = NULL;
+ }
+
+ if (item->parent)
+ item->parent->store.finished_count++;
+
+ dm_pool_free(group->mem, item);
+ return 1;
+}
+
+int dm_report_group_output_and_pop_all(struct dm_report_group *group)
+{
+ struct report_group_item *item, *tmp_item;
+
+ dm_list_iterate_items_safe(item, tmp_item, &group->items) {
+ if (!item->parent) {
+ item->store.finished_count = 0;
+ continue;
+ }
+ if (item->report && !dm_report_output(item->report))
+ return_0;
+ if (!dm_report_group_pop(group))
+ return_0;
+ }
+
+ if (group->type == DM_REPORT_GROUP_JSON) {
+ _json_output_start(group);
+ log_print(JSON_OBJECT_END);
+ group->indent -= JSON_INDENT_UNIT;
+ }
+
+ return 1;
+}
+
+int dm_report_group_destroy(struct dm_report_group *group)
+{
+ int r = 1;
+
+ if (!group)
+ return 1;
+
+ if (!dm_report_group_output_and_pop_all(group))
+ r = 0;
+
+ dm_pool_destroy(group->mem);
+ return r;
+}
diff --git a/device_mapper/libdm-stats.c b/device_mapper/libdm-stats.c
new file mode 100644
index 000000000..6cd08a773
--- /dev/null
+++ b/device_mapper/libdm-stats.c
@@ -0,0 +1,5095 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ *
+ * _stats_get_extents_for_file() based in part on filefrag_fiemap() from
+ * e2fsprogs/misc/filefrag.c. Copyright 2003 by Theodore Ts'o.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "misc/kdev_t.h"
+
+#include "math.h" /* log10() */
+
+#include <sys/sysmacros.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h> /* fstatfs */
+#include <unistd.h>
+
+#ifdef __linux__
+ #include <linux/fs.h> /* FS_IOC_FIEMAP */
+#endif
+
+#ifdef HAVE_LINUX_FIEMAP_H
+ #include <linux/fiemap.h> /* fiemap */
+#endif
+
+#ifdef HAVE_LINUX_MAGIC_H
+ #include <linux/magic.h> /* BTRFS_SUPER_MAGIC */
+#endif
+
+#define DM_STATS_REGION_NOT_PRESENT UINT64_MAX
+#define DM_STATS_GROUP_NOT_PRESENT DM_STATS_GROUP_NONE
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+
+#define PRECISE_ARG "precise_timestamps"
+#define HISTOGRAM_ARG "histogram:"
+
+#define STATS_ROW_BUF_LEN 4096
+#define STATS_MSG_BUF_LEN 1024
+#define STATS_FIE_BUF_LEN 2048
+
+#define SECTOR_SHIFT 9L
+
+/* Histogram bin */
+struct dm_histogram_bin {
+ uint64_t upper; /* Upper bound on this bin. */
+ uint64_t count; /* Count value for this bin. */
+};
+
+struct dm_histogram {
+ /* The stats handle this histogram belongs to. */
+ const struct dm_stats *dms;
+ /* The region this histogram belongs to. */
+ const struct dm_stats_region *region;
+ uint64_t sum; /* Sum of histogram bin counts. */
+ int nr_bins; /* Number of histogram bins assigned. */
+ struct dm_histogram_bin bins[0];
+};
+
+/*
+ * See Documentation/device-mapper/statistics.txt for full descriptions
+ * of the device-mapper statistics counter fields.
+ */
+struct dm_stats_counters {
+ uint64_t reads; /* Num reads completed */
+ uint64_t reads_merged; /* Num reads merged */
+ uint64_t read_sectors; /* Num sectors read */
+ uint64_t read_nsecs; /* Num milliseconds spent reading */
+ uint64_t writes; /* Num writes completed */
+ uint64_t writes_merged; /* Num writes merged */
+ uint64_t write_sectors; /* Num sectors written */
+ uint64_t write_nsecs; /* Num milliseconds spent writing */
+ uint64_t io_in_progress; /* Num I/Os currently in progress */
+ uint64_t io_nsecs; /* Num milliseconds spent doing I/Os */
+ uint64_t weighted_io_nsecs; /* Weighted num milliseconds doing I/Os */
+ uint64_t total_read_nsecs; /* Total time spent reading in milliseconds */
+ uint64_t total_write_nsecs; /* Total time spent writing in milliseconds */
+ struct dm_histogram *histogram; /* Histogram. */
+};
+
+struct dm_stats_region {
+ uint64_t region_id; /* as returned by @stats_list */
+ uint64_t group_id;
+ uint64_t start;
+ uint64_t len;
+ uint64_t step;
+ char *program_id;
+ char *aux_data;
+ uint64_t timescale; /* precise_timestamps is per-region */
+ struct dm_histogram *bounds; /* histogram configuration */
+ struct dm_histogram *histogram; /* aggregate cache */
+ struct dm_stats_counters *counters;
+};
+
+struct dm_stats_group {
+ uint64_t group_id;
+ const char *alias;
+ dm_bitset_t regions;
+ struct dm_histogram *histogram;
+};
+
+struct dm_stats {
+ /* device binding */
+ int bind_major; /* device major that this dm_stats object is bound to */
+ int bind_minor; /* device minor that this dm_stats object is bound to */
+ char *bind_name; /* device-mapper device name */
+ char *bind_uuid; /* device-mapper UUID */
+ char *program_id; /* default program_id for this handle */
+ const char *name; /* cached device_name used for reporting */
+ struct dm_pool *mem; /* memory pool for region and counter tables */
+ struct dm_pool *hist_mem; /* separate pool for histogram tables */
+ struct dm_pool *group_mem; /* separate pool for group tables */
+ uint64_t nr_regions; /* total number of present regions */
+ uint64_t max_region; /* size of the regions table */
+ uint64_t interval_ns; /* sampling interval in nanoseconds */
+ uint64_t timescale; /* default sample value multiplier */
+ int precise; /* use precise_timestamps when creating regions */
+ struct dm_stats_region *regions;
+ struct dm_stats_group *groups;
+ /* statistics cursor */
+ uint64_t walk_flags; /* walk control flags */
+ uint64_t cur_flags;
+ uint64_t cur_group;
+ uint64_t cur_region;
+ uint64_t cur_area;
+};
+
+#define PROC_SELF_COMM "/proc/self/comm"
+static char *_program_id_from_proc(void)
+{
+ FILE *comm = NULL;
+ char buf[STATS_ROW_BUF_LEN];
+
+ if (!(comm = fopen(PROC_SELF_COMM, "r")))
+ return_NULL;
+
+ if (!fgets(buf, sizeof(buf), comm)) {
+ log_error("Could not read from %s", PROC_SELF_COMM);
+ if (fclose(comm))
+ stack;
+ return NULL;
+ }
+
+ if (fclose(comm))
+ stack;
+
+ return dm_strdup(buf);
+}
+
+static uint64_t _nr_areas(uint64_t len, uint64_t step)
+{
+ /* Default is one area. */
+ if (!len || !step)
+ return 1;
+ /*
+ * drivers/md/dm-stats.c::message_stats_create()
+ * A region may be sub-divided into areas with their own counters.
+ * Any partial area at the end of the region is treated as an
+ * additional complete area.
+ */
+ return (len + step - 1) / step;
+}
+
+static uint64_t _nr_areas_region(struct dm_stats_region *region)
+{
+ return _nr_areas(region->len, region->step);
+}
+
+struct dm_stats *dm_stats_create(const char *program_id)
+{
+ size_t hist_hint = sizeof(struct dm_histogram_bin);
+ size_t group_hint = sizeof(struct dm_stats_group);
+ struct dm_stats *dms = NULL;
+
+ if (!(dms = dm_zalloc(sizeof(*dms))))
+ return_NULL;
+
+ /* FIXME: better hint. */
+ if (!(dms->mem = dm_pool_create("stats_pool", 4096))) {
+ dm_free(dms);
+ return_NULL;
+ }
+
+ if (!(dms->hist_mem = dm_pool_create("histogram_pool", hist_hint)))
+ goto_bad;
+
+ if (!(dms->group_mem = dm_pool_create("group_pool", group_hint)))
+ goto_bad;
+
+ if (!program_id || !strlen(program_id))
+ dms->program_id = _program_id_from_proc();
+ else
+ dms->program_id = dm_strdup(program_id);
+
+ if (!dms->program_id) {
+ log_error("Could not allocate memory for program_id");
+ goto bad;
+ }
+
+ dms->bind_major = -1;
+ dms->bind_minor = -1;
+ dms->bind_name = NULL;
+ dms->bind_uuid = NULL;
+
+ dms->name = NULL;
+
+ /* by default all regions use msec precision */
+ dms->timescale = NSEC_PER_MSEC;
+ dms->precise = 0;
+
+ dms->nr_regions = DM_STATS_REGION_NOT_PRESENT;
+ dms->max_region = DM_STATS_REGION_NOT_PRESENT;
+ dms->regions = NULL;
+
+ /* maintain compatibility with earlier walk version */
+ dms->walk_flags = dms->cur_flags = DM_STATS_WALK_DEFAULT;
+
+ return dms;
+
+bad:
+ dm_pool_destroy(dms->mem);
+ if (dms->hist_mem)
+ dm_pool_destroy(dms->hist_mem);
+ if (dms->group_mem)
+ dm_pool_destroy(dms->group_mem);
+ dm_free(dms);
+ return NULL;
+}
+
+/*
+ * Test whether the stats region pointed to by region is present.
+ */
+static int _stats_region_present(const struct dm_stats_region *region)
+{
+ return !(region->region_id == DM_STATS_REGION_NOT_PRESENT);
+}
+
+/*
+ * Test whether the stats group pointed to by group is present.
+ */
+static int _stats_group_present(const struct dm_stats_group *group)
+{
+ return !(group->group_id == DM_STATS_GROUP_NOT_PRESENT);
+}
+
+/*
+ * Test whether a stats group id is present.
+ */
+static int _stats_group_id_present(const struct dm_stats *dms, uint64_t id)
+{
+ struct dm_stats_group *group = NULL;
+
+ if (id == DM_STATS_GROUP_NOT_PRESENT)
+ return 0;
+
+ if (!dms)
+ return_0;
+
+ if (!dms->regions)
+ return 0;
+
+ if (id > dms->max_region)
+ return 0;
+
+ group = &dms->groups[id];
+
+ return _stats_group_present(group);
+}
+
+/*
+ * Test whether the given region_id is a member of any group.
+ */
+static uint64_t _stats_region_is_grouped(const struct dm_stats* dms,
+ uint64_t region_id)
+{
+ uint64_t group_id;
+
+ if (region_id == DM_STATS_GROUP_NOT_PRESENT)
+ return 0;
+
+ if (!_stats_region_present(&dms->regions[region_id]))
+ return 0;
+
+ group_id = dms->regions[region_id].group_id;
+
+ return group_id != DM_STATS_GROUP_NOT_PRESENT;
+}
+
+static void _stats_histograms_destroy(struct dm_pool *mem,
+ struct dm_stats_region *region)
+{
+ /* Unpopulated handle. */
+ if (!region->counters)
+ return;
+
+ /*
+ * Free everything in the pool back to the first histogram.
+ */
+ if (region->counters[0].histogram)
+ dm_pool_free(mem, region->counters[0].histogram);
+}
+
+static void _stats_region_destroy(struct dm_stats_region *region)
+{
+ if (!_stats_region_present(region))
+ return;
+
+ region->start = region->len = region->step = 0;
+ region->timescale = 0;
+
+ /*
+ * Don't free counters and histogram bounds here: they are
+ * dropped from the pool along with the corresponding
+ * regions table.
+ *
+ * The following objects are all allocated with dm_malloc.
+ */
+
+ region->counters = NULL;
+ region->bounds = NULL;
+
+ dm_free(region->program_id);
+ region->program_id = NULL;
+ dm_free(region->aux_data);
+ region->aux_data = NULL;
+ region->region_id = DM_STATS_REGION_NOT_PRESENT;
+}
+
+static void _stats_regions_destroy(struct dm_stats *dms)
+{
+ struct dm_pool *mem = dms->mem;
+ uint64_t i;
+
+ if (!dms->regions)
+ return;
+
+ /* walk backwards to obey pool order */
+ for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--) {
+ _stats_histograms_destroy(dms->hist_mem, &dms->regions[i]);
+ _stats_region_destroy(&dms->regions[i]);
+ }
+
+ dm_pool_free(mem, dms->regions);
+ dms->regions = NULL;
+}
+
+static void _stats_group_destroy(struct dm_stats_group *group)
+{
+ if (!_stats_group_present(group))
+ return;
+
+ group->histogram = NULL;
+
+ if (group->alias) {
+ dm_free((char *) group->alias);
+ group->alias = NULL;
+ }
+ if (group->regions) {
+ dm_bitset_destroy(group->regions);
+ group->regions = NULL;
+ }
+ group->group_id = DM_STATS_GROUP_NOT_PRESENT;
+}
+
+static void _stats_groups_destroy(struct dm_stats *dms)
+{
+ uint64_t i;
+
+ if (!dms->groups)
+ return;
+
+ for (i = dms->max_region; (i != DM_STATS_REGION_NOT_PRESENT); i--)
+ _stats_group_destroy(&dms->groups[i]);
+ dm_pool_free(dms->group_mem, dms->groups);
+ dms->groups = NULL;
+}
+
+static int _set_stats_device(struct dm_stats *dms, struct dm_task *dmt)
+{
+ if (dms->bind_name)
+ return dm_task_set_name(dmt, dms->bind_name);
+ if (dms->bind_uuid)
+ return dm_task_set_uuid(dmt, dms->bind_uuid);
+ if (dms->bind_major > 0)
+ return dm_task_set_major(dmt, dms->bind_major)
+ && dm_task_set_minor(dmt, dms->bind_minor);
+ return_0;
+}
+
+static int _stats_bound(const struct dm_stats *dms)
+{
+ if (dms->bind_major > 0 || dms->bind_name || dms->bind_uuid)
+ return 1;
+ /* %p format specifier expects a void pointer. */
+ log_error("Stats handle at %p is not bound.", dms);
+ return 0;
+}
+
+static void _stats_clear_binding(struct dm_stats *dms)
+{
+ if (dms->bind_name)
+ dm_pool_free(dms->mem, dms->bind_name);
+ if (dms->bind_uuid)
+ dm_pool_free(dms->mem, dms->bind_uuid);
+ dm_free((char *) dms->name);
+
+ dms->bind_name = dms->bind_uuid = NULL;
+ dms->bind_major = dms->bind_minor = -1;
+ dms->name = NULL;
+}
+
+int dm_stats_bind_devno(struct dm_stats *dms, int major, int minor)
+{
+ _stats_clear_binding(dms);
+ _stats_regions_destroy(dms);
+ _stats_groups_destroy(dms);
+
+ dms->bind_major = major;
+ dms->bind_minor = minor;
+
+ return 1;
+}
+
+int dm_stats_bind_name(struct dm_stats *dms, const char *name)
+{
+ _stats_clear_binding(dms);
+ _stats_regions_destroy(dms);
+ _stats_groups_destroy(dms);
+
+ if (!(dms->bind_name = dm_pool_strdup(dms->mem, name)))
+ return_0;
+
+ return 1;
+}
+
+int dm_stats_bind_uuid(struct dm_stats *dms, const char *uuid)
+{
+ _stats_clear_binding(dms);
+ _stats_regions_destroy(dms);
+ _stats_groups_destroy(dms);
+
+ if (!(dms->bind_uuid = dm_pool_strdup(dms->mem, uuid)))
+ return_0;
+
+ return 1;
+}
+
+int dm_stats_bind_from_fd(struct dm_stats *dms, int fd)
+{
+ int major, minor;
+ struct stat buf;
+
+ if (fstat(fd, &buf)) {
+ log_error("fstat failed for fd %d.", fd);
+ return 0;
+ }
+
+ major = (int) MAJOR(buf.st_dev);
+ minor = (int) MINOR(buf.st_dev);
+
+ if (!dm_stats_bind_devno(dms, major, minor))
+ return_0;
+ return 1;
+}
+
+static int _stats_check_precise_timestamps(const struct dm_stats *dms)
+{
+ /* Already checked? */
+ if (dms && dms->precise)
+ return 1;
+
+ return dm_message_supports_precise_timestamps();
+}
+
+int dm_stats_driver_supports_precise(void)
+{
+ return _stats_check_precise_timestamps(NULL);
+}
+
+int dm_stats_driver_supports_histogram(void)
+{
+ return _stats_check_precise_timestamps(NULL);
+}
+
+static int _fill_hist_arg(char *hist_arg, size_t hist_len, uint64_t scale,
+ struct dm_histogram *bounds)
+{
+ int i, l, len = 0, nr_bins;
+ char *arg = hist_arg;
+ uint64_t value;
+
+ nr_bins = bounds->nr_bins;
+
+ for (i = 0; i < nr_bins; i++) {
+ value = bounds->bins[i].upper / scale;
+ if ((l = dm_snprintf(arg, hist_len - len, FMTu64"%s", value,
+ (i == (nr_bins - 1)) ? "" : ",")) < 0)
+ return_0;
+ len += l;
+ arg += l;
+ }
+ return 1;
+}
+
+static void *_get_hist_arg(struct dm_histogram *bounds, uint64_t scale,
+ size_t *len)
+{
+ struct dm_histogram_bin *entry, *bins;
+ size_t hist_len = 1; /* terminating '\0' */
+ double value;
+
+ entry = bins = bounds->bins;
+
+ entry += bounds->nr_bins - 1;
+ while(entry >= bins) {
+ value = (double) (entry--)->upper;
+ /* Use lround to avoid size_t -> double cast warning. */
+ hist_len += 1 + (size_t) lround(log10(value / scale));
+ if (entry != bins)
+ hist_len++; /* ',' */
+ }
+
+ *len = hist_len;
+
+ return dm_zalloc(hist_len);
+}
+
+static char *_build_histogram_arg(struct dm_histogram *bounds, int *precise)
+{
+ struct dm_histogram_bin *entry, *bins;
+ size_t hist_len;
+ char *hist_arg;
+ uint64_t scale;
+
+ entry = bins = bounds->bins;
+
+ /* Empty histogram is invalid. */
+ if (!bounds->nr_bins) {
+ log_error("Cannot format empty histogram description.");
+ return NULL;
+ }
+
+ /* Validate entries and set *precise if precision < 1ms. */
+ entry += bounds->nr_bins - 1;
+ while (entry >= bins) {
+ if (entry != bins) {
+ if (entry->upper < (entry - 1)->upper) {
+ log_error("Histogram boundaries must be in "
+ "order of increasing magnitude.");
+ return 0;
+ }
+ }
+
+ /*
+ * Only enable precise_timestamps automatically if any
+ * value in the histogram bounds uses precision < 1ms.
+ */
+ if (((entry--)->upper % NSEC_PER_MSEC) && !*precise)
+ *precise = 1;
+ }
+
+ scale = (*precise) ? 1 : NSEC_PER_MSEC;
+
+ /* Calculate hist_len and allocate a character buffer. */
+ if (!(hist_arg = _get_hist_arg(bounds, scale, &hist_len))) {
+ log_error("Could not allocate memory for histogram argument.");
+ return 0;
+ }
+
+ /* Fill hist_arg with boundary strings. */
+ if (!_fill_hist_arg(hist_arg, hist_len, scale, bounds))
+ goto_bad;
+
+ return hist_arg;
+
+bad:
+ log_error("Could not build histogram arguments.");
+ dm_free(hist_arg);
+
+ return NULL;
+}
+
+static struct dm_task *_stats_send_message(struct dm_stats *dms, char *msg)
+{
+ struct dm_task *dmt;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_TARGET_MSG)))
+ return_0;
+
+ if (!_set_stats_device(dms, dmt))
+ goto_bad;
+
+ if (!dm_task_set_message(dmt, msg))
+ goto_bad;
+
+ if (!dm_task_run(dmt))
+ goto_bad;
+
+ return dmt;
+
+bad:
+ dm_task_destroy(dmt);
+ return NULL;
+}
+
+/*
+ * Cache the dm device_name for the device bound to dms.
+ */
+static int _stats_set_name_cache(struct dm_stats *dms)
+{
+ struct dm_task *dmt;
+
+ if (dms->name)
+ return 1;
+
+ if (!(dmt = dm_task_create(DM_DEVICE_INFO)))
+ return_0;
+
+ if (!_set_stats_device(dms, dmt))
+ goto_bad;
+
+ if (!dm_task_run(dmt))
+ goto_bad;
+
+ if (!(dms->name = dm_strdup(dm_task_get_name(dmt))))
+ goto_bad;
+
+ dm_task_destroy(dmt);
+
+ return 1;
+
+bad:
+ log_error("Could not retrieve device-mapper name for device.");
+ dm_task_destroy(dmt);
+ return 0;
+}
+
+/*
+ * update region group_id values
+ */
+static void _stats_update_groups(struct dm_stats *dms)
+{
+ struct dm_stats_group *group;
+ uint64_t group_id, i;
+
+ for (group_id = 0; group_id < dms->max_region + 1; group_id++) {
+ if (!_stats_group_id_present(dms, group_id))
+ continue;
+
+ group = &dms->groups[group_id];
+
+ for (i = dm_bit_get_first(group->regions);
+ i != DM_STATS_GROUP_NOT_PRESENT;
+ i = dm_bit_get_next(group->regions, i))
+ dms->regions[i].group_id = group_id;
+ }
+}
+
+static void _check_group_regions_present(struct dm_stats *dms,
+ struct dm_stats_group *group)
+{
+ dm_bitset_t regions = group->regions;
+ int64_t i, group_id;
+
+ group_id = i = dm_bit_get_first(regions);
+
+ for (; i > 0; i = dm_bit_get_next(regions, i))
+ if (!_stats_region_present(&dms->regions[i])) {
+ log_warn("Group descriptor " FMTd64 " contains "
+ "non-existent region_id " FMTd64 ".",
+ group_id, i);
+ dm_bit_clear(regions, i);
+ }
+}
+
+/*
+ * Parse a DMS_GROUP group descriptor embedded in a region's aux_data.
+ *
+ * DMS_GROUP="ALIAS:MEMBERS"
+ *
+ * ALIAS: group alias
+ * MEMBERS: list of group member region ids.
+ *
+ */
+#define DMS_GROUP_TAG "DMS_GROUP="
+#define DMS_GROUP_TAG_LEN (sizeof(DMS_GROUP_TAG) - 1)
+#define DMS_GROUP_SEP ':'
+#define DMS_AUX_SEP "#"
+
+static int _parse_aux_data_group(struct dm_stats *dms,
+ struct dm_stats_region *region,
+ struct dm_stats_group *group)
+{
+ char *alias, *c, *end;
+ dm_bitset_t regions;
+
+ memset(group, 0, sizeof(*group));
+ group->group_id = DM_STATS_GROUP_NOT_PRESENT;
+
+ /* find start of group tag */
+ c = strstr(region->aux_data, DMS_GROUP_TAG);
+ if (!c)
+ return 1; /* no group is not an error */
+
+ alias = c + strlen(DMS_GROUP_TAG);
+
+ c = strchr(c, DMS_GROUP_SEP);
+
+ if (!c) {
+ log_error("Found malformed group tag while reading aux_data");
+ return 0;
+ }
+
+ /* terminate alias and advance to members */
+ *(c++) = '\0';
+
+ log_debug("Read alias '%s' from aux_data", alias);
+
+ if (!c) {
+ log_error("Found malformed group descriptor while "
+ "reading aux_data, expected '%c'", DMS_GROUP_SEP);
+ return 0;
+ }
+
+ /* if user aux_data follows make sure we have a terminated
+ * string to pass to dm_bitset_parse_list().
+ */
+ end = strstr(c, DMS_AUX_SEP);
+ if (!end)
+ end = c + strlen(c);
+ *(end++) = '\0';
+
+ if (!(regions = dm_bitset_parse_list(c, NULL, 0))) {
+ log_error("Could not parse member list while "
+ "reading group aux_data");
+ return 0;
+ }
+
+ group->group_id = dm_bit_get_first(regions);
+ if (group->group_id != region->region_id) {
+ log_error("Found invalid group descriptor in region " FMTu64
+ " aux_data.", region->region_id);
+ group->group_id = DM_STATS_GROUP_NOT_PRESENT;
+ goto bad;
+ }
+
+ group->regions = regions;
+ group->alias = NULL;
+ if (strlen(alias)) {
+ group->alias = dm_strdup(alias);
+ if (!group->alias) {
+ log_error("Could not allocate memory for group alias");
+ goto bad;
+ }
+ }
+
+ /* separate group tag from user aux_data */
+ if ((strlen(end) > 1) || strncmp(end, "-", 1))
+ c = dm_strdup(end);
+ else
+ c = dm_strdup("");
+
+ if (!c) {
+ log_error("Could not allocate memory for user aux_data");
+ goto bad_alias;
+ }
+
+ dm_free(region->aux_data);
+ region->aux_data = c;
+
+ log_debug("Found group_id " FMTu64 ": alias=\"%s\"", group->group_id,
+ (group->alias) ? group->alias : "");
+
+ return 1;
+
+bad_alias:
+ dm_free((char *) group->alias);
+bad:
+ dm_bitset_destroy(regions);
+ return 0;
+}
+
+/*
+ * Parse a histogram specification returned by the kernel in a
+ * @stats_list response.
+ */
+static int _stats_parse_histogram_spec(struct dm_stats *dms,
+ struct dm_stats_region *region,
+ const char *histogram)
+{
+ static const char _valid_chars[] = "0123456789,";
+ uint64_t scale = region->timescale, this_val = 0;
+ struct dm_pool *mem = dms->hist_mem;
+ struct dm_histogram_bin cur;
+ struct dm_histogram hist;
+ int nr_bins = 1;
+ const char *c, *v, *val_start;
+ char *p, *endptr = NULL;
+
+ /* Advance past "histogram:". */
+ histogram = strchr(histogram, ':');
+ if (!histogram) {
+ log_error("Could not parse histogram description.");
+ return 0;
+ }
+ histogram++;
+
+ /* @stats_list rows are newline terminated. */
+ if ((p = strchr(histogram, '\n')))
+ *p = '\0';
+
+ if (!dm_pool_begin_object(mem, sizeof(cur)))
+ return_0;
+
+ memset(&hist, 0, sizeof(hist));
+
+ hist.nr_bins = 0; /* fix later */
+ hist.region = region;
+ hist.dms = dms;
+
+ if (!dm_pool_grow_object(mem, &hist, sizeof(hist)))
+ goto_bad;
+
+ c = histogram;
+ do {
+ for (v = _valid_chars; *v; v++)
+ if (*c == *v)
+ break;
+ if (!*v) {
+ stack;
+ goto badchar;
+ }
+
+ if (*c == ',') {
+ log_error("Invalid histogram description: %s",
+ histogram);
+ goto bad;
+ } else {
+ val_start = c;
+ endptr = NULL;
+
+ errno = 0;
+ this_val = strtoull(val_start, &endptr, 10);
+ if (errno || !endptr) {
+ log_error("Could not parse histogram boundary.");
+ goto bad;
+ }
+
+ c = endptr; /* Advance to units, comma, or end. */
+
+ if (*c == ',')
+ c++;
+ else if (*c || (*c == ' ')) { /* Expected ',' or NULL. */
+ stack;
+ goto badchar;
+ }
+
+ if (*c == ',')
+ c++;
+
+ cur.upper = scale * this_val;
+ cur.count = 0;
+
+ if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_bad;
+
+ nr_bins++;
+ }
+ } while (*c && (*c != ' '));
+
+ /* final upper bound. */
+ cur.upper = UINT64_MAX;
+ if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_bad;
+
+ region->bounds = dm_pool_end_object(mem);
+
+ if (!region->bounds)
+ return_0;
+
+ region->bounds->nr_bins = nr_bins;
+
+ log_debug("Added region histogram spec with %d entries.", nr_bins);
+ return 1;
+
+badchar:
+ log_error("Invalid character in histogram: '%c' (0x%x)", *c, *c);
+bad:
+ dm_pool_abandon_object(mem);
+ return 0;
+}
+
+static int _stats_parse_list_region(struct dm_stats *dms,
+ struct dm_stats_region *region, char *line)
+{
+ char *p = NULL, string_data[STATS_ROW_BUF_LEN];
+ char *program_id, *aux_data, *stats_args;
+ char *empty_string = (char *) "";
+ int r;
+
+ memset(string_data, 0, sizeof(string_data));
+
+ /*
+ * Parse fixed fields, line format:
+ *
+ * <region_id>: <start_sector>+<length> <step> <string data>
+ *
+ * Maximum string data size is 4096 - 1 bytes.
+ */
+ r = sscanf(line, FMTu64 ": " FMTu64 "+" FMTu64 " " FMTu64 " %4095c",
+ &region->region_id, &region->start, &region->len,
+ &region->step, string_data);
+
+ if (r != 5)
+ return_0;
+
+ /* program_id is guaranteed to be first. */
+ program_id = string_data;
+
+ /*
+ * FIXME: support embedded '\ ' in string data:
+ * s/strchr/_find_unescaped_space()/
+ */
+ if ((p = strchr(string_data, ' '))) {
+ /* terminate program_id string. */
+ *p = '\0';
+ if (!strncmp(program_id, "-", 1))
+ program_id = empty_string;
+ aux_data = p + 1;
+ if ((p = strchr(aux_data, ' '))) {
+ /* terminate aux_data string. */
+ *p = '\0';
+ stats_args = p + 1;
+ } else
+ stats_args = empty_string;
+
+ /* no aux_data? */
+ if (!strncmp(aux_data, "-", 1))
+ aux_data = empty_string;
+ else
+ /* remove trailing newline */
+ aux_data[strlen(aux_data) - 1] = '\0';
+ } else
+ aux_data = stats_args = empty_string;
+
+ if (strstr(stats_args, PRECISE_ARG))
+ region->timescale = 1;
+ else
+ region->timescale = NSEC_PER_MSEC;
+
+ if ((p = strstr(stats_args, HISTOGRAM_ARG))) {
+ if (!_stats_parse_histogram_spec(dms, region, p))
+ return_0;
+ } else
+ region->bounds = NULL;
+
+ /* clear aggregate cache */
+ region->histogram = NULL;
+
+ region->group_id = DM_STATS_GROUP_NOT_PRESENT;
+
+ if (!(region->program_id = dm_strdup(program_id)))
+ return_0;
+ if (!(region->aux_data = dm_strdup(aux_data))) {
+ dm_free(region->program_id);
+ return_0;
+ }
+
+ region->counters = NULL;
+ return 1;
+}
+
+static int _stats_parse_list(struct dm_stats *dms, const char *resp)
+{
+ uint64_t max_region = 0, nr_regions = 0;
+ struct dm_stats_region cur, fill;
+ struct dm_stats_group cur_group;
+ struct dm_pool *mem = dms->mem, *group_mem = dms->group_mem;
+ char line[STATS_ROW_BUF_LEN];
+ FILE *list_rows;
+
+ if (!resp) {
+ log_error("Could not parse NULL @stats_list response.");
+ return 0;
+ }
+
+ _stats_regions_destroy(dms);
+ _stats_groups_destroy(dms);
+
+ /* no regions */
+ if (!strlen(resp)) {
+ dms->nr_regions = dms->max_region = 0;
+ dms->regions = NULL;
+ return 1;
+ }
+
+ /*
+ * dm_task_get_message_response() returns a 'const char *' but
+ * since fmemopen also permits "w" it expects a 'char *'.
+ */
+ if (!(list_rows = fmemopen((char *)resp, strlen(resp), "r")))
+ return_0;
+
+ /* begin region table */
+ if (!dm_pool_begin_object(mem, 1024))
+ goto_bad;
+
+ /* begin group table */
+ if (!dm_pool_begin_object(group_mem, 32))
+ goto_bad;
+
+ while(fgets(line, sizeof(line), list_rows)) {
+
+ cur_group.group_id = DM_STATS_GROUP_NOT_PRESENT;
+ cur_group.regions = NULL;
+ cur_group.alias = NULL;
+
+ if (!_stats_parse_list_region(dms, &cur, line))
+ goto_bad;
+
+ /* handle holes in the list of region_ids */
+ if (cur.region_id > max_region) {
+ memset(&fill, 0, sizeof(fill));
+ memset(&cur_group, 0, sizeof(cur_group));
+ fill.region_id = DM_STATS_REGION_NOT_PRESENT;
+ cur_group.group_id = DM_STATS_GROUP_NOT_PRESENT;
+ do {
+ if (!dm_pool_grow_object(mem, &fill, sizeof(fill)))
+ goto_bad;
+ if (!dm_pool_grow_object(group_mem, &cur_group,
+ sizeof(cur_group)))
+ goto_bad;
+ } while (max_region++ < (cur.region_id - 1));
+ }
+
+ if (cur.aux_data)
+ if (!_parse_aux_data_group(dms, &cur, &cur_group))
+ log_error("Failed to parse group descriptor "
+ "from region_id " FMTu64 " aux_data:"
+ "'%s'", cur.region_id, cur.aux_data);
+ /* continue */
+
+ if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_bad;
+
+ if (!dm_pool_grow_object(group_mem, &cur_group,
+ sizeof(cur_group)))
+ goto_bad;
+
+ max_region++;
+ nr_regions++;
+ }
+
+ if (!nr_regions)
+ /* no region data read from @stats_list */
+ goto bad;
+
+ dms->nr_regions = nr_regions;
+ dms->max_region = max_region - 1;
+ dms->regions = dm_pool_end_object(mem);
+ dms->groups = dm_pool_end_object(group_mem);
+
+ dm_stats_foreach_group(dms)
+ _check_group_regions_present(dms, &dms->groups[dms->cur_group]);
+
+ _stats_update_groups(dms);
+
+ if (fclose(list_rows))
+ stack;
+
+ return 1;
+
+bad:
+ if (fclose(list_rows))
+ stack;
+ dm_pool_abandon_object(mem);
+ dm_pool_abandon_object(group_mem);
+
+ return 0;
+}
+
+int dm_stats_list(struct dm_stats *dms, const char *program_id)
+{
+ char msg[STATS_MSG_BUF_LEN];
+ struct dm_task *dmt;
+ int r;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ /* allow zero-length program_id for list */
+ if (!program_id)
+ program_id = dms->program_id;
+
+ if (!_stats_set_name_cache(dms))
+ return_0;
+
+ if (dms->regions)
+ _stats_regions_destroy(dms);
+
+ r = dm_snprintf(msg, sizeof(msg), "@stats_list %s", program_id);
+
+ if (r < 0) {
+ log_error("Failed to prepare stats message.");
+ return 0;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ return_0;
+
+ if (!_stats_parse_list(dms, dm_task_get_message_response(dmt))) {
+ log_error("Could not parse @stats_list response.");
+ goto bad;
+ }
+
+ dm_task_destroy(dmt);
+ return 1;
+
+bad:
+ dm_task_destroy(dmt);
+ return 0;
+}
+
+/*
+ * Parse histogram data returned from a @stats_print operation.
+ */
+static int _stats_parse_histogram(struct dm_pool *mem, char *hist_str,
+ struct dm_histogram **histogram,
+ struct dm_stats_region *region)
+{
+ static const char _valid_chars[] = "0123456789:";
+ struct dm_histogram *bounds = region->bounds;
+ struct dm_histogram hist = {
+ .nr_bins = region->bounds->nr_bins
+ };
+ const char *c, *v, *val_start;
+ struct dm_histogram_bin cur;
+ uint64_t sum = 0, this_val;
+ char *endptr = NULL;
+ int bin = 0;
+
+ c = hist_str;
+
+ if (!dm_pool_begin_object(mem, sizeof(cur)))
+ return_0;
+
+ if (!dm_pool_grow_object(mem, &hist, sizeof(hist)))
+ goto_bad;
+
+ do {
+ memset(&cur, 0, sizeof(cur));
+ for (v = _valid_chars; *v; v++)
+ if (*c == *v)
+ break;
+ if (!*v)
+ goto badchar;
+
+ if (*c == ',')
+ goto badchar;
+ else {
+ val_start = c;
+ endptr = NULL;
+
+ errno = 0;
+ this_val = strtoull(val_start, &endptr, 10);
+ if (errno || !endptr) {
+ log_error("Could not parse histogram value.");
+ goto bad;
+ }
+ c = endptr; /* Advance to colon, or end. */
+
+ if (*c == ':')
+ c++;
+ else if (*c & (*c != '\n'))
+ /* Expected ':', '\n', or NULL. */
+ goto badchar;
+
+ if (*c == ':')
+ c++;
+
+ cur.upper = bounds->bins[bin].upper;
+ cur.count = this_val;
+ sum += this_val;
+
+ if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_bad;
+
+ bin++;
+ }
+ } while (*c && (*c != '\n'));
+
+ log_debug("Added region histogram data with %d entries.", hist.nr_bins);
+
+ *histogram = dm_pool_end_object(mem);
+ (*histogram)->sum = sum;
+
+ return 1;
+
+badchar:
+ log_error("Invalid character in histogram data: '%c' (0x%x)", *c, *c);
+bad:
+ dm_pool_abandon_object(mem);
+ return 0;
+}
+
+static int _stats_parse_region(struct dm_stats *dms, const char *resp,
+ struct dm_stats_region *region,
+ uint64_t timescale)
+{
+ struct dm_histogram *hist = NULL;
+ struct dm_pool *mem = dms->mem;
+ struct dm_stats_counters cur;
+ FILE *stats_rows = NULL;
+ uint64_t start = 0, len = 0;
+ char row[STATS_ROW_BUF_LEN];
+ int r;
+
+ if (!resp) {
+ log_error("Could not parse empty @stats_print response.");
+ return 0;
+ }
+
+ region->start = UINT64_MAX;
+
+ if (!dm_pool_begin_object(mem, 512))
+ goto_bad;
+
+ /*
+ * dm_task_get_message_response() returns a 'const char *' but
+ * since fmemopen also permits "w" it expects a 'char *'.
+ */
+ stats_rows = fmemopen((char *)resp, strlen(resp), "r");
+ if (!stats_rows)
+ goto_bad;
+
+ /*
+ * Output format for each step-sized area of a region:
+ *
+ * <start_sector>+<length> counters
+ *
+ * The first 11 counters have the same meaning as
+ * /sys/block/ * /stat or /proc/diskstats.
+ *
+ * Please refer to Documentation/iostats.txt for details.
+ *
+ * 1. the number of reads completed
+ * 2. the number of reads merged
+ * 3. the number of sectors read
+ * 4. the number of milliseconds spent reading
+ * 5. the number of writes completed
+ * 6. the number of writes merged
+ * 7. the number of sectors written
+ * 8. the number of milliseconds spent writing
+ * 9. the number of I/Os currently in progress
+ * 10. the number of milliseconds spent doing I/Os
+ * 11. the weighted number of milliseconds spent doing I/Os
+ *
+ * Additional counters:
+ * 12. the total time spent reading in milliseconds
+ * 13. the total time spent writing in milliseconds
+ *
+ */
+ while (fgets(row, sizeof(row), stats_rows)) {
+ r = sscanf(row, FMTu64 "+" FMTu64 /* start+len */
+ /* reads */
+ FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " "
+ /* writes */
+ FMTu64 " " FMTu64 " " FMTu64 " " FMTu64 " "
+ /* in flight & io nsecs */
+ FMTu64 " " FMTu64 " " FMTu64 " "
+ /* tot read/write nsecs */
+ FMTu64 " " FMTu64, &start, &len,
+ &cur.reads, &cur.reads_merged, &cur.read_sectors,
+ &cur.read_nsecs,
+ &cur.writes, &cur.writes_merged, &cur.write_sectors,
+ &cur.write_nsecs,
+ &cur.io_in_progress,
+ &cur.io_nsecs, &cur.weighted_io_nsecs,
+ &cur.total_read_nsecs, &cur.total_write_nsecs);
+ if (r != 15) {
+ log_error("Could not parse @stats_print row.");
+ goto bad;
+ }
+
+ /* scale time values up if needed */
+ if (timescale != 1) {
+ cur.read_nsecs *= timescale;
+ cur.write_nsecs *= timescale;
+ cur.io_nsecs *= timescale;
+ cur.weighted_io_nsecs *= timescale;
+ cur.total_read_nsecs *= timescale;
+ cur.total_write_nsecs *= timescale;
+ }
+
+ if (region->bounds) {
+ /* Find first histogram separator. */
+ char *hist_str = strchr(row, ':');
+ if (!hist_str) {
+ log_error("Could not parse histogram value.");
+ goto bad;
+ }
+ /* Find space preceding histogram. */
+ while (hist_str && *(hist_str - 1) != ' ')
+ hist_str--;
+
+ /* Use a separate pool for histogram objects since we
+ * are growing the area table and each area's histogram
+ * table simultaneously.
+ */
+ if (!_stats_parse_histogram(dms->hist_mem, hist_str,
+ &hist, region))
+ goto_bad;
+ hist->dms = dms;
+ hist->region = region;
+ }
+
+ cur.histogram = hist;
+
+ if (!dm_pool_grow_object(mem, &cur, sizeof(cur)))
+ goto_bad;
+
+ if (region->start == UINT64_MAX) {
+ region->start = start;
+ region->step = len; /* area size is always uniform. */
+ }
+ }
+
+ if (region->start == UINT64_MAX)
+ /* no area data read from @stats_print */
+ goto bad;
+
+ region->len = (start + len) - region->start;
+ region->timescale = timescale;
+ region->counters = dm_pool_end_object(mem);
+
+ if (fclose(stats_rows))
+ stack;
+
+ return 1;
+
+bad:
+ if (stats_rows)
+ if (fclose(stats_rows))
+ stack;
+ dm_pool_abandon_object(mem);
+
+ return 0;
+}
+
+static void _stats_walk_next_present(const struct dm_stats *dms,
+ uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a,
+ uint64_t *cur_g)
+{
+ struct dm_stats_region *cur = NULL;
+
+ /* start of walk: region loop advances *cur_r to 0. */
+ if (*cur_r != DM_STATS_REGION_NOT_PRESENT)
+ cur = &dms->regions[*cur_r];
+
+ /* within current region? */
+ if (cur && (*flags & DM_STATS_WALK_AREA)) {
+ if (++(*cur_a) < _nr_areas_region(cur))
+ return;
+ else
+ *cur_a = 0;
+ }
+
+ /* advance to next present, non-skipped region or end */
+ while (++(*cur_r) <= dms->max_region) {
+ cur = &dms->regions[*cur_r];
+ if (!_stats_region_present(cur))
+ continue;
+ if ((*flags & DM_STATS_WALK_SKIP_SINGLE_AREA))
+ if (!(*flags & DM_STATS_WALK_AREA))
+ if (_nr_areas_region(cur) < 2)
+ continue;
+ /* matching region found */
+ break;
+ }
+ return;
+}
+
+static void _stats_walk_next(const struct dm_stats *dms, uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g)
+{
+ if (!dms || !dms->regions)
+ return;
+
+ if (*flags & DM_STATS_WALK_AREA) {
+ /* advance to next area, region, or end */
+ _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+ return;
+ }
+
+ if (*flags & DM_STATS_WALK_REGION) {
+ /* enable region aggregation */
+ *cur_a = DM_STATS_WALK_REGION;
+ _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+ return;
+ }
+
+ if (*flags & DM_STATS_WALK_GROUP) {
+ /* enable group aggregation */
+ *cur_r = *cur_a = DM_STATS_WALK_GROUP;
+ while (!_stats_group_id_present(dms, ++(*cur_g))
+ && (*cur_g) < dms->max_region + 1)
+ ; /* advance to next present group or end */
+ return;
+ }
+
+ log_error("stats_walk_next called with empty walk flags");
+}
+
+static void _group_walk_start(const struct dm_stats *dms, uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g)
+{
+ if (!(*flags & DM_STATS_WALK_GROUP))
+ return;
+
+ *cur_a = *cur_r = DM_STATS_WALK_GROUP;
+ *cur_g = 0;
+
+ /* advance to next present group or end */
+ while ((*cur_g) <= dms->max_region) {
+ if (_stats_region_is_grouped(dms, *cur_g))
+ break;
+ (*cur_g)++;
+ }
+
+ if (*cur_g > dms->max_region)
+ /* no groups to walk */
+ *flags &= ~DM_STATS_WALK_GROUP;
+}
+
+static void _stats_walk_start(const struct dm_stats *dms, uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a,
+ uint64_t *cur_g)
+{
+ log_debug("starting stats walk with %s %s %s %s",
+ (*flags & DM_STATS_WALK_AREA) ? "AREA" : "",
+ (*flags & DM_STATS_WALK_REGION) ? "REGION" : "",
+ (*flags & DM_STATS_WALK_GROUP) ? "GROUP" : "",
+ (*flags & DM_STATS_WALK_SKIP_SINGLE_AREA) ? "SKIP" : "");
+
+ if (!dms->regions)
+ return;
+
+ if (!(*flags & (DM_STATS_WALK_AREA | DM_STATS_WALK_REGION)))
+ return _group_walk_start(dms, flags, cur_r, cur_a, cur_g);
+
+ /* initialise cursor state */
+ *cur_a = 0;
+ *cur_r = DM_STATS_REGION_NOT_PRESENT;
+ *cur_g = DM_STATS_GROUP_NOT_PRESENT;
+
+ if (!(*flags & DM_STATS_WALK_AREA))
+ *cur_a = DM_STATS_WALK_REGION;
+
+ /* advance to first present, non-skipped region */
+ _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+}
+
+#define DM_STATS_WALK_MASK (DM_STATS_WALK_AREA \
+ | DM_STATS_WALK_REGION \
+ | DM_STATS_WALK_GROUP \
+ | DM_STATS_WALK_SKIP_SINGLE_AREA)
+
+int dm_stats_walk_init(struct dm_stats *dms, uint64_t flags)
+{
+ if (!dms)
+ return_0;
+
+ if (flags & ~DM_STATS_WALK_MASK) {
+ log_error("Unknown value in walk flags: 0x" FMTx64,
+ (uint64_t) (flags & ~DM_STATS_WALK_MASK));
+ return 0;
+ }
+ dms->walk_flags = flags;
+ log_debug("dm_stats_walk_init: initialised flags to " FMTx64, flags);
+ return 1;
+}
+
+void dm_stats_walk_start(struct dm_stats *dms)
+{
+ if (!dms || !dms->regions)
+ return;
+
+ dms->cur_flags = dms->walk_flags;
+
+ _stats_walk_start(dms, &dms->cur_flags,
+ &dms->cur_region, &dms->cur_area,
+ &dms->cur_group);
+}
+
+void dm_stats_walk_next(struct dm_stats *dms)
+{
+ _stats_walk_next(dms, &dms->cur_flags,
+ &dms->cur_region, &dms->cur_area,
+ &dms->cur_group);
+}
+
+void dm_stats_walk_next_region(struct dm_stats *dms)
+{
+ dms->cur_flags &= ~DM_STATS_WALK_AREA;
+ _stats_walk_next(dms, &dms->cur_flags,
+ &dms->cur_region, &dms->cur_area,
+ &dms->cur_group);
+}
+
+/*
+ * Return 1 if any regions remain that are present and not skipped
+ * by the current walk flags or 0 otherwise.
+ */
+static uint64_t _stats_walk_any_unskipped(const struct dm_stats *dms,
+ uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a)
+{
+ struct dm_stats_region *region;
+ uint64_t i;
+
+ if (*cur_r > dms->max_region)
+ return 0;
+
+ for (i = *cur_r; i <= dms->max_region; i++) {
+ region = &dms->regions[i];
+ if (!_stats_region_present(region))
+ continue;
+ if ((*flags & DM_STATS_WALK_SKIP_SINGLE_AREA)
+ && !(*flags & DM_STATS_WALK_AREA))
+ if (_nr_areas_region(region) < 2)
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
+static void _stats_walk_end_areas(const struct dm_stats *dms, uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a,
+ uint64_t *cur_g)
+{
+ int end = !_stats_walk_any_unskipped(dms, flags, cur_r, cur_a);
+
+ if (!(*flags & DM_STATS_WALK_AREA))
+ return;
+
+ if (!end)
+ return;
+
+ *flags &= ~DM_STATS_WALK_AREA;
+ if (*flags & DM_STATS_WALK_REGION) {
+ /* start region walk */
+ *cur_a = DM_STATS_WALK_REGION;
+ *cur_r = DM_STATS_REGION_NOT_PRESENT;
+ _stats_walk_next_present(dms, flags, cur_r, cur_a, cur_g);
+ if (!_stats_walk_any_unskipped(dms, flags, cur_r, cur_a)) {
+ /* no more regions */
+ *flags &= ~DM_STATS_WALK_REGION;
+ if (!(*flags & DM_STATS_WALK_GROUP))
+ *cur_r = dms->max_region;
+ }
+ }
+
+ if (*flags & DM_STATS_WALK_REGION)
+ return;
+
+ if (*flags & DM_STATS_WALK_GROUP)
+ _group_walk_start(dms, flags, cur_r, cur_a, cur_g);
+}
+
+static int _stats_walk_end(const struct dm_stats *dms, uint64_t *flags,
+ uint64_t *cur_r, uint64_t *cur_a, uint64_t *cur_g)
+{
+ if (*flags & DM_STATS_WALK_AREA) {
+ _stats_walk_end_areas(dms, flags, cur_r, cur_a, cur_g);
+ goto out;
+ }
+
+ if (*flags & DM_STATS_WALK_REGION) {
+ if (!_stats_walk_any_unskipped(dms, flags, cur_r, cur_a)) {
+ *flags &= ~DM_STATS_WALK_REGION;
+ _group_walk_start(dms, flags, cur_r, cur_a, cur_g);
+ }
+ goto out;
+ }
+
+ if (*flags & DM_STATS_WALK_GROUP) {
+ if (*cur_g <= dms->max_region)
+ goto out;
+ *flags &= ~DM_STATS_WALK_GROUP;
+ }
+out:
+ return !(*flags & ~DM_STATS_WALK_SKIP_SINGLE_AREA);
+}
+
+int dm_stats_walk_end(struct dm_stats *dms)
+{
+ if (!dms)
+ return 1;
+
+ if (_stats_walk_end(dms, &dms->cur_flags,
+ &dms->cur_region, &dms->cur_area,
+ &dms->cur_group)) {
+ dms->cur_flags = dms->walk_flags;
+ return 1;
+ }
+ return 0;
+}
+
+dm_stats_obj_type_t dm_stats_object_type(const struct dm_stats *dms,
+ uint64_t region_id,
+ uint64_t area_id)
+{
+ uint64_t group_id;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_AREA_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ if (region_id == DM_STATS_REGION_NOT_PRESENT)
+ /* no region */
+ return DM_STATS_OBJECT_TYPE_NONE;
+
+ if (region_id & DM_STATS_WALK_GROUP) {
+ if (region_id == DM_STATS_WALK_GROUP)
+ /* indirect group_id from cursor */
+ group_id = dms->cur_group;
+ else
+ /* immediate group_id encoded in region_id */
+ group_id = region_id & ~DM_STATS_WALK_GROUP;
+ if (!_stats_group_id_present(dms, group_id))
+ return DM_STATS_OBJECT_TYPE_NONE;
+ return DM_STATS_OBJECT_TYPE_GROUP;
+ }
+
+ if (region_id > dms->max_region)
+ /* end of table */
+ return DM_STATS_OBJECT_TYPE_NONE;
+
+ if (area_id & DM_STATS_WALK_REGION)
+ /* aggregate region */
+ return DM_STATS_OBJECT_TYPE_REGION;
+
+ /* plain region_id and area_id */
+ return DM_STATS_OBJECT_TYPE_AREA;
+}
+
+dm_stats_obj_type_t dm_stats_current_object_type(const struct dm_stats *dms)
+{
+ /* dm_stats_object_type will decode region/area */
+ return dm_stats_object_type(dms,
+ DM_STATS_REGION_CURRENT,
+ DM_STATS_AREA_CURRENT);
+}
+
+uint64_t dm_stats_get_region_nr_areas(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ struct dm_stats_region *region = NULL;
+
+ /* groups or aggregate regions cannot be subdivided */
+ if (region_id & DM_STATS_WALK_GROUP)
+ return 1;
+
+ region = &dms->regions[region_id];
+ return _nr_areas_region(region);
+}
+
+uint64_t dm_stats_get_current_nr_areas(const struct dm_stats *dms)
+{
+ /* groups or aggregate regions cannot be subdivided */
+ if (dms->cur_region & DM_STATS_WALK_GROUP)
+ return 1;
+
+ return dm_stats_get_region_nr_areas(dms, dms->cur_region);
+}
+
+uint64_t dm_stats_get_nr_areas(const struct dm_stats *dms)
+{
+ uint64_t nr_areas = 0, flags = DM_STATS_WALK_AREA;
+ /* use a separate cursor */
+ uint64_t cur_region = 0, cur_area = 0, cur_group = 0;
+
+ /* no regions to visit? */
+ if (!dms->regions)
+ return 0;
+
+ flags = DM_STATS_WALK_AREA;
+ _stats_walk_start(dms, &flags, &cur_region, &cur_area, &cur_group);
+ do {
+ nr_areas += dm_stats_get_current_nr_areas(dms);
+ _stats_walk_next(dms, &flags,
+ &cur_region, &cur_area,
+ &cur_group);
+ } while (!_stats_walk_end(dms, &flags,
+ &cur_region, &cur_area,
+ &cur_group));
+ return nr_areas;
+}
+
+int dm_stats_group_present(const struct dm_stats *dms, uint64_t group_id)
+{
+ return _stats_group_id_present(dms, group_id);
+}
+
+int dm_stats_get_region_nr_histogram_bins(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+
+ /* FIXME: support group histograms if all region bounds match */
+ if (region_id & DM_STATS_WALK_GROUP)
+ return 0;
+
+ if (!dms->regions[region_id].bounds)
+ return 0;
+
+ return dms->regions[region_id].bounds->nr_bins;
+}
+
+/*
+ * Fill buf with a list of set regions in the regions bitmap. Consecutive
+ * ranges of set region IDs are output using "M-N" range notation.
+ *
+ * The number of bytes consumed is returned or zero on error.
+ */
+static size_t _stats_group_tag_fill(const struct dm_stats *dms,
+ dm_bitset_t regions,
+ char *buf, size_t buflen)
+{
+ int i, j, r, next, last = 0;
+ size_t used = 0;
+
+ last = dm_bit_get_last(regions);
+
+ i = dm_bit_get_first(regions);
+ for(; i >= 0; i = dm_bit_get_next(regions, i)) {
+ /* find range end */
+ j = i;
+ do
+ next = j + 1;
+ while ((j = dm_bit_get_next(regions, j)) == next);
+
+ /* set to last set bit */
+ j = next - 1;
+
+ /* handle range vs. single region */
+ if (i != j)
+ r = dm_snprintf(buf, buflen, FMTu64 "-" FMTu64 "%s",
+ (uint64_t) i, (uint64_t) j,
+ (j == last) ? "" : ",");
+ else
+ r = dm_snprintf(buf, buflen, FMTu64 "%s", (uint64_t) i,
+ (i == last) ? "" : ",");
+ if (r < 0)
+ goto_bad;
+
+ i = next; /* skip handled bits if in range */
+
+ buf += r;
+ used += r;
+ }
+
+ return used;
+bad:
+ log_error("Could not format group list.");
+ return 0;
+}
+
+/*
+ * Calculate the space required to hold a string description of the group
+ * described by the regions bitset using comma separated list in range
+ * notation ("A,B,C,M-N").
+ */
+static size_t _stats_group_tag_len(const struct dm_stats *dms,
+ dm_bitset_t regions)
+{
+ int64_t i, j, next, nr_regions = 0;
+ size_t buflen = 0, id_len = 0;
+
+ /* check region ids and find last set bit */
+ i = dm_bit_get_first(regions);
+ for (; i >= 0; i = dm_bit_get_next(regions, i)) {
+ /* length of region_id or range start in characters */
+ id_len = (i) ? 1 + (size_t) log10(i) : 1;
+ buflen += id_len;
+ j = i;
+ do
+ next = j + 1;
+ while ((j = dm_bit_get_next(regions, j)) == next);
+
+ /* set to last set bit */
+ j = next - 1;
+
+ nr_regions += j - i + 1;
+
+ /* handle range */
+ if (i != j) {
+ /* j is always > i, which is always >= 0 */
+ id_len = 1 + (size_t) log10(j);
+ buflen += id_len + 1; /* range end plus "-" */
+ }
+ buflen++;
+ i = next; /* skip bits if handling range */
+ }
+ return buflen;
+}
+
+/*
+ * Build a DMS_GROUP="..." tag for the group specified by group_id,
+ * to be stored in the corresponding region's aux_data field.
+ */
+static char *_build_group_tag(struct dm_stats *dms, uint64_t group_id)
+{
+ char *aux_string, *buf;
+ dm_bitset_t regions;
+ const char *alias;
+ size_t buflen = 0;
+ int r;
+
+ regions = dms->groups[group_id].regions;
+ alias = dms->groups[group_id].alias;
+
+ buflen = _stats_group_tag_len(dms, regions);
+
+ if (!buflen)
+ return_0;
+
+ buflen += DMS_GROUP_TAG_LEN;
+ buflen += 1 + (alias ? strlen(alias) : 0); /* 'alias:' */
+
+ buf = aux_string = dm_malloc(buflen);
+ if (!buf) {
+ log_error("Could not allocate memory for aux_data string.");
+ return NULL;
+ }
+
+ if (!dm_strncpy(buf, DMS_GROUP_TAG, DMS_GROUP_TAG_LEN + 1))
+ goto_bad;
+
+ buf += DMS_GROUP_TAG_LEN;
+ buflen -= DMS_GROUP_TAG_LEN;
+
+ r = dm_snprintf(buf, buflen, "%s%c", alias ? alias : "", DMS_GROUP_SEP);
+ if (r < 0)
+ goto_bad;
+
+ buf += r;
+ buflen -= r;
+
+ r = _stats_group_tag_fill(dms, regions, buf, buflen);
+ if (!r)
+ goto_bad;
+
+ return aux_string;
+bad:
+ log_error("Could not format group aux_data.");
+ dm_free(aux_string);
+ return NULL;
+}
+
+/*
+ * Store updated aux_data for a region. The aux_data is passed to the
+ * kernel using the @stats_set_aux message. Any required group tag is
+ * generated from the current group table and included in the message.
+ */
+static int _stats_set_aux(struct dm_stats *dms,
+ uint64_t region_id, const char *aux_data)
+{
+ const char *group_tag = NULL;
+ struct dm_task *dmt = NULL;
+ char msg[STATS_MSG_BUF_LEN];
+
+ /* group data required? */
+ if (_stats_group_id_present(dms, region_id)) {
+ group_tag = _build_group_tag(dms, region_id);
+ if (!group_tag) {
+ log_error("Could not build group descriptor for "
+ "region ID " FMTu64, region_id);
+ goto bad;
+ }
+ }
+
+ if (dm_snprintf(msg, sizeof(msg), "@stats_set_aux " FMTu64 " %s%s%s ",
+ region_id, (group_tag) ? group_tag : "",
+ (group_tag) ? DMS_AUX_SEP : "",
+ (strlen(aux_data)) ? aux_data : "-") < 0) {
+ log_error("Could not prepare @stats_set_aux message");
+ goto bad;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ goto_bad;
+
+ dm_free((char *) group_tag);
+
+ /* no response to a @stats_set_aux message */
+ dm_task_destroy(dmt);
+
+ return 1;
+bad:
+ dm_free((char *) group_tag);
+ return 0;
+}
+
+/*
+ * Maximum length of a "start+end" range string:
+ * Two 20 digit uint64_t, '+', and NULL.
+ */
+#define RANGE_LEN 42
+static int _stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ int precise, const char *hist_arg,
+ const char *program_id, const char *aux_data)
+{
+ char msg[STATS_MSG_BUF_LEN], range[RANGE_LEN], *endptr = NULL;
+ const char *err_fmt = "Could not prepare @stats_create %s.";
+ const char *precise_str = PRECISE_ARG;
+ const char *resp, *opt_args = NULL;
+ struct dm_task *dmt = NULL;
+ int r = 0, nr_opt = 0;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (!program_id || !strlen(program_id))
+ program_id = dms->program_id;
+
+ if (start || len) {
+ if (dm_snprintf(range, sizeof(range), FMTu64 "+" FMTu64,
+ start, len) < 0) {
+ log_error(err_fmt, "range");
+ return 0;
+ }
+ }
+
+ if (precise < 0)
+ precise = dms->precise;
+
+ if (precise)
+ nr_opt++;
+ else
+ precise_str = "";
+
+ if (hist_arg)
+ nr_opt++;
+ else
+ hist_arg = "";
+
+ if (nr_opt) {
+ if ((dm_asprintf((char **)&opt_args, "%d %s %s%s", nr_opt,
+ precise_str,
+ (strlen(hist_arg)) ? HISTOGRAM_ARG : "",
+ hist_arg)) < 0) {
+ log_error(err_fmt, PRECISE_ARG " option.");
+ return 0;
+ }
+ } else
+ opt_args = dm_strdup("");
+
+ if (dm_snprintf(msg, sizeof(msg), "@stats_create %s %s" FMTu64
+ " %s %s %s", (start || len) ? range : "-",
+ (step < 0) ? "/" : "",
+ (uint64_t)llabs(step),
+ opt_args, program_id, aux_data) < 0) {
+ log_error(err_fmt, "message");
+ dm_free((void *) opt_args);
+ return 0;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ goto_out;
+
+ resp = dm_task_get_message_response(dmt);
+ if (!resp) {
+ log_error("Could not parse empty @stats_create response.");
+ goto out;
+ }
+
+ if (region_id) {
+ errno = 0;
+ *region_id = strtoull(resp, &endptr, 10);
+ if (errno || resp == endptr)
+ goto_out;
+ }
+
+ r = 1;
+
+out:
+ if (dmt)
+ dm_task_destroy(dmt);
+ dm_free((void *) opt_args);
+
+ return r;
+}
+
+int dm_stats_create_region(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ int precise, struct dm_histogram *bounds,
+ const char *program_id, const char *user_data)
+{
+ char *hist_arg = NULL;
+ int r = 0;
+
+ /* Nanosecond counters and histograms both need precise_timestamps. */
+ if ((precise || bounds) && !_stats_check_precise_timestamps(dms))
+ return_0;
+
+ if (bounds) {
+ /* _build_histogram_arg enables precise if vals < 1ms. */
+ if (!(hist_arg = _build_histogram_arg(bounds, &precise)))
+ goto_out;
+ }
+
+ r = _stats_create_region(dms, region_id, start, len, step,
+ precise, hist_arg, program_id, user_data);
+ dm_free(hist_arg);
+
+out:
+ return r;
+}
+
+static void _stats_clear_group_regions(struct dm_stats *dms, uint64_t group_id)
+{
+ struct dm_stats_group *group;
+ uint64_t i;
+
+ group = &dms->groups[group_id];
+ for (i = dm_bit_get_first(group->regions);
+ i != DM_STATS_GROUP_NOT_PRESENT;
+ i = dm_bit_get_next(group->regions, i))
+ dms->regions[i].group_id = DM_STATS_GROUP_NOT_PRESENT;
+}
+
+static int _stats_remove_region_id_from_group(struct dm_stats *dms,
+ uint64_t region_id)
+{
+ struct dm_stats_region *region = &dms->regions[region_id];
+ uint64_t group_id = region->group_id;
+ dm_bitset_t regions = dms->groups[group_id].regions;
+
+ if (!_stats_region_is_grouped(dms, region_id))
+ return_0;
+
+ dm_bit_clear(regions, region_id);
+
+ /* removing group leader? */
+ if (region_id == group_id) {
+ _stats_clear_group_regions(dms, group_id);
+ _stats_group_destroy(&dms->groups[group_id]);
+ }
+
+ return _stats_set_aux(dms, group_id, dms->regions[group_id].aux_data);
+}
+
+static int _stats_delete_region(struct dm_stats *dms, uint64_t region_id)
+{
+ char msg[STATS_MSG_BUF_LEN];
+ struct dm_task *dmt;
+
+ if (_stats_region_is_grouped(dms, region_id))
+ if (!_stats_remove_region_id_from_group(dms, region_id)) {
+ log_error("Could not remove region ID " FMTu64 " from "
+ "group ID " FMTu64,
+ region_id, dms->regions[region_id].group_id);
+ return 0;
+ }
+
+ if (dm_snprintf(msg, sizeof(msg), "@stats_delete " FMTu64, region_id) < 0) {
+ log_error("Could not prepare @stats_delete message.");
+ return 0;
+ }
+
+ dmt = _stats_send_message(dms, msg);
+ if (!dmt)
+ return_0;
+ dm_task_destroy(dmt);
+
+ return 1;
+}
+
+int dm_stats_delete_region(struct dm_stats *dms, uint64_t region_id)
+{
+ int listed = 0;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ /*
+ * To correctly delete a region, that may be part of a group, a
+ * listed handle is required, since the region may need to be
+ * removed from another region's group descriptor; earlier
+ * versions of the region deletion interface do not have this
+ * requirement since there are no dependencies between regions.
+ *
+ * Listing a previously unlisted handle has numerous
+ * side-effects on other calls and operations (e.g. stats
+ * walks), especially when returning to a function that depends
+ * on the state of the region table, or statistics cursor.
+ *
+ * To avoid changing the semantics of the API, and the need for
+ * a versioned symbol, maintain a flag indicating when a listing
+ * has been carried out, and drop the region table before
+ * returning.
+ *
+ * This ensures compatibility with programs compiled against
+ * earlier versions of libdm.
+ */
+ if (!dms->regions && !(listed = dm_stats_list(dms, dms->program_id))) {
+ log_error("Could not obtain region list while deleting "
+ "region ID " FMTu64, region_id);
+ goto bad;
+ }
+
+ if (!dm_stats_get_nr_regions(dms)) {
+ log_error("Could not delete region ID " FMTu64 ": "
+ "no regions found", region_id);
+ goto bad;
+ }
+
+ /* includes invalid and special region_id values */
+ if (!dm_stats_region_present(dms, region_id)) {
+ log_error("Region ID " FMTu64 " does not exist", region_id);
+ goto bad;
+ }
+
+ if (!_stats_delete_region(dms, region_id))
+ goto bad;
+
+ if (!listed)
+ /* wipe region and mark as not present */
+ _stats_region_destroy(&dms->regions[region_id]);
+ else
+ /* return handle to prior state */
+ _stats_regions_destroy(dms);
+
+ return 1;
+bad:
+ if (listed)
+ _stats_regions_destroy(dms);
+
+ return 0;
+}
+
+int dm_stats_clear_region(struct dm_stats *dms, uint64_t region_id)
+{
+ char msg[STATS_MSG_BUF_LEN];
+ struct dm_task *dmt;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (dm_snprintf(msg, sizeof(msg), "@stats_clear " FMTu64, region_id) < 0) {
+ log_error("Could not prepare @stats_clear message.");
+ return 0;
+ }
+
+ dmt = _stats_send_message(dms, msg);
+
+ if (!dmt)
+ return_0;
+
+ dm_task_destroy(dmt);
+
+ return 1;
+}
+
+static struct dm_task *_stats_print_region(struct dm_stats *dms,
+ uint64_t region_id, unsigned start_line,
+ unsigned num_lines, unsigned clear)
+{
+ /* @stats_print[_clear] <region_id> [<start_line> <num_lines>] */
+ const char *err_fmt = "Could not prepare @stats_print %s.";
+ char msg[STATS_MSG_BUF_LEN], lines[RANGE_LEN];
+ struct dm_task *dmt = NULL;
+
+ if (start_line || num_lines)
+ if (dm_snprintf(lines, sizeof(lines),
+ "%u %u", start_line, num_lines) < 0) {
+ log_error(err_fmt, "row specification");
+ return NULL;
+ }
+
+ if (dm_snprintf(msg, sizeof(msg), "@stats_print%s " FMTu64 " %s",
+ (clear) ? "_clear" : "",
+ region_id, (start_line || num_lines) ? lines : "") < 0) {
+ log_error(err_fmt, "message");
+ return NULL;
+ }
+
+ if (!(dmt = _stats_send_message(dms, msg)))
+ return_NULL;
+
+ return dmt;
+}
+
+char *dm_stats_print_region(struct dm_stats *dms, uint64_t region_id,
+ unsigned start_line, unsigned num_lines,
+ unsigned clear)
+{
+ char *resp = NULL;
+ struct dm_task *dmt = NULL;
+ const char *response;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ /*
+ * FIXME: 'print' can be emulated for groups or aggregate regions
+ * by populating the handle and emitting aggregate counter data
+ * in the kernel print format.
+ */
+ if (region_id == DM_STATS_WALK_GROUP)
+ return_0;
+
+ dmt = _stats_print_region(dms, region_id,
+ start_line, num_lines, clear);
+
+ if (!dmt)
+ return_0;
+
+ if (!(response = dm_task_get_message_response(dmt)))
+ goto_out;
+
+ if (!(resp = dm_pool_strdup(dms->mem, response)))
+ log_error("Could not allocate memory for response buffer.");
+out:
+ dm_task_destroy(dmt);
+
+ return resp;
+}
+
+void dm_stats_buffer_destroy(struct dm_stats *dms, char *buffer)
+{
+ dm_pool_free(dms->mem, buffer);
+}
+
+uint64_t dm_stats_get_nr_regions(const struct dm_stats *dms)
+{
+ if (!dms)
+ return_0;
+
+ if (!dms->regions)
+ return 0;
+
+ return dms->nr_regions;
+}
+
+uint64_t dm_stats_get_nr_groups(const struct dm_stats *dms)
+{
+ uint64_t group_id, nr_groups = 0;
+
+ if (!dms)
+ return_0;
+
+ /* no regions or groups? */
+ if (!dms->regions || !dms->groups)
+ return 0;
+
+ for (group_id = 0; group_id <= dms->max_region; group_id++)
+ if (dms->groups[group_id].group_id
+ != DM_STATS_GROUP_NOT_PRESENT)
+ nr_groups++;
+
+ return nr_groups;
+}
+
+/**
+ * Test whether region_id is present in this set of stats data.
+ */
+int dm_stats_region_present(const struct dm_stats *dms, uint64_t region_id)
+{
+ if (!dms->regions)
+ return_0;
+
+ if (region_id > dms->max_region)
+ return 0;
+
+ return _stats_region_present(&dms->regions[region_id]);
+}
+
+static int _dm_stats_populate_region(struct dm_stats *dms, uint64_t region_id,
+ const char *resp)
+{
+ struct dm_stats_region *region = &dms->regions[region_id];
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if (!region) {
+ log_error("Cannot populate empty handle before dm_stats_list().");
+ return 0;
+ }
+ if (!_stats_parse_region(dms, resp, region, region->timescale)) {
+ log_error("Could not parse @stats_print message response.");
+ return 0;
+ }
+ region->region_id = region_id;
+ return 1;
+}
+
+int dm_stats_populate(struct dm_stats *dms, const char *program_id,
+ uint64_t region_id)
+{
+ int all_regions = (region_id == DM_STATS_REGIONS_ALL);
+ struct dm_task *dmt = NULL; /* @stats_print task */
+ uint64_t saved_flags; /* saved walk flags */
+ const char *resp;
+
+ /*
+ * We are about do destroy and re-create the region table, so it
+ * is safe to use the cursor embedded in the stats handle: just
+ * save a copy of the current walk_flags to restore later.
+ */
+ saved_flags = dms->walk_flags;
+
+ if (!_stats_bound(dms))
+ return_0;
+
+ if ((!all_regions) && (region_id & DM_STATS_WALK_GROUP)) {
+ log_error("Invalid region_id for dm_stats_populate: "
+ "DM_STATS_WALK_GROUP");
+ return 0;
+ }
+
+ if (!dms->nr_regions) {
+ log_error("No regions registered.");
+ return 0;
+ }
+
+ /* allow zero-length program_id for populate */
+ if (!program_id)
+ program_id = dms->program_id;
+
+ if (all_regions && !dm_stats_list(dms, program_id)) {
+ log_error("Could not parse @stats_list response.");
+ goto bad;
+ } else if (!_stats_set_name_cache(dms)) {
+ goto_bad;
+ }
+
+ dms->walk_flags = DM_STATS_WALK_REGION;
+ dm_stats_walk_start(dms);
+ do {
+ region_id = (all_regions)
+ ? dm_stats_get_current_region(dms) : region_id;
+
+ /* obtain all lines and clear counter values */
+ if (!(dmt = _stats_print_region(dms, region_id, 0, 0, 1)))
+ goto_bad;
+
+ resp = dm_task_get_message_response(dmt);
+ if (!_dm_stats_populate_region(dms, region_id, resp)) {
+ dm_task_destroy(dmt);
+ goto_bad;
+ }
+
+ dm_task_destroy(dmt);
+ dm_stats_walk_next(dms);
+
+ } while (all_regions && !dm_stats_walk_end(dms));
+
+ dms->walk_flags = saved_flags;
+ return 1;
+
+bad:
+ dms->walk_flags = saved_flags;
+ _stats_regions_destroy(dms);
+ dms->regions = NULL;
+ return 0;
+}
+
+/**
+ * destroy a dm_stats object and all associated regions and counter sets.
+ */
+void dm_stats_destroy(struct dm_stats *dms)
+{
+ if (!dms)
+ return;
+
+ _stats_regions_destroy(dms);
+ _stats_groups_destroy(dms);
+ _stats_clear_binding(dms);
+ dm_pool_destroy(dms->mem);
+ dm_pool_destroy(dms->hist_mem);
+ dm_pool_destroy(dms->group_mem);
+ dm_free(dms->program_id);
+ dm_free((char *) dms->name);
+ dm_free(dms);
+}
+
+/*
+ * Walk each area that is a member of region_id rid.
+ * i is a variable of type int that holds the current area_id.
+ */
+#define _foreach_region_area(dms, rid, i) \
+for ((i) = 0; (i) < _nr_areas_region(&dms->regions[(rid)]); (i)++) \
+
+/*
+ * Walk each region that is a member of group_id gid.
+ * i is a variable of type int that holds the current region_id.
+ */
+#define _foreach_group_region(dms, gid, i) \
+for ((i) = dm_bit_get_first((dms)->groups[(gid)].regions); \
+ (i) != DM_STATS_GROUP_NOT_PRESENT; \
+ (i) = dm_bit_get_next((dms)->groups[(gid)].regions, (i))) \
+
+/*
+ * Walk each region that is a member of group_id gid visiting each
+ * area within the region.
+ * i is a variable of type int that holds the current region_id.
+ * j is a variable of type int variable that holds the current area_id.
+ */
+#define _foreach_group_area(dms, gid, i, j) \
+_foreach_group_region(dms, gid, i) \
+ _foreach_region_area(dms, i, j)
+
+static uint64_t _stats_get_counter(const struct dm_stats *dms,
+ const struct dm_stats_counters *area,
+ dm_stats_counter_t counter)
+{
+ switch(counter) {
+ case DM_STATS_READS_COUNT:
+ return area->reads;
+ case DM_STATS_READS_MERGED_COUNT:
+ return area->reads_merged;
+ case DM_STATS_READ_SECTORS_COUNT:
+ return area->read_sectors;
+ case DM_STATS_READ_NSECS:
+ return area->read_nsecs;
+ case DM_STATS_WRITES_COUNT:
+ return area->writes;
+ case DM_STATS_WRITES_MERGED_COUNT:
+ return area->writes_merged;
+ case DM_STATS_WRITE_SECTORS_COUNT:
+ return area->write_sectors;
+ case DM_STATS_WRITE_NSECS:
+ return area->write_nsecs;
+ case DM_STATS_IO_IN_PROGRESS_COUNT:
+ return area->io_in_progress;
+ case DM_STATS_IO_NSECS:
+ return area->io_nsecs;
+ case DM_STATS_WEIGHTED_IO_NSECS:
+ return area->weighted_io_nsecs;
+ case DM_STATS_TOTAL_READ_NSECS:
+ return area->total_read_nsecs;
+ case DM_STATS_TOTAL_WRITE_NSECS:
+ return area->total_write_nsecs;
+ case DM_STATS_NR_COUNTERS:
+ default:
+ log_error("Attempt to read invalid counter: %d", counter);
+ }
+ return 0;
+}
+
+uint64_t dm_stats_get_counter(const struct dm_stats *dms,
+ dm_stats_counter_t counter,
+ uint64_t region_id, uint64_t area_id)
+{
+ uint64_t i, j, sum = 0; /* aggregation */
+ int sum_regions = 0;
+ struct dm_stats_region *region;
+ struct dm_stats_counters *area;
+
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ sum_regions = !!(region_id & DM_STATS_WALK_GROUP);
+
+ if (region_id == DM_STATS_WALK_GROUP)
+ /* group walk using the cursor */
+ region_id = dms->cur_group;
+ else if (region_id & DM_STATS_WALK_GROUP)
+ /* group walk using immediate group_id */
+ region_id &= ~DM_STATS_WALK_GROUP;
+ region = &dms->regions[region_id];
+
+ /*
+ * All statistics aggregation takes place here: aggregate metrics
+ * are calculated as normal using the aggregated counter values
+ * returned for the region or group specified.
+ */
+
+ if (_stats_region_is_grouped(dms, region_id) && (sum_regions)) {
+ /* group */
+ if (area_id & DM_STATS_WALK_GROUP)
+ _foreach_group_area(dms, region->group_id, i, j) {
+ area = &dms->regions[i].counters[j];
+ sum += _stats_get_counter(dms, area, counter);
+ }
+ else
+ _foreach_group_region(dms, region->group_id, i) {
+ area = &dms->regions[i].counters[area_id];
+ sum += _stats_get_counter(dms, area, counter);
+ }
+ } else if (area_id == DM_STATS_WALK_REGION) {
+ /* aggregate region */
+ _foreach_region_area(dms, region_id, j) {
+ area = &dms->regions[region_id].counters[j];
+ sum += _stats_get_counter(dms, area, counter);
+ }
+ } else {
+ /* plain region / area */
+ area = &region->counters[area_id];
+ sum = _stats_get_counter(dms, area, counter);
+ }
+
+ return sum;
+}
+
+/*
+ * Methods for accessing named counter fields. All methods share the
+ * following naming scheme and prototype:
+ *
+ * uint64_t dm_stats_get_COUNTER(const struct dm_stats *, uint64_t, uint64_t)
+ *
+ * Where the two integer arguments are the region_id and area_id
+ * respectively.
+ *
+ * name is the name of the counter (lower case)
+ * counter is the part of the enum name following DM_STATS_ (upper case)
+ */
+#define MK_STATS_GET_COUNTER_FN(name, counter) \
+uint64_t dm_stats_get_ ## name(const struct dm_stats *dms, \
+ uint64_t region_id, uint64_t area_id) \
+{ \
+ return dm_stats_get_counter(dms, DM_STATS_ ## counter, \
+ region_id, area_id); \
+}
+
+MK_STATS_GET_COUNTER_FN(reads, READS_COUNT)
+MK_STATS_GET_COUNTER_FN(reads_merged, READS_MERGED_COUNT)
+MK_STATS_GET_COUNTER_FN(read_sectors, READ_SECTORS_COUNT)
+MK_STATS_GET_COUNTER_FN(read_nsecs, READ_NSECS)
+MK_STATS_GET_COUNTER_FN(writes, WRITES_COUNT)
+MK_STATS_GET_COUNTER_FN(writes_merged, WRITES_MERGED_COUNT)
+MK_STATS_GET_COUNTER_FN(write_sectors, WRITE_SECTORS_COUNT)
+MK_STATS_GET_COUNTER_FN(write_nsecs, WRITE_NSECS)
+MK_STATS_GET_COUNTER_FN(io_in_progress, IO_IN_PROGRESS_COUNT)
+MK_STATS_GET_COUNTER_FN(io_nsecs, IO_NSECS)
+MK_STATS_GET_COUNTER_FN(weighted_io_nsecs, WEIGHTED_IO_NSECS)
+MK_STATS_GET_COUNTER_FN(total_read_nsecs, TOTAL_READ_NSECS)
+MK_STATS_GET_COUNTER_FN(total_write_nsecs, TOTAL_WRITE_NSECS)
+#undef MK_STATS_GET_COUNTER_FN
+
+/*
+ * Floating point stats metric functions
+ *
+ * Called from dm_stats_get_metric() to calculate the value of
+ * the requested metric.
+ *
+ * int _metric_name(const struct dm_stats *dms,
+ * struct dm_stats_counters *c,
+ * double *value);
+ *
+ * Calculate a metric value from the counter data for the given
+ * identifiers and store it in the memory pointed to by value,
+ * applying group or region aggregation if enabled.
+ *
+ * Return one on success or zero on failure.
+ *
+ * To add a new metric:
+ *
+ * o Add a new name to the dm_stats_metric_t enum.
+ * o Create a _metric_fn() to calculate the new metric.
+ * o Add _metric_fn to the _metrics function table
+ * (entries in enum order).
+ * o Do not add a new named public function for the metric -
+ * users of new metrics are encouraged to convert to the enum
+ * based metric interface.
+ *
+ */
+
+static int _rd_merges_per_sec(const struct dm_stats *dms, double *rrqm,
+ uint64_t region_id, uint64_t area_id)
+{
+ double mrgs;
+ mrgs = (double) dm_stats_get_counter(dms, DM_STATS_READS_MERGED_COUNT,
+ region_id, area_id);
+
+ *rrqm = mrgs / (double) dms->interval_ns;
+
+ return 1;
+}
+
+static int _wr_merges_per_sec(const struct dm_stats *dms, double *wrqm,
+ uint64_t region_id, uint64_t area_id)
+{
+ double mrgs;
+ mrgs = (double) dm_stats_get_counter(dms, DM_STATS_WRITES_MERGED_COUNT,
+ region_id, area_id);
+
+ *wrqm = mrgs / (double) dms->interval_ns;
+
+ return 1;
+}
+
+static int _reads_per_sec(const struct dm_stats *dms, double *rd_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ double reads;
+ reads = (double) dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+ region_id, area_id);
+
+ *rd_s = (reads * NSEC_PER_SEC) / (double) dms->interval_ns;
+
+ return 1;
+}
+
+static int _writes_per_sec(const struct dm_stats *dms, double *wr_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ double writes;
+ writes = (double) dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+ region_id, area_id);
+
+ *wr_s = (writes * NSEC_PER_SEC) / (double) dms->interval_ns;
+
+ return 1;
+}
+
+static int _read_sectors_per_sec(const struct dm_stats *dms, double *rsec_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ double sect;
+ sect = (double) dm_stats_get_counter(dms, DM_STATS_READ_SECTORS_COUNT,
+ region_id, area_id);
+
+ *rsec_s = (sect * (double) NSEC_PER_SEC) / (double) dms->interval_ns;
+
+ return 1;
+}
+
+static int _write_sectors_per_sec(const struct dm_stats *dms, double *wsec_s,
+ uint64_t region_id, uint64_t area_id)
+{
+ double sect;
+ sect = (double) dm_stats_get_counter(dms, DM_STATS_WRITE_SECTORS_COUNT,
+ region_id, area_id);
+
+ *wsec_s = (sect * (double) NSEC_PER_SEC) / (double) dms->interval_ns;
+
+ return 1;
+}
+
+static int _average_request_size(const struct dm_stats *dms, double *arqsz,
+ uint64_t region_id, uint64_t area_id)
+{
+ double ios, sectors;
+
+ ios = (double) (dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+ region_id, area_id)
+ + dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+ region_id, area_id));
+ sectors = (double) (dm_stats_get_counter(dms, DM_STATS_READ_SECTORS_COUNT,
+ region_id, area_id)
+ + dm_stats_get_counter(dms, DM_STATS_WRITE_SECTORS_COUNT,
+ region_id, area_id));
+
+ if (ios > 0.0)
+ *arqsz = sectors / ios;
+ else
+ *arqsz = 0.0;
+
+ return 1;
+}
+
+static int _average_queue_size(const struct dm_stats *dms, double *qusz,
+ uint64_t region_id, uint64_t area_id)
+{
+ double io_ticks;
+ io_ticks = (double) dm_stats_get_counter(dms, DM_STATS_WEIGHTED_IO_NSECS,
+ region_id, area_id);
+
+ if (io_ticks > 0.0)
+ *qusz = io_ticks / (double) dms->interval_ns;
+ else
+ *qusz = 0.0;
+
+ return 1;
+}
+
+static int _average_wait_time(const struct dm_stats *dms, double *await,
+ uint64_t region_id, uint64_t area_id)
+{
+ uint64_t io_ticks, nr_ios;
+
+ io_ticks = dm_stats_get_counter(dms, DM_STATS_READ_NSECS,
+ region_id, area_id);
+ io_ticks += dm_stats_get_counter(dms, DM_STATS_WRITE_NSECS,
+ region_id, area_id);
+
+ nr_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+ region_id, area_id);
+ nr_ios += dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+ region_id, area_id);
+
+ if (nr_ios > 0)
+ *await = (double) io_ticks / (double) nr_ios;
+ else
+ *await = 0.0;
+
+ return 1;
+}
+
+static int _average_rd_wait_time(const struct dm_stats *dms, double *await,
+ uint64_t region_id, uint64_t area_id)
+{
+ uint64_t rd_io_ticks, nr_rd_ios;
+
+ rd_io_ticks = dm_stats_get_counter(dms, DM_STATS_READ_NSECS,
+ region_id, area_id);
+ nr_rd_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+ region_id, area_id);
+
+ /*
+ * If rd_io_ticks is > 0 this should imply that nr_rd_ios is
+ * also > 0 (unless a kernel bug exists). Test for both here
+ * before using the IO count as a divisor (Coverity).
+ */
+ if (rd_io_ticks > 0 && nr_rd_ios > 0)
+ *await = (double) rd_io_ticks / (double) nr_rd_ios;
+ else
+ *await = 0.0;
+
+ return 1;
+}
+
+static int _average_wr_wait_time(const struct dm_stats *dms, double *await,
+ uint64_t region_id, uint64_t area_id)
+{
+ uint64_t wr_io_ticks, nr_wr_ios;
+
+ wr_io_ticks = dm_stats_get_counter(dms, DM_STATS_WRITE_NSECS,
+ region_id, area_id);
+ nr_wr_ios = dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+ region_id, area_id);
+
+ /*
+ * If wr_io_ticks is > 0 this should imply that nr_wr_ios is
+ * also > 0 (unless a kernel bug exists). Test for both here
+ * before using the IO count as a divisor (Coverity).
+ */
+ if (wr_io_ticks > 0 && nr_wr_ios > 0)
+ *await = (double) wr_io_ticks / (double) nr_wr_ios;
+ else
+ *await = 0.0;
+
+ return 1;
+}
+
+static int _throughput(const struct dm_stats *dms, double *tput,
+ uint64_t region_id, uint64_t area_id)
+{
+ uint64_t nr_ios;
+
+ nr_ios = dm_stats_get_counter(dms, DM_STATS_READS_COUNT,
+ region_id, area_id);
+ nr_ios += dm_stats_get_counter(dms, DM_STATS_WRITES_COUNT,
+ region_id, area_id);
+
+ *tput = ((double) NSEC_PER_SEC * (double) nr_ios)
+ / (double) (dms->interval_ns);
+
+ return 1;
+}
+
+static int _utilization(const struct dm_stats *dms, double *util,
+ uint64_t region_id, uint64_t area_id)
+{
+ uint64_t io_nsecs, interval_ns = dms->interval_ns;
+
+ /**
+ * If io_nsec > interval_ns there is something wrong with the clock
+ * for the last interval; do not allow a value > 100% utilization
+ * to be passed to a dm_make_percent() call. We expect to see these
+ * at startup if counters have not been cleared before the first read.
+ *
+ * A zero interval_ns is also an error since metrics cannot be
+ * calculated without a defined interval - return zero and emit a
+ * backtrace in this case.
+ */
+ io_nsecs = dm_stats_get_counter(dms, DM_STATS_IO_NSECS,
+ region_id, area_id);
+
+ if (!interval_ns) {
+ *util = 0.0;
+ return_0;
+ }
+
+ io_nsecs = ((io_nsecs < interval_ns) ? io_nsecs : interval_ns);
+
+ *util = (double) io_nsecs / (double) interval_ns;
+
+ return 1;
+}
+
+static int _service_time(const struct dm_stats *dms, double *svctm,
+ uint64_t region_id, uint64_t area_id)
+{
+ double tput, util;
+
+ if (!_throughput(dms, &tput, region_id, area_id))
+ return 0;
+
+ if (!_utilization(dms, &util, region_id, area_id))
+ return 0;
+
+ util *= 100;
+
+ /* avoid NAN with zero counter values */
+ if ( (uint64_t) tput == 0 || (uint64_t) util == 0) {
+ *svctm = 0.0;
+ return 1;
+ }
+
+ *svctm = ((double) NSEC_PER_SEC * dm_percent_to_float(util))
+ / (100.0 * tput);
+
+ return 1;
+}
+
+/*
+ * Table in enum order:
+ * DM_STATS_RD_MERGES_PER_SEC,
+ * DM_STATS_WR_MERGES_PER_SEC,
+ * DM_STATS_READS_PER_SEC,
+ * DM_STATS_WRITES_PER_SEC,
+ * DM_STATS_READ_SECTORS_PER_SEC,
+ * DM_STATS_WRITE_SECTORS_PER_SEC,
+ * DM_STATS_AVERAGE_REQUEST_SIZE,
+ * DM_STATS_AVERAGE_QUEUE_SIZE,
+ * DM_STATS_AVERAGE_WAIT_TIME,
+ * DM_STATS_AVERAGE_RD_WAIT_TIME,
+ * DM_STATS_AVERAGE_WR_WAIT_TIME
+ * DM_STATS_SERVICE_TIME,
+ * DM_STATS_THROUGHPUT,
+ * DM_STATS_UTILIZATION
+ *
+*/
+
+typedef int (*_metric_fn_t)(const struct dm_stats *, double *,
+ uint64_t, uint64_t);
+
+_metric_fn_t _metrics[DM_STATS_NR_METRICS] = {
+ _rd_merges_per_sec,
+ _wr_merges_per_sec,
+ _reads_per_sec,
+ _writes_per_sec,
+ _read_sectors_per_sec,
+ _write_sectors_per_sec,
+ _average_request_size,
+ _average_queue_size,
+ _average_wait_time,
+ _average_rd_wait_time,
+ _average_wr_wait_time,
+ _service_time,
+ _throughput,
+ _utilization
+};
+
+int dm_stats_get_metric(const struct dm_stats *dms, int metric,
+ uint64_t region_id, uint64_t area_id, double *value)
+{
+ if (!dms->interval_ns)
+ return_0;
+
+ /*
+ * Decode DM_STATS_{REGION,AREA}_CURRENT here; counters will then
+ * be returned for the actual current region and area.
+ *
+ * DM_STATS_WALK_GROUP is passed through to the counter methods -
+ * aggregates for the group are returned and used to calculate
+ * the metric for the group totals.
+ */
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id ;
+ area_id = (area_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ if (metric < 0 || metric >= DM_STATS_NR_METRICS) {
+ log_error("Attempt to read invalid metric: %d", metric);
+ return 0;
+ }
+
+ return _metrics[metric](dms, value, region_id, area_id);
+}
+
+/**
+ * Methods for accessing stats metrics. All methods share the
+ * following naming scheme and prototype:
+ *
+ * uint64_t dm_stats_get_metric(struct dm_stats *,
+ * int, int,
+ * uint64_t, uint64_t,
+ * double *v)
+ *
+ * Where the two integer arguments are the region_id and area_id
+ * respectively.
+ *
+ * name is the name of the metric (lower case)
+ * metric is the part of the enum name following DM_STATS_ (upper case)
+ */
+#define MK_STATS_GET_METRIC_FN(name, metric, meta) \
+int dm_stats_get_ ## name(const struct dm_stats *dms, double *meta, \
+ uint64_t region_id, uint64_t area_id) \
+{ \
+ return dm_stats_get_metric(dms, DM_STATS_ ## metric, \
+ region_id, area_id, meta); \
+}
+
+MK_STATS_GET_METRIC_FN(rd_merges_per_sec, RD_MERGES_PER_SEC, rrqm)
+MK_STATS_GET_METRIC_FN(wr_merges_per_sec, WR_MERGES_PER_SEC, wrqm)
+MK_STATS_GET_METRIC_FN(reads_per_sec, READS_PER_SEC, rd_s)
+MK_STATS_GET_METRIC_FN(writes_per_sec, WRITES_PER_SEC, wr_s)
+MK_STATS_GET_METRIC_FN(read_sectors_per_sec, READ_SECTORS_PER_SEC, rsec_s)
+MK_STATS_GET_METRIC_FN(write_sectors_per_sec, WRITE_SECTORS_PER_SEC, wsec_s)
+MK_STATS_GET_METRIC_FN(average_request_size, AVERAGE_REQUEST_SIZE, arqsz)
+MK_STATS_GET_METRIC_FN(average_queue_size, AVERAGE_QUEUE_SIZE, qusz)
+MK_STATS_GET_METRIC_FN(average_wait_time, AVERAGE_WAIT_TIME, await)
+MK_STATS_GET_METRIC_FN(average_rd_wait_time, AVERAGE_RD_WAIT_TIME, await)
+MK_STATS_GET_METRIC_FN(average_wr_wait_time, AVERAGE_WR_WAIT_TIME, await)
+MK_STATS_GET_METRIC_FN(service_time, SERVICE_TIME, svctm)
+MK_STATS_GET_METRIC_FN(throughput, THROUGHPUT, tput)
+
+/*
+ * Utilization is an exception since it used the dm_percent_t type in the
+ * original named function based interface: preserve this behaviour for
+ * backwards compatibility with existing users.
+ *
+ * The same metric may be accessed as a double via the enum based metric
+ * interface.
+ */
+int dm_stats_get_utilization(const struct dm_stats *dms, dm_percent_t *util,
+ uint64_t region_id, uint64_t area_id)
+{
+ double _util;
+
+ if (!dm_stats_get_metric(dms, DM_STATS_UTILIZATION,
+ region_id, area_id, &_util))
+ return_0;
+ /* scale up utilization value in the range [0.00..1.00] */
+ *util = dm_make_percent(DM_PERCENT_1 * _util, DM_PERCENT_1);
+ return 1;
+}
+
+void dm_stats_set_sampling_interval_ms(struct dm_stats *dms, uint64_t interval_ms)
+{
+ /* All times use nsecs internally. */
+ dms->interval_ns = interval_ms * NSEC_PER_MSEC;
+}
+
+void dm_stats_set_sampling_interval_ns(struct dm_stats *dms, uint64_t interval_ns)
+{
+ dms->interval_ns = interval_ns;
+}
+
+uint64_t dm_stats_get_sampling_interval_ms(const struct dm_stats *dms)
+{
+ /* All times use nsecs internally. */
+ return (dms->interval_ns / NSEC_PER_MSEC);
+}
+
+uint64_t dm_stats_get_sampling_interval_ns(const struct dm_stats *dms)
+{
+ /* All times use nsecs internally. */
+ return (dms->interval_ns);
+}
+
+int dm_stats_set_program_id(struct dm_stats *dms, int allow_empty,
+ const char *program_id)
+{
+ if (!allow_empty && (!program_id || !strlen(program_id))) {
+ log_error("Empty program_id not permitted without "
+ "allow_empty=1");
+ return 0;
+ }
+
+ if (!program_id)
+ program_id = "";
+
+ dm_free(dms->program_id);
+
+ if (!(dms->program_id = dm_strdup(program_id)))
+ return_0;
+
+ return 1;
+}
+
+uint64_t dm_stats_get_current_region(const struct dm_stats *dms)
+{
+ return dms->cur_region;
+}
+
+uint64_t dm_stats_get_current_area(const struct dm_stats *dms)
+{
+ return dms->cur_area & ~DM_STATS_WALK_ALL;
+}
+
+int dm_stats_get_region_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+
+ /* start is unchanged when aggregating areas */
+ if (region_id & DM_STATS_WALK_REGION)
+ region_id &= ~DM_STATS_WALK_REGION;
+
+ /* use start of first region as group start */
+ if (region_id & DM_STATS_WALK_GROUP) {
+ if (region_id == DM_STATS_WALK_GROUP)
+ region_id = dms->cur_group;
+ else
+ region_id &= ~DM_STATS_WALK_GROUP;
+ }
+
+ *start = dms->regions[region_id].start;
+ return 1;
+}
+
+int dm_stats_get_region_len(const struct dm_stats *dms, uint64_t *len,
+ uint64_t region_id)
+{
+ uint64_t i;
+ if (!dms || !dms->regions)
+ return_0;
+
+ *len = 0;
+
+ /* length is unchanged when aggregating areas */
+ if (region_id & DM_STATS_WALK_REGION)
+ region_id &= ~DM_STATS_WALK_REGION;
+
+ if (region_id & DM_STATS_WALK_GROUP) {
+ /* decode region / group ID */
+ if (region_id == DM_STATS_WALK_GROUP)
+ region_id = dms->cur_group;
+ else
+ region_id &= ~DM_STATS_WALK_GROUP;
+
+ /* use sum of region sizes as group size */
+ if (_stats_region_is_grouped(dms, region_id))
+ _foreach_group_region(dms, dms->cur_group, i)
+ *len += dms->regions[i].len;
+ else {
+ log_error("Group ID " FMTu64 " does not exist",
+ region_id);
+ return 0;
+ }
+ } else
+ *len = dms->regions[region_id].len;
+
+ return 1;
+}
+
+int dm_stats_get_region_area_len(const struct dm_stats *dms, uint64_t *len,
+ uint64_t region_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+
+ /* groups are not subdivided - area size equals group size */
+ if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION))
+ /* get_region_len will decode region_id */
+ return dm_stats_get_region_len(dms, len, region_id);
+
+ *len = dms->regions[region_id].step;
+ return 1;
+}
+
+int dm_stats_get_current_region_start(const struct dm_stats *dms,
+ uint64_t *start)
+{
+ return dm_stats_get_region_start(dms, start, dms->cur_region);
+}
+
+int dm_stats_get_current_region_len(const struct dm_stats *dms,
+ uint64_t *len)
+{
+ return dm_stats_get_region_len(dms, len, dms->cur_region);
+}
+
+int dm_stats_get_current_region_area_len(const struct dm_stats *dms,
+ uint64_t *step)
+{
+ return dm_stats_get_region_area_len(dms, step, dms->cur_region);
+}
+
+int dm_stats_get_area_start(const struct dm_stats *dms, uint64_t *start,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_region *region;
+ if (!dms || !dms->regions)
+ return_0;
+
+ /* group or region area start equals region start */
+ if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION))
+ return dm_stats_get_region_start(dms, start, region_id);
+
+ region = &dms->regions[region_id];
+ *start = region->start + region->step * area_id;
+ return 1;
+}
+
+int dm_stats_get_area_offset(const struct dm_stats *dms, uint64_t *offset,
+ uint64_t region_id, uint64_t area_id)
+{
+ if (!dms || !dms->regions)
+ return_0;
+
+ /* no areas for groups or aggregate regions */
+ if (region_id & (DM_STATS_WALK_GROUP | DM_STATS_WALK_REGION))
+ *offset = 0;
+ else
+ *offset = dms->regions[region_id].step * area_id;
+
+ return 1;
+}
+
+int dm_stats_get_current_area_start(const struct dm_stats *dms,
+ uint64_t *start)
+{
+ return dm_stats_get_area_start(dms, start,
+ dms->cur_region, dms->cur_area);
+}
+
+int dm_stats_get_current_area_offset(const struct dm_stats *dms,
+ uint64_t *offset)
+{
+ return dm_stats_get_area_offset(dms, offset,
+ dms->cur_region, dms->cur_area);
+}
+
+int dm_stats_get_current_area_len(const struct dm_stats *dms,
+ uint64_t *len)
+{
+ return dm_stats_get_region_area_len(dms, len, dms->cur_region);
+}
+
+const char *dm_stats_get_region_program_id(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ const char *program_id = NULL;
+
+ if (region_id & DM_STATS_WALK_GROUP)
+ return dms->program_id;
+
+ if (region_id & DM_STATS_WALK_REGION)
+ region_id &= ~DM_STATS_WALK_REGION;
+
+ program_id = dms->regions[region_id].program_id;
+ return (program_id) ? program_id : "";
+}
+
+const char *dm_stats_get_region_aux_data(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ const char *aux_data = NULL;
+
+ if (region_id & DM_STATS_WALK_GROUP)
+ return "";
+
+ if (region_id & DM_STATS_WALK_REGION)
+ region_id &= ~DM_STATS_WALK_REGION;
+
+ aux_data = dms->regions[region_id].aux_data;
+ return (aux_data) ? aux_data : "" ;
+}
+
+int dm_stats_set_alias(struct dm_stats *dms, uint64_t group_id, const char *alias)
+{
+ struct dm_stats_group *group = NULL;
+ const char *old_alias = NULL;
+
+ if (!dms->regions || !dms->groups || !alias)
+ return_0;
+
+ if (!_stats_region_is_grouped(dms, group_id)) {
+ log_error("Cannot set alias for ungrouped region ID "
+ FMTu64, group_id);
+ return 0;
+ }
+
+ if (group_id & DM_STATS_WALK_GROUP) {
+ if (group_id == DM_STATS_WALK_GROUP)
+ group_id = dms->cur_group;
+ else
+ group_id &= ~DM_STATS_WALK_GROUP;
+ }
+
+ if (group_id != dms->regions[group_id].group_id) {
+ /* dm_stats_set_alias() must be called on the group ID. */
+ log_error("Cannot set alias for group member " FMTu64 ".",
+ group_id);
+ return 0;
+ }
+
+ group = &dms->groups[group_id];
+ old_alias = group->alias;
+
+ group->alias = dm_strdup(alias);
+ if (!group->alias) {
+ log_error("Could not allocate memory for alias.");
+ goto bad;
+ }
+
+ if (!_stats_set_aux(dms, group_id, dms->regions[group_id].aux_data)) {
+ log_error("Could not set new aux_data");
+ goto bad;
+ }
+
+ dm_free((char *) old_alias);
+
+ return 1;
+
+bad:
+ dm_free((char *) group->alias);
+ group->alias = old_alias;
+ return 0;
+}
+
+const char *dm_stats_get_alias(const struct dm_stats *dms, uint64_t id)
+{
+ const struct dm_stats_region *region;
+
+ id = (id == DM_STATS_REGION_CURRENT) ? dms->cur_region : id;
+
+ if (id & DM_STATS_WALK_GROUP) {
+ if (id == DM_STATS_WALK_GROUP)
+ id = dms->cur_group;
+ else
+ id &= ~DM_STATS_WALK_GROUP;
+ }
+
+ region = &dms->regions[id];
+ if (!_stats_region_is_grouped(dms, id)
+ || !dms->groups[region->group_id].alias)
+ return dms->name;
+
+ return dms->groups[region->group_id].alias;
+}
+
+const char *dm_stats_get_current_region_program_id(const struct dm_stats *dms)
+{
+ return dm_stats_get_region_program_id(dms, dms->cur_region);
+}
+
+const char *dm_stats_get_current_region_aux_data(const struct dm_stats *dms)
+{
+ return dm_stats_get_region_aux_data(dms, dms->cur_region);
+}
+
+int dm_stats_get_region_precise_timestamps(const struct dm_stats *dms,
+ uint64_t region_id)
+{
+ struct dm_stats_region *region;
+
+ if (region_id == DM_STATS_REGION_CURRENT)
+ region_id = dms->cur_region;
+
+ if (region_id == DM_STATS_WALK_GROUP)
+ region_id = dms->cur_group;
+ else if (region_id & DM_STATS_WALK_GROUP)
+ region_id &= ~DM_STATS_WALK_GROUP;
+
+ region = &dms->regions[region_id];
+ return region->timescale == 1;
+}
+
+int dm_stats_get_current_region_precise_timestamps(const struct dm_stats *dms)
+{
+ return dm_stats_get_region_precise_timestamps(dms,
+ DM_STATS_REGION_CURRENT);
+}
+
+/*
+ * Histogram access methods.
+ */
+
+static void _sum_histogram_bins(const struct dm_stats *dms,
+ struct dm_histogram *dmh_aggr,
+ uint64_t region_id, uint64_t area_id)
+{
+ struct dm_stats_region *region;
+ struct dm_histogram_bin *bins;
+ struct dm_histogram *dmh_cur;
+ int bin;
+
+ region = &dms->regions[region_id];
+ dmh_cur = region->counters[area_id].histogram;
+ bins = dmh_aggr->bins;
+
+ for (bin = 0; bin < dmh_aggr->nr_bins; bin++)
+ bins[bin].count += dmh_cur->bins[bin].count;
+}
+
+/*
+ * Create an aggregate histogram for a sub-divided region or a group.
+ */
+static struct dm_histogram *_aggregate_histogram(const struct dm_stats *dms,
+ uint64_t region_id,
+ uint64_t area_id)
+{
+ struct dm_histogram *dmh_aggr, *dmh_cur, **dmh_cachep;
+ uint64_t group_id = DM_STATS_GROUP_NOT_PRESENT;
+ int bin, nr_bins, group = 1;
+ size_t hist_size;
+
+ if (area_id == DM_STATS_WALK_REGION) {
+ /* region aggregation */
+ group = 0;
+ if (!_stats_region_present(&dms->regions[region_id]))
+ return_NULL;
+
+ if (!dms->regions[region_id].bounds)
+ return_NULL;
+
+ if (!dms->regions[region_id].counters)
+ return dms->regions[region_id].bounds;
+
+ if (dms->regions[region_id].histogram)
+ return dms->regions[region_id].histogram;
+
+ dmh_cur = dms->regions[region_id].counters[0].histogram;
+ dmh_cachep = &dms->regions[region_id].histogram;
+ nr_bins = dms->regions[region_id].bounds->nr_bins;
+ } else {
+ /* group aggregation */
+ group_id = region_id;
+ area_id = DM_STATS_WALK_GROUP;
+ if (!_stats_group_id_present(dms, group_id))
+ return_NULL;
+
+ if (!dms->regions[group_id].bounds)
+ return_NULL;
+
+ if (!dms->regions[group_id].counters)
+ return dms->regions[group_id].bounds;
+
+ if (dms->groups[group_id].histogram)
+ return dms->groups[group_id].histogram;
+
+ dmh_cur = dms->regions[group_id].counters[0].histogram;
+ dmh_cachep = &dms->groups[group_id].histogram;
+ nr_bins = dms->regions[group_id].bounds->nr_bins;
+ }
+
+ hist_size = sizeof(*dmh_aggr)
+ + nr_bins * sizeof(struct dm_histogram_bin);
+
+ if (!(dmh_aggr = dm_pool_zalloc(dms->hist_mem, hist_size))) {
+ log_error("Could not allocate group histogram");
+ return 0;
+ }
+
+ dmh_aggr->nr_bins = dmh_cur->nr_bins;
+ dmh_aggr->dms = dms;
+
+ if (!group)
+ _foreach_region_area(dms, region_id, area_id) {
+ _sum_histogram_bins(dms, dmh_aggr, region_id, area_id);
+ }
+ else {
+ _foreach_group_area(dms, group_id, region_id, area_id) {
+ _sum_histogram_bins(dms, dmh_aggr, region_id, area_id);
+ }
+ }
+
+ for (bin = 0; bin < nr_bins; bin++) {
+ dmh_aggr->sum += dmh_aggr->bins[bin].count;
+ dmh_aggr->bins[bin].upper = dmh_cur->bins[bin].upper;
+ }
+
+ /* cache aggregate histogram for subsequent access */
+ *dmh_cachep = dmh_aggr;
+
+ return dmh_aggr;
+}
+
+struct dm_histogram *dm_stats_get_histogram(const struct dm_stats *dms,
+ uint64_t region_id,
+ uint64_t area_id)
+{
+ int aggr = 0;
+
+ if (region_id == DM_STATS_REGION_CURRENT) {
+ region_id = dms->cur_region;
+ if (region_id & DM_STATS_WALK_GROUP) {
+ region_id = dms->cur_group;
+ aggr = 1;
+ }
+ } else if (region_id & DM_STATS_WALK_GROUP) {
+ region_id &= ~DM_STATS_WALK_GROUP;
+ aggr = 1;
+ }
+
+ area_id = (area_id == DM_STATS_AREA_CURRENT)
+ ? dms->cur_area : area_id ;
+
+ if (area_id == DM_STATS_WALK_REGION)
+ aggr = 1;
+
+ if (aggr)
+ return _aggregate_histogram(dms, region_id, area_id);
+
+ if (region_id & DM_STATS_WALK_REGION)
+ region_id &= ~DM_STATS_WALK_REGION;
+
+ if (!dms->regions[region_id].counters)
+ return dms->regions[region_id].bounds;
+
+ return dms->regions[region_id].counters[area_id].histogram;
+}
+
+int dm_histogram_get_nr_bins(const struct dm_histogram *dmh)
+{
+ return dmh->nr_bins;
+}
+
+uint64_t dm_histogram_get_bin_lower(const struct dm_histogram *dmh, int bin)
+{
+ return (!bin) ? 0 : dmh->bins[bin - 1].upper;
+}
+
+uint64_t dm_histogram_get_bin_upper(const struct dm_histogram *dmh, int bin)
+{
+ return dmh->bins[bin].upper;
+}
+
+uint64_t dm_histogram_get_bin_width(const struct dm_histogram *dmh, int bin)
+{
+ uint64_t upper, lower;
+ upper = dm_histogram_get_bin_upper(dmh, bin);
+ lower = dm_histogram_get_bin_lower(dmh, bin);
+ return (upper - lower);
+}
+
+uint64_t dm_histogram_get_bin_count(const struct dm_histogram *dmh, int bin)
+{
+ return dmh->bins[bin].count;
+}
+
+uint64_t dm_histogram_get_sum(const struct dm_histogram *dmh)
+{
+ return dmh->sum;
+}
+
+dm_percent_t dm_histogram_get_bin_percent(const struct dm_histogram *dmh,
+ int bin)
+{
+ uint64_t value = dm_histogram_get_bin_count(dmh, bin);
+ uint64_t width = dm_histogram_get_bin_width(dmh, bin);
+ uint64_t total = dm_histogram_get_sum(dmh);
+
+ double val = (double) value;
+
+ if (!total || !value || !width)
+ return DM_PERCENT_0;
+
+ return dm_make_percent((uint64_t) val, total);
+}
+
+/*
+ * Histogram string helper functions: used to construct histogram and
+ * bin boundary strings from numeric data.
+ */
+
+/*
+ * Allocate an unbound histogram object with nr_bins bins. Only used
+ * for histograms used to hold bounds values as arguments for calls to
+ * dm_stats_create_region().
+ */
+static struct dm_histogram *_alloc_dm_histogram(int nr_bins)
+{
+ /* Allocate space for dm_histogram + nr_entries. */
+ size_t size = sizeof(struct dm_histogram) +
+ (unsigned) nr_bins * sizeof(struct dm_histogram_bin);
+ return dm_zalloc(size);
+}
+
+/*
+ * Parse a histogram bounds string supplied by the user. The string
+ * consists of a list of numbers, "n1,n2,n3,..." with optional 'ns',
+ * 'us', 'ms', or 's' unit suffixes.
+ *
+ * The scale parameter indicates the timescale used for this region: one
+ * for nanoscale resolution and NSEC_PER_MSEC for miliseconds.
+ *
+ * On return bounds contains a pointer to an array of uint64_t
+ * histogram bounds values expressed in units of nanoseconds.
+ */
+struct dm_histogram *dm_histogram_bounds_from_string(const char *bounds_str)
+{
+ static const char _valid_chars[] = "0123456789,muns";
+ uint64_t this_val = 0, mult = 1;
+ const char *c, *v, *val_start;
+ struct dm_histogram_bin *cur;
+ struct dm_histogram *dmh;
+ int nr_entries = 1;
+ char *endptr;
+
+ c = bounds_str;
+
+ /* Count number of bounds entries. */
+ while(*c)
+ if (*(c++) == ',')
+ nr_entries++;
+
+ c = bounds_str;
+
+ if (!(dmh = _alloc_dm_histogram(nr_entries)))
+ return_0;
+
+ dmh->nr_bins = nr_entries;
+
+ cur = dmh->bins;
+
+ do {
+ for (v = _valid_chars; *v; v++)
+ if (*c == *v)
+ break;
+
+ if (!*v) {
+ stack;
+ goto badchar;
+ }
+
+ if (*c == ',') {
+ log_error("Empty histogram bin not allowed: %s",
+ bounds_str);
+ goto bad;
+ } else {
+ val_start = c;
+ endptr = NULL;
+
+ this_val = strtoull(val_start, &endptr, 10);
+ if (!endptr) {
+ log_error("Could not parse histogram bound.");
+ goto bad;
+ }
+ c = endptr; /* Advance to units, comma, or end. */
+
+ if (*c == 's') {
+ mult = NSEC_PER_SEC;
+ c++; /* Advance over 's'. */
+ } else if (*(c + 1) == 's') {
+ if (*c == 'm')
+ mult = NSEC_PER_MSEC;
+ else if (*c == 'u')
+ mult = NSEC_PER_USEC;
+ else if (*c == 'n')
+ mult = 1;
+ else {
+ stack;
+ goto badchar;
+ }
+ c += 2; /* Advance over 'ms', 'us', or 'ns'. */
+ } else if (*c == ',')
+ c++;
+ else if (*c) { /* Expected ',' or NULL. */
+ stack;
+ goto badchar;
+ }
+
+ if (*c == ',')
+ c++;
+ this_val *= mult;
+ (cur++)->upper = this_val;
+ }
+ } while (*c);
+
+ /* Bounds histograms have no owner. */
+ dmh->dms = NULL;
+ dmh->region = NULL;
+
+ return dmh;
+
+badchar:
+ log_error("Invalid character in histogram: %c", *c);
+bad:
+ dm_free(dmh);
+ return NULL;
+}
+
+struct dm_histogram *dm_histogram_bounds_from_uint64(const uint64_t *bounds)
+{
+ const uint64_t *entry = bounds;
+ struct dm_histogram_bin *cur;
+ struct dm_histogram *dmh;
+ int nr_entries = 1;
+
+ if (!bounds || !bounds[0]) {
+ log_error("Could not parse empty histogram bounds array");
+ return 0;
+ }
+
+ /* Count number of bounds entries. */
+ while(*entry)
+ if (*(++entry))
+ nr_entries++;
+
+ entry = bounds;
+
+ if (!(dmh = _alloc_dm_histogram(nr_entries)))
+ return_0;
+
+ dmh->nr_bins = nr_entries;
+
+ cur = dmh->bins;
+
+ while (*entry)
+ (cur++)->upper = *(entry++);
+
+ /* Bounds histograms have no owner. */
+ dmh->dms = NULL;
+ dmh->region = NULL;
+
+ return dmh;
+}
+
+void dm_histogram_bounds_destroy(struct dm_histogram *bounds)
+{
+ if (!bounds)
+ return;
+
+ /* Bounds histograms are not bound to any handle or region. */
+ if (bounds->dms || bounds->region) {
+ log_error("Freeing invalid histogram bounds pointer %p.",
+ (void *) bounds);
+ stack;
+ }
+ /* dm_free() expects a (void *). */
+ dm_free((void *) bounds);
+}
+
+/*
+ * Scale a bounds value down from nanoseconds to the largest possible
+ * whole unit suffix.
+ */
+static void _scale_bound_value_to_suffix(uint64_t *bound, const char **suffix)
+{
+ *suffix = "ns";
+ if (!(*bound % NSEC_PER_SEC)) {
+ *bound /= NSEC_PER_SEC;
+ *suffix = "s";
+ } else if (!(*bound % NSEC_PER_MSEC)) {
+ *bound /= NSEC_PER_MSEC;
+ *suffix = "ms";
+ } else if (!(*bound % NSEC_PER_USEC)) {
+ *bound /= NSEC_PER_USEC;
+ *suffix = "us";
+ }
+}
+
+#define DM_HISTOGRAM_BOUNDS_MASK 0x30
+#define BOUNDS_LEN 64
+
+static int _make_bounds_string(char *buf, size_t size, uint64_t lower,
+ uint64_t upper, int flags, int width)
+{
+ char bound_buf[BOUNDS_LEN];
+ const char *l_suff = NULL;
+ const char *u_suff = NULL;
+ const char *sep = "";
+ int bounds = flags & DM_HISTOGRAM_BOUNDS_MASK;
+
+ if (!bounds)
+ return_0;
+
+ *buf = '\0';
+
+ if (flags & DM_HISTOGRAM_SUFFIX) {
+ _scale_bound_value_to_suffix(&lower, &l_suff);
+ _scale_bound_value_to_suffix(&upper, &u_suff);
+ } else
+ l_suff = u_suff = "";
+
+ if (flags & DM_HISTOGRAM_VALUES)
+ sep = ":";
+
+ if (bounds > DM_HISTOGRAM_BOUNDS_LOWER) {
+ /* Handle infinite uppermost bound. */
+ if (upper == UINT64_MAX) {
+ if (dm_snprintf(bound_buf, sizeof(bound_buf),
+ ">" FMTu64 "%s", lower, l_suff) < 0)
+ goto_out;
+ /* Only display an 'upper' string for final bin. */
+ bounds = DM_HISTOGRAM_BOUNDS_UPPER;
+ } else {
+ if (dm_snprintf(bound_buf, sizeof(bound_buf),
+ FMTu64 "%s", upper, u_suff) < 0)
+ goto_out;
+ }
+ } else if (bounds == DM_HISTOGRAM_BOUNDS_LOWER) {
+ if ((dm_snprintf(bound_buf, sizeof(bound_buf), FMTu64 "%s",
+ lower, l_suff)) < 0)
+ goto_out;
+ }
+
+ switch (bounds) {
+ case DM_HISTOGRAM_BOUNDS_LOWER:
+ case DM_HISTOGRAM_BOUNDS_UPPER:
+ return dm_snprintf(buf, size, "%*s%s", width, bound_buf, sep);
+ case DM_HISTOGRAM_BOUNDS_RANGE:
+ return dm_snprintf(buf, size, FMTu64 "%s-%s%s",
+ lower, l_suff, bound_buf, sep);
+ }
+out:
+ return 0;
+}
+
+#define BOUND_WIDTH_NOSUFFIX 10 /* 999999999 nsecs */
+#define BOUND_WIDTH 6 /* bounds string up to 9999xs */
+#define COUNT_WIDTH 6 /* count string: up to 9999 */
+#define PERCENT_WIDTH 6 /* percent string : 0.00-100.00% */
+#define DM_HISTOGRAM_VALUES_MASK 0x06
+
+const char *dm_histogram_to_string(const struct dm_histogram *dmh, int bin,
+ int width, int flags)
+{
+ char buf[BOUNDS_LEN], bounds_buf[BOUNDS_LEN];
+ int minwidth, bounds, values, start, last;
+ uint64_t lower, upper, val_u64; /* bounds of the current bin. */
+ /* Use the histogram pool for string building. */
+ struct dm_pool *mem = dmh->dms->hist_mem;
+ const char *sep = "";
+ int bounds_width;
+ ssize_t len = 0;
+ float val_flt;
+
+ bounds = flags & DM_HISTOGRAM_BOUNDS_MASK;
+ values = flags & DM_HISTOGRAM_VALUES;
+
+ if (bin < 0) {
+ start = 0;
+ last = dmh->nr_bins - 1;
+ } else
+ start = last = bin;
+
+ minwidth = width;
+
+ if (width < 0 || !values)
+ width = minwidth = 0; /* no padding */
+ else if (flags & DM_HISTOGRAM_PERCENT)
+ width = minwidth = (width) ? : PERCENT_WIDTH;
+ else if (flags & DM_HISTOGRAM_VALUES)
+ width = minwidth = (width) ? : COUNT_WIDTH;
+
+ if (values && !width)
+ sep = ":";
+
+ /* Set bounds string to the empty string. */
+ bounds_buf[0] = '\0';
+
+ if (!dm_pool_begin_object(mem, 64))
+ return_0;
+
+ for (bin = start; bin <= last; bin++) {
+ if (bounds) {
+ /* Default bounds width depends on time suffixes. */
+ bounds_width = (!(flags & DM_HISTOGRAM_SUFFIX))
+ ? BOUND_WIDTH_NOSUFFIX
+ : BOUND_WIDTH ;
+
+ bounds_width = (!width) ? width : bounds_width;
+
+ lower = dm_histogram_get_bin_lower(dmh, bin);
+ upper = dm_histogram_get_bin_upper(dmh, bin);
+
+ len = sizeof(bounds_buf);
+ len = _make_bounds_string(bounds_buf, len,
+ lower, upper, flags,
+ bounds_width);
+ /*
+ * Comma separates "bounds: value" pairs unless
+ * --noheadings is used.
+ */
+ sep = (width || !values) ? "," : ":";
+
+ /* Adjust width by real bounds length if set. */
+ width -= (width) ? (len - (bounds_width + 1)) : 0;
+
+ /* -ve width indicates specified width was overrun. */
+ width = (width > 0) ? width : 0;
+ }
+
+ if (bin == last)
+ sep = "";
+
+ if (flags & DM_HISTOGRAM_PERCENT) {
+ dm_percent_t pr;
+ pr = dm_histogram_get_bin_percent(dmh, bin);
+ val_flt = dm_percent_to_float(pr);
+ len = dm_snprintf(buf, sizeof(buf), "%s%*.2f%%%s",
+ bounds_buf, width, val_flt, sep);
+ } else if (values) {
+ val_u64 = dmh->bins[bin].count;
+ len = dm_snprintf(buf, sizeof(buf), "%s%*"PRIu64"%s",
+ bounds_buf, width, val_u64, sep);
+ } else if (bounds)
+ len = dm_snprintf(buf, sizeof(buf), "%s%s", bounds_buf,
+ sep);
+ else {
+ *buf = '\0';
+ len = 0;
+ }
+
+ if (len < 0)
+ goto_bad;
+
+ width = minwidth; /* re-set histogram column width. */
+ if (!dm_pool_grow_object(mem, buf, (size_t) len))
+ goto_bad;
+ }
+
+ if (!dm_pool_grow_object(mem, "\0", 1))
+ goto_bad;
+
+ return (const char *) dm_pool_end_object(mem);
+
+bad:
+ dm_pool_abandon_object(mem);
+ return NULL;
+}
+
+/*
+ * A lightweight representation of an extent (region, area, file
+ * system block or extent etc.). A table of extents can be used
+ * to sort and to efficiently find holes or overlaps among a set
+ * of tuples of the form (id, start, len).
+ */
+struct _extent {
+ struct dm_list list;
+ uint64_t id;
+ uint64_t start;
+ uint64_t len;
+};
+
+/* last address in an extent */
+#define _extent_end(a) ((a)->start + (a)->len - 1)
+
+/* a and b must be sorted by increasing start sector */
+#define _extents_overlap(a, b) (_extent_end(a) > (b)->start)
+
+/*
+ * Comparison function to sort extents in ascending start order.
+ */
+static int _extent_start_compare(const void *p1, const void *p2)
+{
+ const struct _extent *r1, *r2;
+ r1 = (const struct _extent *) p1;
+ r2 = (const struct _extent *) p2;
+
+ if (r1->start < r2->start)
+ return -1;
+ else if (r1->start == r2->start)
+ return 0;
+ return 1;
+}
+
+static int _stats_create_group(struct dm_stats *dms, dm_bitset_t regions,
+ const char *alias, uint64_t *group_id)
+{
+ struct dm_stats_group *group;
+ *group_id = dm_bit_get_first(regions);
+
+ /* group has no regions? */
+ if (*group_id == DM_STATS_GROUP_NOT_PRESENT)
+ return_0;
+
+ group = &dms->groups[*group_id];
+
+ if (group->regions) {
+ log_error(INTERNAL_ERROR "Unexpected group state while"
+ "creating group ID bitmap" FMTu64, *group_id);
+ return 0;
+ }
+
+ group->group_id = *group_id;
+ group->regions = regions;
+
+ if (alias)
+ group->alias = dm_strdup(alias);
+ else
+ group->alias = NULL;
+
+ /* force an update of the group tag stored in aux_data */
+ if (!_stats_set_aux(dms, *group_id, dms->regions[*group_id].aux_data))
+ return 0;
+
+ return 1;
+}
+
+static int _stats_group_check_overlap(const struct dm_stats *dms,
+ dm_bitset_t regions, int count)
+{
+ struct dm_list ext_list = DM_LIST_HEAD_INIT(ext_list);
+ struct _extent *ext, *tmp, *next, *map = NULL;
+ size_t map_size = (dms->max_region + 1) * sizeof(*map);
+ int i = 0, id, overlap, merged;
+
+ map = dm_pool_alloc(dms->mem, map_size);
+ if (!map) {
+ log_error("Could not allocate memory for region map");
+ return 0;
+ }
+
+ /* build a table of extents in order of region_id */
+ for (id = dm_bit_get_first(regions); id >= 0;
+ id = dm_bit_get_next(regions, id)) {
+ dm_list_init(&map[i].list);
+ map[i].id = id;
+ map[i].start = dms->regions[id].start;
+ map[i].len = dms->regions[id].len;
+ i++;
+ }
+
+ /* A single region cannot overlap itself. */
+ if (i == 1) {
+ dm_pool_free(dms->mem, map);
+ return 1;
+ }
+
+ /* sort by extent.start */
+ qsort(map, count, sizeof(*map), _extent_start_compare);
+
+ for (i = 0; i < count; i++)
+ dm_list_add(&ext_list, &map[i].list);
+
+ overlap = 0;
+merge:
+ merged = 0;
+ dm_list_iterate_items_safe(ext, tmp, &ext_list) {
+ next = dm_list_item(dm_list_next(&ext_list, &ext->list),
+ struct _extent);
+ if (!next)
+ continue;
+
+ if (_extents_overlap(ext, next)) {
+ log_warn("WARNING: region IDs " FMTu64 " and "
+ FMTu64 " overlap. Some events will be "
+ "counted twice.", ext->id, next->id);
+ /* merge larger extent into smaller */
+ if (_extent_end(ext) > _extent_end(next)) {
+ next->id = ext->id;
+ next->len = ext->len;
+ }
+ if (ext->start < next->start)
+ next->start = ext->start;
+ dm_list_del(&ext->list);
+ overlap = merged = 1;
+ }
+ }
+ /* continue until no merge candidates remain */
+ if (merged)
+ goto merge;
+
+ dm_pool_free(dms->mem, map);
+ return (overlap == 0);
+}
+
+static void _stats_copy_histogram_bounds(struct dm_histogram *to,
+ struct dm_histogram *from)
+{
+ int i;
+
+ to->nr_bins = from->nr_bins;
+
+ for (i = 0; i < to->nr_bins; i++)
+ to->bins[i].upper = from->bins[i].upper;
+}
+
+/*
+ * Compare histogram bounds h1 and h2, and return 1 if they match (i.e.
+ * have the same number of bins and identical bin boundary values), or 0
+ * otherwise.
+ */
+static int _stats_check_histogram_bounds(struct dm_histogram *h1,
+ struct dm_histogram *h2)
+{
+ int i;
+
+ if (!h1 || !h2)
+ return 0;
+
+ if (h1->nr_bins != h2->nr_bins)
+ return 0;
+
+ for (i = 0; i < h1->nr_bins; i++)
+ if (h1->bins[i].upper != h2->bins[i].upper)
+ return 0;
+ return 1;
+}
+
+/*
+ * Create a new group in stats handle dms from the group description
+ * passed in group.
+ */
+int dm_stats_create_group(struct dm_stats *dms, const char *members,
+ const char *alias, uint64_t *group_id)
+{
+ struct dm_histogram *check = NULL, *bounds;
+ int i, count = 0, precise = 0;
+ dm_bitset_t regions;
+
+ if (!dms->regions || !dms->groups) {
+ log_error("Could not create group: no regions found.");
+ return 0;
+ };
+
+ if (!(regions = dm_bitset_parse_list(members, NULL, 0))) {
+ log_error("Could not parse list: '%s'", members);
+ return 0;
+ }
+
+ if (!(check = dm_pool_zalloc(dms->hist_mem, sizeof(*check)))) {
+ log_error("Could not allocate memory for bounds check");
+ goto bad;
+ }
+
+ /* too many bits? */
+ if ((*regions - 1) > dms->max_region) {
+ log_error("Invalid region ID: %d", *regions - 1);
+ goto bad;
+ }
+
+ /*
+ * Check that each region_id in the bitmap meets the group
+ * constraints: present, not already grouped, and if any
+ * histogram is present that they all have the same bounds.
+ */
+ for (i = dm_bit_get_first(regions); i >= 0;
+ i = dm_bit_get_next(regions, i)) {
+ if (!dm_stats_region_present(dms, i)) {
+ log_error("Region ID %d does not exist", i);
+ goto bad;
+ }
+ if (_stats_region_is_grouped(dms, i)) {
+ log_error("Region ID %d already a member of group ID "
+ FMTu64, i, dms->regions[i].group_id);
+ goto bad;
+ }
+ if (dms->regions[i].timescale == 1)
+ precise++;
+
+ /* check for matching histogram bounds */
+ bounds = dms->regions[i].bounds;
+ if (bounds && !check->nr_bins)
+ _stats_copy_histogram_bounds(check, bounds);
+ else if (bounds) {
+ if (!_stats_check_histogram_bounds(check, bounds)) {
+ log_error("All region histogram bounds "
+ "must match exactly");
+ goto bad;
+ }
+ }
+ count++;
+ }
+
+ if (precise && (precise != count))
+ log_warn("WARNING: Grouping regions with different clock resolution: "
+ "precision may be lost.");
+
+ if (!_stats_group_check_overlap(dms, regions, count))
+ log_very_verbose("Creating group with overlapping regions.");
+
+ if (!_stats_create_group(dms, regions, alias, group_id))
+ goto bad;
+
+ dm_pool_free(dms->hist_mem, check);
+ return 1;
+
+bad:
+ dm_pool_free(dms->hist_mem, check);
+ dm_bitset_destroy(regions);
+ return 0;
+}
+
+/*
+ * Remove the specified group_id.
+ */
+int dm_stats_delete_group(struct dm_stats *dms, uint64_t group_id,
+ int remove_regions)
+{
+ struct dm_stats_region *leader;
+ dm_bitset_t regions;
+ uint64_t i;
+
+ if (group_id > dms->max_region) {
+ log_error("Invalid group ID: " FMTu64, group_id);
+ return 0;
+ }
+
+ if (!_stats_group_id_present(dms, group_id)) {
+ log_error("Group ID " FMTu64 " does not exist", group_id);
+ return 0;
+ }
+
+ regions = dms->groups[group_id].regions;
+ leader = &dms->regions[group_id];
+
+ /* delete all but the group leader */
+ for (i = (*regions - 1); i > leader->region_id; i--) {
+ if (dm_bit(regions, i)) {
+ dm_bit_clear(regions, i);
+ if (remove_regions && !dm_stats_delete_region(dms, i))
+ log_warn("WARNING: Failed to delete region "
+ FMTu64 " on %s.", i, dms->name);
+ }
+ }
+
+ /* clear group and mark as not present */
+ _stats_clear_group_regions(dms, group_id);
+ _stats_group_destroy(&dms->groups[group_id]);
+
+ /* delete leader or clear aux_data */
+ if (remove_regions)
+ return dm_stats_delete_region(dms, group_id);
+ else if (!_stats_set_aux(dms, group_id, leader->aux_data))
+ return 0;
+
+ return 1;
+}
+
+uint64_t dm_stats_get_group_id(const struct dm_stats *dms, uint64_t region_id)
+{
+ region_id = (region_id == DM_STATS_REGION_CURRENT)
+ ? dms->cur_region : region_id;
+
+ if (region_id & DM_STATS_WALK_GROUP) {
+ if (region_id == DM_STATS_WALK_GROUP)
+ return dms->cur_group;
+ else
+ return region_id & ~DM_STATS_WALK_GROUP;
+ }
+
+ if (region_id & DM_STATS_WALK_REGION)
+ region_id &= ~DM_STATS_WALK_REGION;
+
+ return dms->regions[region_id].group_id;
+}
+
+int dm_stats_get_group_descriptor(const struct dm_stats *dms,
+ uint64_t group_id, char **buf)
+{
+ dm_bitset_t regions = dms->groups[group_id].regions;
+ size_t buflen;
+
+ buflen = _stats_group_tag_len(dms, regions);
+
+ *buf = dm_pool_alloc(dms->mem, buflen);
+ if (!*buf) {
+ log_error("Could not allocate memory for regions string");
+ return 0;
+ }
+
+ if (!_stats_group_tag_fill(dms, regions, *buf, buflen))
+ return 0;
+
+ return 1;
+}
+
+#ifdef HAVE_LINUX_FIEMAP_H
+/*
+ * Resize the group bitmap corresponding to group_id so that it can
+ * contain at least num_regions members.
+ */
+static int _stats_resize_group(struct dm_stats_group *group,
+ uint64_t num_regions)
+{
+ uint64_t last_bit = dm_bit_get_last(group->regions);
+ dm_bitset_t new, old;
+
+ if (last_bit >= num_regions) {
+ log_error("Cannot resize group bitmap to " FMTu64
+ " with bit " FMTu64 " set.", num_regions, last_bit);
+ return 0;
+ }
+
+ log_very_verbose("Resizing group bitmap from " FMTu32 " to " FMTu64
+ " (last_bit: " FMTu64 ").", group->regions[0],
+ num_regions, last_bit);
+
+ new = dm_bitset_create(NULL, (unsigned) num_regions);
+ if (!new) {
+ log_error("Could not allocate memory for new group bitmap.");
+ return 0;
+ }
+
+ old = group->regions;
+ dm_bit_copy(new, old);
+ group->regions = new;
+ dm_bitset_destroy(old);
+ return 1;
+}
+
+/*
+ * Group a table of region_ids corresponding to the extents of a file.
+ */
+static int _stats_group_file_regions(struct dm_stats *dms, uint64_t *region_ids,
+ uint64_t count, const char *alias)
+{
+ dm_bitset_t regions = dm_bitset_create(NULL, dms->nr_regions);
+ uint64_t i, group_id = DM_STATS_GROUP_NOT_PRESENT;
+ char *members = NULL;
+ size_t buflen;
+
+ if (!regions) {
+ log_error("Cannot map file: failed to allocate group bitmap.");
+ return 0;
+ }
+
+ for (i = 0; i < count; i++)
+ dm_bit_set(regions, region_ids[i]);
+
+ buflen = _stats_group_tag_len(dms, regions);
+ members = dm_malloc(buflen);
+
+ if (!members) {
+ log_error("Cannot map file: failed to allocate group "
+ "descriptor.");
+ dm_bitset_destroy(regions);
+ return 0;
+ }
+
+ if (!_stats_group_tag_fill(dms, regions, members, buflen))
+ goto bad;
+
+ /*
+ * overlaps should not be possible: overlapping file extents
+ * returned by FIEMAP imply a kernel bug or a corrupt fs.
+ */
+ if (!_stats_group_check_overlap(dms, regions, count))
+ log_very_verbose("Creating group with overlapping regions.");
+
+ if (!_stats_create_group(dms, regions, alias, &group_id))
+ goto bad;
+
+ dm_free(members);
+ return 1;
+bad:
+ dm_bitset_destroy(regions);
+ dm_free(members);
+ return 0;
+}
+
+static int _stats_add_file_extent(int fd, struct dm_pool *mem, uint64_t id,
+ struct fiemap_extent *fm_ext)
+{
+ struct _extent extent;
+
+ /* final address of list is unknown */
+ memset(&extent.list, 0, sizeof(extent.list));
+
+ /* convert bytes to dm (512b) sectors */
+ extent.start = fm_ext->fe_physical >> SECTOR_SHIFT;
+ extent.len = fm_ext->fe_length >> SECTOR_SHIFT;
+ extent.id = id;
+
+ log_very_verbose("Extent " FMTu64 " on fd %d at " FMTu64 "+"
+ FMTu64, extent.id, fd, extent.start, extent.len);
+
+ if (!dm_pool_grow_object(mem, &extent,
+ sizeof(extent))) {
+ log_error("Cannot map file: failed to grow extent map.");
+ return 0;
+ }
+ return 1;
+}
+
+/* test for the boundary of an extent */
+#define ext_boundary(ext, exp) \
+((ext).fe_logical != 0) && \
+((ext).fe_physical != (exp))
+
+/*
+ * Copy fields from fiemap_extent 'from' to the fiemap_extent
+ * pointed to by 'to'.
+ */
+#define ext_copy(to, from) \
+do { \
+ *(to) = *(from); \
+} while (0)
+
+static uint64_t _stats_map_extents(int fd, struct dm_pool *mem,
+ struct fiemap *fiemap,
+ struct fiemap_extent *fm_ext,
+ struct fiemap_extent *fm_last,
+ struct fiemap_extent *fm_pending,
+ uint64_t next_extent,
+ int *eof)
+{
+ uint64_t expected = 0, nr_extents = next_extent;
+ unsigned int i;
+
+ /*
+ * Loop over the returned extents adding the fm_pending extent
+ * to the table of extents each time a discontinuity (or eof)
+ * is detected.
+ *
+ * We use a pointer to fm_pending in the caller since it is
+ * possible that logical extents comprising a single physical
+ * extent are returned by successive FIEMAP calls.
+ */
+ for (i = 0; i < fiemap->fm_mapped_extents; i++) {
+ expected = fm_last->fe_physical + fm_last->fe_length;
+
+ if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST)
+ *eof = 1;
+
+ /* cannot map extents that are not yet allocated. */
+ if (fm_ext[i].fe_flags
+ & (FIEMAP_EXTENT_UNKNOWN | FIEMAP_EXTENT_DELALLOC))
+ continue;
+
+ /*
+ * Begin a new extent if the current physical address differs
+ * from the expected address yielded by fm_last.fe_physical +
+ * fm_last.fe_length.
+ *
+ * A logical discontinuity is seen at the start of the file if
+ * unwritten space exists before the first extent: do not add
+ * any extent record until we have accumulated a non-zero length
+ * in fm_pending.
+ */
+ if (fm_pending->fe_length &&
+ ext_boundary(fm_ext[i], expected)) {
+ if (!_stats_add_file_extent(fd, mem, nr_extents,
+ fm_pending))
+ goto_bad;
+ nr_extents++;
+ /* Begin a new pending extent. */
+ ext_copy(fm_pending, fm_ext + i);
+ } else {
+ expected = 0;
+ /* Begin a new pending extent for extent 0. If there is
+ * a hole at the start of the file, the first allocated
+ * extent will have a non-zero fe_logical. Detect this
+ * case by testing fm_pending->fe_length: if no length
+ * has been accumulated we are handling the first
+ * physical extent of the file.
+ */
+ if (!fm_pending->fe_length || fm_ext[i].fe_logical == 0)
+ ext_copy(fm_pending, fm_ext + i);
+ else
+ /* accumulate this logical extent's length */
+ fm_pending->fe_length += fm_ext[i].fe_length;
+ }
+ *fm_last = fm_ext[i];
+ }
+
+ /*
+ * If the file only has a single extent, no boundary is ever
+ * detected to trigger addition of the first extent.
+ */
+ if (*eof || (fm_ext[i - 1].fe_logical == 0)) {
+ _stats_add_file_extent(fd, mem, nr_extents, fm_pending);
+ nr_extents++;
+ }
+
+ fiemap->fm_start = (fm_ext[i - 1].fe_logical +
+ fm_ext[i - 1].fe_length);
+
+ /* return the number of extents found in this call. */
+ return nr_extents - next_extent;
+bad:
+ /* signal mapping error to caller */
+ *eof = -1;
+ return 0;
+}
+
+/*
+ * Read the extents of an open file descriptor into a table of struct _extent.
+ *
+ * Based on e2fsprogs/misc/filefrag.c::filefrag_fiemap().
+ *
+ * Copyright 2003 by Theodore Ts'o.
+ *
+ */
+static struct _extent *_stats_get_extents_for_file(struct dm_pool *mem, int fd,
+ uint64_t *count)
+{
+ struct fiemap_extent fm_last = {0}, fm_pending = {0}, *fm_ext = NULL;
+ struct fiemap *fiemap = NULL;
+ int eof = 0, nr_extents = 0;
+ struct _extent *extents;
+ unsigned long flags = 0;
+ uint64_t *buf;
+
+ /* grow temporary extent table in the pool */
+ if (!dm_pool_begin_object(mem, sizeof(*extents)))
+ return NULL;
+
+ buf = dm_zalloc(STATS_FIE_BUF_LEN);
+ if (!buf) {
+ log_error("Could not allocate memory for FIEMAP buffer.");
+ goto bad;
+ }
+
+ /* initialise pointers into the ioctl buffer. */
+ fiemap = (struct fiemap *) buf;
+ fm_ext = &fiemap->fm_extents[0];
+
+ /* space available per ioctl */
+ *count = (STATS_FIE_BUF_LEN - sizeof(*fiemap))
+ / sizeof(struct fiemap_extent);
+
+ flags = FIEMAP_FLAG_SYNC;
+
+ do {
+ /* start of ioctl loop - zero size and set count to bufsize */
+ fiemap->fm_length = ~0ULL;
+ fiemap->fm_flags = flags;
+ fiemap->fm_extent_count = *count;
+
+ /* get count-sized chunk of extents */
+ if (ioctl(fd, FS_IOC_FIEMAP, (unsigned long) fiemap) < 0) {
+ if (errno == EBADR)
+ log_err_once("FIEMAP failed with unknown "
+ "flags %x.", fiemap->fm_flags);
+ goto bad;
+ }
+
+ /* If 0 extents are returned, more ioctls are not needed */
+ if (fiemap->fm_mapped_extents == 0)
+ break;
+
+ nr_extents += _stats_map_extents(fd, mem, fiemap, fm_ext,
+ &fm_last, &fm_pending,
+ nr_extents, &eof);
+
+ /* check for extent mapping error */
+ if (eof < 0)
+ goto bad;
+
+ } while (eof == 0);
+
+ if (!nr_extents) {
+ log_error("Cannot map file: no allocated extents.");
+ goto bad;
+ }
+
+ /* return total number of extents */
+ *count = nr_extents;
+ extents = dm_pool_end_object(mem);
+
+ /* free FIEMAP buffer. */
+ dm_free(buf);
+
+ return extents;
+
+bad:
+ *count = 0;
+ dm_pool_abandon_object(mem);
+ dm_free(buf);
+ return NULL;
+}
+
+#define MATCH_EXTENT(e, s, l) \
+(((e).start == (s)) && ((e).len == (l)))
+
+static struct _extent *_find_extent(uint64_t nr_extents, struct _extent *extents,
+ uint64_t start, uint64_t len)
+{
+ size_t i;
+ for (i = 0; i < nr_extents; i++)
+ if (MATCH_EXTENT(extents[i], start, len))
+ return extents + i;
+ return NULL;
+}
+
+/*
+ * Clean up a table of region_id values that were created during a
+ * failed dm_stats_create_regions_from_fd, or dm_stats_update_regions_from_fd
+ * operation.
+ */
+static void _stats_cleanup_region_ids(struct dm_stats *dms, uint64_t *regions,
+ uint64_t nr_regions)
+{
+ uint64_t i;
+
+ for (i = 0; i < nr_regions; i++)
+ if (!_stats_delete_region(dms, regions[i]))
+ log_error("Could not delete region " FMTu64 ".", i);
+}
+
+/*
+ * First update pass: prune no-longer-allocated extents from the group
+ * and build a table of the remaining extents so that their creation
+ * can be skipped in the second pass.
+ */
+static int _stats_unmap_regions(struct dm_stats *dms, uint64_t group_id,
+ struct dm_pool *mem, struct _extent *extents,
+ struct _extent **old_extents, uint64_t *count,
+ int *regroup)
+{
+ struct dm_stats_region *region = NULL;
+ struct dm_stats_group *group = NULL;
+ uint64_t nr_kept, nr_old;
+ struct _extent ext;
+ int64_t i;
+
+ group = &dms->groups[group_id];
+
+ log_very_verbose("Checking for changed file extents in group ID "
+ FMTu64, group_id);
+
+ if (!dm_pool_begin_object(mem, sizeof(**old_extents))) {
+ log_error("Could not allocate extent table.");
+ return 0;
+ }
+
+ nr_kept = nr_old = 0; /* counts of old and retained extents */
+
+ /*
+ * First pass: delete de-allocated extents and set regroup=1 if
+ * deleting the current group leader.
+ */
+ i = dm_bit_get_last(group->regions);
+ for (; i >= 0; i = dm_bit_get_prev(group->regions, i)) {
+ region = &dms->regions[i];
+ nr_old++;
+
+ if (extents && _find_extent(*count, extents,
+ region->start, region->len)) {
+ ext.start = region->start;
+ ext.len = region->len;
+ ext.id = i;
+ nr_kept++;
+
+ if (!dm_pool_grow_object(mem, &ext, sizeof(ext)))
+ goto out;
+
+ log_very_verbose("Kept region " FMTu64, i);
+ } else {
+
+ if (i == group_id)
+ *regroup = 1;
+
+ if (!_stats_delete_region(dms, i)) {
+ log_error("Could not remove region ID " FMTu64,
+ i);
+ goto out;
+ }
+
+ log_very_verbose("Deleted region " FMTu64, i);
+ }
+ }
+
+ *old_extents = dm_pool_end_object(mem);
+ if (!*old_extents) {
+ log_error("Could not finalize region extent table.");
+ goto out;
+ }
+ log_very_verbose("Kept " FMTd64 " of " FMTd64 " old extents",
+ nr_kept, nr_old);
+ log_very_verbose("Found " FMTu64 " new extents",
+ *count - nr_kept);
+
+ return (int) nr_kept;
+out:
+ dm_pool_abandon_object(mem);
+ return -1;
+}
+
+/*
+ * Create or update a set of regions representing the extents of a file
+ * and return a table of uint64_t region_id values. The number of regions
+ * created is returned in the memory pointed to by count (which must be
+ * non-NULL).
+ *
+ * If group_id is not equal to DM_STATS_GROUP_NOT_PRESENT, it is assumed
+ * that group_id corresponds to a group containing existing regions that
+ * were mapped to this file at an earlier time: regions will be added or
+ * removed to reflect the current status of the file.
+ */
+static uint64_t *_stats_map_file_regions(struct dm_stats *dms, int fd,
+ struct dm_histogram *bounds,
+ int precise, uint64_t group_id,
+ uint64_t *count, int *regroup)
+{
+ struct _extent *extents = NULL, *old_extents = NULL;
+ uint64_t *regions = NULL, fail_region, i, num_bits;
+ struct dm_stats_group *group = NULL;
+ struct dm_pool *extent_mem = NULL;
+ struct _extent *old_ext;
+ char *hist_arg = NULL;
+ struct statfs fsbuf;
+ int64_t nr_kept = 0;
+ struct stat buf;
+ int update;
+
+ *count = 0;
+ update = _stats_group_id_present(dms, group_id);
+
+#ifdef BTRFS_SUPER_MAGIC
+ if (fstatfs(fd, &fsbuf)) {
+ log_error("fstatfs failed for fd %d", fd);
+ return 0;
+ }
+
+ if (fsbuf.f_type == BTRFS_SUPER_MAGIC) {
+ log_error("Cannot map file: btrfs does not provide "
+ "physical FIEMAP extent data.");
+ return 0;
+ }
+#endif
+
+ if (fstat(fd, &buf)) {
+ log_error("fstat failed for fd %d", fd);
+ return 0;
+ }
+
+ if (!(buf.st_mode & S_IFREG)) {
+ log_error("Not a regular file");
+ return 0;
+ }
+
+ if (!dm_is_dm_major(major(buf.st_dev))) {
+ log_error("Cannot map file: not a device-mapper device.");
+ return 0;
+ }
+
+ /*
+ * If regroup is set here, we are creating a new filemap: otherwise
+ * we are updating a group with a valid group identifier in group_id.
+ */
+ if (update)
+ log_very_verbose("Updating extents from fd %d with group ID "
+ FMTu64 " on (%d:%d)", fd, group_id,
+ major(buf.st_dev), minor(buf.st_dev));
+ else
+ log_very_verbose("Mapping extents from fd %d on (%d:%d)",
+ fd, major(buf.st_dev), minor(buf.st_dev));
+
+ /* Use a temporary, private pool for the extent table. This avoids
+ * hijacking the dms->mem (region table) pool which would lead to
+ * interleaving temporary allocations with dm_stats_list() data,
+ * causing complications in the error path.
+ */
+ if (!(extent_mem = dm_pool_create("extents", sizeof(*extents))))
+ return_NULL;
+
+ if (!(extents = _stats_get_extents_for_file(extent_mem, fd, count))) {
+ log_very_verbose("No extents found in fd %d", fd);
+ if (!update)
+ goto out;
+ }
+
+ if (update) {
+ group = &dms->groups[group_id];
+ if ((nr_kept = _stats_unmap_regions(dms, group_id, extent_mem,
+ extents, &old_extents,
+ count, regroup)) < 0)
+ goto_out;
+ }
+
+ if (bounds)
+ if (!(hist_arg = _build_histogram_arg(bounds, &precise)))
+ goto_out;
+
+ /* make space for end-of-table marker */
+ if (!(regions = dm_malloc((1 + *count) * sizeof(*regions)))) {
+ log_error("Could not allocate memory for region IDs.");
+ goto_out;
+ }
+
+ /*
+ * Second pass (first for non-update case): create regions for
+ * all extents not retained from the prior mapping, and insert
+ * retained regions into the table of region_id values.
+ *
+ * If a regroup is not scheduled, set group bits for newly
+ * created regions in the group leader bitmap.
+ */
+ for (i = 0; i < *count; i++) {
+ if (update) {
+ if ((old_ext = _find_extent((uint64_t) nr_kept,
+ old_extents,
+ extents[i].start,
+ extents[i].len))) {
+ regions[i] = old_ext->id;
+ continue;
+ }
+ }
+ if (!_stats_create_region(dms, regions + i, extents[i].start,
+ extents[i].len, -1, precise, hist_arg,
+ dms->program_id, "")) {
+ log_error("Failed to create region " FMTu64 " of "
+ FMTu64 " at " FMTu64 ".", i, *count,
+ extents[i].start);
+ goto out_remove;
+ }
+
+ log_very_verbose("Created new region mapping " FMTu64 "+" FMTu64
+ " with region ID " FMTu64, extents[i].start,
+ extents[i].len, regions[i]);
+
+ if (!*regroup && update) {
+ /* expand group bitmap */
+ if (regions[i] > (group->regions[0] - 1)) {
+ num_bits = regions[i] + *count;
+ if (!_stats_resize_group(group, num_bits)) {
+ log_error("Failed to resize group "
+ "bitmap.");
+ goto out_remove;
+ }
+ }
+ dm_bit_set(group->regions, regions[i]);
+ }
+
+ }
+ regions[*count] = DM_STATS_REGION_NOT_PRESENT;
+
+ /* Update group leader aux_data for new group members. */
+ if (!*regroup && update)
+ if (!_stats_set_aux(dms, group_id,
+ dms->regions[group_id].aux_data))
+ log_error("Failed to update group aux_data.");
+
+ if (bounds)
+ dm_free(hist_arg);
+
+ /* the extent table will be empty if the file has been truncated. */
+ if (extents)
+ dm_pool_free(extent_mem, extents);
+
+ dm_pool_destroy(extent_mem);
+
+ return regions;
+
+out_remove:
+ /* New region creation may begin to fail part-way through creating
+ * a set of file mapped regions: in this case we need to roll back
+ * the regions that were already created and return the handle to
+ * a consistent state. A listed handle is required for this: use a
+ * single list operation and call _stats_delete_region() directly
+ * to avoid a @stats_list ioctl and list parsing for each region.
+ */
+ if (!dm_stats_list(dms, NULL))
+ goto out;
+
+ fail_region = i;
+ _stats_cleanup_region_ids(dms, regions, fail_region);
+ *count = 0;
+
+out:
+ dm_pool_destroy(extent_mem);
+ dm_free(hist_arg);
+ dm_free(regions);
+ return NULL;
+}
+
+uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
+ int group, int precise,
+ struct dm_histogram *bounds,
+ const char *alias)
+{
+ uint64_t *regions, count;
+ int regroup = 1;
+
+ if (alias && !group) {
+ log_error("Cannot set alias without grouping regions.");
+ return NULL;
+ }
+
+ if (!(regions = _stats_map_file_regions(dms, fd, bounds, precise,
+ DM_STATS_GROUP_NOT_PRESENT,
+ &count, &regroup)))
+ return NULL;
+
+ if (!group)
+ return regions;
+
+ /* refresh handle */
+ if (!dm_stats_list(dms, NULL))
+ goto_out;
+
+ if (!_stats_group_file_regions(dms, regions, count, alias))
+ goto_out;
+
+ return regions;
+out:
+ _stats_cleanup_region_ids(dms, regions, count);
+ dm_free(regions);
+ return NULL;
+}
+
+uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
+ uint64_t group_id)
+{
+ struct dm_histogram *bounds = NULL;
+ int nr_bins, precise, regroup;
+ uint64_t *regions, count = 0;
+ const char *alias = NULL;
+
+ if (!dms->regions || !dm_stats_group_present(dms, group_id)) {
+ if (!dm_stats_list(dms, dms->program_id)) {
+ log_error("Could not obtain region list while "
+ "updating group " FMTu64 ".", group_id);
+ return NULL;
+ }
+ }
+
+ if (!dm_stats_group_present(dms, group_id)) {
+ log_error("Group ID " FMTu64 " does not exist.", group_id);
+ return NULL;
+ }
+
+ /*
+ * If the extent corresponding to the group leader's region has been
+ * deallocated, _stats_map_file_regions() will remove the region and
+ * the group. In this case, regroup will be set by the call and the
+ * group will be re-created using saved values.
+ */
+ regroup = 0;
+
+ /*
+ * A copy of the alias is needed to re-create the group when regroup=1.
+ */
+ if (dms->groups[group_id].alias) {
+ alias = dm_strdup(dms->groups[group_id].alias);
+ if (!alias) {
+ log_error("Failed to allocate group alias string.");
+ return NULL;
+ }
+ }
+
+ if (dms->regions[group_id].bounds) {
+ /*
+ * A copy of the histogram bounds must be passed to
+ * _stats_map_file_regions() to be used when creating new
+ * regions: it is not safe to use the copy in the current group
+ * leader since it may be destroyed during the first group
+ * update pass.
+ */
+ nr_bins = dms->regions[group_id].bounds->nr_bins;
+ bounds = _alloc_dm_histogram(nr_bins);
+ if (!bounds) {
+ log_error("Could not allocate memory for group "
+ "histogram bounds.");
+ goto out;
+ }
+ _stats_copy_histogram_bounds(bounds,
+ dms->regions[group_id].bounds);
+ }
+
+ precise = (dms->regions[group_id].timescale == 1);
+
+ regions = _stats_map_file_regions(dms, fd, bounds, precise,
+ group_id, &count, &regroup);
+
+ if (!regions)
+ goto bad;
+
+ if (!dm_stats_list(dms, NULL))
+ goto bad;
+
+ /* regroup if there are regions to group */
+ if (regroup && (*regions != DM_STATS_REGION_NOT_PRESENT))
+ if (!_stats_group_file_regions(dms, regions, count, alias))
+ goto bad;
+
+ dm_free(bounds);
+ dm_free((char *) alias);
+ return regions;
+bad:
+ _stats_cleanup_region_ids(dms, regions, count);
+ dm_free(bounds);
+ dm_free(regions);
+out:
+ dm_free((char *) alias);
+ return NULL;
+}
+#else /* !HAVE_LINUX_FIEMAP */
+uint64_t *dm_stats_create_regions_from_fd(struct dm_stats *dms, int fd,
+ int group, int precise,
+ struct dm_histogram *bounds,
+ const char *alias)
+{
+ log_error("File mapping requires FIEMAP ioctl support.");
+ return 0;
+}
+
+uint64_t *dm_stats_update_regions_from_fd(struct dm_stats *dms, int fd,
+ uint64_t group_id)
+{
+ log_error("File mapping requires FIEMAP ioctl support.");
+ return 0;
+}
+#endif /* HAVE_LINUX_FIEMAP */
+
+#ifdef DMFILEMAPD
+static const char *_filemapd_mode_names[] = {
+ "inode",
+ "path",
+ NULL
+};
+
+dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str)
+{
+ dm_filemapd_mode_t mode = DM_FILEMAPD_FOLLOW_INODE;
+ const char **mode_name;
+
+ if (mode_str) {
+ for (mode_name = _filemapd_mode_names; *mode_name; mode_name++)
+ if (!strcmp(*mode_name, mode_str))
+ break;
+ if (*mode_name)
+ mode = DM_FILEMAPD_FOLLOW_INODE
+ + (mode_name - _filemapd_mode_names);
+ else {
+ log_error("Could not parse dmfilemapd mode: %s",
+ mode_str);
+ return DM_FILEMAPD_FOLLOW_NONE;
+ }
+ }
+ return mode;
+}
+
+#define DM_FILEMAPD "dmfilemapd"
+#define NR_FILEMAPD_ARGS 7 /* includes argv[0] */
+/*
+ * Start dmfilemapd to monitor the specified file descriptor, and to
+ * update the group given by 'group_id' when the file's allocation
+ * changes.
+ *
+ * usage: dmfilemapd <fd> <group_id> <mode> [<foreground>[<log_level>]]
+ */
+int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
+ dm_filemapd_mode_t mode, unsigned foreground,
+ unsigned verbose)
+{
+ char fd_str[8], group_str[8], fg_str[2], verb_str[2];
+ const char *mode_str = _filemapd_mode_names[mode];
+ char *args[NR_FILEMAPD_ARGS + 1];
+ pid_t pid = 0;
+ int argc = 0;
+
+ if (fd < 0) {
+ log_error("dmfilemapd file descriptor must be "
+ "non-negative: %d", fd);
+ return 0;
+ }
+
+ if (path[0] != '/') {
+ log_error("Path argument must specify an absolute path.");
+ return 0;
+ }
+
+ if (mode > DM_FILEMAPD_FOLLOW_PATH) {
+ log_error("Invalid dmfilemapd mode argument: "
+ "Must be DM_FILEMAPD_FOLLOW_INODE or "
+ "DM_FILEMAPD_FOLLOW_PATH");
+ return 0;
+ }
+
+ if (foreground > 1) {
+ log_error("Invalid dmfilemapd foreground argument. "
+ "Must be 0 or 1: %d.", foreground);
+ return 0;
+ }
+
+ if (verbose > 3) {
+ log_error("Invalid dmfilemapd verbose argument. "
+ "Must be 0..3: %d.", verbose);
+ return 0;
+ }
+
+ /* set argv[0] */
+ args[argc++] = (char *) DM_FILEMAPD;
+
+ /* set <fd> */
+ if ((dm_snprintf(fd_str, sizeof(fd_str), "%d", fd)) < 0) {
+ log_error("Could not format fd argument.");
+ return 0;
+ }
+ args[argc++] = fd_str;
+
+ /* set <group_id> */
+ if ((dm_snprintf(group_str, sizeof(group_str), FMTu64, group_id)) < 0) {
+ log_error("Could not format group_id argument.");
+ return 0;
+ }
+ args[argc++] = group_str;
+
+ /* set <path> */
+ args[argc++] = (char *) path;
+
+ /* set <mode> */
+ args[argc++] = (char *) mode_str;
+
+ /* set <foreground> */
+ if ((dm_snprintf(fg_str, sizeof(fg_str), "%u", foreground)) < 0) {
+ log_error("Could not format foreground argument.");
+ return 0;
+ }
+ args[argc++] = fg_str;
+
+ /* set <verbose> */
+ if ((dm_snprintf(verb_str, sizeof(verb_str), "%u", verbose)) < 0) {
+ log_error("Could not format verbose argument.");
+ return 0;
+ }
+ args[argc++] = verb_str;
+
+ /* terminate args[argc] */
+ args[argc] = NULL;
+
+ log_very_verbose("Spawning daemon as '%s %d " FMTu64 " %s %s %u %u'",
+ *args, fd, group_id, path, mode_str,
+ foreground, verbose);
+
+ if (!foreground && ((pid = fork()) < 0)) {
+ log_error("Failed to fork dmfilemapd process.");
+ return 0;
+ }
+
+ if (pid > 0) {
+ log_very_verbose("Forked dmfilemapd process as pid %d", pid);
+ return 1;
+ }
+
+ execvp(args[0], args);
+ log_sys_error("execvp", args[0]);
+ if (!foreground)
+ _exit(127);
+ return 0;
+}
+# else /* !DMFILEMAPD */
+dm_filemapd_mode_t dm_filemapd_mode_from_string(const char *mode_str)
+{
+ return 0;
+};
+
+int dm_stats_start_filemapd(int fd, uint64_t group_id, const char *path,
+ dm_filemapd_mode_t mode, unsigned foreground,
+ unsigned verbose)
+{
+ log_error("dmfilemapd support disabled.");
+ return 0;
+}
+#endif /* DMFILEMAPD */
+
+/*
+ * Backward compatible dm_stats_create_region() implementations.
+ *
+ * Keep these at the end of the file to avoid adding clutter around the
+ * current dm_stats_create_region() version.
+ */
+
+#if defined(__GNUC__)
+int dm_stats_create_region_v1_02_106(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ int precise, const char *program_id,
+ const char *aux_data);
+int dm_stats_create_region_v1_02_106(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ int precise, const char *program_id,
+ const char *aux_data)
+{
+ /* 1.02.106 lacks histogram argument. */
+ return _stats_create_region(dms, region_id, start, len, step, precise,
+ NULL, program_id, aux_data);
+}
+
+int dm_stats_create_region_v1_02_104(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ const char *program_id, const char *aux_data);
+int dm_stats_create_region_v1_02_104(struct dm_stats *dms, uint64_t *region_id,
+ uint64_t start, uint64_t len, int64_t step,
+ const char *program_id, const char *aux_data)
+{
+ /* 1.02.104 lacks histogram and precise arguments. */
+ return _stats_create_region(dms, region_id, start, len, step, 0, NULL,
+ program_id, aux_data);
+}
+#endif
diff --git a/device_mapper/libdm-string.c b/device_mapper/libdm-string.c
new file mode 100644
index 000000000..8bd6c2d15
--- /dev/null
+++ b/device_mapper/libdm-string.c
@@ -0,0 +1,718 @@
+/*
+ * Copyright (C) 2006-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#include <ctype.h>
+#include <stdarg.h>
+#include <math.h> /* fabs() */
+#include <float.h> /* DBL_EPSILON */
+
+/*
+ * consume characters while they match the predicate function.
+ */
+static char *_consume(char *buffer, int (*fn) (int))
+{
+ while (*buffer && fn(*buffer))
+ buffer++;
+
+ return buffer;
+}
+
+static int _isword(int c)
+{
+ return !isspace(c);
+}
+
+/*
+ * Split buffer into NULL-separated words in argv.
+ * Returns number of words.
+ */
+int dm_split_words(char *buffer, unsigned max,
+ unsigned ignore_comments __attribute__((unused)),
+ char **argv)
+{
+ unsigned arg;
+
+ for (arg = 0; arg < max; arg++) {
+ buffer = _consume(buffer, isspace);
+ if (!*buffer)
+ break;
+
+ argv[arg] = buffer;
+ buffer = _consume(buffer, _isword);
+
+ if (*buffer) {
+ *buffer = '\0';
+ buffer++;
+ }
+ }
+
+ return arg;
+}
+
+/*
+ * Remove hyphen quoting from a component of a name.
+ * NULL-terminates the component and returns start of next component.
+ */
+static char *_unquote(char *component)
+{
+ char *c = component;
+ char *o = c;
+ char *r;
+
+ while (*c) {
+ if (*(c + 1)) {
+ if (*c == '-') {
+ if (*(c + 1) == '-')
+ c++;
+ else
+ break;
+ }
+ }
+ *o = *c;
+ o++;
+ c++;
+ }
+
+ r = (*c) ? c + 1 : c;
+ *o = '\0';
+
+ return r;
+}
+
+int dm_split_lvm_name(struct dm_pool *mem, const char *dmname,
+ char **vgname, char **lvname, char **layer)
+{
+ if (!vgname || !lvname || !layer) {
+ log_error(INTERNAL_ERROR "dm_split_lvm_name: Forbidden NULL parameter detected.");
+ return 0;
+ }
+
+ if (mem && (!dmname || !(*vgname = dm_pool_strdup(mem, dmname)))) {
+ log_error("Failed to duplicate lvm name.");
+ return 0;
+ } else if (!*vgname) {
+ log_error("Missing lvm name for split.");
+ return 0;
+ }
+
+ _unquote(*layer = _unquote(*lvname = _unquote(*vgname)));
+
+ return 1;
+}
+
+/*
+ * On error, up to glibc 2.0.6, snprintf returned -1 if buffer was too small;
+ * From glibc 2.1 it returns number of chars (excl. trailing null) that would
+ * have been written had there been room.
+ *
+ * dm_snprintf reverts to the old behaviour.
+ */
+int dm_snprintf(char *buf, size_t bufsize, const char *format, ...)
+{
+ int n;
+ va_list ap;
+
+ va_start(ap, format);
+ n = vsnprintf(buf, bufsize, format, ap);
+ va_end(ap);
+
+ if (n < 0 || ((unsigned) n >= bufsize))
+ return -1;
+
+ return n;
+}
+
+const char *dm_basename(const char *path)
+{
+ const char *p = strrchr(path, '/');
+
+ return p ? p + 1 : path;
+}
+
+int dm_vasprintf(char **result, const char *format, va_list aq)
+{
+ int i, n, size = 16;
+ va_list ap;
+ char *buf = dm_malloc(size);
+
+ *result = 0;
+
+ if (!buf)
+ return -1;
+
+ for (i = 0;; i++) {
+ va_copy(ap, aq);
+ n = vsnprintf(buf, size, format, ap);
+ va_end(ap);
+
+ if (0 <= n && n < size)
+ break;
+
+ dm_free(buf);
+ /* Up to glibc 2.0.6 returns -1 */
+ size = (n < 0) ? size * 2 : n + 1;
+ if (!(buf = dm_malloc(size)))
+ return -1;
+ }
+
+ if (i > 1) {
+ /* Reallocating more then once? */
+ if (!(*result = dm_strdup(buf))) {
+ dm_free(buf);
+ return -1;
+ }
+ dm_free(buf);
+ } else
+ *result = buf;
+
+ return n + 1;
+}
+
+int dm_asprintf(char **result, const char *format, ...)
+{
+ int r;
+ va_list ap;
+ va_start(ap, format);
+ r = dm_vasprintf(result, format, ap);
+ va_end(ap);
+ return r;
+}
+
+/*
+ * Count occurences of 'c' in 'str' until we reach a null char.
+ *
+ * Returns:
+ * len - incremented for each char we encounter.
+ * count - number of occurrences of 'c' and 'c2'.
+ */
+static void _count_chars(const char *str, size_t *len, int *count,
+ const int c1, const int c2)
+{
+ const char *ptr;
+
+ for (ptr = str; *ptr; ptr++, (*len)++)
+ if (*ptr == c1 || *ptr == c2)
+ (*count)++;
+}
+
+/*
+ * Count occurrences of 'c' in 'str' of length 'size'.
+ *
+ * Returns:
+ * Number of occurrences of 'c'
+ */
+unsigned dm_count_chars(const char *str, size_t len, const int c)
+{
+ size_t i;
+ unsigned count = 0;
+
+ for (i = 0; i < len; i++)
+ if (str[i] == c)
+ count++;
+
+ return count;
+}
+
+/*
+ * Length of string after escaping double quotes and backslashes.
+ */
+size_t dm_escaped_len(const char *str)
+{
+ size_t len = 1;
+ int count = 0;
+
+ _count_chars(str, &len, &count, '\"', '\\');
+
+ return count + len;
+}
+
+/*
+ * Copies a string, quoting orig_char with quote_char.
+ * Optionally also quote quote_char.
+ */
+static void _quote_characters(char **out, const char *src,
+ const int orig_char, const int quote_char,
+ int quote_quote_char)
+{
+ while (*src) {
+ if (*src == orig_char ||
+ (*src == quote_char && quote_quote_char))
+ *(*out)++ = quote_char;
+
+ *(*out)++ = *src++;
+ }
+}
+
+static void _unquote_one_character(char *src, const char orig_char,
+ const char quote_char)
+{
+ char *out;
+ char s, n;
+
+ /* Optimise for the common case where no changes are needed. */
+ while ((s = *src++)) {
+ if (s == quote_char &&
+ ((n = *src) == orig_char || n == quote_char)) {
+ out = src++;
+ *(out - 1) = n;
+
+ while ((s = *src++)) {
+ if (s == quote_char &&
+ ((n = *src) == orig_char || n == quote_char)) {
+ s = n;
+ src++;
+ }
+ *out = s;
+ out++;
+ }
+
+ *out = '\0';
+ return;
+ }
+ }
+}
+
+/*
+ * Unquote each character given in orig_char array and unquote quote_char
+ * as well. Also save the first occurrence of each character from orig_char
+ * that was found unquoted in arr_substr_first_unquoted array. This way we can
+ * process several characters in one go.
+ */
+static void _unquote_characters(char *src, const char *orig_chars,
+ size_t num_orig_chars,
+ const char quote_char,
+ char *arr_substr_first_unquoted[])
+{
+ char *out = src;
+ char c, s, n;
+ unsigned i;
+
+ while ((s = *src++)) {
+ for (i = 0; i < num_orig_chars; i++) {
+ c = orig_chars[i];
+ if (s == quote_char &&
+ ((n = *src) == c || n == quote_char)) {
+ s = n;
+ src++;
+ break;
+ }
+ if (arr_substr_first_unquoted && (s == c) &&
+ !arr_substr_first_unquoted[i])
+ arr_substr_first_unquoted[i] = out;
+ };
+ *out++ = s;
+ }
+
+ *out = '\0';
+}
+
+/*
+ * Copies a string, quoting hyphens with hyphens.
+ */
+static void _quote_hyphens(char **out, const char *src)
+{
+ _quote_characters(out, src, '-', '-', 0);
+}
+
+/*
+ * <vg>-<lv>-<layer> or if !layer just <vg>-<lv>.
+ */
+char *dm_build_dm_name(struct dm_pool *mem, const char *vgname,
+ const char *lvname, const char *layer)
+{
+ size_t len = 1;
+ int hyphens = 1;
+ char *r, *out;
+
+ _count_chars(vgname, &len, &hyphens, '-', 0);
+ _count_chars(lvname, &len, &hyphens, '-', 0);
+
+ if (layer && *layer) {
+ _count_chars(layer, &len, &hyphens, '-', 0);
+ hyphens++;
+ }
+
+ len += hyphens;
+
+ if (!(r = dm_pool_alloc(mem, len))) {
+ log_error("build_dm_name: Allocation failed for %" PRIsize_t
+ " for %s %s %s.", len, vgname, lvname, layer);
+ return NULL;
+ }
+
+ out = r;
+ _quote_hyphens(&out, vgname);
+ *out++ = '-';
+ _quote_hyphens(&out, lvname);
+
+ if (layer && *layer) {
+ /* No hyphen if the layer begins with _ e.g. _mlog */
+ if (*layer != '_')
+ *out++ = '-';
+ _quote_hyphens(&out, layer);
+ }
+ *out = '\0';
+
+ return r;
+}
+
+char *dm_build_dm_uuid(struct dm_pool *mem, const char *uuid_prefix, const char *lvid, const char *layer)
+{
+ char *dmuuid;
+ size_t len;
+
+ if (!layer)
+ layer = "";
+
+ len = strlen(uuid_prefix) + strlen(lvid) + strlen(layer) + 2;
+
+ if (!(dmuuid = dm_pool_alloc(mem, len))) {
+ log_error("build_dm_name: Allocation failed for %" PRIsize_t
+ " %s %s.", len, lvid, layer);
+ return NULL;
+ }
+
+ sprintf(dmuuid, "%s%s%s%s", uuid_prefix, lvid, (*layer) ? "-" : "", layer);
+
+ return dmuuid;
+}
+
+/*
+ * Copies a string, quoting double quotes with backslashes.
+ */
+char *dm_escape_double_quotes(char *out, const char *src)
+{
+ char *buf = out;
+
+ _quote_characters(&buf, src, '\"', '\\', 1);
+ *buf = '\0';
+
+ return out;
+}
+
+/*
+ * Undo quoting in situ.
+ */
+void dm_unescape_double_quotes(char *src)
+{
+ _unquote_one_character(src, '\"', '\\');
+}
+
+/*
+ * Unescape colons and "at" signs in situ and save the substrings
+ * starting at the position of the first unescaped colon and the
+ * first unescaped "at" sign. This is normally used to unescape
+ * device names used as PVs.
+ */
+void dm_unescape_colons_and_at_signs(char *src,
+ char **substr_first_unquoted_colon,
+ char **substr_first_unquoted_at_sign)
+{
+ const char *orig_chars = ":@";
+ char *arr_substr_first_unquoted[] = {NULL, NULL, NULL};
+
+ _unquote_characters(src, orig_chars, 2, '\\', arr_substr_first_unquoted);
+
+ if (substr_first_unquoted_colon)
+ *substr_first_unquoted_colon = arr_substr_first_unquoted[0];
+
+ if (substr_first_unquoted_at_sign)
+ *substr_first_unquoted_at_sign = arr_substr_first_unquoted[1];
+}
+
+int dm_strncpy(char *dest, const char *src, size_t n)
+{
+ if (memccpy(dest, src, 0, n))
+ return 1;
+
+ if (n > 0)
+ dest[n - 1] = '\0';
+
+ return 0;
+}
+
+/* Test if the doubles are close enough to be considered equal */
+static int _close_enough(double d1, double d2)
+{
+ return fabs(d1 - d2) < DBL_EPSILON;
+}
+
+#define BASE_UNKNOWN 0
+#define BASE_SHARED 1
+#define BASE_1024 8
+#define BASE_1000 15
+#define BASE_SPECIAL 21
+#define NUM_UNIT_PREFIXES 6
+#define NUM_SPECIAL 3
+
+#define SIZE_BUF 128
+
+const char *dm_size_to_string(struct dm_pool *mem, uint64_t size,
+ char unit_type, int use_si_units,
+ uint64_t unit_factor, int include_suffix,
+ dm_size_suffix_t suffix_type)
+{
+ unsigned base = BASE_UNKNOWN;
+ unsigned s;
+ int precision;
+ double d;
+ uint64_t byte = UINT64_C(0);
+ uint64_t units = UINT64_C(1024);
+ char *size_buf = NULL;
+ char new_unit_type = '\0', unit_type_buf[2];
+ const char *prefix = "";
+ const char * const size_str[][3] = {
+ /* BASE_UNKNOWN */
+ {" ", " ", " "}, /* [0] */
+
+ /* BASE_SHARED - Used if use_si_units = 0 */
+ {" Exabyte", " EB", "E"}, /* [1] */
+ {" Petabyte", " PB", "P"}, /* [2] */
+ {" Terabyte", " TB", "T"}, /* [3] */
+ {" Gigabyte", " GB", "G"}, /* [4] */
+ {" Megabyte", " MB", "M"}, /* [5] */
+ {" Kilobyte", " KB", "K"}, /* [6] */
+ {" Byte ", " B", "B"}, /* [7] */
+
+ /* BASE_1024 - Used if use_si_units = 1 */
+ {" Exbibyte", " EiB", "e"}, /* [8] */
+ {" Pebibyte", " PiB", "p"}, /* [9] */
+ {" Tebibyte", " TiB", "t"}, /* [10] */
+ {" Gibibyte", " GiB", "g"}, /* [11] */
+ {" Mebibyte", " MiB", "m"}, /* [12] */
+ {" Kibibyte", " KiB", "k"}, /* [13] */
+ {" Byte ", " B", "b"}, /* [14] */
+
+ /* BASE_1000 - Used if use_si_units = 1 */
+ {" Exabyte", " EB", "E"}, /* [15] */
+ {" Petabyte", " PB", "P"}, /* [16] */
+ {" Terabyte", " TB", "T"}, /* [17] */
+ {" Gigabyte", " GB", "G"}, /* [18] */
+ {" Megabyte", " MB", "M"}, /* [19] */
+ {" Kilobyte", " kB", "K"}, /* [20] */
+
+ /* BASE_SPECIAL */
+ {" Byte ", " B ", "B"}, /* [21] (shared with BASE_1000) */
+ {" Units ", " Un", "U"}, /* [22] */
+ {" Sectors ", " Se", "S"}, /* [23] */
+ };
+
+ if (!(size_buf = dm_pool_alloc(mem, SIZE_BUF))) {
+ log_error("no memory for size display buffer");
+ return "";
+ }
+
+ if (!use_si_units) {
+ /* Case-independent match */
+ for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+ if (toupper((int) unit_type) ==
+ *size_str[BASE_SHARED + s][2]) {
+ base = BASE_SHARED;
+ break;
+ }
+ } else {
+ /* Case-dependent match for powers of 1000 */
+ for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+ if (unit_type == *size_str[BASE_1000 + s][2]) {
+ base = BASE_1000;
+ break;
+ }
+
+ /* Case-dependent match for powers of 1024 */
+ if (base == BASE_UNKNOWN)
+ for (s = 0; s < NUM_UNIT_PREFIXES; s++)
+ if (unit_type == *size_str[BASE_1024 + s][2]) {
+ base = BASE_1024;
+ break;
+ }
+ }
+
+ if (base == BASE_UNKNOWN)
+ /* Check for special units - s, b or u */
+ for (s = 0; s < NUM_SPECIAL; s++)
+ if (toupper((int) unit_type) ==
+ *size_str[BASE_SPECIAL + s][2]) {
+ base = BASE_SPECIAL;
+ break;
+ }
+
+ if (size == UINT64_C(0)) {
+ if (base == BASE_UNKNOWN)
+ s = 0;
+ sprintf(size_buf, "0%s", include_suffix ? size_str[base + s][suffix_type] : "");
+ return size_buf;
+ }
+
+ size *= UINT64_C(512);
+
+ if (base != BASE_UNKNOWN) {
+ if (!unit_factor) {
+ unit_type_buf[0] = unit_type;
+ unit_type_buf[1] = '\0';
+ if (!(unit_factor = dm_units_to_factor(&unit_type_buf[0], &new_unit_type, 1, NULL)) ||
+ unit_type != new_unit_type) {
+ /* The two functions should match (and unrecognised units get treated like 'h'). */
+ log_error(INTERNAL_ERROR "Inconsistent units: %c and %c.", unit_type, new_unit_type);
+ return "";
+ }
+ }
+ byte = unit_factor;
+ } else {
+ /* Human-readable style */
+ if (unit_type == 'H' || unit_type == 'R') {
+ units = UINT64_C(1000);
+ base = BASE_1000;
+ } else {
+ units = UINT64_C(1024);
+ base = BASE_1024;
+ }
+
+ if (!use_si_units)
+ base = BASE_SHARED;
+
+ byte = units * units * units * units * units * units;
+
+ for (s = 0; s < NUM_UNIT_PREFIXES && size < byte; s++)
+ byte /= units;
+
+ if ((s < NUM_UNIT_PREFIXES) &&
+ ((unit_type == 'R') || (unit_type == 'r'))) {
+ /* When the rounding would cause difference, add '<' prefix
+ * i.e. 2043M is more then 1.9949G prints <2.00G
+ * This version is for 2 digits fixed precision */
+ d = 100. * (double) size / byte;
+ if (!_close_enough(floorl(d), nearbyintl(d)))
+ prefix = "<";
+ }
+
+ include_suffix = 1;
+ }
+
+ /* FIXME Make precision configurable */
+ switch (toupper(*size_str[base + s][DM_SIZE_UNIT])) {
+ case 'B':
+ case 'S':
+ precision = 0;
+ break;
+ default:
+ precision = 2;
+ }
+
+ snprintf(size_buf, SIZE_BUF, "%s%.*f%s", prefix, precision,
+ (double) size / byte, include_suffix ? size_str[base + s][suffix_type] : "");
+
+ return size_buf;
+}
+
+uint64_t dm_units_to_factor(const char *units, char *unit_type,
+ int strict, const char **endptr)
+{
+ char *ptr = NULL;
+ uint64_t v;
+ double custom_value = 0;
+ uint64_t multiplier;
+
+ if (endptr)
+ *endptr = units;
+
+ if (isdigit(*units)) {
+ custom_value = strtod(units, &ptr);
+ if (ptr == units)
+ return 0;
+ v = (uint64_t) strtoull(units, NULL, 10);
+ if (_close_enough((double) v, custom_value))
+ custom_value = 0; /* Use integer arithmetic */
+ units = ptr;
+ } else
+ v = 1;
+
+ /* Only one units char permitted in strict mode. */
+ if (strict && units[0] && units[1])
+ return 0;
+
+ if (v == 1)
+ *unit_type = *units;
+ else
+ *unit_type = 'U';
+
+ switch (*units) {
+ case 'h':
+ case 'H':
+ case 'r':
+ case 'R':
+ multiplier = v = UINT64_C(1);
+ *unit_type = *units;
+ break;
+ case 'b':
+ case 'B':
+ multiplier = UINT64_C(1);
+ break;
+#define KILO UINT64_C(1024)
+ case 's':
+ case 'S':
+ multiplier = (KILO/2);
+ break;
+ case 'k':
+ multiplier = KILO;
+ break;
+ case 'm':
+ multiplier = KILO * KILO;
+ break;
+ case 'g':
+ multiplier = KILO * KILO * KILO;
+ break;
+ case 't':
+ multiplier = KILO * KILO * KILO * KILO;
+ break;
+ case 'p':
+ multiplier = KILO * KILO * KILO * KILO * KILO;
+ break;
+ case 'e':
+ multiplier = KILO * KILO * KILO * KILO * KILO * KILO;
+ break;
+#undef KILO
+#define KILO UINT64_C(1000)
+ case 'K':
+ multiplier = KILO;
+ break;
+ case 'M':
+ multiplier = KILO * KILO;
+ break;
+ case 'G':
+ multiplier = KILO * KILO * KILO;
+ break;
+ case 'T':
+ multiplier = KILO * KILO * KILO * KILO;
+ break;
+ case 'P':
+ multiplier = KILO * KILO * KILO * KILO * KILO;
+ break;
+ case 'E':
+ multiplier = KILO * KILO * KILO * KILO * KILO * KILO;
+ break;
+#undef KILO
+ default:
+ return 0;
+ }
+
+ if (endptr)
+ *endptr = units + 1;
+
+ if (_close_enough(custom_value, 0.))
+ return v * multiplier; /* Use integer arithmetic */
+ else
+ return (uint64_t) (custom_value * multiplier);
+}
diff --git a/device_mapper/libdm-targets.c b/device_mapper/libdm-targets.c
new file mode 100644
index 000000000..5ab4701bb
--- /dev/null
+++ b/device_mapper/libdm-targets.c
@@ -0,0 +1,565 @@
+/*
+ * Copyright (C) 2005-2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "libdm-common.h"
+
+int dm_get_status_snapshot(struct dm_pool *mem, const char *params,
+ struct dm_status_snapshot **status)
+{
+ struct dm_status_snapshot *s;
+ int r;
+
+ if (!params) {
+ log_error("Failed to parse invalid snapshot params.");
+ return 0;
+ }
+
+ if (!(s = dm_pool_zalloc(mem, sizeof(*s)))) {
+ log_error("Failed to allocate snapshot status structure.");
+ return 0;
+ }
+
+ r = sscanf(params, FMTu64 "/" FMTu64 " " FMTu64,
+ &s->used_sectors, &s->total_sectors,
+ &s->metadata_sectors);
+
+ if (r == 3 || r == 2)
+ s->has_metadata_sectors = (r == 3);
+ else if (!strcmp(params, "Invalid"))
+ s->invalid = 1;
+ else if (!strcmp(params, "Merge failed"))
+ s->merge_failed = 1;
+ else if (!strcmp(params, "Overflow"))
+ s->overflow = 1;
+ else {
+ dm_pool_free(mem, s);
+ log_error("Failed to parse snapshot params: %s.", params);
+ return 0;
+ }
+
+ *status = s;
+
+ return 1;
+}
+
+/*
+ * Skip nr fields each delimited by a single space.
+ * FIXME Don't assume single space.
+ */
+static const char *_skip_fields(const char *p, unsigned nr)
+{
+ while (p && nr-- && (p = strchr(p, ' ')))
+ p++;
+
+ return p;
+}
+
+/*
+ * Count number of single-space delimited fields.
+ * Number of fields is number of spaces plus one.
+ */
+static unsigned _count_fields(const char *p)
+{
+ unsigned nr = 1;
+
+ if (!p || !*p)
+ return 0;
+
+ while ((p = _skip_fields(p, 1)))
+ nr++;
+
+ return nr;
+}
+
+/*
+ * Various RAID status versions include:
+ * Versions < 1.5.0 (4 fields):
+ * <raid_type> <#devs> <health_str> <sync_ratio>
+ * Versions 1.5.0+ (6 fields):
+ * <raid_type> <#devs> <health_str> <sync_ratio> <sync_action> <mismatch_cnt>
+ * Versions 1.9.0+ (7 fields):
+ * <raid_type> <#devs> <health_str> <sync_ratio> <sync_action> <mismatch_cnt> <data_offset>
+ */
+int dm_get_status_raid(struct dm_pool *mem, const char *params,
+ struct dm_status_raid **status)
+{
+ int i;
+ unsigned num_fields;
+ const char *p, *pp, *msg_fields = "";
+ struct dm_status_raid *s = NULL;
+ unsigned a = 0;
+
+ if ((num_fields = _count_fields(params)) < 4)
+ goto_bad;
+
+ /* Second field holds the device count */
+ msg_fields = "<#devs> ";
+ if (!(p = _skip_fields(params, 1)) || (sscanf(p, "%d", &i) != 1))
+ goto_bad;
+
+ msg_fields = "";
+ if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_raid))))
+ goto_bad;
+
+ if (!(s->raid_type = dm_pool_zalloc(mem, p - params)))
+ goto_bad; /* memory is freed when pool is destroyed */
+
+ if (!(s->dev_health = dm_pool_zalloc(mem, i + 1))) /* Space for health chars */
+ goto_bad;
+
+ msg_fields = "<raid_type> <#devices> <health_chars> and <sync_ratio> ";
+ if (sscanf(params, "%s %u %s " FMTu64 "/" FMTu64,
+ s->raid_type,
+ &s->dev_count,
+ s->dev_health,
+ &s->insync_regions,
+ &s->total_regions) != 5)
+ goto_bad;
+
+ /*
+ * All pre-1.5.0 version parameters are read. Now we check
+ * for additional 1.5.0+ parameters (i.e. num_fields at least 6).
+ *
+ * Note that 'sync_action' will be NULL (and mismatch_count
+ * will be 0) if the kernel returns a pre-1.5.0 status.
+ */
+ if (num_fields < 6)
+ goto out;
+
+ msg_fields = "<sync_action> and <mismatch_cnt> ";
+
+ /* Skip pre-1.5.0 params */
+ if (!(p = _skip_fields(params, 4)) || !(pp = _skip_fields(p, 1)))
+ goto_bad;
+
+ if (!(s->sync_action = dm_pool_zalloc(mem, pp - p)))
+ goto_bad;
+
+ if (sscanf(p, "%s " FMTu64, s->sync_action, &s->mismatch_count) != 2)
+ goto_bad;
+
+ if (num_fields < 7)
+ goto out;
+
+ /*
+ * All pre-1.9.0 version parameters are read. Now we check
+ * for additional 1.9.0+ parameters (i.e. nr_fields at least 7).
+ *
+ * Note that data_offset will be 0 if the
+ * kernel returns a pre-1.9.0 status.
+ */
+ msg_fields = "<data_offset>";
+ if (!(p = _skip_fields(params, 6))) /* skip pre-1.9.0 params */
+ goto bad;
+ if (sscanf(p, FMTu64, &s->data_offset) != 1)
+ goto bad;
+
+out:
+ *status = s;
+
+ if (s->insync_regions == s->total_regions) {
+ /* FIXME: kernel gives misleading info here
+ * Trying to recognize a true state */
+ while (i-- > 0)
+ if (s->dev_health[i] == 'a')
+ a++; /* Count number of 'a' */
+
+ if (a && a < s->dev_count) {
+ /* SOME legs are in 'a' */
+ if (!strcasecmp(s->sync_action, "recover")
+ || !strcasecmp(s->sync_action, "idle"))
+ /* Kernel may possibly start some action
+ * in near-by future, do not report 100% */
+ s->insync_regions--;
+ }
+ }
+
+ return 1;
+
+bad:
+ log_error("Failed to parse %sraid params: %s", msg_fields, params);
+
+ if (s)
+ dm_pool_free(mem, s);
+
+ *status = NULL;
+
+ return 0;
+}
+
+/*
+ * <metadata block size> <#used metadata blocks>/<#total metadata blocks>
+ * <cache block size> <#used cache blocks>/<#total cache blocks>
+ * <#read hits> <#read misses> <#write hits> <#write misses>
+ * <#demotions> <#promotions> <#dirty> <#features> <features>*
+ * <#core args> <core args>* <policy name> <#policy args> <policy args>*
+ *
+ * metadata block size : Fixed block size for each metadata block in
+ * sectors
+ * #used metadata blocks : Number of metadata blocks used
+ * #total metadata blocks : Total number of metadata blocks
+ * cache block size : Configurable block size for the cache device
+ * in sectors
+ * #used cache blocks : Number of blocks resident in the cache
+ * #total cache blocks : Total number of cache blocks
+ * #read hits : Number of times a READ bio has been mapped
+ * to the cache
+ * #read misses : Number of times a READ bio has been mapped
+ * to the origin
+ * #write hits : Number of times a WRITE bio has been mapped
+ * to the cache
+ * #write misses : Number of times a WRITE bio has been
+ * mapped to the origin
+ * #demotions : Number of times a block has been removed
+ * from the cache
+ * #promotions : Number of times a block has been moved to
+ * the cache
+ * #dirty : Number of blocks in the cache that differ
+ * from the origin
+ * #feature args : Number of feature args to follow
+ * feature args : 'writethrough' (optional)
+ * #core args : Number of core arguments (must be even)
+ * core args : Key/value pairs for tuning the core
+ * e.g. migration_threshold
+ * *policy name : Name of the policy
+ * #policy args : Number of policy arguments to follow (must be even)
+ * policy args : Key/value pairs
+ * e.g. sequential_threshold
+ */
+int dm_get_status_cache(struct dm_pool *mem, const char *params,
+ struct dm_status_cache **status)
+{
+ int i, feature_argc;
+ char *str;
+ const char *p, *pp;
+ struct dm_status_cache *s;
+
+ if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_cache))))
+ return_0;
+
+ if (strstr(params, "Error")) {
+ s->error = 1;
+ s->fail = 1; /* This is also I/O fail state */
+ goto out;
+ }
+
+ if (strstr(params, "Fail")) {
+ s->fail = 1;
+ goto out;
+ }
+
+ /* Read in args that have definitive placement */
+ if (sscanf(params,
+ " " FMTu32
+ " " FMTu64 "/" FMTu64
+ " " FMTu32
+ " " FMTu64 "/" FMTu64
+ " " FMTu64 " " FMTu64
+ " " FMTu64 " " FMTu64
+ " " FMTu64 " " FMTu64
+ " " FMTu64
+ " %d",
+ &s->metadata_block_size,
+ &s->metadata_used_blocks, &s->metadata_total_blocks,
+ &s->block_size, /* AKA, chunk_size */
+ &s->used_blocks, &s->total_blocks,
+ &s->read_hits, &s->read_misses,
+ &s->write_hits, &s->write_misses,
+ &s->demotions, &s->promotions,
+ &s->dirty_blocks,
+ &feature_argc) != 14)
+ goto bad;
+
+ /* Now jump to "features" section */
+ if (!(p = _skip_fields(params, 12)))
+ goto bad;
+
+ /* Read in features */
+ for (i = 0; i < feature_argc; i++) {
+ if (!strncmp(p, "writethrough ", 13))
+ s->feature_flags |= DM_CACHE_FEATURE_WRITETHROUGH;
+ else if (!strncmp(p, "writeback ", 10))
+ s->feature_flags |= DM_CACHE_FEATURE_WRITEBACK;
+ else if (!strncmp(p, "passthrough ", 12))
+ s->feature_flags |= DM_CACHE_FEATURE_PASSTHROUGH;
+ else if (!strncmp(p, "metadata2 ", 10))
+ s->feature_flags |= DM_CACHE_FEATURE_METADATA2;
+ else
+ log_error("Unknown feature in status: %s", params);
+
+ if (!(p = _skip_fields(p, 1)))
+ goto bad;
+ }
+
+ /* Read in core_args. */
+ if (sscanf(p, "%d ", &s->core_argc) != 1)
+ goto bad;
+ if ((s->core_argc > 0) &&
+ (!(s->core_argv = dm_pool_zalloc(mem, sizeof(char *) * s->core_argc)) ||
+ !(p = _skip_fields(p, 1)) ||
+ !(str = dm_pool_strdup(mem, p)) ||
+ !(p = _skip_fields(p, (unsigned) s->core_argc)) ||
+ (dm_split_words(str, s->core_argc, 0, s->core_argv) != s->core_argc)))
+ goto bad;
+
+ /* Read in policy args */
+ pp = p;
+ if (!(p = _skip_fields(p, 1)) ||
+ !(s->policy_name = dm_pool_zalloc(mem, (p - pp))))
+ goto bad;
+ if (sscanf(pp, "%s %d", s->policy_name, &s->policy_argc) != 2)
+ goto bad;
+ if (s->policy_argc &&
+ (!(s->policy_argv = dm_pool_zalloc(mem, sizeof(char *) * s->policy_argc)) ||
+ !(p = _skip_fields(p, 1)) ||
+ !(str = dm_pool_strdup(mem, p)) ||
+ (dm_split_words(str, s->policy_argc, 0, s->policy_argv) != s->policy_argc)))
+ goto bad;
+
+ /* TODO: improve this parser */
+ if (strstr(p, " ro"))
+ s->read_only = 1;
+
+ if (strstr(p, " needs_check"))
+ s->needs_check = 1;
+out:
+ *status = s;
+ return 1;
+
+bad:
+ log_error("Failed to parse cache params: %s", params);
+ dm_pool_free(mem, s);
+ *status = NULL;
+
+ return 0;
+}
+
+int parse_thin_pool_status(const char *params, struct dm_status_thin_pool *s)
+{
+ int pos;
+
+ memset(s, 0, sizeof(*s));
+
+ if (!params) {
+ log_error("Failed to parse invalid thin params.");
+ return 0;
+ }
+
+ if (strstr(params, "Error")) {
+ s->error = 1;
+ s->fail = 1; /* This is also I/O fail state */
+ return 1;
+ }
+
+ if (strstr(params, "Fail")) {
+ s->fail = 1;
+ return 1;
+ }
+
+ /* FIXME: add support for held metadata root */
+ if (sscanf(params, FMTu64 " " FMTu64 "/" FMTu64 " " FMTu64 "/" FMTu64 "%n",
+ &s->transaction_id,
+ &s->used_metadata_blocks,
+ &s->total_metadata_blocks,
+ &s->used_data_blocks,
+ &s->total_data_blocks, &pos) < 5) {
+ log_error("Failed to parse thin pool params: %s.", params);
+ return 0;
+ }
+
+ /* New status flags */
+ if (strstr(params + pos, "no_discard_passdown"))
+ s->discards = DM_THIN_DISCARDS_NO_PASSDOWN;
+ else if (strstr(params + pos, "ignore_discard"))
+ s->discards = DM_THIN_DISCARDS_IGNORE;
+ else /* default discard_passdown */
+ s->discards = DM_THIN_DISCARDS_PASSDOWN;
+
+ /* Default is 'writable' (rw) data */
+ if (strstr(params + pos, "out_of_data_space"))
+ s->out_of_data_space = 1;
+ else if (strstr(params + pos, "ro "))
+ s->read_only = 1;
+
+ /* Default is 'queue_if_no_space' */
+ if (strstr(params + pos, "error_if_no_space"))
+ s->error_if_no_space = 1;
+
+ if (strstr(params + pos, "needs_check"))
+ s->needs_check = 1;
+
+ return 1;
+}
+
+int dm_get_status_thin_pool(struct dm_pool *mem, const char *params,
+ struct dm_status_thin_pool **status)
+{
+ struct dm_status_thin_pool *s;
+
+ if (!(s = dm_pool_alloc(mem, sizeof(struct dm_status_thin_pool)))) {
+ log_error("Failed to allocate thin_pool status structure.");
+ return 0;
+ }
+
+ if (!parse_thin_pool_status(params, s)) {
+ dm_pool_free(mem, s);
+ return_0;
+ }
+
+ *status = s;
+
+ return 1;
+}
+
+int dm_get_status_thin(struct dm_pool *mem, const char *params,
+ struct dm_status_thin **status)
+{
+ struct dm_status_thin *s;
+
+ if (!(s = dm_pool_zalloc(mem, sizeof(struct dm_status_thin)))) {
+ log_error("Failed to allocate thin status structure.");
+ return 0;
+ }
+
+ if (strchr(params, '-')) {
+ /* nothing to parse */
+ } else if (strstr(params, "Fail")) {
+ s->fail = 1;
+ } else if (sscanf(params, FMTu64 " " FMTu64,
+ &s->mapped_sectors,
+ &s->highest_mapped_sector) != 2) {
+ dm_pool_free(mem, s);
+ log_error("Failed to parse thin params: %s.", params);
+ return 0;
+ }
+
+ *status = s;
+
+ return 1;
+}
+
+/*
+ * dm core parms: 0 409600 mirror
+ * Mirror core parms: 2 253:4 253:5 400/400
+ * New-style failure params: 1 AA
+ * New-style log params: 3 cluster 253:3 A
+ * or 3 disk 253:3 A
+ * or 1 core
+ */
+#define DM_MIRROR_MAX_IMAGES 8 /* limited by kernel DM_KCOPYD_MAX_REGIONS */
+
+int dm_get_status_mirror(struct dm_pool *mem, const char *params,
+ struct dm_status_mirror **status)
+{
+ struct dm_status_mirror *s;
+ const char *p, *pos = params;
+ unsigned num_devs, argc, i;
+ int used;
+
+ if (!(s = dm_pool_zalloc(mem, sizeof(*s)))) {
+ log_error("Failed to alloc mem pool to parse mirror status.");
+ return 0;
+ }
+
+ if (sscanf(pos, "%u %n", &num_devs, &used) != 1)
+ goto_out;
+ pos += used;
+
+ if (num_devs > DM_MIRROR_MAX_IMAGES) {
+ log_error(INTERNAL_ERROR "More then " DM_TO_STRING(DM_MIRROR_MAX_IMAGES)
+ " reported in mirror status.");
+ goto out;
+ }
+
+ if (!(s->devs = dm_pool_alloc(mem, num_devs * sizeof(*(s->devs))))) {
+ log_error("Allocation of devs failed.");
+ goto out;
+ }
+
+ for (i = 0; i < num_devs; ++i, pos += used)
+ if (sscanf(pos, "%u:%u %n",
+ &(s->devs[i].major), &(s->devs[i].minor), &used) != 2)
+ goto_out;
+
+ if (sscanf(pos, FMTu64 "/" FMTu64 "%n",
+ &s->insync_regions, &s->total_regions, &used) != 2)
+ goto_out;
+ pos += used;
+
+ if (sscanf(pos, "%u %n", &argc, &used) != 1)
+ goto_out;
+ pos += used;
+
+ for (i = 0; i < num_devs ; ++i)
+ s->devs[i].health = pos[i];
+
+ if (!(pos = _skip_fields(pos, argc)))
+ goto_out;
+
+ if (strncmp(pos, "userspace", 9) == 0) {
+ pos += 9;
+ /* FIXME: support status of userspace mirror implementation */
+ }
+
+ if (sscanf(pos, "%u %n", &argc, &used) != 1)
+ goto_out;
+ pos += used;
+
+ if (argc == 1) {
+ /* core, cluster-core */
+ if (!(s->log_type = dm_pool_strdup(mem, pos))) {
+ log_error("Allocation of log type string failed.");
+ goto out;
+ }
+ } else {
+ if (!(p = _skip_fields(pos, 1)))
+ goto_out;
+
+ /* disk, cluster-disk */
+ if (!(s->log_type = dm_pool_strndup(mem, pos, p - pos - 1))) {
+ log_error("Allocation of log type string failed.");
+ goto out;
+ }
+ pos = p;
+
+ if ((argc > 2) && !strcmp(s->log_type, "disk")) {
+ s->log_count = argc - 2;
+
+ if (!(s->logs = dm_pool_alloc(mem, s->log_count * sizeof(*(s->logs))))) {
+ log_error("Allocation of logs failed.");
+ goto out;
+ }
+
+ for (i = 0; i < s->log_count; ++i, pos += used)
+ if (sscanf(pos, "%u:%u %n",
+ &s->logs[i].major, &s->logs[i].minor, &used) != 2)
+ goto_out;
+
+ for (i = 0; i < s->log_count; ++i)
+ s->logs[i].health = pos[i];
+ }
+ }
+
+ s->dev_count = num_devs;
+ *status = s;
+
+ return 1;
+out:
+ log_error("Failed to parse mirror status %s.", params);
+ dm_pool_free(mem, s);
+ *status = NULL;
+
+ return 0;
+}
diff --git a/device_mapper/libdm-timestamp.c b/device_mapper/libdm-timestamp.c
new file mode 100644
index 000000000..c2d0ad8d2
--- /dev/null
+++ b/device_mapper/libdm-timestamp.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2006 Rackable Systems All rights reserved.
+ * Copyright (C) 2015 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Abstract out the time methods used so they can be adjusted later -
+ * the results of these routines should stay in-core.
+ */
+
+#include "misc/dmlib.h"
+
+#include <stdlib.h>
+
+#define NSEC_PER_USEC UINT64_C(1000)
+#define NSEC_PER_MSEC UINT64_C(1000000)
+#define NSEC_PER_SEC UINT64_C(1000000000)
+
+/*
+ * The realtime section uses clock_gettime with the CLOCK_MONOTONIC
+ * parameter to prevent issues with time warps
+ * This implementation requires librt.
+ */
+#ifdef HAVE_REALTIME
+
+#include <time.h>
+
+struct dm_timestamp {
+ struct timespec t;
+};
+
+static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts)
+{
+ uint64_t stamp = 0;
+
+ stamp += (uint64_t) ts->t.tv_sec * NSEC_PER_SEC;
+ stamp += (uint64_t) ts->t.tv_nsec;
+
+ return stamp;
+}
+
+struct dm_timestamp *dm_timestamp_alloc(void)
+{
+ struct dm_timestamp *ts = NULL;
+
+ if (!(ts = dm_zalloc(sizeof(*ts))))
+ stack;
+
+ return ts;
+}
+
+int dm_timestamp_get(struct dm_timestamp *ts)
+{
+ if (!ts)
+ return 0;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts->t)) {
+ log_sys_error("clock_gettime", "get_timestamp");
+ ts->t.tv_sec = 0;
+ ts->t.tv_nsec = 0;
+ return 0;
+ }
+
+ return 1;
+}
+
+#else /* ! HAVE_REALTIME */
+
+/*
+ * The !realtime section just uses gettimeofday and is therefore subject
+ * to ntp-type time warps - not sure if should allow that.
+ */
+
+#include <sys/time.h>
+
+struct dm_timestamp {
+ struct timeval t;
+};
+
+static uint64_t _timestamp_to_uint64(struct dm_timestamp *ts)
+{
+ uint64_t stamp = 0;
+
+ stamp += ts->t.tv_sec * NSEC_PER_SEC;
+ stamp += ts->t.tv_usec * NSEC_PER_USEC;
+
+ return stamp;
+}
+
+struct dm_timestamp *dm_timestamp_alloc(void)
+{
+ struct dm_timestamp *ts;
+
+ if (!(ts = dm_malloc(sizeof(*ts))))
+ stack;
+
+ return ts;
+}
+
+int dm_timestamp_get(struct dm_timestamp *ts)
+{
+ if (!ts)
+ return 0;
+
+ if (gettimeofday(&ts->t, NULL)) {
+ log_sys_error("gettimeofday", "get_timestamp");
+ ts->t.tv_sec = 0;
+ ts->t.tv_usec = 0;
+ return 0;
+ }
+
+ return 1;
+}
+
+#endif /* HAVE_REALTIME */
+
+/*
+ * Compare two timestamps.
+ *
+ * Return: -1 if ts1 is less than ts2
+ * 0 if ts1 is equal to ts2
+ * 1 if ts1 is greater than ts2
+ */
+int dm_timestamp_compare(struct dm_timestamp *ts1, struct dm_timestamp *ts2)
+{
+ uint64_t t1, t2;
+
+ t1 = _timestamp_to_uint64(ts1);
+ t2 = _timestamp_to_uint64(ts2);
+
+ if (t2 < t1)
+ return 1;
+
+ if (t1 < t2)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Return the absolute difference in nanoseconds between
+ * the dm_timestamp objects ts1 and ts2.
+ *
+ * Callers that need to know whether ts1 is before, equal to, or after ts2
+ * in addition to the magnitude should use dm_timestamp_compare.
+ */
+uint64_t dm_timestamp_delta(struct dm_timestamp *ts1, struct dm_timestamp *ts2)
+{
+ uint64_t t1, t2;
+
+ t1 = _timestamp_to_uint64(ts1);
+ t2 = _timestamp_to_uint64(ts2);
+
+ if (t1 > t2)
+ return t1 - t2;
+
+ return t2 - t1;
+}
+
+void dm_timestamp_copy(struct dm_timestamp *ts_new, struct dm_timestamp *ts_old)
+{
+ *ts_new = *ts_old;
+}
+
+void dm_timestamp_destroy(struct dm_timestamp *ts)
+{
+ dm_free(ts);
+}
diff --git a/device_mapper/misc/dm-ioctl.h b/device_mapper/misc/dm-ioctl.h
new file mode 100644
index 000000000..79f574cd9
--- /dev/null
+++ b/device_mapper/misc/dm-ioctl.h
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2001 - 2003 Sistina Software (UK) Limited.
+ * Copyright (C) 2004 - 2017 Red Hat, Inc. All rights reserved.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef _LINUX_DM_IOCTL_V4_H
+#define _LINUX_DM_IOCTL_V4_H
+
+#ifdef __linux__
+# include <linux/types.h>
+#endif
+
+#define DM_DIR "mapper" /* Slashes not supported */
+#define DM_CONTROL_NODE "control"
+#define DM_MAX_TYPE_NAME 16
+#define DM_NAME_LEN 128
+#define DM_UUID_LEN 129
+
+/*
+ * A traditional ioctl interface for the device mapper.
+ *
+ * Each device can have two tables associated with it, an
+ * 'active' table which is the one currently used by io passing
+ * through the device, and an 'inactive' one which is a table
+ * that is being prepared as a replacement for the 'active' one.
+ *
+ * DM_VERSION:
+ * Just get the version information for the ioctl interface.
+ *
+ * DM_REMOVE_ALL:
+ * Remove all dm devices, destroy all tables. Only really used
+ * for debug.
+ *
+ * DM_LIST_DEVICES:
+ * Get a list of all the dm device names.
+ *
+ * DM_DEV_CREATE:
+ * Create a new device, neither the 'active' or 'inactive' table
+ * slots will be filled. The device will be in suspended state
+ * after creation, however any io to the device will get errored
+ * since it will be out-of-bounds.
+ *
+ * DM_DEV_REMOVE:
+ * Remove a device, destroy any tables.
+ *
+ * DM_DEV_RENAME:
+ * Rename a device or set its uuid if none was previously supplied.
+ *
+ * DM_SUSPEND:
+ * This performs both suspend and resume, depending which flag is
+ * passed in.
+ * Suspend: This command will not return until all pending io to
+ * the device has completed. Further io will be deferred until
+ * the device is resumed.
+ * Resume: It is no longer an error to issue this command on an
+ * unsuspended device. If a table is present in the 'inactive'
+ * slot, it will be moved to the active slot, then the old table
+ * from the active slot will be _destroyed_. Finally the device
+ * is resumed.
+ *
+ * DM_DEV_STATUS:
+ * Retrieves the status for the table in the 'active' slot.
+ *
+ * DM_DEV_WAIT:
+ * Wait for a significant event to occur to the device. This
+ * could either be caused by an event triggered by one of the
+ * targets of the table in the 'active' slot, or a table change.
+ *
+ * DM_TABLE_LOAD:
+ * Load a table into the 'inactive' slot for the device. The
+ * device does _not_ need to be suspended prior to this command.
+ *
+ * DM_TABLE_CLEAR:
+ * Destroy any table in the 'inactive' slot (ie. abort).
+ *
+ * DM_TABLE_DEPS:
+ * Return a set of device dependencies for the 'active' table.
+ *
+ * DM_TABLE_STATUS:
+ * Return the targets status for the 'active' table.
+ *
+ * DM_TARGET_MSG:
+ * Pass a message string to the target at a specific offset of a device.
+ *
+ * DM_DEV_SET_GEOMETRY:
+ * Set the geometry of a device by passing in a string in this format:
+ *
+ * "cylinders heads sectors_per_track start_sector"
+ *
+ * Beware that CHS geometry is nearly obsolete and only provided
+ * for compatibility with dm devices that can be booted by a PC
+ * BIOS. See struct hd_geometry for range limits. Also note that
+ * the geometry is erased if the device size changes.
+ */
+
+/*
+ * All ioctl arguments consist of a single chunk of memory, with
+ * this structure at the start. If a uuid is specified any
+ * lookup (eg. for a DM_INFO) will be done on that, *not* the
+ * name.
+ */
+struct dm_ioctl {
+ /*
+ * The version number is made up of three parts:
+ * major - no backward or forward compatibility,
+ * minor - only backwards compatible,
+ * patch - both backwards and forwards compatible.
+ *
+ * All clients of the ioctl interface should fill in the
+ * version number of the interface that they were
+ * compiled with.
+ *
+ * All recognised ioctl commands (ie. those that don't
+ * return -ENOTTY) fill out this field, even if the
+ * command failed.
+ */
+ uint32_t version[3]; /* in/out */
+ uint32_t data_size; /* total size of data passed in
+ * including this struct */
+
+ uint32_t data_start; /* offset to start of data
+ * relative to start of this struct */
+
+ uint32_t target_count; /* in/out */
+ int32_t open_count; /* out */
+ uint32_t flags; /* in/out */
+
+ /*
+ * event_nr holds either the event number (input and output) or the
+ * udev cookie value (input only).
+ * The DM_DEV_WAIT ioctl takes an event number as input.
+ * The DM_SUSPEND, DM_DEV_REMOVE and DM_DEV_RENAME ioctls
+ * use the field as a cookie to return in the DM_COOKIE
+ * variable with the uevents they issue.
+ * For output, the ioctls return the event number, not the cookie.
+ */
+ uint32_t event_nr; /* in/out */
+ uint32_t padding;
+
+ uint64_t dev; /* in/out */
+
+ char name[DM_NAME_LEN]; /* device name */
+ char uuid[DM_UUID_LEN]; /* unique identifier for
+ * the block device */
+ char data[7]; /* padding or data */
+};
+
+/*
+ * Used to specify tables. These structures appear after the
+ * dm_ioctl.
+ */
+struct dm_target_spec {
+ uint64_t sector_start;
+ uint64_t length;
+ int32_t status; /* used when reading from kernel only */
+
+ /*
+ * Location of the next dm_target_spec.
+ * - When specifying targets on a DM_TABLE_LOAD command, this value is
+ * the number of bytes from the start of the "current" dm_target_spec
+ * to the start of the "next" dm_target_spec.
+ * - When retrieving targets on a DM_TABLE_STATUS command, this value
+ * is the number of bytes from the start of the first dm_target_spec
+ * (that follows the dm_ioctl struct) to the start of the "next"
+ * dm_target_spec.
+ */
+ uint32_t next;
+
+ char target_type[DM_MAX_TYPE_NAME];
+
+ /*
+ * Parameter string starts immediately after this object.
+ * Be careful to add padding after string to ensure correct
+ * alignment of subsequent dm_target_spec.
+ */
+};
+
+/*
+ * Used to retrieve the target dependencies.
+ */
+struct dm_target_deps {
+ uint32_t count; /* Array size */
+ uint32_t padding; /* unused */
+ uint64_t dev[0]; /* out */
+};
+
+/*
+ * Used to get a list of all dm devices.
+ */
+struct dm_name_list {
+ uint64_t dev;
+ uint32_t next; /* offset to the next record from
+ the _start_ of this */
+ char name[0];
+};
+
+/*
+ * Used to retrieve the target versions
+ */
+struct dm_target_versions {
+ uint32_t next;
+ uint32_t version[3];
+
+ char name[0];
+};
+
+/*
+ * Used to pass message to a target
+ */
+struct dm_target_msg {
+ uint64_t sector; /* Device sector */
+
+ char message[0];
+};
+
+/*
+ * If you change this make sure you make the corresponding change
+ * to dm-ioctl.c:lookup_ioctl()
+ */
+enum {
+ /* Top level cmds */
+ DM_VERSION_CMD = 0,
+ DM_REMOVE_ALL_CMD,
+ DM_LIST_DEVICES_CMD,
+
+ /* device level cmds */
+ DM_DEV_CREATE_CMD,
+ DM_DEV_REMOVE_CMD,
+ DM_DEV_RENAME_CMD,
+ DM_DEV_SUSPEND_CMD,
+ DM_DEV_STATUS_CMD,
+ DM_DEV_WAIT_CMD,
+
+ /* Table level cmds */
+ DM_TABLE_LOAD_CMD,
+ DM_TABLE_CLEAR_CMD,
+ DM_TABLE_DEPS_CMD,
+ DM_TABLE_STATUS_CMD,
+
+ /* Added later */
+ DM_LIST_VERSIONS_CMD,
+ DM_TARGET_MSG_CMD,
+ DM_DEV_SET_GEOMETRY_CMD,
+ DM_DEV_ARM_POLL_CMD,
+};
+
+#define DM_IOCTL 0xfd
+
+#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
+#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
+#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
+
+#define DM_DEV_CREATE _IOWR(DM_IOCTL, DM_DEV_CREATE_CMD, struct dm_ioctl)
+#define DM_DEV_REMOVE _IOWR(DM_IOCTL, DM_DEV_REMOVE_CMD, struct dm_ioctl)
+#define DM_DEV_RENAME _IOWR(DM_IOCTL, DM_DEV_RENAME_CMD, struct dm_ioctl)
+#define DM_DEV_SUSPEND _IOWR(DM_IOCTL, DM_DEV_SUSPEND_CMD, struct dm_ioctl)
+#define DM_DEV_STATUS _IOWR(DM_IOCTL, DM_DEV_STATUS_CMD, struct dm_ioctl)
+#define DM_DEV_WAIT _IOWR(DM_IOCTL, DM_DEV_WAIT_CMD, struct dm_ioctl)
+#define DM_DEV_ARM_POLL _IOWR(DM_IOCTL, DM_DEV_ARM_POLL_CMD, struct dm_ioctl)
+
+#define DM_TABLE_LOAD _IOWR(DM_IOCTL, DM_TABLE_LOAD_CMD, struct dm_ioctl)
+#define DM_TABLE_CLEAR _IOWR(DM_IOCTL, DM_TABLE_CLEAR_CMD, struct dm_ioctl)
+#define DM_TABLE_DEPS _IOWR(DM_IOCTL, DM_TABLE_DEPS_CMD, struct dm_ioctl)
+#define DM_TABLE_STATUS _IOWR(DM_IOCTL, DM_TABLE_STATUS_CMD, struct dm_ioctl)
+
+#define DM_LIST_VERSIONS _IOWR(DM_IOCTL, DM_LIST_VERSIONS_CMD, struct dm_ioctl)
+
+#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
+#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+
+#define DM_VERSION_MAJOR 4
+#define DM_VERSION_MINOR 36
+#define DM_VERSION_PATCHLEVEL 0
+#define DM_VERSION_EXTRA "-ioctl (2017-06-09)"
+
+/* Status bits */
+#define DM_READONLY_FLAG (1 << 0) /* In/Out */
+#define DM_SUSPEND_FLAG (1 << 1) /* In/Out */
+#define DM_PERSISTENT_DEV_FLAG (1 << 3) /* In */
+
+/*
+ * Flag passed into ioctl STATUS command to get table information
+ * rather than current status.
+ */
+#define DM_STATUS_TABLE_FLAG (1 << 4) /* In */
+
+/*
+ * Flags that indicate whether a table is present in either of
+ * the two table slots that a device has.
+ */
+#define DM_ACTIVE_PRESENT_FLAG (1 << 5) /* Out */
+#define DM_INACTIVE_PRESENT_FLAG (1 << 6) /* Out */
+
+/*
+ * Indicates that the buffer passed in wasn't big enough for the
+ * results.
+ */
+#define DM_BUFFER_FULL_FLAG (1 << 8) /* Out */
+
+/*
+ * This flag is now ignored.
+ */
+#define DM_SKIP_BDGET_FLAG (1 << 9) /* In */
+
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG (1 << 10) /* In */
+
+/*
+ * Set this to suspend without flushing queued ios.
+ * Also disables flushing uncommitted changes in the thin target before
+ * generating statistics for DM_TABLE_STATUS and DM_DEV_WAIT.
+ */
+#define DM_NOFLUSH_FLAG (1 << 11) /* In */
+
+/*
+ * If set, any table information returned will relate to the inactive
+ * table instead of the live one. Always check DM_INACTIVE_PRESENT_FLAG
+ * is set before using the data returned.
+ */
+#define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */
+
+/*
+ * If set, a uevent was generated for which the caller may need to wait.
+ */
+#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
+
+/*
+ * If set, rename changes the uuid not the name. Only permitted
+ * if no uuid was previously supplied: an existing uuid cannot be changed.
+ */
+#define DM_UUID_FLAG (1 << 14) /* In */
+
+/*
+ * If set, all buffers are wiped after use. Use when sending
+ * or requesting sensitive data such as an encryption key.
+ */
+#define DM_SECURE_DATA_FLAG (1 << 15) /* In */
+
+/*
+ * If set, a message generated output data.
+ */
+#define DM_DATA_OUT_FLAG (1 << 16) /* Out */
+
+/*
+ * If set with DM_DEV_REMOVE or DM_REMOVE_ALL this indicates that if
+ * the device cannot be removed immediately because it is still in use
+ * it should instead be scheduled for removal when it gets closed.
+ *
+ * On return from DM_DEV_REMOVE, DM_DEV_STATUS or other ioctls, this
+ * flag indicates that the device is scheduled to be removed when it
+ * gets closed.
+ */
+#define DM_DEFERRED_REMOVE (1 << 17) /* In/Out */
+
+/*
+ * If set, the device is suspended internally.
+ */
+#define DM_INTERNAL_SUSPEND_FLAG (1 << 18) /* Out */
+
+#endif /* _LINUX_DM_IOCTL_H */
diff --git a/device_mapper/misc/dm-log-userspace.h b/device_mapper/misc/dm-log-userspace.h
new file mode 100644
index 000000000..a770ae62e
--- /dev/null
+++ b/device_mapper/misc/dm-log-userspace.h
@@ -0,0 +1,418 @@
+/*
+ * Copyright (C) 2006-2009 Red Hat, Inc.
+ *
+ * This file is released under the LGPL.
+ */
+
+#ifndef __DM_LOG_USERSPACE_H__
+#define __DM_LOG_USERSPACE_H__
+
+#include <inttypes.h>
+
+#include "dm-ioctl.h" /* For DM_UUID_LEN */
+
+/*
+ * The device-mapper userspace log module consists of a kernel component and
+ * a user-space component. The kernel component implements the API defined
+ * in dm-dirty-log.h. Its purpose is simply to pass the parameters and
+ * return values of those API functions between kernel and user-space.
+ *
+ * Below are defined the 'request_types' - DM_ULOG_CTR, DM_ULOG_DTR, etc.
+ * These request types represent the different functions in the device-mapper
+ * dirty log API. Each of these is described in more detail below.
+ *
+ * The user-space program must listen for requests from the kernel (representing
+ * the various API functions) and process them.
+ *
+ * User-space begins by setting up the communication link (error checking
+ * removed for clarity):
+ * fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ * addr.nl_family = AF_NETLINK;
+ * addr.nl_groups = CN_IDX_DM;
+ * addr.nl_pid = 0;
+ * r = bind(fd, (struct sockaddr *) &addr, sizeof(addr));
+ * opt = addr.nl_groups;
+ * setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &opt, sizeof(opt));
+ *
+ * User-space will then wait to receive requests from the kernel, which it
+ * will process as described below. The requests are received in the form,
+ * ((struct dm_ulog_request) + (additional data)). Depending on the request
+ * type, there may or may not be 'additional data'. In the descriptions below,
+ * you will see 'Payload-to-userspace' and 'Payload-to-kernel'. The
+ * 'Payload-to-userspace' is what the kernel sends in 'additional data' as
+ * necessary parameters to complete the request. The 'Payload-to-kernel' is
+ * the 'additional data' returned to the kernel that contains the necessary
+ * results of the request. The 'data_size' field in the dm_ulog_request
+ * structure denotes the availability and amount of payload data.
+ */
+
+/*
+ * DM_ULOG_CTR corresponds to (found in dm-dirty-log.h):
+ * int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
+ * unsigned argc, char **argv);
+ *
+ * Payload-to-userspace:
+ * A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ * The name of the device that is used as the backing store for the log
+ * data. 'dm_get_device' will be called on this device. ('dm_put_device'
+ * will be called on this device automatically after calling DM_ULOG_DTR.)
+ * If there is no device needed for log data, 'data_size' in the
+ * dm_ulog_request struct should be 0.
+ *
+ * The UUID contained in the dm_ulog_request structure is the reference that
+ * will be used by all request types to a specific log. The constructor must
+ * record this assotiation with the instance created.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field, filling the
+ * data field with the log device if necessary, and setting 'data_size'
+ * appropriately.
+ */
+#define DM_ULOG_CTR 1
+
+/*
+ * DM_ULOG_DTR corresponds to (found in dm-dirty-log.h):
+ * void (*dtr)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ * None. ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being destroyed. There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_DTR 2
+
+/*
+ * DM_ULOG_PRESUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*presuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being presuspended. There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_PRESUSPEND 3
+
+/*
+ * DM_ULOG_POSTSUSPEND corresponds to (found in dm-dirty-log.h):
+ * int (*postsuspend)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being postsuspended. There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_POSTSUSPEND 4
+
+/*
+ * DM_ULOG_RESUME corresponds to (found in dm-dirty-log.h):
+ * int (*resume)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * None.
+ *
+ * The UUID contained in the dm_ulog_request structure is all that is
+ * necessary to identify the log instance being resumed. There is no
+ * payload data.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_RESUME 5
+
+/*
+ * DM_ULOG_GET_REGION_SIZE corresponds to (found in dm-dirty-log.h):
+ * uint32_t (*get_region_size)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * uint64_t - contains the region size
+ *
+ * The region size is something that was determined at constructor time.
+ * It is returned in the payload area and 'data_size' is set to
+ * reflect this.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_REGION_SIZE 6
+
+/*
+ * DM_ULOG_IS_CLEAN corresponds to (found in dm-dirty-log.h):
+ * int (*is_clean)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ * uint64_t - the region to get clean status on
+ * Payload-to-kernel:
+ * int64_t - 1 if clean, 0 otherwise
+ *
+ * Payload is sizeof(uint64_t) and contains the region for which the clean
+ * status is being made.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - filling the payload with 0 (not clean) or
+ * 1 (clean), setting 'data_size' and 'error' appropriately.
+ */
+#define DM_ULOG_IS_CLEAN 7
+
+/*
+ * DM_ULOG_IN_SYNC corresponds to (found in dm-dirty-log.h):
+ * int (*in_sync)(struct dm_dirty_log *log, region_t region,
+ * int can_block);
+ *
+ * Payload-to-userspace:
+ * uint64_t - the region to get sync status on
+ * Payload-to-kernel:
+ * int64_t - 1 if in-sync, 0 otherwise
+ *
+ * Exactly the same as 'is_clean' above, except this time asking "has the
+ * region been recovered?" vs. "is the region not being modified?"
+ */
+#define DM_ULOG_IN_SYNC 8
+
+/*
+ * DM_ULOG_FLUSH corresponds to (found in dm-dirty-log.h):
+ * int (*flush)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * None.
+ *
+ * No incoming or outgoing payload. Simply flush log state to disk.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_FLUSH 9
+
+/*
+ * DM_ULOG_MARK_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*mark_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ * uint64_t [] - region(s) to mark
+ * Payload-to-kernel:
+ * None.
+ *
+ * Incoming payload contains the one or more regions to mark dirty.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_MARK_REGION 10
+
+/*
+ * DM_ULOG_CLEAR_REGION corresponds to (found in dm-dirty-log.h):
+ * void (*clear_region)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ * uint64_t [] - region(s) to clear
+ * Payload-to-kernel:
+ * None.
+ *
+ * Incoming payload contains the one or more regions to mark clean.
+ * The number of regions contained in the payload can be determined from
+ * 'data_size/sizeof(uint64_t)'.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_CLEAR_REGION 11
+
+/*
+ * DM_ULOG_GET_RESYNC_WORK corresponds to (found in dm-dirty-log.h):
+ * int (*get_resync_work)(struct dm_dirty_log *log, region_t *region);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * {
+ * int64_t i; -- 1 if recovery necessary, 0 otherwise
+ * uint64_t r; -- The region to recover if i=1
+ * }
+ * 'data_size' should be set appropriately.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field appropriately.
+ */
+#define DM_ULOG_GET_RESYNC_WORK 12
+
+/*
+ * DM_ULOG_SET_REGION_SYNC corresponds to (found in dm-dirty-log.h):
+ * void (*set_region_sync)(struct dm_dirty_log *log,
+ * region_t region, int in_sync);
+ *
+ * Payload-to-userspace:
+ * {
+ * uint64_t - region to set sync state on
+ * int64_t - 0 if not-in-sync, 1 if in-sync
+ * }
+ * Payload-to-kernel:
+ * None.
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and clearing
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_SET_REGION_SYNC 13
+
+/*
+ * DM_ULOG_GET_SYNC_COUNT corresponds to (found in dm-dirty-log.h):
+ * region_t (*get_sync_count)(struct dm_dirty_log *log);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * uint64_t - the number of in-sync regions
+ *
+ * No incoming payload. Kernel-bound payload contains the number of
+ * regions that are in-sync (in a size_t).
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_GET_SYNC_COUNT 14
+
+/*
+ * DM_ULOG_STATUS_INFO corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_INFO,
+ * char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * Character string containing STATUSTYPE_INFO
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_INFO 15
+
+/*
+ * DM_ULOG_STATUS_TABLE corresponds to (found in dm-dirty-log.h):
+ * int (*status)(struct dm_dirty_log *log, STATUSTYPE_TABLE,
+ * char *result, unsigned maxlen);
+ *
+ * Payload-to-userspace:
+ * None.
+ * Payload-to-kernel:
+ * Character string containing STATUSTYPE_TABLE
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_STATUS_TABLE 16
+
+/*
+ * DM_ULOG_IS_REMOTE_RECOVERING corresponds to (found in dm-dirty-log.h):
+ * int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
+ *
+ * Payload-to-userspace:
+ * uint64_t - region to determine recovery status on
+ * Payload-to-kernel:
+ * {
+ * int64_t is_recovering; -- 0 if no, 1 if yes
+ * uint64_t in_sync_hint; -- lowest region still needing resync
+ * }
+ *
+ * When the request has been processed, user-space must return the
+ * dm_ulog_request to the kernel - setting the 'error' field and
+ * 'data_size' appropriately.
+ */
+#define DM_ULOG_IS_REMOTE_RECOVERING 17
+
+/*
+ * (DM_ULOG_REQUEST_MASK & request_type) to get the request type
+ *
+ * Payload-to-userspace:
+ * A single string containing all the argv arguments separated by ' 's
+ * Payload-to-kernel:
+ * None. ('data_size' in the dm_ulog_request struct should be 0.)
+ *
+ * We are reserving 8 bits of the 32-bit 'request_type' field for the
+ * various request types above. The remaining 24-bits are currently
+ * set to zero and are reserved for future use and compatibility concerns.
+ *
+ * User-space should always use DM_ULOG_REQUEST_TYPE to acquire the
+ * request type from the 'request_type' field to maintain forward compatibility.
+ */
+#define DM_ULOG_REQUEST_MASK 0xFF
+#define DM_ULOG_REQUEST_TYPE(request_type) \
+ (DM_ULOG_REQUEST_MASK & (request_type))
+
+/*
+ * DM_ULOG_REQUEST_VERSION is incremented when there is a
+ * change to the way information is passed between kernel
+ * and userspace. This could be a structure change of
+ * dm_ulog_request or a change in the way requests are
+ * issued/handled. Changes are outlined here:
+ * version 1: Initial implementation
+ * version 2: DM_ULOG_CTR allowed to return a string containing a
+ * device name that is to be registered with DM via
+ * 'dm_get_device'.
+ */
+#define DM_ULOG_REQUEST_VERSION 2
+
+struct dm_ulog_request {
+ /*
+ * The local unique identifier (luid) and the universally unique
+ * identifier (uuid) are used to tie a request to a specific
+ * mirror log. A single machine log could probably make due with
+ * just the 'luid', but a cluster-aware log must use the 'uuid' and
+ * the 'luid'. The uuid is what is required for node to node
+ * communication concerning a particular log, but the 'luid' helps
+ * differentiate between logs that are being swapped and have the
+ * same 'uuid'. (Think "live" and "inactive" device-mapper tables.)
+ */
+ uint64_t luid;
+ char uuid[DM_UUID_LEN];
+ char padding[3]; /* Padding because DM_UUID_LEN = 129 */
+
+ uint32_t version; /* See DM_ULOG_REQUEST_VERSION */
+ int32_t error; /* Used to report back processing errors */
+
+ uint32_t seq; /* Sequence number for request */
+ uint32_t request_type; /* DM_ULOG_* defined above */
+ uint32_t data_size; /* How much data (not including this struct) */
+
+ char data[];
+};
+
+#endif /* __DM_LOG_USERSPACE_H__ */
diff --git a/device_mapper/misc/dm-logging.h b/device_mapper/misc/dm-logging.h
new file mode 100644
index 000000000..a35480e36
--- /dev/null
+++ b/device_mapper/misc/dm-logging.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2016 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_LOGGING_H
+#define _DM_LOGGING_H
+
+#include "libdevmapper.h"
+
+extern dm_log_with_errno_fn dm_log_with_errno;
+
+#define LOG_MESG(l, f, ln, e, x...) \
+ dm_log_with_errno(l, f, ln, e, ## x)
+
+#define LOG_LINE(l, x...) LOG_MESG(l, __FILE__, __LINE__, 0, ## x)
+#define LOG_LINE_WITH_ERRNO(l, e, x...) LOG_MESG(l, __FILE__, __LINE__, e, ## x)
+
+/* Debug messages may have a type instead of an errno */
+#define LOG_LINE_WITH_CLASS(l, c, x...) LOG_MESG(l, __FILE__, __LINE__, c, ## x)
+
+#include "lib/log/log.h"
+
+#endif
diff --git a/device_mapper/misc/dmlib.h b/device_mapper/misc/dmlib.h
new file mode 100644
index 000000000..ba376bcff
--- /dev/null
+++ b/device_mapper/misc/dmlib.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * This file must be included first by every device-mapper library source file.
+ */
+#ifndef _DM_LIB_H
+#define _DM_LIB_H
+
+// FIXME: get rid of this whole file
+
+#include "configure.h"
+
+#define _REENTRANT
+#define _GNU_SOURCE
+
+#include "libdevmapper.h"
+#include "lib/misc/util.h"
+#include "dm-logging.h"
+
+#endif
diff --git a/device_mapper/misc/kdev_t.h b/device_mapper/misc/kdev_t.h
new file mode 100644
index 000000000..f88bb0ab6
--- /dev/null
+++ b/device_mapper/misc/kdev_t.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of LVM2.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _LIBDM_KDEV_H
+#define _LIBDM_KDEV_H
+
+#define MAJOR(dev) ((dev & 0xfff00) >> 8)
+#define MINOR(dev) ((dev & 0xff) | ((dev >> 12) & 0xfff00))
+#define MKDEV(ma,mi) ((mi & 0xff) | (ma << 8) | ((mi & ~0xff) << 12))
+
+#endif
diff --git a/device_mapper/mm/dbg_malloc.c b/device_mapper/mm/dbg_malloc.c
new file mode 100644
index 000000000..a17203c48
--- /dev/null
+++ b/device_mapper/mm/dbg_malloc.c
@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+
+#ifdef VALGRIND_POOL
+#include "memcheck.h"
+#endif
+#include <assert.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+void *dm_malloc_aux(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_malloc_aux_debug(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+static void *_dm_malloc_aligned_aux(size_t s, size_t a, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_aux(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_zalloc_aux_debug(size_t s, const char *file, int line)
+ __attribute__((__malloc__)) __attribute__((__warn_unused_result__));
+void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line)
+ __attribute__((__warn_unused_result__));
+void dm_free_aux(void *p);
+char *dm_strdup_aux(const char *str, const char *file, int line)
+ __attribute__((__warn_unused_result__));
+int dm_dump_memory_debug(void);
+void dm_bounds_check_debug(void);
+
+char *dm_strdup_aux(const char *str, const char *file, int line)
+{
+ char *ret;
+
+ if (!str) {
+ log_error(INTERNAL_ERROR "dm_strdup called with NULL pointer");
+ return NULL;
+ }
+
+ if ((ret = dm_malloc_aux_debug(strlen(str) + 1, file, line)))
+ strcpy(ret, str);
+
+ return ret;
+}
+
+struct memblock {
+ struct memblock *prev, *next; /* All allocated blocks are linked */
+ size_t length; /* Size of the requested block */
+ int id; /* Index of the block */
+ const char *file; /* File that allocated */
+ int line; /* Line that allocated */
+ void *magic; /* Address of this block */
+} __attribute__((aligned(8)));
+
+static struct {
+ unsigned block_serialno;/* Non-decreasing serialno of block */
+ unsigned blocks_allocated; /* Current number of blocks allocated */
+ unsigned blocks_max; /* Max no of concurrently-allocated blocks */
+ unsigned int bytes, mbytes;
+
+} _mem_stats = {
+0, 0, 0, 0, 0};
+
+static struct memblock *_head = 0;
+static struct memblock *_tail = 0;
+
+void *dm_malloc_aux_debug(size_t s, const char *file, int line)
+{
+ struct memblock *nb;
+ size_t tsize = s + sizeof(*nb) + sizeof(unsigned long);
+
+ if (s > 50000000) {
+ log_error("Huge memory allocation (size %" PRIsize_t
+ ") rejected - metadata corruption?", s);
+ return 0;
+ }
+
+ if (!(nb = malloc(tsize))) {
+ log_error("couldn't allocate any memory, size = %" PRIsize_t,
+ s);
+ return 0;
+ }
+
+ /* set up the file and line info */
+ nb->file = file;
+ nb->line = line;
+
+ dm_bounds_check();
+
+ /* setup fields */
+ nb->magic = nb + 1;
+ nb->length = s;
+ nb->id = ++_mem_stats.block_serialno;
+ nb->next = 0;
+
+ /* stomp a pretty pattern across the new memory
+ and fill in the boundary bytes */
+ {
+ char *ptr = (char *) (nb + 1);
+ size_t i;
+ for (i = 0; i < s; i++)
+ *ptr++ = i & 0x1 ? (char) 0xba : (char) 0xbe;
+
+ for (i = 0; i < sizeof(unsigned long); i++)
+ *ptr++ = (char) nb->id;
+ }
+
+ nb->prev = _tail;
+
+ /* link to tail of the list */
+ if (!_head)
+ _head = _tail = nb;
+ else {
+ _tail->next = nb;
+ _tail = nb;
+ }
+
+ _mem_stats.blocks_allocated++;
+ if (_mem_stats.blocks_allocated > _mem_stats.blocks_max)
+ _mem_stats.blocks_max = _mem_stats.blocks_allocated;
+
+ _mem_stats.bytes += s;
+ if (_mem_stats.bytes > _mem_stats.mbytes)
+ _mem_stats.mbytes = _mem_stats.bytes;
+
+ /* log_debug_mem("Allocated: %u %u %u", nb->id, _mem_stats.blocks_allocated,
+ _mem_stats.bytes); */
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_UNDEFINED(nb + 1, s);
+#endif
+ return nb + 1;
+}
+
+void *dm_zalloc_aux_debug(size_t s, const char *file, int line)
+{
+ void *ptr = dm_malloc_aux_debug(s, file, line);
+
+ if (ptr)
+ memset(ptr, 0, s);
+
+ return ptr;
+}
+
+void dm_free_aux(void *p)
+{
+ char *ptr;
+ size_t i;
+ struct memblock *mb = ((struct memblock *) p) - 1;
+ if (!p)
+ return;
+
+ dm_bounds_check();
+
+ /* sanity check */
+ assert(mb->magic == p);
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_DEFINED(p, mb->length);
+#endif
+ /* check data at the far boundary */
+ ptr = (char *) p + mb->length;
+ for (i = 0; i < sizeof(unsigned long); i++)
+ if (ptr[i] != (char) mb->id)
+ assert(!"Damage at far end of block");
+
+ /* have we freed this before ? */
+ assert(mb->id != 0);
+
+ /* unlink */
+ if (mb->prev)
+ mb->prev->next = mb->next;
+ else
+ _head = mb->next;
+
+ if (mb->next)
+ mb->next->prev = mb->prev;
+ else
+ _tail = mb->prev;
+
+ mb->id = 0;
+
+ /* stomp a different pattern across the memory */
+ ptr = p;
+ for (i = 0; i < mb->length; i++)
+ ptr[i] = i & 1 ? (char) 0xde : (char) 0xad;
+
+ assert(_mem_stats.blocks_allocated);
+ _mem_stats.blocks_allocated--;
+ _mem_stats.bytes -= mb->length;
+
+ /* free the memory */
+ free(mb);
+}
+
+void *dm_realloc_aux(void *p, unsigned int s, const char *file, int line)
+{
+ void *r;
+ struct memblock *mb = ((struct memblock *) p) - 1;
+
+ r = dm_malloc_aux_debug(s, file, line);
+
+ if (r && p) {
+ memcpy(r, p, mb->length);
+ dm_free_aux(p);
+ }
+
+ return r;
+}
+
+int dm_dump_memory_debug(void)
+{
+ unsigned long tot = 0;
+ struct memblock *mb;
+ char str[32];
+
+ if (_head)
+ log_very_verbose("You have a memory leak:");
+
+ for (mb = _head; mb; mb = mb->next) {
+#ifdef VALGRIND_POOL
+ /*
+ * We can't look at the memory in case it has had
+ * VALGRIND_MAKE_MEM_NOACCESS called on it.
+ */
+ str[0] = '\0';
+#else
+ size_t c;
+
+ for (c = 0; c < sizeof(str) - 1; c++) {
+ if (c >= mb->length)
+ str[c] = ' ';
+ else if (((char *)mb->magic)[c] == '\0')
+ str[c] = '\0';
+ else if (((char *)mb->magic)[c] < ' ')
+ str[c] = '?';
+ else
+ str[c] = ((char *)mb->magic)[c];
+ }
+ str[sizeof(str) - 1] = '\0';
+#endif
+
+ LOG_MESG(_LOG_INFO, mb->file, mb->line, 0,
+ "block %d at %p, size %" PRIsize_t "\t [%s]",
+ mb->id, mb->magic, mb->length, str);
+ tot += mb->length;
+ }
+
+ if (_head)
+ log_very_verbose("%ld bytes leaked in total", tot);
+
+ return 1;
+}
+
+void dm_bounds_check_debug(void)
+{
+ struct memblock *mb = _head;
+ while (mb) {
+ size_t i;
+ char *ptr = ((char *) (mb + 1)) + mb->length;
+ for (i = 0; i < sizeof(unsigned long); i++)
+ if (*ptr++ != (char) mb->id)
+ assert(!"Memory smash");
+
+ mb = mb->next;
+ }
+}
+
+void *dm_malloc_aux(size_t s, const char *file __attribute__((unused)),
+ int line __attribute__((unused)))
+{
+ if (s > 50000000) {
+ log_error("Huge memory allocation (size %" PRIsize_t
+ ") rejected - metadata corruption?", s);
+ return 0;
+ }
+
+ return malloc(s);
+}
+
+/* Allocate size s with alignment a (or page size if 0) */
+static void *_dm_malloc_aligned_aux(size_t s, size_t a, const char *file __attribute__((unused)),
+ int line __attribute__((unused)))
+{
+ void *memptr;
+ int r;
+
+ if (!a)
+ a = getpagesize();
+
+ if (s > 50000000) {
+ log_error("Huge memory allocation (size %" PRIsize_t
+ ") rejected - metadata corruption?", s);
+ return 0;
+ }
+
+ if ((r = posix_memalign(&memptr, a, s))) {
+ log_error("Failed to allocate %" PRIsize_t " bytes aligned to %" PRIsize_t ": %s", s, a, strerror(r));
+ return 0;
+ }
+
+ return memptr;
+}
+
+void *dm_zalloc_aux(size_t s, const char *file, int line)
+{
+ void *ptr = dm_malloc_aux(s, file, line);
+
+ if (ptr)
+ memset(ptr, 0, s);
+
+ return ptr;
+}
+
+#ifdef DEBUG_MEM
+
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_malloc_aux_debug(s, file, line);
+}
+
+void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line)
+{
+ /* FIXME Implement alignment when debugging - currently just ignored */
+ return _dm_malloc_aux_debug(s, file, line);
+}
+
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_zalloc_aux_debug(s, file, line);
+}
+
+char *dm_strdup_wrapper(const char *str, const char *file, int line)
+{
+ return dm_strdup_aux(str, file, line);
+}
+
+void dm_free_wrapper(void *ptr)
+{
+ dm_free_aux(ptr);
+}
+
+void *dm_realloc_wrapper(void *p, unsigned int s, const char *file, int line)
+{
+ return dm_realloc_aux(p, s, file, line);
+}
+
+int dm_dump_memory_wrapper(void)
+{
+ return dm_dump_memory_debug();
+}
+
+void dm_bounds_check_wrapper(void)
+{
+ dm_bounds_check_debug();
+}
+
+#else /* !DEBUG_MEM */
+
+void *dm_malloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_malloc_aux(s, file, line);
+}
+
+void *dm_malloc_aligned_wrapper(size_t s, size_t a, const char *file, int line)
+{
+ return _dm_malloc_aligned_aux(s, a, file, line);
+}
+
+void *dm_zalloc_wrapper(size_t s, const char *file, int line)
+{
+ return dm_zalloc_aux(s, file, line);
+}
+
+char *dm_strdup_wrapper(const char *str,
+ const char *file __attribute__((unused)),
+ int line __attribute__((unused)))
+{
+ return strdup(str);
+}
+
+void dm_free_wrapper(void *ptr)
+{
+ free(ptr);
+}
+
+void *dm_realloc_wrapper(void *p, unsigned int s,
+ const char *file __attribute__((unused)),
+ int line __attribute__((unused)))
+{
+ return realloc(p, s);
+}
+
+int dm_dump_memory_wrapper(void)
+{
+ return 1;
+}
+
+void dm_bounds_check_wrapper(void)
+{
+}
+
+#endif /* DEBUG_MEM */
diff --git a/device_mapper/mm/pool-debug.c b/device_mapper/mm/pool-debug.c
new file mode 100644
index 000000000..c5232386f
--- /dev/null
+++ b/device_mapper/mm/pool-debug.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dmlib.h"
+#include <assert.h>
+
+struct block {
+ struct block *next;
+ size_t size;
+ void *data;
+};
+
+typedef struct {
+ unsigned block_serialno; /* Non-decreasing serialno of block */
+ unsigned blocks_allocated; /* Current number of blocks allocated */
+ unsigned blocks_max; /* Max no of concurrently-allocated blocks */
+ unsigned int bytes, maxbytes;
+} pool_stats;
+
+struct dm_pool {
+ struct dm_list list;
+ const char *name;
+ void *orig_pool; /* to pair it with first allocation call */
+ unsigned locked;
+ long crc;
+
+ int begun;
+ struct block *object;
+
+ struct block *blocks;
+ struct block *tail;
+
+ pool_stats stats;
+};
+
+/* by default things come out aligned for doubles */
+#define DEFAULT_ALIGNMENT __alignof__ (double)
+
+struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
+{
+ struct dm_pool *mem = dm_zalloc(sizeof(*mem));
+
+ if (!mem) {
+ log_error("Couldn't create memory pool %s (size %"
+ PRIsize_t ")", name, sizeof(*mem));
+ return NULL;
+ }
+
+ mem->name = name;
+ mem->orig_pool = mem;
+
+#ifdef DEBUG_POOL
+ log_debug_mem("Created mempool %s at %p", name, mem);
+#endif
+
+ dm_list_add(&_dm_pools, &mem->list);
+ return mem;
+}
+
+static void _free_blocks(struct dm_pool *p, struct block *b)
+{
+ struct block *n;
+
+ if (p->locked)
+ log_error(INTERNAL_ERROR "_free_blocks from locked pool %s",
+ p->name);
+
+ while (b) {
+ p->stats.bytes -= b->size;
+ p->stats.blocks_allocated--;
+
+ n = b->next;
+ dm_free(b->data);
+ dm_free(b);
+ b = n;
+ }
+}
+
+static void _pool_stats(struct dm_pool *p, const char *action)
+{
+#ifdef DEBUG_POOL
+ log_debug_mem("%s mempool %s at %p: %u/%u bytes, %u/%u blocks, "
+ "%u allocations)", action, p->name, p, p->stats.bytes,
+ p->stats.maxbytes, p->stats.blocks_allocated,
+ p->stats.blocks_max, p->stats.block_serialno);
+#else
+ ;
+#endif
+}
+
+void dm_pool_destroy(struct dm_pool *p)
+{
+ _pool_stats(p, "Destroying");
+ _free_blocks(p, p->blocks);
+ dm_list_del(&p->list);
+ dm_free(p);
+}
+
+void *dm_pool_alloc(struct dm_pool *p, size_t s)
+{
+ return dm_pool_alloc_aligned(p, s, DEFAULT_ALIGNMENT);
+}
+
+static void _append_block(struct dm_pool *p, struct block *b)
+{
+ if (p->locked)
+ log_error(INTERNAL_ERROR "_append_blocks to locked pool %s",
+ p->name);
+
+ if (p->tail) {
+ p->tail->next = b;
+ p->tail = b;
+ } else
+ p->blocks = p->tail = b;
+
+ p->stats.block_serialno++;
+ p->stats.blocks_allocated++;
+ if (p->stats.blocks_allocated > p->stats.blocks_max)
+ p->stats.blocks_max = p->stats.blocks_allocated;
+
+ p->stats.bytes += b->size;
+ if (p->stats.bytes > p->stats.maxbytes)
+ p->stats.maxbytes = p->stats.bytes;
+}
+
+static struct block *_new_block(size_t s, unsigned alignment)
+{
+ /* FIXME: I'm currently ignoring the alignment arg. */
+ size_t len = sizeof(struct block) + s;
+ struct block *b = dm_malloc(len);
+
+ /*
+ * Too lazy to implement alignment for debug version, and
+ * I don't think LVM will use anything but default
+ * align.
+ */
+ assert(alignment <= DEFAULT_ALIGNMENT);
+
+ if (!b) {
+ log_error("Out of memory");
+ return NULL;
+ }
+
+ if (!(b->data = dm_malloc(s))) {
+ log_error("Out of memory");
+ dm_free(b);
+ return NULL;
+ }
+
+ b->next = NULL;
+ b->size = s;
+
+ return b;
+}
+
+void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
+{
+ struct block *b = _new_block(s, alignment);
+
+ if (!b)
+ return_NULL;
+
+ _append_block(p, b);
+
+ return b->data;
+}
+
+void dm_pool_empty(struct dm_pool *p)
+{
+ _pool_stats(p, "Emptying");
+ _free_blocks(p, p->blocks);
+ p->blocks = p->tail = NULL;
+}
+
+void dm_pool_free(struct dm_pool *p, void *ptr)
+{
+ struct block *b, *prev = NULL;
+
+ _pool_stats(p, "Freeing (before)");
+
+ for (b = p->blocks; b; b = b->next) {
+ if (b->data == ptr)
+ break;
+ prev = b;
+ }
+
+ /*
+ * If this fires then you tried to free a
+ * pointer that either wasn't from this
+ * pool, or isn't the start of a block.
+ */
+ assert(b);
+
+ _free_blocks(p, b);
+
+ if (prev) {
+ p->tail = prev;
+ prev->next = NULL;
+ } else
+ p->blocks = p->tail = NULL;
+
+ _pool_stats(p, "Freeing (after)");
+}
+
+int dm_pool_begin_object(struct dm_pool *p, size_t init_size)
+{
+ assert(!p->begun);
+ p->begun = 1;
+ return 1;
+}
+
+int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta)
+{
+ struct block *new;
+ size_t new_size;
+
+ if (p->locked)
+ log_error(INTERNAL_ERROR "Grow objects in locked pool %s",
+ p->name);
+
+ if (!delta)
+ delta = strlen(extra);
+
+ assert(p->begun);
+
+ if (p->object)
+ new_size = delta + p->object->size;
+ else
+ new_size = delta;
+
+ if (!(new = _new_block(new_size, DEFAULT_ALIGNMENT))) {
+ log_error("Couldn't extend object.");
+ return 0;
+ }
+
+ if (p->object) {
+ memcpy(new->data, p->object->data, p->object->size);
+ dm_free(p->object->data);
+ dm_free(p->object);
+ }
+ p->object = new;
+
+ memcpy((char*)new->data + new_size - delta, extra, delta);
+
+ return 1;
+}
+
+void *dm_pool_end_object(struct dm_pool *p)
+{
+ assert(p->begun);
+ _append_block(p, p->object);
+
+ p->begun = 0;
+ p->object = NULL;
+ return p->tail->data;
+}
+
+void dm_pool_abandon_object(struct dm_pool *p)
+{
+ assert(p->begun);
+ dm_free(p->object);
+ p->begun = 0;
+ p->object = NULL;
+}
+
+static long _pool_crc(const struct dm_pool *p)
+{
+#ifndef DEBUG_ENFORCE_POOL_LOCKING
+#warning pool crc not implemented with pool debug
+#endif
+ return 0;
+}
+
+static int _pool_protect(struct dm_pool *p, int prot)
+{
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+#warning pool mprotect not implemented with pool debug
+#endif
+ return 1;
+}
diff --git a/device_mapper/mm/pool-fast.c b/device_mapper/mm/pool-fast.c
new file mode 100644
index 000000000..895872e4b
--- /dev/null
+++ b/device_mapper/mm/pool-fast.c
@@ -0,0 +1,363 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifdef VALGRIND_POOL
+#include "memcheck.h"
+#endif
+
+#include "misc/dmlib.h"
+#include <stddef.h> /* For musl libc */
+#include <malloc.h>
+
+struct chunk {
+ char *begin, *end;
+ struct chunk *prev;
+} __attribute__((aligned(8)));
+
+struct dm_pool {
+ struct dm_list list;
+ struct chunk *chunk, *spare_chunk; /* spare_chunk is a one entry free
+ list to stop 'bobbling' */
+ const char *name;
+ size_t chunk_size;
+ size_t object_len;
+ unsigned object_alignment;
+ int locked;
+ long crc;
+};
+
+static void _align_chunk(struct chunk *c, unsigned alignment);
+static struct chunk *_new_chunk(struct dm_pool *p, size_t s);
+static void _free_chunk(struct chunk *c);
+
+/* by default things come out aligned for doubles */
+#define DEFAULT_ALIGNMENT __alignof__ (double)
+
+struct dm_pool *dm_pool_create(const char *name, size_t chunk_hint)
+{
+ size_t new_size = 1024;
+ struct dm_pool *p = dm_zalloc(sizeof(*p));
+
+ if (!p) {
+ log_error("Couldn't create memory pool %s (size %"
+ PRIsize_t ")", name, sizeof(*p));
+ return 0;
+ }
+
+ p->name = name;
+ /* round chunk_hint up to the next power of 2 */
+ p->chunk_size = chunk_hint + sizeof(struct chunk);
+ while (new_size < p->chunk_size)
+ new_size <<= 1;
+ p->chunk_size = new_size;
+ pthread_mutex_lock(&_dm_pools_mutex);
+ dm_list_add(&_dm_pools, &p->list);
+ pthread_mutex_unlock(&_dm_pools_mutex);
+ return p;
+}
+
+void dm_pool_destroy(struct dm_pool *p)
+{
+ struct chunk *c, *pr;
+ _free_chunk(p->spare_chunk);
+ c = p->chunk;
+ while (c) {
+ pr = c->prev;
+ _free_chunk(c);
+ c = pr;
+ }
+
+ pthread_mutex_lock(&_dm_pools_mutex);
+ dm_list_del(&p->list);
+ pthread_mutex_unlock(&_dm_pools_mutex);
+ dm_free(p);
+}
+
+void *dm_pool_alloc(struct dm_pool *p, size_t s)
+{
+ return dm_pool_alloc_aligned(p, s, DEFAULT_ALIGNMENT);
+}
+
+void *dm_pool_alloc_aligned(struct dm_pool *p, size_t s, unsigned alignment)
+{
+ struct chunk *c = p->chunk;
+ void *r;
+
+ /* realign begin */
+ if (c)
+ _align_chunk(c, alignment);
+
+ /* have we got room ? */
+ if (!c || (c->begin > c->end) || ((c->end - c->begin) < (int) s)) {
+ /* allocate new chunk */
+ size_t needed = s + alignment + sizeof(struct chunk);
+ c = _new_chunk(p, (needed > p->chunk_size) ?
+ needed : p->chunk_size);
+
+ if (!c)
+ return_NULL;
+
+ _align_chunk(c, alignment);
+ }
+
+ r = c->begin;
+ c->begin += s;
+
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_UNDEFINED(r, s);
+#endif
+
+ return r;
+}
+
+void dm_pool_empty(struct dm_pool *p)
+{
+ struct chunk *c;
+
+ for (c = p->chunk; c && c->prev; c = c->prev)
+ ;
+
+ if (c)
+ dm_pool_free(p, (char *) (c + 1));
+}
+
+void dm_pool_free(struct dm_pool *p, void *ptr)
+{
+ struct chunk *c = p->chunk;
+
+ while (c) {
+ if (((char *) c < (char *) ptr) &&
+ ((char *) c->end > (char *) ptr)) {
+ c->begin = ptr;
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin);
+#endif
+ break;
+ }
+
+ if (p->spare_chunk)
+ _free_chunk(p->spare_chunk);
+
+ c->begin = (char *) (c + 1);
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin);
+#endif
+
+ p->spare_chunk = c;
+ c = c->prev;
+ }
+
+ if (!c)
+ log_error(INTERNAL_ERROR "pool_free asked to free pointer "
+ "not in pool");
+ else
+ p->chunk = c;
+}
+
+int dm_pool_begin_object(struct dm_pool *p, size_t hint)
+{
+ struct chunk *c = p->chunk;
+ const size_t align = DEFAULT_ALIGNMENT;
+
+ p->object_len = 0;
+ p->object_alignment = align;
+
+ if (c)
+ _align_chunk(c, align);
+
+ if (!c || (c->begin > c->end) || ((c->end - c->begin) < (int) hint)) {
+ /* allocate a new chunk */
+ c = _new_chunk(p,
+ hint > (p->chunk_size - sizeof(struct chunk)) ?
+ hint + sizeof(struct chunk) + align :
+ p->chunk_size);
+
+ if (!c)
+ return 0;
+
+ _align_chunk(c, align);
+ }
+
+ return 1;
+}
+
+int dm_pool_grow_object(struct dm_pool *p, const void *extra, size_t delta)
+{
+ struct chunk *c = p->chunk, *nc;
+
+ if (!delta)
+ delta = strlen(extra);
+
+ if ((c->end - (c->begin + p->object_len)) < (int) delta) {
+ /* move into a new chunk */
+ if (p->object_len + delta > (p->chunk_size / 2))
+ nc = _new_chunk(p, (p->object_len + delta) * 2);
+ else
+ nc = _new_chunk(p, p->chunk_size);
+
+ if (!nc)
+ return 0;
+
+ _align_chunk(p->chunk, p->object_alignment);
+
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_UNDEFINED(p->chunk->begin, p->object_len);
+#endif
+
+ memcpy(p->chunk->begin, c->begin, p->object_len);
+
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_NOACCESS(c->begin, p->object_len);
+#endif
+
+ c = p->chunk;
+ }
+
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_UNDEFINED(p->chunk->begin + p->object_len, delta);
+#endif
+
+ memcpy(c->begin + p->object_len, extra, delta);
+ p->object_len += delta;
+ return 1;
+}
+
+void *dm_pool_end_object(struct dm_pool *p)
+{
+ struct chunk *c = p->chunk;
+ void *r = c->begin;
+ c->begin += p->object_len;
+ p->object_len = 0u;
+ p->object_alignment = DEFAULT_ALIGNMENT;
+ return r;
+}
+
+void dm_pool_abandon_object(struct dm_pool *p)
+{
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_NOACCESS(p->chunk, p->object_len);
+#endif
+ p->object_len = 0;
+ p->object_alignment = DEFAULT_ALIGNMENT;
+}
+
+static void _align_chunk(struct chunk *c, unsigned alignment)
+{
+ c->begin += alignment - ((unsigned long) c->begin & (alignment - 1));
+}
+
+static struct chunk *_new_chunk(struct dm_pool *p, size_t s)
+{
+ struct chunk *c;
+
+ if (p->spare_chunk &&
+ ((p->spare_chunk->end - p->spare_chunk->begin) >= (ptrdiff_t)s)) {
+ /* reuse old chunk */
+ c = p->spare_chunk;
+ p->spare_chunk = 0;
+ } else {
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+ if (!_pagesize) {
+ _pagesize = getpagesize(); /* lvm_pagesize(); */
+ _pagesize_mask = _pagesize - 1;
+ }
+ /*
+ * Allocate page aligned size so malloc could work.
+ * Otherwise page fault would happen from pool unrelated
+ * memory writes of internal malloc pointers.
+ */
+# define aligned_malloc(s) (posix_memalign((void**)&c, _pagesize, \
+ ALIGN_ON_PAGE(s)) == 0)
+#else
+# define aligned_malloc(s) (c = dm_malloc(s))
+#endif /* DEBUG_ENFORCE_POOL_LOCKING */
+ if (!aligned_malloc(s)) {
+#undef aligned_malloc
+ log_error("Out of memory. Requested %" PRIsize_t
+ " bytes.", s);
+ return NULL;
+ }
+
+ c->begin = (char *) (c + 1);
+ c->end = (char *) c + s;
+
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_NOACCESS(c->begin, c->end - c->begin);
+#endif
+ }
+
+ c->prev = p->chunk;
+ p->chunk = c;
+ return c;
+}
+
+static void _free_chunk(struct chunk *c)
+{
+#ifdef VALGRIND_POOL
+# ifdef DEBUG_MEM
+ if (c)
+ VALGRIND_MAKE_MEM_UNDEFINED(c + 1, c->end - (char *) (c + 1));
+# endif
+#endif
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+ /* since DEBUG_MEM is using own memory list */
+ free(c); /* for posix_memalign() */
+#else
+ dm_free(c);
+#endif
+}
+
+
+/**
+ * Calc crc/hash from pool's memory chunks with internal pointers
+ */
+static long _pool_crc(const struct dm_pool *p)
+{
+ long crc_hash = 0;
+#ifndef DEBUG_ENFORCE_POOL_LOCKING
+ const struct chunk *c;
+ const long *ptr, *end;
+
+ for (c = p->chunk; c; c = c->prev) {
+ end = (const long *) (c->begin < c->end ? (long) c->begin & ~7: (long) c->end);
+ ptr = (const long *) c;
+#ifdef VALGRIND_POOL
+ VALGRIND_MAKE_MEM_DEFINED(ptr, (end - ptr) * sizeof(*end));
+#endif
+ while (ptr < end) {
+ crc_hash += *ptr++;
+ crc_hash += (crc_hash << 10);
+ crc_hash ^= (crc_hash >> 6);
+ }
+ }
+#endif /* DEBUG_ENFORCE_POOL_LOCKING */
+
+ return crc_hash;
+}
+
+static int _pool_protect(struct dm_pool *p, int prot)
+{
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+ struct chunk *c;
+
+ for (c = p->chunk; c; c = c->prev) {
+ if (mprotect(c, (size_t) ((c->end - (char *) c) - 1), prot) != 0) {
+ log_sys_error("mprotect", "");
+ return 0;
+ }
+ }
+#endif
+ return 1;
+}
diff --git a/device_mapper/mm/pool.c b/device_mapper/mm/pool.c
new file mode 100644
index 000000000..a710704ae
--- /dev/null
+++ b/device_mapper/mm/pool.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include <sys/mman.h>
+#include <pthread.h>
+
+static DM_LIST_INIT(_dm_pools);
+static pthread_mutex_t _dm_pools_mutex = PTHREAD_MUTEX_INITIALIZER;
+void dm_pools_check_leaks(void);
+
+#ifdef DEBUG_ENFORCE_POOL_LOCKING
+#ifdef DEBUG_POOL
+#error Do not use DEBUG_POOL with DEBUG_ENFORCE_POOL_LOCKING
+#endif
+
+/*
+ * Use mprotect system call to ensure all locked pages are not writable.
+ * Generates segmentation fault with write access to the locked pool.
+ *
+ * - Implementation is using posix_memalign() to get page aligned
+ * memory blocks (could be implemented also through malloc).
+ * - Only pool-fast is properly handled for now.
+ * - Checksum is slower compared to mprotect.
+ */
+static size_t _pagesize = 0;
+static size_t _pagesize_mask = 0;
+#define ALIGN_ON_PAGE(size) (((size) + (_pagesize_mask)) & ~(_pagesize_mask))
+#endif
+
+#ifdef DEBUG_POOL
+#include "pool-debug.c"
+#else
+#include "pool-fast.c"
+#endif
+
+char *dm_pool_strdup(struct dm_pool *p, const char *str)
+{
+ size_t len = strlen(str) + 1;
+ char *ret = dm_pool_alloc(p, len);
+
+ if (ret)
+ memcpy(ret, str, len);
+
+ return ret;
+}
+
+char *dm_pool_strndup(struct dm_pool *p, const char *str, size_t n)
+{
+ char *ret = dm_pool_alloc(p, n + 1);
+
+ if (ret) {
+ strncpy(ret, str, n);
+ ret[n] = '\0';
+ }
+
+ return ret;
+}
+
+void *dm_pool_zalloc(struct dm_pool *p, size_t s)
+{
+ void *ptr = dm_pool_alloc(p, s);
+
+ if (ptr)
+ memset(ptr, 0, s);
+
+ return ptr;
+}
+
+void dm_pools_check_leaks(void)
+{
+ struct dm_pool *p;
+
+ pthread_mutex_lock(&_dm_pools_mutex);
+ if (dm_list_empty(&_dm_pools)) {
+ pthread_mutex_unlock(&_dm_pools_mutex);
+ return;
+ }
+
+ log_error("You have a memory leak (not released memory pool):");
+ dm_list_iterate_items(p, &_dm_pools) {
+#ifdef DEBUG_POOL
+ log_error(" [%p] %s (%u bytes)",
+ p->orig_pool,
+ p->name, p->stats.bytes);
+#else
+ log_error(" [%p] %s", p, p->name);
+#endif
+ }
+ pthread_mutex_unlock(&_dm_pools_mutex);
+ log_error(INTERNAL_ERROR "Unreleased memory pool(s) found.");
+}
+
+/**
+ * Status of locked pool.
+ *
+ * \param p
+ * Pool to be tested for lock status.
+ *
+ * \return
+ * 1 when the pool is locked, 0 otherwise.
+ */
+int dm_pool_locked(struct dm_pool *p)
+{
+ return p->locked;
+}
+
+/**
+ * Lock memory pool.
+ *
+ * \param p
+ * Pool to be locked.
+ *
+ * \param crc
+ * Bool specifies whether to store the pool crc/hash checksum.
+ *
+ * \return
+ * 1 (success) when the pool was preperly locked, 0 otherwise.
+ */
+int dm_pool_lock(struct dm_pool *p, int crc)
+{
+ if (p->locked) {
+ log_error(INTERNAL_ERROR "Pool %s is already locked.",
+ p->name);
+ return 0;
+ }
+
+ if (crc)
+ p->crc = _pool_crc(p); /* Get crc for pool */
+
+ if (!_pool_protect(p, PROT_READ)) {
+ _pool_protect(p, PROT_READ | PROT_WRITE);
+ return_0;
+ }
+
+ p->locked = 1;
+
+ log_debug_mem("Pool %s is locked.", p->name);
+
+ return 1;
+}
+
+/**
+ * Unlock memory pool.
+ *
+ * \param p
+ * Pool to be unlocked.
+ *
+ * \param crc
+ * Bool enables compare of the pool crc/hash with the stored value
+ * at pool lock. The pool is not properly unlocked if there is a mismatch.
+ *
+ * \return
+ * 1 (success) when the pool was properly unlocked, 0 otherwise.
+ */
+int dm_pool_unlock(struct dm_pool *p, int crc)
+{
+ if (!p->locked) {
+ log_error(INTERNAL_ERROR "Pool %s is already unlocked.",
+ p->name);
+ return 0;
+ }
+
+ p->locked = 0;
+
+ if (!_pool_protect(p, PROT_READ | PROT_WRITE))
+ return_0;
+
+ log_debug_mem("Pool %s is unlocked.", p->name);
+
+ if (crc && (p->crc != _pool_crc(p))) {
+ log_error(INTERNAL_ERROR "Pool %s crc mismatch.", p->name);
+ return 0;
+ }
+
+ return 1;
+}
diff --git a/device_mapper/regex/matcher.c b/device_mapper/regex/matcher.c
new file mode 100644
index 000000000..375c1abdc
--- /dev/null
+++ b/device_mapper/regex/matcher.c
@@ -0,0 +1,575 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2012 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "parse_rx.h"
+#include "ttree.h"
+#include "assert.h"
+
+struct dfa_state {
+ struct dfa_state *next;
+ int final;
+ dm_bitset_t bits;
+ struct dfa_state *lookup[256];
+};
+
+struct dm_regex { /* Instance variables for the lexer */
+ struct dfa_state *start;
+ unsigned num_nodes;
+ unsigned num_charsets;
+ int nodes_entered;
+ struct rx_node **nodes;
+ int charsets_entered;
+ struct rx_node **charsets;
+ struct dm_pool *scratch, *mem;
+
+ /* stuff for on the fly dfa calculation */
+ dm_bitset_t charmap[256];
+ dm_bitset_t dfa_copy;
+ struct ttree *tt;
+ dm_bitset_t bs;
+ struct dfa_state *h, *t;
+};
+
+static int _count_nodes(struct rx_node *rx)
+{
+ int r = 1;
+
+ if (rx->left)
+ r += _count_nodes(rx->left);
+
+ if (rx->right)
+ r += _count_nodes(rx->right);
+
+ return r;
+}
+
+static unsigned _count_charsets(struct rx_node *rx)
+{
+ if (rx->type == CHARSET)
+ return 1;
+
+ return (rx->left ? _count_charsets(rx->left) : 0) +
+ (rx->right ? _count_charsets(rx->right) : 0);
+}
+
+static void _enumerate_charsets_internal(struct rx_node *rx, unsigned *i)
+{
+ if (rx->type == CHARSET)
+ rx->charset_index = (*i)++;
+ else {
+ if (rx->left)
+ _enumerate_charsets_internal(rx->left, i);
+ if (rx->right)
+ _enumerate_charsets_internal(rx->right, i);
+ }
+}
+
+static void _enumerate_charsets(struct rx_node *rx)
+{
+ unsigned i = 0;
+ _enumerate_charsets_internal(rx, &i);
+}
+
+static void _fill_table(struct dm_regex *m, struct rx_node *rx)
+{
+ assert((rx->type != OR) || (rx->left && rx->right));
+
+ if (rx->left)
+ _fill_table(m, rx->left);
+
+ if (rx->right)
+ _fill_table(m, rx->right);
+
+ m->nodes[m->nodes_entered++] = rx;
+ if (rx->type == CHARSET)
+ m->charsets[m->charsets_entered++] = rx;
+}
+
+static int _create_bitsets(struct dm_regex *m)
+{
+ unsigned i;
+ struct rx_node *n;
+
+ for (i = 0; i < m->num_nodes; i++) {
+ n = m->nodes[i];
+ if (!(n->firstpos = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+ if (!(n->lastpos = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+ if (!(n->followpos = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+ }
+
+ return 1;
+}
+
+static void _calc_functions(struct dm_regex *m)
+{
+ unsigned i, j, final = 1;
+ struct rx_node *rx, *c1, *c2;
+
+ for (i = 0; i < m->num_nodes; i++) {
+ rx = m->nodes[i];
+ c1 = rx->left;
+ c2 = rx->right;
+
+ if (rx->type == CHARSET && dm_bit(rx->charset, TARGET_TRANS))
+ rx->final = final++;
+
+ switch (rx->type) {
+ case CAT:
+ if (c1->nullable)
+ dm_bit_union(rx->firstpos,
+ c1->firstpos, c2->firstpos);
+ else
+ dm_bit_copy(rx->firstpos, c1->firstpos);
+
+ if (c2->nullable)
+ dm_bit_union(rx->lastpos,
+ c1->lastpos, c2->lastpos);
+ else
+ dm_bit_copy(rx->lastpos, c2->lastpos);
+
+ rx->nullable = c1->nullable && c2->nullable;
+ break;
+
+ case PLUS:
+ dm_bit_copy(rx->firstpos, c1->firstpos);
+ dm_bit_copy(rx->lastpos, c1->lastpos);
+ rx->nullable = c1->nullable;
+ break;
+
+ case OR:
+ dm_bit_union(rx->firstpos, c1->firstpos, c2->firstpos);
+ dm_bit_union(rx->lastpos, c1->lastpos, c2->lastpos);
+ rx->nullable = c1->nullable || c2->nullable;
+ break;
+
+ case QUEST:
+ case STAR:
+ dm_bit_copy(rx->firstpos, c1->firstpos);
+ dm_bit_copy(rx->lastpos, c1->lastpos);
+ rx->nullable = 1;
+ break;
+
+ case CHARSET:
+ dm_bit_set(rx->firstpos, rx->charset_index);
+ dm_bit_set(rx->lastpos, rx->charset_index);
+ rx->nullable = 0;
+ break;
+
+ default:
+ log_error(INTERNAL_ERROR "Unknown calc node type");
+ }
+
+ /*
+ * followpos has it's own switch
+ * because PLUS and STAR do the
+ * same thing.
+ */
+ switch (rx->type) {
+ case CAT:
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(c1->lastpos, j))
+ dm_bit_union(n->followpos,
+ n->followpos, c2->firstpos);
+ }
+ break;
+
+ case PLUS:
+ case STAR:
+ for (j = 0; j < m->num_charsets; j++) {
+ struct rx_node *n = m->charsets[j];
+ if (dm_bit(rx->lastpos, j))
+ dm_bit_union(n->followpos,
+ n->followpos, rx->firstpos);
+ }
+ break;
+ }
+ }
+}
+
+static struct dfa_state *_create_dfa_state(struct dm_pool *mem)
+{
+ return dm_pool_zalloc(mem, sizeof(struct dfa_state));
+}
+
+static struct dfa_state *_create_state_queue(struct dm_pool *mem,
+ struct dfa_state *dfa,
+ dm_bitset_t bits)
+{
+ if (!(dfa->bits = dm_bitset_create(mem, bits[0]))) /* first element is the size */
+ return_NULL;
+
+ dm_bit_copy(dfa->bits, bits);
+ dfa->next = 0;
+ dfa->final = -1;
+
+ return dfa;
+}
+
+static int _calc_state(struct dm_regex *m, struct dfa_state *dfa, int a)
+{
+ int set_bits = 0, i;
+ dm_bitset_t dfa_bits = dfa->bits;
+ dm_bit_and(m->dfa_copy, m->charmap[a], dfa_bits);
+
+ /* iterate through all the states in firstpos */
+ for (i = dm_bit_get_first(m->dfa_copy); i >= 0; i = dm_bit_get_next(m->dfa_copy, i)) {
+ if (a == TARGET_TRANS)
+ dfa->final = m->charsets[i]->final;
+
+ dm_bit_union(m->bs, m->bs, m->charsets[i]->followpos);
+ set_bits = 1;
+ }
+
+ if (set_bits) {
+ struct dfa_state *tmp;
+ struct dfa_state *ldfa = ttree_lookup(m->tt, m->bs + 1);
+ if (!ldfa) {
+ /* push */
+ if (!(ldfa = _create_dfa_state(m->mem)))
+ return_0;
+
+ ttree_insert(m->tt, m->bs + 1, ldfa);
+ if (!(tmp = _create_state_queue(m->scratch, ldfa, m->bs)))
+ return_0;
+ if (!m->h)
+ m->h = m->t = tmp;
+ else {
+ m->t->next = tmp;
+ m->t = tmp;
+ }
+ }
+
+ dfa->lookup[a] = ldfa;
+ dm_bit_clear_all(m->bs);
+ }
+
+ return 1;
+}
+
+static int _calc_states(struct dm_regex *m, struct rx_node *rx)
+{
+ unsigned iwidth = (m->num_charsets / DM_BITS_PER_INT) + 1;
+ struct dfa_state *dfa;
+ struct rx_node *n;
+ unsigned i;
+ int a;
+
+ if (!(m->tt = ttree_create(m->scratch, iwidth)))
+ return_0;
+
+ if (!(m->bs = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+
+ /* build some char maps */
+ for (a = 0; a < 256; a++)
+ if (!(m->charmap[a] = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+
+ for (i = 0; i < m->num_nodes; i++) {
+ n = m->nodes[i];
+ if (n->type == CHARSET) {
+ for (a = dm_bit_get_first(n->charset);
+ a >= 0; a = dm_bit_get_next(n->charset, a))
+ dm_bit_set(m->charmap[a], n->charset_index);
+ }
+ }
+
+ /* create first state */
+ if (!(dfa = _create_dfa_state(m->mem)))
+ return_0;
+
+ m->start = dfa;
+ ttree_insert(m->tt, rx->firstpos + 1, dfa);
+
+ /* prime the queue */
+ if (!(m->h = m->t = _create_state_queue(m->scratch, dfa, rx->firstpos)))
+ return_0;
+
+ if (!(m->dfa_copy = dm_bitset_create(m->scratch, m->num_charsets)))
+ return_0;
+
+ return 1;
+}
+
+/*
+ * Forces all the dfa states to be calculated up front, ie. what
+ * _calc_states() used to do before we switched to calculating on demand.
+ */
+static int _force_states(struct dm_regex *m)
+{
+ int a;
+
+ /* keep processing until there's nothing in the queue */
+ struct dfa_state *s;
+ while ((s = m->h)) {
+ /* pop state off front of the queue */
+ m->h = m->h->next;
+
+ /* iterate through all the inputs for this state */
+ dm_bit_clear_all(m->bs);
+ for (a = 0; a < 256; a++)
+ if (!_calc_state(m, s, a))
+ return_0;
+ }
+
+ return 1;
+}
+
+struct dm_regex *dm_regex_create(struct dm_pool *mem, const char * const *patterns,
+ unsigned num_patterns)
+{
+ char *all, *ptr;
+ unsigned i;
+ size_t len = 0;
+ struct rx_node *rx;
+ struct dm_regex *m;
+ struct dm_pool *scratch = mem;
+
+ if (!(m = dm_pool_zalloc(mem, sizeof(*m))))
+ return_NULL;
+
+ /* join the regexps together, delimiting with zero */
+ for (i = 0; i < num_patterns; i++)
+ len += strlen(patterns[i]) + 8;
+
+ ptr = all = dm_pool_alloc(scratch, len + 1);
+
+ if (!all)
+ goto_bad;
+
+ for (i = 0; i < num_patterns; i++) {
+ ptr += sprintf(ptr, "(.*(%s)%c)", patterns[i], TARGET_TRANS);
+ if (i < (num_patterns - 1))
+ *ptr++ = '|';
+ }
+
+ /* parse this expression */
+ if (!(rx = rx_parse_tok(scratch, all, ptr))) {
+ log_error("Couldn't parse regex");
+ goto bad;
+ }
+
+ m->mem = mem;
+ m->scratch = scratch;
+ m->num_nodes = _count_nodes(rx);
+ m->num_charsets = _count_charsets(rx);
+ _enumerate_charsets(rx);
+ if (!(m->nodes = dm_pool_alloc(scratch, sizeof(*m->nodes) * m->num_nodes)))
+ goto_bad;
+
+ if (!(m->charsets = dm_pool_alloc(scratch, sizeof(*m->charsets) * m->num_charsets)))
+ goto_bad;
+
+ _fill_table(m, rx);
+
+ if (!_create_bitsets(m))
+ goto_bad;
+
+ _calc_functions(m);
+
+ if (!_calc_states(m, rx))
+ goto_bad;
+
+ return m;
+
+ bad:
+ dm_pool_free(mem, m);
+
+ return NULL;
+}
+
+static struct dfa_state *_step_matcher(struct dm_regex *m, int c, struct dfa_state *cs, int *r)
+{
+ struct dfa_state *ns;
+
+ if (!(ns = cs->lookup[(unsigned char) c])) {
+ if (!_calc_state(m, cs, (unsigned char) c))
+ return_NULL;
+
+ if (!(ns = cs->lookup[(unsigned char) c]))
+ return NULL;
+ }
+
+ // yuck, we have to special case the target trans
+ if ((ns->final == -1) &&
+ !_calc_state(m, ns, TARGET_TRANS))
+ return_NULL;
+
+ if (ns->final && (ns->final > *r))
+ *r = ns->final;
+
+ return ns;
+}
+
+int dm_regex_match(struct dm_regex *regex, const char *s)
+{
+ struct dfa_state *cs = regex->start;
+ int r = 0;
+
+ dm_bit_clear_all(regex->bs);
+ if (!(cs = _step_matcher(regex, HAT_CHAR, cs, &r)))
+ goto out;
+
+ for (; *s; s++)
+ if (!(cs = _step_matcher(regex, *s, cs, &r)))
+ goto out;
+
+ _step_matcher(regex, DOLLAR_CHAR, cs, &r);
+
+ out:
+ /* subtract 1 to get back to zero index */
+ return r - 1;
+}
+
+/*
+ * The next block of code concerns calculating a fingerprint for the dfa.
+ *
+ * We're not calculating a minimal dfa in _calculate_state (maybe a future
+ * improvement). As such it's possible that two non-isomorphic dfas
+ * recognise the same language. This can only really happen if you start
+ * with equivalent, but different regexes (for example the simplifier in
+ * parse_rx.c may have changed).
+ *
+ * The code is inefficient; repeatedly searching a singly linked list for
+ * previously seen nodes. Not worried since this is test code.
+ */
+struct node_list {
+ unsigned node_id;
+ struct dfa_state *node;
+ struct node_list *next;
+};
+
+struct printer {
+ struct dm_pool *mem;
+ struct node_list *pending;
+ struct node_list *processed;
+ unsigned next_index;
+};
+
+static uint32_t _randomise(uint32_t n)
+{
+ /* 2^32 - 5 */
+ uint32_t const prime = (~0) - 4;
+ return n * prime;
+}
+
+static int _seen(struct node_list *n, struct dfa_state *node, uint32_t *i)
+{
+ while (n) {
+ if (n->node == node) {
+ *i = n->node_id;
+ return 1;
+ }
+ n = n->next;
+ }
+
+ return 0;
+}
+
+/*
+ * Push node if it's not been seen before, returning a unique index.
+ */
+static uint32_t _push_node(struct printer *p, struct dfa_state *node)
+{
+ uint32_t i;
+ struct node_list *n;
+
+ if (_seen(p->pending, node, &i) ||
+ _seen(p->processed, node, &i))
+ return i;
+
+ if (!(n = dm_pool_alloc(p->mem, sizeof(*n))))
+ return_0;
+
+ n->node_id = ++p->next_index; /* start from 1, keep 0 as error code */
+ n->node = node;
+ n->next = p->pending;
+ p->pending = n;
+
+ return n->node_id;
+}
+
+/*
+ * Pop the front node, and fill out it's previously assigned index.
+ */
+static struct dfa_state *_pop_node(struct printer *p)
+{
+ struct dfa_state *node = NULL;
+ struct node_list *n;
+
+ if (p->pending) {
+ n = p->pending;
+ p->pending = n->next;
+ n->next = p->processed;
+ p->processed = n;
+
+ node = n->node;
+ }
+
+ return node;
+}
+
+static uint32_t _combine(uint32_t n1, uint32_t n2)
+{
+ return ((n1 << 8) | (n1 >> 24)) ^ _randomise(n2);
+}
+
+static uint32_t _fingerprint(struct printer *p)
+{
+ int c;
+ uint32_t result = 0;
+ struct dfa_state *node;
+
+ while ((node = _pop_node(p))) {
+ result = _combine(result, (node->final < 0) ? 0 : node->final);
+ for (c = 0; c < 256; c++)
+ result = _combine(result,
+ _push_node(p, node->lookup[c]));
+ }
+
+ return result;
+}
+
+uint32_t dm_regex_fingerprint(struct dm_regex *regex)
+{
+ struct printer p;
+ uint32_t result = 0;
+ struct dm_pool *mem = dm_pool_create("regex fingerprint", 1024);
+
+ if (!mem)
+ return_0;
+
+ if (!_force_states(regex))
+ goto_out;
+
+ p.mem = mem;
+ p.pending = NULL;
+ p.processed = NULL;
+ p.next_index = 0;
+
+ if (!_push_node(&p, regex->start))
+ goto_out;
+
+ result = _fingerprint(&p);
+out:
+ dm_pool_destroy(mem);
+
+ return result;
+}
diff --git a/device_mapper/regex/parse_rx.c b/device_mapper/regex/parse_rx.c
new file mode 100644
index 000000000..cc83bfe35
--- /dev/null
+++ b/device_mapper/regex/parse_rx.c
@@ -0,0 +1,667 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "parse_rx.h"
+
+#ifdef DEBUG
+#include <ctype.h>
+
+__attribute__ ((__unused__))
+static void _regex_print(struct rx_node *rx, int depth, unsigned show_nodes)
+{
+ int i, numchars;
+
+ if (rx->left) {
+ if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT))))
+ printf("(");
+
+ _regex_print(rx->left, depth + 1, show_nodes);
+
+ if (rx->left->type != CHARSET && (show_nodes || (!((rx->type == CAT || rx->type == OR) && rx->left->type == CAT))))
+ printf(")");
+ }
+
+ /* display info about the node */
+ switch (rx->type) {
+ case CAT:
+ break;
+
+ case OR:
+ printf("|");
+ break;
+
+ case STAR:
+ printf("*");
+ break;
+
+ case PLUS:
+ printf("+");
+ break;
+
+ case QUEST:
+ printf("?");
+ break;
+
+ case CHARSET:
+ numchars = 0;
+ for (i = 0; i < 256; i++)
+ if (dm_bit(rx->charset, i) && (isprint(i) || i == HAT_CHAR || i == DOLLAR_CHAR))
+ numchars++;
+ if (numchars == 97) {
+ printf(".");
+ break;
+ }
+ if (numchars > 1)
+ printf("[");
+ for (i = 0; i < 256; i++)
+ if (dm_bit(rx->charset, i)) {
+ if (isprint(i))
+ printf("%c", (char) i);
+ else if (i == HAT_CHAR)
+ printf("^");
+ else if (i == DOLLAR_CHAR)
+ printf("$");
+ }
+ if (numchars > 1)
+ printf("]");
+ break;
+
+ default:
+ fprintf(stderr, "Unknown type");
+ }
+
+ if (rx->right) {
+ if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right)))
+ printf("(");
+ _regex_print(rx->right, depth + 1, show_nodes);
+ if (rx->right->type != CHARSET && (show_nodes || (!(rx->type == CAT && rx->right->type == CAT) && rx->right->right)))
+ printf(")");
+ }
+
+ if (!depth)
+ printf("\n");
+}
+#endif /* DEBUG */
+
+struct parse_sp { /* scratch pad for the parsing process */
+ struct dm_pool *mem;
+ int type; /* token type, 0 indicates a charset */
+ dm_bitset_t charset; /* The current charset */
+ const char *cursor; /* where we are in the regex */
+ const char *rx_end; /* 1pte for the expression being parsed */
+};
+
+static struct rx_node *_or_term(struct parse_sp *ps);
+
+static void _single_char(struct parse_sp *ps, unsigned int c, const char *ptr)
+{
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_clear_all(ps->charset);
+ dm_bit_set(ps->charset, c);
+}
+
+/*
+ * Get the next token from the regular expression.
+ * Returns: 1 success, 0 end of input, -1 error.
+ */
+static int _rx_get_token(struct parse_sp *ps)
+{
+ int neg = 0, range = 0;
+ char c, lc = 0;
+ const char *ptr = ps->cursor;
+ if (ptr == ps->rx_end) { /* end of input ? */
+ ps->type = -1;
+ return 0;
+ }
+
+ switch (*ptr) {
+ /* charsets and ncharsets */
+ case '[':
+ ptr++;
+ if (*ptr == '^') {
+ dm_bit_set_all(ps->charset);
+
+ /* never transition on zero */
+ dm_bit_clear(ps->charset, 0);
+ neg = 1;
+ ptr++;
+
+ } else
+ dm_bit_clear_all(ps->charset);
+
+ while ((ptr < ps->rx_end) && (*ptr != ']')) {
+ if (*ptr == '\\') {
+ /* an escaped character */
+ ptr++;
+ switch (*ptr) {
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ default:
+ c = *ptr;
+ }
+ } else if (*ptr == '-' && lc) {
+ /* we've got a range on our hands */
+ range = 1;
+ ptr++;
+ if (ptr == ps->rx_end) {
+ log_error("Incomplete range"
+ "specification");
+ return -1;
+ }
+ c = *ptr;
+ } else
+ c = *ptr;
+
+ if (range) {
+ /* add lc - c into the bitset */
+ if (lc > c) {
+ char tmp = c;
+ c = lc;
+ lc = tmp;
+ }
+
+ for (; lc <= c; lc++) {
+ if (neg)
+ dm_bit_clear(ps->charset, lc);
+ else
+ dm_bit_set(ps->charset, lc);
+ }
+ range = 0;
+ } else {
+ /* add c into the bitset */
+ if (neg)
+ dm_bit_clear(ps->charset, c);
+ else
+ dm_bit_set(ps->charset, c);
+ }
+ ptr++;
+ lc = c;
+ }
+
+ if (ptr >= ps->rx_end) {
+ ps->type = -1;
+ return -1;
+ }
+
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ break;
+
+ /* These characters are special, we just return their ASCII
+ codes as the type. Sorted into ascending order to help the
+ compiler */
+ case '(':
+ case ')':
+ case '*':
+ case '+':
+ case '?':
+ case '|':
+ ps->type = (int) *ptr;
+ ps->cursor = ptr + 1;
+ break;
+
+ case '^':
+ _single_char(ps, HAT_CHAR, ptr);
+ break;
+
+ case '$':
+ _single_char(ps, DOLLAR_CHAR, ptr);
+ break;
+
+ case '.':
+ /* The 'all but newline' character set */
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_set_all(ps->charset);
+ dm_bit_clear(ps->charset, (int) '\n');
+ dm_bit_clear(ps->charset, (int) '\r');
+ dm_bit_clear(ps->charset, 0);
+ break;
+
+ case '\\':
+ /* escaped character */
+ ptr++;
+ if (ptr >= ps->rx_end) {
+ log_error("Badly quoted character at end "
+ "of expression");
+ ps->type = -1;
+ return -1;
+ }
+
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_clear_all(ps->charset);
+ switch (*ptr) {
+ case 'n':
+ dm_bit_set(ps->charset, (int) '\n');
+ break;
+ case 'r':
+ dm_bit_set(ps->charset, (int) '\r');
+ break;
+ case 't':
+ dm_bit_set(ps->charset, (int) '\t');
+ break;
+ default:
+ dm_bit_set(ps->charset, (int) *ptr);
+ }
+ break;
+
+ default:
+ /* add a single character to the bitset */
+ ps->type = 0;
+ ps->cursor = ptr + 1;
+ dm_bit_clear_all(ps->charset);
+ dm_bit_set(ps->charset, (int) (unsigned char) *ptr);
+ break;
+ }
+
+ return 1;
+}
+
+static struct rx_node *_node(struct dm_pool *mem, int type,
+ struct rx_node *l, struct rx_node *r)
+{
+ struct rx_node *n = dm_pool_zalloc(mem, sizeof(*n));
+
+ if (n) {
+ if (type == CHARSET && !(n->charset = dm_bitset_create(mem, 256))) {
+ dm_pool_free(mem, n);
+ return NULL;
+ }
+
+ n->type = type;
+ n->left = l;
+ n->right = r;
+ }
+
+ return n;
+}
+
+static struct rx_node *_term(struct parse_sp *ps)
+{
+ struct rx_node *n;
+
+ switch (ps->type) {
+ case 0:
+ if (!(n = _node(ps->mem, CHARSET, NULL, NULL)))
+ return_NULL;
+
+ dm_bit_copy(n->charset, ps->charset);
+ _rx_get_token(ps); /* match charset */
+ break;
+
+ case '(':
+ _rx_get_token(ps); /* match '(' */
+ n = _or_term(ps);
+ if (ps->type != ')') {
+ log_error("missing ')' in regular expression");
+ return 0;
+ }
+ _rx_get_token(ps); /* match ')' */
+ break;
+
+ default:
+ n = 0;
+ }
+
+ return n;
+}
+
+static struct rx_node *_closure_term(struct parse_sp *ps)
+{
+ struct rx_node *l, *n;
+
+ if (!(l = _term(ps)))
+ return NULL;
+
+ for (;;) {
+ switch (ps->type) {
+ case '*':
+ n = _node(ps->mem, STAR, l, NULL);
+ break;
+
+ case '+':
+ n = _node(ps->mem, PLUS, l, NULL);
+ break;
+
+ case '?':
+ n = _node(ps->mem, QUEST, l, NULL);
+ break;
+
+ default:
+ return l;
+ }
+
+ if (!n)
+ return_NULL;
+
+ _rx_get_token(ps);
+ l = n;
+ }
+
+ return n;
+}
+
+static struct rx_node *_cat_term(struct parse_sp *ps)
+{
+ struct rx_node *l, *r, *n;
+
+ if (!(l = _closure_term(ps)))
+ return NULL;
+
+ if (ps->type == '|')
+ return l;
+
+ if (!(r = _cat_term(ps)))
+ return l;
+
+ if (!(n = _node(ps->mem, CAT, l, r)))
+ stack;
+
+ return n;
+}
+
+static struct rx_node *_or_term(struct parse_sp *ps)
+{
+ struct rx_node *l, *r, *n;
+
+ if (!(l = _cat_term(ps)))
+ return NULL;
+
+ if (ps->type != '|')
+ return l;
+
+ _rx_get_token(ps); /* match '|' */
+
+ if (!(r = _or_term(ps))) {
+ log_error("Badly formed 'or' expression");
+ return NULL;
+ }
+
+ if (!(n = _node(ps->mem, OR, l, r)))
+ stack;
+
+ return n;
+}
+
+/*----------------------------------------------------------------*/
+
+/* Macros for left and right nodes. Inverted if 'leftmost' is set. */
+#define LEFT(a) (leftmost ? (a)->left : (a)->right)
+#define RIGHT(a) (leftmost ? (a)->right : (a)->left)
+
+/*
+ * The optimiser spots common prefixes on either side of an 'or' node, and
+ * lifts them outside the 'or' with a 'cat'.
+ */
+static unsigned _depth(struct rx_node *r, unsigned leftmost)
+{
+ int count = 1;
+
+ while (r->type != CHARSET && LEFT(r) && (leftmost || r->type != OR)) {
+ count++;
+ r = LEFT(r);
+ }
+
+ return count;
+}
+
+/*
+ * FIXME: a unique key could be built up as part of the parse, to make the
+ * comparison quick. Alternatively we could use cons-hashing, and then
+ * this would simply be a pointer comparison.
+ */
+static int _nodes_equal(struct rx_node *l, struct rx_node *r)
+{
+ if (l->type != r->type)
+ return 0;
+
+ switch (l->type) {
+ case CAT:
+ case OR:
+ return _nodes_equal(l->left, r->left) &&
+ _nodes_equal(l->right, r->right);
+
+ case STAR:
+ case PLUS:
+ case QUEST:
+ return _nodes_equal(l->left, r->left);
+
+ case CHARSET:
+ /*
+ * Never change anything containing TARGET_TRANS
+ * used by matcher as boundary marker between concatenated
+ * expressions.
+ */
+ return (!dm_bit(l->charset, TARGET_TRANS) && dm_bitset_equal(l->charset, r->charset));
+ }
+
+ /* NOTREACHED */
+ return_0;
+}
+
+static int _find_leftmost_common(struct rx_node *or,
+ struct rx_node **l,
+ struct rx_node **r,
+ unsigned leftmost)
+{
+ struct rx_node *left = or->left, *right = or->right;
+ unsigned left_depth = _depth(left, leftmost);
+ unsigned right_depth = _depth(right, leftmost);
+
+ while (left_depth > right_depth && left->type != OR) {
+ left = LEFT(left);
+ left_depth--;
+ }
+
+ while (right_depth > left_depth && right->type != OR) {
+ right = LEFT(right);
+ right_depth--;
+ }
+
+ if (left_depth != right_depth)
+ return 0;
+
+ while (left_depth) {
+ if (left->type == CAT && right->type == CAT) {
+ if (_nodes_equal(LEFT(left), LEFT(right))) {
+ *l = left;
+ *r = right;
+ return 1;
+ }
+ }
+ if (left->type == OR || right->type == OR)
+ break;
+ left = LEFT(left);
+ right = LEFT(right);
+ left_depth--;
+ }
+
+ return 0;
+}
+
+/* If top node is OR, rotate (leftmost example) from ((ab)|((ac)|d)) to (((ab)|(ac))|d) */
+static int _rotate_ors(struct rx_node *r, unsigned leftmost)
+{
+ struct rx_node *old_node;
+
+ if (r->type != OR || RIGHT(r)->type != OR)
+ return 0;
+
+ old_node = RIGHT(r);
+
+ if (leftmost) {
+ r->right = RIGHT(old_node);
+ old_node->right = LEFT(old_node);
+ old_node->left = LEFT(r);
+ r->left = old_node;
+ } else {
+ r->left = RIGHT(old_node);
+ old_node->left = LEFT(old_node);
+ old_node->right = LEFT(r);
+ r->right = old_node;
+ }
+
+ return 1;
+}
+
+static struct rx_node *_exchange_nodes(struct dm_pool *mem, struct rx_node *r,
+ struct rx_node *left_cat, struct rx_node *right_cat,
+ unsigned leftmost)
+{
+ struct rx_node *new_r;
+
+ if (leftmost)
+ new_r = _node(mem, CAT, LEFT(left_cat), r);
+ else
+ new_r = _node(mem, CAT, r, LEFT(right_cat));
+
+ if (!new_r)
+ return_NULL;
+
+ memcpy(left_cat, RIGHT(left_cat), sizeof(*left_cat));
+ memcpy(right_cat, RIGHT(right_cat), sizeof(*right_cat));
+
+ return new_r;
+}
+
+static struct rx_node *_pass(struct dm_pool *mem,
+ struct rx_node *r,
+ int *changed)
+{
+ struct rx_node *left, *right;
+
+ /*
+ * walk the tree, optimising every 'or' node.
+ */
+ switch (r->type) {
+ case CAT:
+ if (!(r->left = _pass(mem, r->left, changed)))
+ return_NULL;
+
+ if (!(r->right = _pass(mem, r->right, changed)))
+ return_NULL;
+
+ break;
+
+ case STAR:
+ case PLUS:
+ case QUEST:
+ if (!(r->left = _pass(mem, r->left, changed)))
+ return_NULL;
+
+ break;
+ case OR:
+ /* It's important we optimise sub nodes first */
+ if (!(r->left = _pass(mem, r->left, changed)))
+ return_NULL;
+
+ if (!(r->right = _pass(mem, r->right, changed)))
+ return_NULL;
+ /*
+ * If rotate_ors changes the tree, left and right are stale,
+ * so just set 'changed' to repeat the search.
+ *
+ * FIXME Check we can't 'bounce' between left and right rotations here.
+ */
+ if (_find_leftmost_common(r, &left, &right, 1)) {
+ if (!_rotate_ors(r, 1))
+ r = _exchange_nodes(mem, r, left, right, 1);
+ *changed = 1;
+ } else if (_find_leftmost_common(r, &left, &right, 0)) {
+ if (!_rotate_ors(r, 0))
+ r = _exchange_nodes(mem, r, left, right, 0);
+ *changed = 1;
+ }
+ break;
+
+ case CHARSET:
+ break;
+ }
+
+ return r;
+}
+
+static struct rx_node *_optimise(struct dm_pool *mem, struct rx_node *r)
+{
+ /*
+ * We're looking for (or (... (cat <foo> a)) (... (cat <foo> b)))
+ * and want to turn it into (cat <foo> (or (... a) (... b)))
+ *
+ * (fa)|(fb) becomes f(a|b)
+ */
+
+ /*
+ * Initially done as an inefficient multipass algorithm.
+ */
+ int changed;
+
+ do {
+ changed = 0;
+ r = _pass(mem, r, &changed);
+ } while (r && changed);
+
+ return r;
+}
+
+/*----------------------------------------------------------------*/
+
+struct rx_node *rx_parse_tok(struct dm_pool *mem,
+ const char *begin, const char *end)
+{
+ struct rx_node *r;
+ struct parse_sp *ps = dm_pool_zalloc(mem, sizeof(*ps));
+
+ if (!ps)
+ return_NULL;
+
+ ps->mem = mem;
+ if (!(ps->charset = dm_bitset_create(mem, 256))) {
+ log_error("Regex charset allocation failed");
+ dm_pool_free(mem, ps);
+ return NULL;
+ }
+ ps->cursor = begin;
+ ps->rx_end = end;
+ _rx_get_token(ps); /* load the first token */
+
+ if (!(r = _or_term(ps))) {
+ log_error("Parse error in regex");
+ dm_pool_free(mem, ps);
+ return NULL;
+ }
+
+ if (!(r = _optimise(mem, r))) {
+ log_error("Regex optimisation error");
+ dm_pool_free(mem, ps);
+ return NULL;
+ }
+
+ return r;
+}
+
+struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str)
+{
+ return rx_parse_tok(mem, str, str + strlen(str));
+}
diff --git a/device_mapper/regex/parse_rx.h b/device_mapper/regex/parse_rx.h
new file mode 100644
index 000000000..08970605d
--- /dev/null
+++ b/device_mapper/regex/parse_rx.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_PARSE_REGEX_H
+#define _DM_PARSE_REGEX_H
+
+enum {
+ CAT,
+ STAR,
+ PLUS,
+ OR,
+ QUEST,
+ CHARSET
+};
+
+/*
+ * We're never going to be running the regex on non-printable
+ * chars, so we can use a couple of these chars to represent the
+ * start and end of a string.
+ */
+#define HAT_CHAR 0x2
+#define DOLLAR_CHAR 0x3
+
+#define TARGET_TRANS '\0'
+
+struct rx_node {
+ int type;
+ dm_bitset_t charset;
+ struct rx_node *left, *right;
+
+ /* used to build the dfa for the toker */
+ unsigned charset_index;
+ int nullable, final;
+ dm_bitset_t firstpos;
+ dm_bitset_t lastpos;
+ dm_bitset_t followpos;
+};
+
+struct rx_node *rx_parse_str(struct dm_pool *mem, const char *str);
+struct rx_node *rx_parse_tok(struct dm_pool *mem,
+ const char *begin, const char *end);
+
+#endif
diff --git a/device_mapper/regex/ttree.c b/device_mapper/regex/ttree.c
new file mode 100644
index 000000000..62c5bf786
--- /dev/null
+++ b/device_mapper/regex/ttree.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "misc/dmlib.h"
+#include "ttree.h"
+
+struct node {
+ unsigned k;
+ struct node *l, *m, *r;
+ void *data;
+};
+
+struct ttree {
+ int klen;
+ struct dm_pool *mem;
+ struct node *root;
+};
+
+__attribute__((nonnull(1)))
+static struct node **_lookup_single(struct node **c, unsigned int k)
+{
+ while (*c) {
+ if (k < (*c)->k)
+ c = &((*c)->l);
+
+ else if (k > (*c)->k)
+ c = &((*c)->r);
+
+ else {
+ c = &((*c)->m);
+ break;
+ }
+ }
+
+ return c;
+}
+
+void *ttree_lookup(struct ttree *tt, unsigned *key)
+{
+ struct node **c = &tt->root;
+ int count = tt->klen;
+
+ while (*c && count) {
+ c = _lookup_single(c, *key++);
+ count--;
+ }
+
+ return *c ? (*c)->data : NULL;
+}
+
+static struct node *_tree_node(struct dm_pool *mem, unsigned int k)
+{
+ struct node *n = dm_pool_zalloc(mem, sizeof(*n));
+
+ if (n)
+ n->k = k;
+
+ return n;
+}
+
+int ttree_insert(struct ttree *tt, unsigned int *key, void *data)
+{
+ struct node **c = &tt->root;
+ int count = tt->klen;
+ unsigned int k;
+
+ do {
+ k = *key++;
+ c = _lookup_single(c, k);
+ count--;
+
+ } while (*c && count);
+
+ if (!*c) {
+ count++;
+
+ while (count--) {
+ if (!(*c = _tree_node(tt->mem, k)))
+ return_0;
+
+ if (count) {
+ k = *key++;
+ c = &((*c)->m);
+ }
+ }
+ }
+ (*c)->data = data;
+
+ return 1;
+}
+
+struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen)
+{
+ struct ttree *tt;
+
+ if (!(tt = dm_pool_zalloc(mem, sizeof(*tt))))
+ return_NULL;
+
+ tt->klen = klen;
+ tt->mem = mem;
+ return tt;
+}
diff --git a/device_mapper/regex/ttree.h b/device_mapper/regex/ttree.h
new file mode 100644
index 000000000..8b62181f4
--- /dev/null
+++ b/device_mapper/regex/ttree.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+ * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
+ *
+ * This file is part of the device-mapper userspace tools.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU Lesser General Public License v.2.1.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _DM_TTREE_H
+#define _DM_TTREE_H
+
+struct ttree;
+
+struct ttree *ttree_create(struct dm_pool *mem, unsigned int klen);
+
+void *ttree_lookup(struct ttree *tt, unsigned *key);
+int ttree_insert(struct ttree *tt, unsigned *key, void *data);
+
+#endif
diff --git a/device-mapper/vdo/status.c b/device_mapper/vdo/status.c
index 1739a7e70..4bd15e462 100644
--- a/device-mapper/vdo/status.c
+++ b/device_mapper/vdo/status.c
@@ -1,7 +1,7 @@
#include "target.h"
// For DM_ARRAY_SIZE!
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include <ctype.h>
#include <stdlib.h>
diff --git a/device-mapper/vdo/target.h b/device_mapper/vdo/target.h
index 3137e2c07..3137e2c07 100644
--- a/device-mapper/vdo/target.h
+++ b/device_mapper/vdo/target.h
diff --git a/lib/config/config.h b/lib/config/config.h
index b797c78d8..146aae152 100644
--- a/lib/config/config.h
+++ b/lib/config/config.h
@@ -16,7 +16,7 @@
#ifndef _LVM_CONFIG_H
#define _LVM_CONFIG_H
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "lib/device/device.h"
/* 16 bits: 3 bits for major, 4 bits for minor, 9 bits for patchlevel */
diff --git a/lib/device/bcache.c b/lib/device/bcache.c
index aa6bb7ace..d213758e7 100644
--- a/lib/device/bcache.c
+++ b/lib/device/bcache.c
@@ -15,7 +15,7 @@
#define _GNU_SOURCE
#include "lib/device/bcache.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
#include "lib/log/log.h"
#include <errno.h>
diff --git a/lib/device/bcache.h b/lib/device/bcache.h
index cf3d6688d..8f328c76c 100644
--- a/lib/device/bcache.h
+++ b/lib/device/bcache.h
@@ -15,7 +15,7 @@
#ifndef BCACHE_H
#define BCACHE_H
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include <linux/fs.h>
#include <stdint.h>
diff --git a/lib/device/dev-cache.c b/lib/device/dev-cache.c
index f3c23260d..38026a381 100644
--- a/lib/device/dev-cache.c
+++ b/lib/device/dev-cache.c
@@ -17,7 +17,7 @@
#include "lib/datastruct/btree.h"
#include "lib/config/config.h"
#include "lib/commands/toolcontext.h"
-#include "libdm/misc/dm-ioctl.h"
+#include "device_mapper/misc/dm-ioctl.h"
#include "lib/misc/lvm-string.h"
#ifdef UDEV_SYNC_SUPPORT
diff --git a/lib/metadata/pv.h b/lib/metadata/pv.h
index 23d2bd759..d5d91ce0f 100644
--- a/lib/metadata/pv.h
+++ b/lib/metadata/pv.h
@@ -16,7 +16,7 @@
#define _LVM_PV_H
#include "lib/uuid/uuid.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
struct device;
struct format_type;
diff --git a/lib/metadata/vg.h b/lib/metadata/vg.h
index b37c01d83..3d24bba7d 100644
--- a/lib/metadata/vg.h
+++ b/lib/metadata/vg.h
@@ -16,7 +16,7 @@
#define _LVM_VG_H
#include "lib/uuid/uuid.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
struct cmd_context;
struct format_instance;
diff --git a/lib/misc/lib.h b/lib/misc/lib.h
index 13c7110f7..3ae3aacc0 100644
--- a/lib/misc/lib.h
+++ b/lib/misc/lib.h
@@ -79,7 +79,7 @@
#include "lib/misc/intl.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "lib/misc/util.h"
#ifdef DM
diff --git a/lib/report/properties.h b/lib/report/properties.h
index f52705e9a..38b61110c 100644
--- a/lib/report/properties.h
+++ b/lib/report/properties.h
@@ -14,7 +14,7 @@
#ifndef _LVM_PROPERTIES_H
#define _LVM_PROPERTIES_H
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "lib/metadata/metadata.h"
#include "lib/report/report.h"
#include "lib/properties/prop_common.h"
diff --git a/libdaemon/client/config-util.c b/libdaemon/client/config-util.c
index 12baa998b..f8526d75c 100644
--- a/libdaemon/client/config-util.c
+++ b/libdaemon/client/config-util.c
@@ -17,7 +17,7 @@
#include "tools/tool.h"
#include "libdaemon/client/daemon-io.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
#include <math.h> /* fabs() */
#include <float.h> /* DBL_EPSILON */
diff --git a/libdaemon/client/daemon-client.c b/libdaemon/client/daemon-client.c
index b5dfbf130..28d7c04a3 100644
--- a/libdaemon/client/daemon-client.c
+++ b/libdaemon/client/daemon-client.c
@@ -18,7 +18,7 @@
#include "libdaemon/client/daemon-io.h"
#include "libdaemon/client/daemon-client.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
#include <sys/un.h>
#include <sys/socket.h>
diff --git a/libdm/Makefile.in b/libdm/Makefile.in
index 66ec39513..eeef0801d 100644
--- a/libdm/Makefile.in
+++ b/libdm/Makefile.in
@@ -52,7 +52,7 @@ CFLOW_LIST_TARGET = libdevmapper.cflow
EXPORTED_HEADER = $(srcdir)/libdevmapper.h
EXPORTED_FN_PREFIX = dm
-include $(top_builddir)/make.tmpl
+include $(top_builddir)/libdm/make.tmpl
PROGS_CFLAGS = $(UDEV_CFLAGS)
diff --git a/libdm/make.tmpl.in b/libdm/make.tmpl.in
new file mode 100644
index 000000000..7e3f4a481
--- /dev/null
+++ b/libdm/make.tmpl.in
@@ -0,0 +1,578 @@
+# @configure_input@
+#
+# Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved.
+# Copyright (C) 2004-2014 Red Hat, Inc. All rights reserved.
+#
+# This file is part of LVM2.
+#
+# This copyrighted material is made available to anyone wishing to use,
+# modify, copy, or redistribute it subject to the terms and conditions
+# of the GNU General Public License v.2.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+ifeq ($(V),1)
+ Q=
+else
+ Q=@
+endif
+
+SHELL = @SHELL@
+
+@SET_MAKE@
+
+# Allow environment to override any built-in default value for CC.
+# If there is a built-in default, CC is NOT set to @CC@ here.
+CC ?= @CC@
+
+# If $(CC) holds the usual built-in default value of 'cc' then replace it with
+# the configured value.
+# (To avoid this and force the use of 'cc' from the environment, supply its
+# full path.)
+ifeq ($(CC), cc)
+ CC = @CC@
+endif
+
+RANLIB = @RANLIB@
+INSTALL = @INSTALL@
+MKDIR_P = @MKDIR_P@
+MSGFMT = @MSGFMT@
+LCOV = @LCOV@
+GENHTML = @GENHTML@
+LN_S = @LN_S@
+SED = @SED@
+CFLOW_CMD = @CFLOW_CMD@
+AWK = @AWK@
+CHMOD = @CHMOD@
+EGREP = @EGREP@
+GREP = @GREP@
+SORT = @SORT@
+WC = @WC@
+AR = @AR@
+RM = rm -f
+
+PYTHON2 = @PYTHON2@
+PYTHON3 = @PYTHON3@
+PYCOMPILE = $(top_srcdir)/autoconf/py-compile
+
+LIBS = @LIBS@
+# Extra libraries always linked with static binaries
+STATIC_LIBS = $(SELINUX_LIBS) $(UDEV_LIBS) $(BLKID_LIBS)
+DEFS += @DEFS@
+# FIXME set this only where it's needed, not globally?
+CFLAGS ?= @COPTIMISE_FLAG@ @CFLAGS@
+LDFLAGS ?= @LDFLAGS@
+CLDFLAGS += @CLDFLAGS@
+ELDFLAGS += @ELDFLAGS@
+LDDEPS += @LDDEPS@
+LIB_SUFFIX = @LIB_SUFFIX@
+LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
+DL_LIBS = @DL_LIBS@
+RT_LIBS = @RT_LIBS@
+M_LIBS = @M_LIBS@
+PTHREAD_LIBS = @PTHREAD_LIBS@
+READLINE_LIBS = @READLINE_LIBS@
+SELINUX_LIBS = @SELINUX_LIBS@
+UDEV_CFLAGS = @UDEV_CFLAGS@
+UDEV_LIBS = @UDEV_LIBS@
+BLKID_CFLAGS = @BLKID_CFLAGS@
+BLKID_LIBS = @BLKID_LIBS@
+SYSTEMD_LIBS = @SYSTEMD_LIBS@
+VALGRIND_CFLAGS = @VALGRIND_CFLAGS@
+
+# Setup directory variables
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+udev_prefix = @udev_prefix@
+sysconfdir = @sysconfdir@
+rootdir = $(DESTDIR)/
+bindir = $(DESTDIR)@bindir@
+confdir = $(DESTDIR)@CONFDIR@/lvm
+profiledir = $(confdir)/@DEFAULT_PROFILE_SUBDIR@
+includedir = $(DESTDIR)@includedir@
+libdir = $(DESTDIR)@libdir@
+libexecdir = $(DESTDIR)@libexecdir@
+usrlibdir = $(DESTDIR)@usrlibdir@
+sbindir = $(DESTDIR)@sbindir@
+usrsbindir = $(DESTDIR)@usrsbindir@
+datarootdir = @datarootdir@
+datadir = $(DESTDIR)@datadir@
+infodir = $(DESTDIR)@infodir@
+mandir = $(DESTDIR)@mandir@
+localedir = $(DESTDIR)@localedir@
+staticdir = $(DESTDIR)@STATICDIR@
+udevdir = $(DESTDIR)@udevdir@
+pkgconfigdir = $(usrlibdir)/pkgconfig
+initdir = $(DESTDIR)$(sysconfdir)/rc.d/init.d
+dbusconfdir = $(DESTDIR)$(sysconfdir)/dbus-1/system.d
+dbusservicedir = $(datadir)/dbus-1/system-services
+systemd_unit_dir = $(DESTDIR)@systemdsystemunitdir@
+systemd_generator_dir = $(DESTDIR)$(SYSTEMD_GENERATOR_DIR)
+systemd_dir = $(DESTDIR)@systemdutildir@
+tmpfiles_dir = $(DESTDIR)@tmpfilesdir@
+ocf_scriptdir = $(DESTDIR)@OCFDIR@
+pythonprefix = $(DESTDIR)$(prefix)
+
+# N.B. No $(DESTDIR) prefix here.
+python2dir = @PYTHON2DIR@
+python3dir = @PYTHON3DIR@
+
+USRLIB_RELPATH = $(shell echo $(abspath $(usrlibdir) $(libdir)) | \
+ $(AWK) -f $(top_srcdir)/scripts/relpath.awk)
+
+SYSTEMD_GENERATOR_DIR = @systemdutildir@/system-generators
+DEFAULT_SYS_DIR = @DEFAULT_SYS_DIR@
+DEFAULT_ARCHIVE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_ARCHIVE_SUBDIR@
+DEFAULT_BACKUP_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_BACKUP_SUBDIR@
+DEFAULT_CACHE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_CACHE_SUBDIR@
+DEFAULT_PROFILE_DIR = $(DEFAULT_SYS_DIR)/@DEFAULT_PROFILE_SUBDIR@
+DEFAULT_LOCK_DIR = @DEFAULT_LOCK_DIR@
+DEFAULT_RUN_DIR = @DEFAULT_RUN_DIR@
+DEFAULT_PID_DIR = @DEFAULT_PID_DIR@
+DEFAULT_MANGLING = @MANGLING@
+
+# Setup vpath search paths for some suffixes
+vpath %.c $(srcdir)
+vpath %.cpp $(srcdir)
+vpath %.in $(srcdir)
+vpath %.po $(srcdir)
+vpath %.exported_symbols $(srcdir)
+
+interface = @interface@
+interfacebuilddir = $(top_builddir)/libdm/$(interface)
+rpmbuilddir = $(abs_top_builddir)/build
+
+# The number of jobs to run, if blank, defaults to the make standard
+ifndef MAKEFLAGS
+MAKEFLAGS = @JOBS@
+endif
+
+# Handle installation of files
+ifeq ("@WRITE_INSTALL@", "yes")
+# leaving defaults
+M_INSTALL_SCRIPT =
+M_INSTALL_DATA = -m 644
+else
+M_INSTALL_PROGRAM = -m 555
+M_INSTALL_DATA = -m 444
+endif
+INSTALL_PROGRAM = $(INSTALL) $(M_INSTALL_PROGRAM) $(STRIP)
+INSTALL_DATA = $(INSTALL) -p $(M_INSTALL_DATA)
+INSTALL_WDATA = $(INSTALL) -p -m 644
+
+INSTALL_DIR = $(INSTALL) -m 755 -d
+INSTALL_ROOT_DIR = $(INSTALL) -m 700 -d
+INSTALL_ROOT_DATA = $(INSTALL) -m 600
+INSTALL_SCRIPT = $(INSTALL) -p $(M_INSTALL_PROGRAM)
+
+.SUFFIXES:
+.SUFFIXES: .c .cpp .d .o .so .a .po .pot .mo .dylib
+
+ifeq ("$(notdir $(CC))", "gcc")
+WFLAGS +=\
+ -Wall\
+ -Wcast-align\
+ -Wfloat-equal\
+ -Wformat-security\
+ -Winline\
+ -Wmissing-format-attribute\
+ -Wmissing-include-dirs\
+ -Wmissing-noreturn\
+ -Wpointer-arith\
+ -Wredundant-decls\
+ -Wshadow\
+ -Wundef\
+ -Wwrite-strings
+
+WCFLAGS +=\
+ -Wmissing-declarations\
+ -Wmissing-prototypes\
+ -Wnested-externs\
+ -Wold-style-definition\
+ -Wstrict-prototypes\
+ -Wuninitialized
+
+ifeq ("@HAVE_WJUMP@", "yes")
+WCFLAGS += -Wjump-misses-init
+endif
+
+ifeq ("@HAVE_WCLOBBERED@", "yes")
+WFLAGS +=\
+ -Wclobbered\
+ -Wempty-body\
+ -Wignored-qualifiers\
+ -Wlogical-op\
+ -Wtype-limits
+
+WCFLAGS +=\
+ -Wmissing-parameter-type\
+ -Wold-style-declaration\
+ -Woverride-init
+endif
+
+ifeq ("@HAVE_WSYNCNAND@", "yes")
+WFLAGS += -Wsync-nand
+endif
+endif
+
+ifneq ("@STATIC_LINK@", "yes")
+ifeq ("@HAVE_PIE@", "yes")
+ifeq ("@HAVE_FULL_RELRO@", "yes")
+ EXTRA_EXEC_CFLAGS += -fPIE
+ EXTRA_EXEC_LDFLAGS += -Wl,-z,relro,-z,now -pie -fPIE
+ CLDFLAGS += -Wl,-z,relro
+endif
+endif
+endif
+
+#WFLAGS += -W -Wno-sign-compare -Wno-unused-parameter -Wno-missing-field-initializers
+#WFLAGS += -Wsign-compare -Wunused-parameter -Wmissing-field-initializers
+#WFLAGS += -Wconversion -Wbad-function-cast -Wcast-qual -Waggregate-return -Wpacked
+#WFLAGS += -pedantic -std=gnu99
+#DEFS += -DDEBUG_CRC32
+
+#
+# Avoid recursive extension of CFLAGS
+# by checking whether CFLAGS already has fPIC string
+#
+ifeq (,$(findstring fPIC,$(CFLAGS)))
+
+CFLAGS += -fPIC
+
+ifeq ("@DEBUG@", "yes")
+ifeq (,$(findstring -g,$(CFLAGS)))
+ CFLAGS += -g
+endif
+ CFLAGS += -fno-omit-frame-pointer
+ DEFS += -DDEBUG
+ # memory debugging is not thread-safe yet
+ ifneq ("@BUILD_DMEVENTD@", "yes")
+ ifneq ("@BUILD_DMFILEMAPD@", "yes")
+ ifneq ("@BUILD_LVMLOCKD@", "yes")
+ ifneq ("@BUILD_LVMPOLLD@", "yes")
+ ifneq ("@BUILD_LVMETAD@", "yes")
+ ifeq ("@CLVMD@", "none")
+ DEFS += -DDEBUG_MEM
+ endif
+ endif
+ endif
+ endif
+ endif
+ endif
+endif
+
+# end of fPIC protection
+endif
+
+DEFS += -D_BUILDING_LVM
+
+LDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib
+CLDFLAGS += -L$(top_builddir)/libdm -L$(top_builddir)/lib
+
+DAEMON_LIBS = -ldaemonclient
+LDFLAGS += -L$(top_builddir)/libdaemon/client
+CLDFLAGS += -L$(top_builddir)/libdaemon/client
+
+ifeq ("@BUILD_DMEVENTD@", "yes")
+ DMEVENT_LIBS = -ldevmapper-event
+ LDFLAGS += -L$(top_builddir)/daemons/dmeventd
+ CLDFLAGS += -L$(top_builddir)/daemons/dmeventd
+endif
+
+# Combination of DEBUG_POOL and DEBUG_ENFORCE_POOL_LOCKING is not suppored.
+#DEFS += -DDEBUG_POOL
+# Default pool locking is using the crc checksum. With mprotect memory
+# enforcing compilation faulty memory write could be easily found.
+#DEFS += -DDEBUG_ENFORCE_POOL_LOCKING
+#DEFS += -DBOUNDS_CHECK
+
+# LVM is not supposed to use mmap while devices are suspended.
+# This code causes a core dump if gets called.
+#DEFS += -DDEBUG_MEMLOCK
+
+#CFLAGS += -pg
+#LDFLAGS += -pg
+
+STRIP=
+#STRIP = -s
+
+LVM_VERSION := $(shell cat $(top_srcdir)/VERSION)
+
+LIB_VERSION_LVM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)/VERSION)
+
+LIB_VERSION_DM := $(shell $(AWK) -F '.' '{printf "%s.%s",$$1,$$2}' $(top_srcdir)/VERSION_DM)
+
+LIB_VERSION_APP := $(shell $(AWK) -F '[(). ]' '{printf "%s.%s",$$1,$$4}' $(top_srcdir)/VERSION)
+
+INCLUDES += -I$(top_srcdir) -I$(srcdir) -I$(top_builddir)/include
+
+DEPS = $(top_builddir)/make.tmpl $(top_srcdir)/VERSION \
+ $(top_builddir)/Makefile
+
+OBJECTS = $(SOURCES:%.c=%.o) $(CXXSOURCES:%.cpp=%.o)
+POTFILES = $(SOURCES:%.c=%.pot)
+
+.PHONY: all pofile distclean clean cleandir cflow device-mapper
+.PHONY: install install_cluster install_device-mapper install_lvm2
+.PHONY: install_dbus_service
+.PHONY: install_lib_shared install_dm_plugin install_lvm2_plugin
+.PHONY: install_ocf install_systemd_generators install_all_man all_man man help
+.PHONY: python_bindings install_python_bindings
+.PHONY: $(SUBDIRS) $(SUBDIRS.install) $(SUBDIRS.clean) $(SUBDIRS.distclean)
+.PHONY: $(SUBDIRS.pofile) $(SUBDIRS.install_cluster) $(SUBDIRS.cflow)
+.PHONY: $(SUBDIRS.device-mapper) $(SUBDIRS.install-device-mapper)
+.PHONY: $(SUBDIRS.generate) generate
+
+SUBDIRS.device-mapper := $(SUBDIRS:=.device-mapper)
+SUBDIRS.install := $(SUBDIRS:=.install)
+SUBDIRS.install_cluster := $(SUBDIRS:=.install_cluster)
+SUBDIRS.install_device-mapper := $(SUBDIRS:=.install_device-mapper)
+SUBDIRS.install_lvm2 := $(SUBDIRS:=.install_lvm2)
+SUBDIRS.install_ocf := $(SUBDIRS:=.install_ocf)
+SUBDIRS.pofile := $(SUBDIRS:=.pofile)
+SUBDIRS.cflow := $(SUBDIRS:=.cflow)
+SUBDIRS.clean := $(SUBDIRS:=.clean)
+SUBDIRS.distclean := $(SUBDIRS:=.distclean)
+
+TARGETS += $(LIB_SHARED) $(LIB_STATIC)
+
+all: $(SUBDIRS) $(TARGETS)
+
+install: all $(SUBDIRS.install)
+install_cluster: all $(SUBDIRS.install_cluster)
+install_device-mapper: $(SUBDIRS.install_device-mapper)
+install_lvm2: $(SUBDIRS.install_lvm2)
+install_ocf: $(SUBDIRS.install_ocf)
+cflow: $(SUBDIRS.cflow)
+
+$(SUBDIRS): $(SUBDIRS.device-mapper)
+ $(MAKE) -C $@
+
+$(SUBDIRS.device-mapper):
+ $(MAKE) -C $(@:.device-mapper=) device-mapper
+
+$(SUBDIRS.install): $(SUBDIRS)
+ $(MAKE) -C $(@:.install=) install
+
+$(SUBDIRS.install_cluster): $(SUBDIRS)
+ $(MAKE) -C $(@:.install_cluster=) install_cluster
+
+$(SUBDIRS.install_device-mapper): device-mapper
+ $(MAKE) -C $(@:.install_device-mapper=) install_device-mapper
+
+$(SUBDIRS.install_lvm2): $(SUBDIRS)
+ $(MAKE) -C $(@:.install_lvm2=) install_lvm2
+
+$(SUBDIRS.install_ocf):
+ $(MAKE) -C $(@:.install_ocf=) install_ocf
+
+$(SUBDIRS.clean):
+ -$(MAKE) -C $(@:.clean=) clean
+
+$(SUBDIRS.distclean):
+ -$(MAKE) -C $(@:.distclean=) distclean
+
+$(SUBDIRS.cflow):
+ $(MAKE) -C $(@:.cflow=) cflow
+
+ifeq ("@INTL@", "yes")
+pofile: $(SUBDIRS.pofile) $(POTFILES)
+
+$(SUBDIRS.pofile):
+ $(MAKE) -C $(@:.pofile=) pofile
+endif
+
+$(SUBDIRS.generate):
+ $(MAKE) -C $(@:.generate=) generate
+
+ifneq ("$(CFLOW_LIST_TARGET)", "")
+CLEAN_CFLOW += $(CFLOW_LIST_TARGET)
+$(CFLOW_LIST_TARGET): $(CFLOW_LIST)
+ echo "CFLOW_SOURCES += $(addprefix \
+ \$$(top_srcdir)$(subst $(top_srcdir),,$(srcdir))/, $(CFLOW_LIST))" > $@
+cflow: $(CFLOW_LIST_TARGET)
+endif
+
+ifneq ("$(CFLOW_TARGET)", "")
+CLEAN_CFLOW += \
+ $(CFLOW_TARGET).cflow \
+ $(CFLOW_TARGET).xref \
+ $(CFLOW_TARGET).tree \
+ $(CFLOW_TARGET).rtree \
+ $(CFLOW_TARGET).rxref
+
+ifneq ("$(CFLOW_CMD)", "")
+CFLOW_FLAGS +=\
+ --cpp="$(CC) -E" \
+ --symbol _ISbit:wrapper \
+ --symbol __attribute__:wrapper \
+ --symbol __const__:wrapper \
+ --symbol __const:type \
+ --symbol __restrict:type \
+ --symbol __extension__:wrapper \
+ --symbol __nonnull:wrapper \
+ --symbol __nothrow__:wrapper \
+ --symbol __pure__:wrapper \
+ --symbol __REDIRECT:wrapper \
+ --symbol __REDIRECT_NTH:wrapper \
+ --symbol __wur:wrapper \
+ -I$(top_srcdir)/libdm \
+ -I$(top_srcdir)/libdm/ioctl \
+ -I$(top_srcdir)/daemons/dmeventd/plugins/lvm2/ \
+ $(INCLUDES) $(DEFS)
+
+$(CFLOW_TARGET).cflow: $(CFLOW_SOURCES)
+ $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) $(CFLOW_SOURCES)
+$(CFLOW_TARGET).rxref: $(CFLOW_SOURCES)
+ $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) -r --omit-arguments $(CFLOW_SOURCES)
+$(CFLOW_TARGET).tree: $(CFLOW_SOURCES)
+ $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) --omit-arguments -T -b $(CFLOW_SOURCES)
+$(CFLOW_TARGET).xref: $(CFLOW_SOURCES)
+ $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) --omit-arguments -x $(CFLOW_SOURCES)
+#$(CFLOW_TARGET).rtree: $(CFLOW_SOURCES)
+# $(CFLOW_CMD) -o$@ $(CFLOW_FLAGS) -r --omit-arguments -T -b $(CFLOW_SOURCES)
+cflow: $(CFLOW_TARGET).cflow $(CFLOW_TARGET).tree $(CFLOW_TARGET).rxref $(CFLOW_TARGET).xref
+#$(CFLOW_TARGET).rtree
+endif
+endif
+
+.LIBPATTERNS = lib%.so lib%.a
+
+DEPFLAGS=-MT $@ -MMD -MP -MF $*.d
+
+# still needed in 2018 for 32bit builds
+DEFS+=-D_FILE_OFFSET_BITS=64
+
+%.o: %.c
+ @echo " [CC] $<"
+ $(Q) $(CC) $(DEPFLAGS) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(WCFLAGS) $(CFLAGS) $(CFLAGS_$@) $< -o $@
+
+%.o: %.cpp
+ @echo " [CXX] $<"
+ $(Q) $(CXX) -c $(INCLUDES) $(VALGRIND_CFLAGS) $(DEFS) $(DEFS_$@) $(WFLAGS) $(CXXFLAGS) $(CXXFLAGS_$@) $< -o $@
+
+%.pot: %.c Makefile
+ @echo " [CC] $@"
+ $(Q) $(CC) -E $(INCLUDES) $(VALGRIND_CFLAGS) $(PROGS_CFLAGS) -include $(top_builddir)/include/pogen.h $(DEFS) $(WFLAGS) $(CFLAGS) $< >$@
+
+%.so: %.o
+ @echo " [CC] $<"
+ $(Q) $(CC) -c $(CFLAGS) $(CLDFLAGS) $< $(LIBS) -o $@
+
+ifneq (,$(LIB_SHARED))
+
+TARGETS += $(LIB_SHARED).$(LIB_VERSION)
+$(LIB_SHARED).$(LIB_VERSION): $(OBJECTS) $(LDDEPS)
+ @echo " [CC] $@"
+ifeq ("@LIB_SUFFIX@","so")
+ $(Q) $(CC) -shared -Wl,-soname,$(notdir $@) \
+ $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@
+endif
+ifeq ("@LIB_SUFFIX@","dylib")
+ $(Q) $(CC) -dynamiclib -dylib_current_version,$(LIB_VERSION) \
+ $(CFLAGS) $(CLDFLAGS) $(OBJECTS) $(LIBS) -o $@
+endif
+
+$(LIB_SHARED): $(LIB_SHARED).$(LIB_VERSION)
+ @echo " [LN] $<"
+ $(Q) $(LN_S) -f $(<F) $@
+
+CLEAN_TARGETS += $(LDDEPS) .exported_symbols_generated
+
+install_lib_shared: $(LIB_SHARED)
+ @echo " [INSTALL] $<"
+ $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/$(<F).$(LIB_VERSION)
+ $(Q) $(INSTALL_DIR) $(usrlibdir)
+ $(Q) $(LN_S) -f $(USRLIB_RELPATH)$(<F).$(LIB_VERSION) $(usrlibdir)/$(<F)
+
+# FIXME: plugins are installed to subdirs
+# and for compatibility links in libdir are created
+# when the code is fixed links could be removed.
+install_dm_plugin: $(LIB_SHARED)
+ @echo " [INSTALL] $<"
+ $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/device-mapper/$(<F)
+ $(Q) $(LN_S) -f device-mapper/$(<F) $(libdir)/$(<F)
+
+install_lvm2_plugin: $(LIB_SHARED)
+ @echo " [INSTALL] $<"
+ $(Q) $(INSTALL_PROGRAM) -D $< $(libdir)/lvm2/$(<F)
+ $(Q) $(LN_S) -f lvm2/$(<F) $(libdir)/$(<F)
+ $(Q) $(LN_S) -f $(<F) $(libdir)/$(<F).$(LIB_VERSION)
+endif
+
+$(LIB_STATIC): $(OBJECTS)
+ @echo " [AR] $@"
+ $(Q) $(RM) $@
+ $(Q) $(AR) rsv $@ $(OBJECTS) > /dev/null
+
+%.d:
+.PRECIOUS: %.d
+
+%.mo: %.po
+ @echo " [MSGFMT] $<"
+ $(Q) $(MSGFMT) -o $@ $<
+
+CLEAN_TARGETS += \
+ $(SOURCES:%.c=%.d) $(SOURCES:%.c=%.gcno) $(SOURCES:%.c=%.gcda) \
+ $(SOURCES2:%.c=%.o) $(SOURCES2:%.c=%.d) $(SOURCES2:%.c=%.gcno) $(SOURCES2:%.c=%.gcda) \
+ $(POTFILES) $(CLEAN_CFLOW)
+
+cleandir:
+ifneq (,$(firstword $(CLEAN_DIRS)))
+ $(RM) -r $(CLEAN_DIRS)
+endif
+ $(RM) $(OBJECTS) $(TARGETS) $(CLEAN_TARGETS) core
+
+clean: $(SUBDIRS.clean) cleandir
+
+distclean: cleandir $(SUBDIRS.distclean)
+ifneq (,$(firstword $(DISTCLEAN_DIRS)))
+ $(RM) -r $(DISTCLEAN_DIRS)
+endif
+ $(RM) $(DISTCLEAN_TARGETS) Makefile
+
+.exported_symbols_generated: $(EXPORTED_HEADER) .exported_symbols $(DEPS)
+ $(Q) set -e; \
+ ( cat $(srcdir)/.exported_symbols; \
+ if test -n "$(EXPORTED_HEADER)"; then \
+ $(CC) -E -P $(INCLUDES) $(DEFS) $(EXPORTED_HEADER) | \
+ $(SED) -ne "/^typedef|}/!s/.*[ *]\($(EXPORTED_FN_PREFIX)_[a-z0-9_]*\)(.*/\1/p"; \
+ fi \
+ ) > $@
+
+EXPORTED_UC := $(shell echo $(EXPORTED_FN_PREFIX) | tr '[a-z]' '[A-Z]')
+EXPORTED_SYMBOLS := $(wildcard $(srcdir)/.exported_symbols.Base $(srcdir)/.exported_symbols.$(EXPORTED_UC)_[0-9_]*[0-9])
+
+.export.sym: .exported_symbols_generated $(EXPORTED_SYMBOLS)
+ifeq (,$(firstword $(EXPORTED_SYMBOLS)))
+ $(Q) set -e; (echo "Base {"; echo " global:";\
+ $(SED) "s/^/ /;s/$$/;/" $<;\
+ echo "};";\
+ echo "Local {"; echo " local:"; echo " *;"; echo "};";\
+ ) > $@
+else
+ $(Q) set -e;\
+ R=$$($(SORT) $^ | uniq -u);\
+ test -z "$$R" || { echo "Mismatch between symbols in shared library and lists in .exported_symbols.* files: $$R"; false; } ;\
+ ( for i in $$(echo $(EXPORTED_SYMBOLS) | tr ' ' '\n' | $(SORT) -rnt_ -k5 ); do\
+ echo "$${i##*.} {"; echo " global:";\
+ $(SED) "s/^/ /;s/$$/;/" $$i;\
+ echo "};";\
+ done;\
+ echo "Local {"; echo " local:"; echo " *;"; echo "};";\
+ ) > $@
+endif
+
+ifeq ("@USE_TRACKING@","yes")
+ifeq (,$(findstring $(MAKECMDGOALS),cscope.out cflow clean distclean lcov \
+ help check check_local check_cluster check_lvmetad check_lvmpolld))
+ ifdef SOURCES
+ -include $(SOURCES:.c=.d) $(CXXSOURCES:.cpp=.d)
+ endif
+ ifdef SOURCES2
+ -include $(SOURCES2:.c=.d)
+ endif
+endif
+endif
diff --git a/liblvm/Makefile.in b/liblvm/Makefile.in
index be3049a9e..210ee3760 100644
--- a/liblvm/Makefile.in
+++ b/liblvm/Makefile.in
@@ -43,7 +43,7 @@ LDDEPS += $(top_builddir)/lib/liblvm-internal.a
include $(top_builddir)/make.tmpl
LDFLAGS += -L$(top_builddir)/lib -L$(top_builddir)/daemons/dmeventd
-LIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio
+LIBS += $(LVMINTERNAL_LIBS) -laio
.PHONY: install_dynamic install_static install_include install_pkgconfig
diff --git a/liblvm/lvm_misc.h b/liblvm/lvm_misc.h
index 62f91ce50..b83a44305 100644
--- a/liblvm/lvm_misc.h
+++ b/liblvm/lvm_misc.h
@@ -14,7 +14,7 @@
#ifndef _LVM2APP_MISC_H
#define _LVM2APP_MISC_H
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "liblvm/lvm2app.h"
#include "lib/metadata/metadata-exported.h"
#include "lib/commands/toolcontext.h"
diff --git a/liblvm/lvm_prop.c b/liblvm/lvm_prop.c
index 199759667..5d274a601 100644
--- a/liblvm/lvm_prop.c
+++ b/liblvm/lvm_prop.c
@@ -13,7 +13,7 @@
*/
#include "lvm_prop.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "lib/metadata/metadata.h"
/* lv create parameters */
diff --git a/make.tmpl.in b/make.tmpl.in
index 7e3f4a481..c4d9d583f 100644
--- a/make.tmpl.in
+++ b/make.tmpl.in
@@ -68,7 +68,9 @@ CLDFLAGS += @CLDFLAGS@
ELDFLAGS += @ELDFLAGS@
LDDEPS += @LDDEPS@
LIB_SUFFIX = @LIB_SUFFIX@
-LVMINTERNAL_LIBS = -llvm-internal $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
+LVMINTERNAL_LIBS=\
+ -llvm-internal \
+ $(DMEVENT_LIBS) $(DAEMON_LIBS) $(SYSTEMD_LIBS) $(UDEV_LIBS) $(DL_LIBS) $(BLKID_LIBS)
DL_LIBS = @DL_LIBS@
RT_LIBS = @RT_LIBS@
M_LIBS = @M_LIBS@
@@ -338,7 +340,7 @@ SUBDIRS.distclean := $(SUBDIRS:=.distclean)
TARGETS += $(LIB_SHARED) $(LIB_STATIC)
-all: $(SUBDIRS) $(TARGETS)
+all: $(top_builddir)/device_mapper/libdevice-mapper.a $(SUBDIRS) $(TARGETS)
install: all $(SUBDIRS.install)
install_cluster: all $(SUBDIRS.install_cluster)
@@ -347,7 +349,7 @@ install_lvm2: $(SUBDIRS.install_lvm2)
install_ocf: $(SUBDIRS.install_ocf)
cflow: $(SUBDIRS.cflow)
-$(SUBDIRS): $(SUBDIRS.device-mapper)
+$(SUBDIRS): $(SUBDIRS.device-mapper) $(top_builddir)/device_mapper/libdevice-mapper.a
$(MAKE) -C $@
$(SUBDIRS.device-mapper):
diff --git a/scripts/Makefile.in b/scripts/Makefile.in
index 720ae9f0e..eeac88156 100644
--- a/scripts/Makefile.in
+++ b/scripts/Makefile.in
@@ -23,12 +23,12 @@ endif
include $(top_builddir)/make.tmpl
ifeq ("@APPLIB@", "yes")
- DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so $(top_builddir)/libdm/libdevmapper.so
+ DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so
LDFLAGS += -L$(top_builddir)/liblvm
ifeq ("@BUILD_DMEVENTD@", "yes")
LDFLAGS += -Wl,-rpath-link,$(top_builddir)/daemons/dmeventd
endif
- LVMLIBS = @LVM2APP_LIB@ -ldevmapper -laio
+ LVMLIBS = @LVM2APP_LIB@ -laio
endif
LVM_SCRIPTS = lvmdump.sh lvmconf.sh
diff --git a/test/api/Makefile.in b/test/api/Makefile.in
index e953675a0..6661149d9 100644
--- a/test/api/Makefile.in
+++ b/test/api/Makefile.in
@@ -41,9 +41,12 @@ endif
include $(top_builddir)/make.tmpl
DEFS += -D_REENTRANT
-DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so $(top_builddir)/libdm/libdevmapper.so
-LDFLAGS += -L$(top_builddir)/liblvm -L$(top_builddir)/daemons/dmeventd
-LIBS += @LVM2APP_LIB@ $(DMEVENT_LIBS) -ldevmapper
+DEPLIBS += $(top_builddir)/liblvm/liblvm2app.so
+LDFLAGS+=\
+ -L$(top_builddir)/liblvm \
+ -L$(top_builddir)/daemons/dmeventd \
+ -L$(top_builddir)/device_mapper/libdevice-mapper.a
+LIBS += @LVM2APP_LIB@ $(DMEVENT_LIBS)
%.t: %.o $(DEPLIBS)
$(CC) -o $@ $(<) $(CFLAGS) $(LDFLAGS) $(ELDFLAGS) $(LIBS)
diff --git a/test/unit/Makefile.in b/test/unit/Makefile.in
index 9d1860882..54b7d723c 100644
--- a/test/unit/Makefile.in
+++ b/test/unit/Makefile.in
@@ -12,7 +12,7 @@
UNIT_SOURCE=\
base/data-struct/radix-tree.c \
- device-mapper/vdo/status.c \
+ device_mapper/vdo/status.c \
\
test/unit/bcache_t.c \
test/unit/bcache_utils_t.c \
@@ -32,9 +32,9 @@ UNIT_SOURCE=\
UNIT_DEPENDS=$(subst .c,.d,$(UNIT_SOURCE))
UNIT_OBJECTS=$(UNIT_SOURCE:%.c=%.o)
CLEAN_TARGETS+=$(UNIT_DEPENDS) $(UNIT_OBJECTS)
-UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -ldevmapper -laio
+UNIT_LDLIBS += $(LVMINTERNAL_LIBS) -laio
-test/unit/unit-test: $(UNIT_OBJECTS) libdm/libdevmapper.$(LIB_SUFFIX) lib/liblvm-internal.a
+test/unit/unit-test: $(UNIT_OBJECTS) device_mapper/libdevice-mapper.a lib/liblvm-internal.a
@echo " [LD] $@"
$(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) -L$(top_builddir)/libdm \
-o $@ $(UNIT_OBJECTS) $(UNIT_LDLIBS)
diff --git a/test/unit/bitset_t.c b/test/unit/bitset_t.c
index 66e0d321d..9b18fcbf0 100644
--- a/test/unit/bitset_t.c
+++ b/test/unit/bitset_t.c
@@ -13,7 +13,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
enum {
NR_BITS = 137
diff --git a/test/unit/config_t.c b/test/unit/config_t.c
index e988706d2..21af55146 100644
--- a/test/unit/config_t.c
+++ b/test/unit/config_t.c
@@ -13,7 +13,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
static void *_mem_init(void)
{
diff --git a/test/unit/dmlist_t.c b/test/unit/dmlist_t.c
index 5a4951e1a..8a9948f72 100644
--- a/test/unit/dmlist_t.c
+++ b/test/unit/dmlist_t.c
@@ -13,7 +13,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
static void test_dmlist_splice(void *fixture)
{
diff --git a/test/unit/dmstatus_t.c b/test/unit/dmstatus_t.c
index 43fb0bf82..f50dd75c4 100644
--- a/test/unit/dmstatus_t.c
+++ b/test/unit/dmstatus_t.c
@@ -13,7 +13,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
static void *_mem_init(void)
{
diff --git a/test/unit/framework.h b/test/unit/framework.h
index a90a6e2bd..0a8a5f27b 100644
--- a/test/unit/framework.h
+++ b/test/unit/framework.h
@@ -1,7 +1,7 @@
#ifndef TEST_UNIT_FRAMEWORK_H
#define TEST_UNIT_FRAMEWORK_H
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include <stdbool.h>
#include <stdint.h>
diff --git a/test/unit/matcher_t.c b/test/unit/matcher_t.c
index 8405a347f..296c78ad1 100644
--- a/test/unit/matcher_t.c
+++ b/test/unit/matcher_t.c
@@ -14,7 +14,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "matcher_data.h"
diff --git a/test/unit/percent_t.c b/test/unit/percent_t.c
index fc168d4e1..43414809a 100644
--- a/test/unit/percent_t.c
+++ b/test/unit/percent_t.c
@@ -13,7 +13,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include <stdio.h>
#include <string.h>
diff --git a/test/unit/string_t.c b/test/unit/string_t.c
index 74886f0bd..3557247e8 100644
--- a/test/unit/string_t.c
+++ b/test/unit/string_t.c
@@ -13,7 +13,7 @@
*/
#include "units.h"
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include <stdio.h>
#include <string.h>
diff --git a/test/unit/vdo_t.c b/test/unit/vdo_t.c
index 21ecd1ad8..9c41887ee 100644
--- a/test/unit/vdo_t.c
+++ b/test/unit/vdo_t.c
@@ -12,7 +12,7 @@
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "device-mapper/vdo/target.h"
+#include "device_mapper/vdo/target.h"
#include "framework.h"
#include "units.h"
diff --git a/tools/Makefile.in b/tools/Makefile.in
index 103b76732..d4514a289 100644
--- a/tools/Makefile.in
+++ b/tools/Makefile.in
@@ -95,7 +95,7 @@ ifeq ("@STATIC_LINK@", "yes")
INSTALL_CMDLIB_TARGETS += install_cmdlib_static
endif
-LVMLIBS = $(LVMINTERNAL_LIBS) -ldevmapper -laio
+LVMLIBS = $(LVMINTERNAL_LIBS) -laio
LIB_VERSION = $(LIB_VERSION_LVM)
CLEAN_TARGETS = liblvm2cmd.$(LIB_SUFFIX) $(TARGETS_DM) \
@@ -122,15 +122,15 @@ device-mapper: $(TARGETS_DM)
CFLAGS_dmsetup.o += $(UDEV_CFLAGS) $(EXTRA_EXEC_CFLAGS)
-dmsetup: dmsetup.o $(top_builddir)/libdm/libdevmapper.$(LIB_SUFFIX)
+dmsetup: dmsetup.o $(top_builddir)/device_mapper/libdevice-mapper.a
@echo " [CC] $@"
$(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) \
- -o $@ dmsetup.o -ldevmapper $(LIBS)
+ -o $@ $+ $(LIBS) -lm
-dmsetup.static: dmsetup.o $(interfacebuilddir)/libdevmapper.a
+dmsetup.static: dmsetup.o $(top_builddir)/device_mapper/libdevice-mapper.a
@echo " [CC] $@"
$(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) \
- -o $@ dmsetup.o -ldevmapper $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS)
+ -o $@ $+ $(M_LIBS) $(PTHREAD_LIBS) $(STATIC_LIBS) $(LIBS)
all: device-mapper
@@ -138,10 +138,10 @@ CFLAGS_lvm.o += $(EXTRA_EXEC_CFLAGS)
INCLUDES += -I$(top_builddir)/tools
-lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a
+lvm: $(OBJECTS) lvm.o $(top_builddir)/lib/liblvm-internal.a $(top_builddir)/device_mapper/libdevice-mapper.a
@echo " [CC] $@"
- $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $(OBJECTS) lvm.o \
- $(LVMLIBS) $(READLINE_LIBS) $(LIBS)
+ $(Q) $(CC) $(CFLAGS) $(LDFLAGS) $(EXTRA_EXEC_LDFLAGS) $(ELDFLAGS) -o $@ $+ \
+ $(LVMLIBS) $(READLINE_LIBS) $(LIBS) -lm
DEFS_man-generator.o += -DMAN_PAGE_GENERATOR
@@ -157,7 +157,7 @@ ifeq ("@BUILD_LVMETAD@", "yes")
lvm: $(top_builddir)/libdaemon/client/libdaemonclient.a
endif
-lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a $(interfacebuilddir)/libdevmapper.a
+lvm.static: $(OBJECTS) lvm-static.o $(top_builddir)/lib/liblvm-internal.a $(top_builddir)/device_mapper/libdevice-mapper.a
@echo " [CC] $@"
$(Q) $(CC) $(CFLAGS) $(LDFLAGS) -static -L$(interfacebuilddir) -o $@ \
$(OBJECTS) lvm-static.o $(LVMLIBS) $(STATIC_LIBS) $(LIBS)
@@ -222,7 +222,6 @@ $(SOURCES:%.c=%.o) $(SOURCES2:%.c=%.o): command-lines-input.h command-count.h cm
ifneq ("$(CFLOW_CMD)", "")
CFLOW_SOURCES = $(addprefix $(srcdir)/, $(SOURCES))
--include $(top_builddir)/libdm/libdevmapper.cflow
-include $(top_builddir)/lib/liblvm-internal.cflow
endif
diff --git a/tools/dmsetup.c b/tools/dmsetup.c
index d3080bba8..5486ed38a 100644
--- a/tools/dmsetup.c
+++ b/tools/dmsetup.c
@@ -17,7 +17,7 @@
*/
#include "tools/tool.h"
-#include "libdm/misc/dm-logging.h"
+#include "device_mapper/misc/dm-logging.h"
#include <ctype.h>
#include <dirent.h>
diff --git a/tools/tool.h b/tools/tool.h
index 656234c34..51d530c76 100644
--- a/tools/tool.h
+++ b/tools/tool.h
@@ -24,7 +24,7 @@
#include <unistd.h>
-#include "libdm/libdevmapper.h"
+#include "device_mapper/libdevmapper.h"
#include "lib/misc/util.h"
#endif /* _LVM_TOOL_H */